# S3FS

Instead of using MinIO, why not just use S3FS and some conventions from FastAI v2?

The idea is there could be a clean-enough way to store things locally or remotely.

In [1]:
import os
from pathlib import Path
import shutil

import s3fs

In [2]:
s3 = s3fs.S3FileSystem()

The interface might be:

* touch
* remove
* put
* mkdir
* open

Then, the namespace can be s3fs, or os. I have too much defensive code below.

In [29]:
path = Path('/tmp/thunk.txt')

In [16]:
path.exists()

False

In [15]:
path.unlink()

FileNotFoundError: [Errno 2] No such file or directory: '/tmp/thunk.txt'

In [32]:
!touch /tmp/thunk.txt

In [33]:
remove(path)

In [31]:
os.path.exists(path)

False

In [21]:
os.unlink(path)

In [22]:
connector = os

In [24]:
hasattr(connector, 'path') and hasattr(connector.path, 'exists')

True

In [25]:
connector = None

In [26]:
hasattr(connector, 'path') and hasattr(connector.path, 'exists')

False

In [27]:
def listify(o):
    """Create lists from objects"""
    if o is None: return []
    if isinstance(o, list): return o
    if isinstance(o, str): return [o]
    if isinstance(o, dict): return [o]
    if isinstance(o, Iterable): return list(o)
    return [o]

def has_method(o, methods):
    """Recursively look for a method."""
    for method in listify(methods):
        if not hasattr(o, method): return False
        o = getattr(o, method)
    return True

def remove(path, connector=None, **kw):
    if connector is None:
        path = Path(path)
        if not path.exists(): return False
        return path.unlink()
    if has_method(connector, ['path', 'exists']):
        if not connector.path.exists(path): return False
    if has_method(connector, 'exists'):
        if not connector.exists(path): return False
    return connector.unlink(path)
    
def connector_mkdir(connector, path, **kw):
    """Make a directory on something like os or s3"""
    try:
        path = str(path)
        return connector.mkdir(path, exist_ok=True)
    except TypeError as e:
        if 'exist_ok' in str(e):
            return connector.mkdir(path)
        else:
            raise

def combine_directory(root='', content_type=None, create=False, connector=None, **kw):
    """Create a directory path for a content collection.
    Create the directory if requested."""
    path = Path(root)
    if not content_type is None: path = path/content_type
        
    if create:
        if not connector is None:
            connector_mkdir(connector, path)
        else:
            path.mkdir(parents=True, exist_ok=True)

    return path

def combine_path(root='', content_type=None, filename=None, **kw):
    """Create a content path for a content collection."""
    path = combine_directory(root=root, content_type=content_type, **kw)
    if not filename is None: path = path/filename
    return path

    
def store(content, root='', content_type=None, filename=None, connector=None, mode='r', **kw):
    """Store anything through a connecting interface."""

    path = combine_path(root=root, content_type=content_type, filename=filename)
    with connector(path, mode) as f:
        f.write(content)
    return True

In [4]:
assert combine_path(root='/tmp', content_type='slips', filename='foo.txt') == Path('/tmp/slips/foo.txt')
assert combine_path(filename='foo.txt') == Path('foo.txt')
assert combine_path(content_type='slips', filename='foo.txt') == Path('slips/foo.txt')

In [5]:
path = str(combine_path(root='/tmp', content_type='slips'))
path

'/tmp/slips'

In [10]:
try:
    os.mkdir(path, exist_ok=True)
except TypeError as e:
    if 'exist_ok' in str(e):
        os.mkdir(path)
    else:
        raise

In [11]:
!ls /tmp

[34mcom.apple.launchd.VQGbts87n7[m[m
[34mcom.apple.launchd.qfigViTXDH[m[m
[32mfctvpnctl.sock[m[m
[32mfctvpnctl.sock_501[m[m
foo
hydra.log
[34mpowerlog[m[m
[34mslips[m[m
[34mtests[m[m
tunnelblick-downscript-needs-to-be-run.txt


This could possibly work, using Path and pathlib and os and shutils and s3fs somewhat interchangeably. However, there are enough odd differences that maybe MinIO is a better approach. MinIO, it turns out, uses s3fs as well.

In [54]:
s3.ls('drichards-data', refresh=True)

['drichards-data/building.tgz',
 'drichards-data/public_space.tgz',
 'drichards-data/room.tgz',
 'drichards-data/room_bed',
 'drichards-data/room_bed.tgz',
 'drichards-data/room_television.tgz']

In [41]:
alt = combine_path(root='drichards-data', content_type='slips')
alt

PosixPath('drichards-data/slips')

In [42]:
# s3.mkdir(str(alt), exist_ok=True)

In [45]:
content = "something"
alt = combine_path(root='drichards-data', content_type='slips', filename='1.a.2.some_value.txt')

In [46]:
# with s3.open(str(alt), 'w') as f:
#     f.write(content)

In [50]:
# s3.rm(str(alt))

In [52]:
alt2 = combine_path(root='drichards-data', content_type='slips')
alt2

PosixPath('drichards-data/slips')

In [53]:
# s3.rmdir(str(alt2))

In [28]:
# combine_path(root='/tmp', content_type='z/bar/baz', filename='foo.txt', create=True)

PosixPath('/tmp/z/bar/baz/foo.txt')

In [29]:
z = Path('/tmp/z')

In [17]:
path = combine_path(root='/tmp', content_type='z/bar/baz', filename='thunk.txt')

In [18]:
# Path(path).mkdir(parents=True, exist_ok=True)

In [22]:
# !rm -rf /tmp/z

In [11]:
path

'/tmp/foo/bar/baz/thunk.txt'

In [12]:
p = Path(path)

In [15]:
# p.parent.mkdir(parents=True, exist_ok=True)

NotADirectoryError: [Errno 20] Not a directory: '/tmp/foo/bar/baz'

In [16]:
!ls /tmp

[34mcom.apple.launchd.VQGbts87n7[m[m
[34mcom.apple.launchd.qfigViTXDH[m[m
[32mfctvpnctl.sock[m[m
[32mfctvpnctl.sock_501[m[m
foo
hydra.log
[34mpowerlog[m[m
[34mtests[m[m
tunnelblick-downscript-needs-to-be-run.txt


In [9]:
# os.mkdir(path, exist_ok=True)

TypeError: 'exist_ok' is an invalid keyword argument for mkdir()