Skip to content

Commit

Permalink
Merge pull request #9 from dask/path_corrections
Browse files Browse the repository at this point in the history
Path corrections
  • Loading branch information
martindurant committed Mar 22, 2016
2 parents 3b3e1a6 + 04a422c commit 2c1bea5
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 5 deletions.
35 changes: 30 additions & 5 deletions s3fs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,12 @@ def open(self, path, mode='rb', block_size=5*1024**2):
return S3File(self, path, mode, block_size=block_size)

def _ls(self, path, refresh=False):
""" List files below path
""" List files in given bucket, or list of buckets.
Listing is cached unless `refresh=True`.
Note: only your buckets associated with the login will be listed by
`ls('')`, not any public buckets (even if already accessed).
Parameters
----------
Expand All @@ -160,7 +165,9 @@ def _ls(self, path, refresh=False):
refresh : bool (=False)
if False, look in local cache for file details first
"""
path = path.lstrip('s3://').lstrip('/')
if path.startswith('s3://'):
path = path[len('s3://'):]
path = path.rstrip('/')
bucket, key = split_path(path)
if bucket not in self.dirs or refresh:
if bucket == '':
Expand All @@ -173,6 +180,7 @@ def _ls(self, path, refresh=False):
f['Key'] = f['Name']
f['Size'] = 0
del f['Name']
self.dirs[''] = files
else:
try:
files = self.s3.list_objects(Bucket=bucket).get('Contents', [])
Expand All @@ -187,7 +195,9 @@ def _ls(self, path, refresh=False):

def ls(self, path, detail=False):
""" List single "directory" with or without details """
path = path.lstrip('s3://').rstrip('/')
if path.startswith('s3://'):
path = path[len('s3://'):]
path = path.rstrip('/')
files = self._ls(path)
if path:
pattern = re.compile(path + '/[^/]*.$')
Expand All @@ -203,6 +213,11 @@ def ls(self, path, detail=False):
return [f['Key'] for f in files]

def info(self, path):
""" Detail on the specific file pointed to by path.
NB: path has trailing '/' stripped to work as `ls` does, so key
names that genuinely end in '/' will fail.
"""
if path.startswith('s3://'):
path = path[len('s3://'):]
path = path.rstrip('/')
Expand All @@ -215,6 +230,8 @@ def info(self, path):

def walk(self, path):
""" Return all entries below path """
if path.startswith('s3://'):
path = path[len('s3://'):]
return [f['Key'] for f in self._ls(path) if f['Key'].rstrip('/'
).startswith(path.rstrip('/') + '/')]

Expand All @@ -225,7 +242,9 @@ def glob(self, path):
Note that the bucket part of the path must not contain a "*"
"""
path0 = path
path = path.lstrip('s3://').lstrip('/')
if path.startswith('s3://'):
path = path[len('s3://'):]
path = path.rstrip('/')
bucket, key = split_path(path)
if "*" in bucket:
raise ValueError('Bucket cannot contain a "*"')
Expand Down Expand Up @@ -260,6 +279,11 @@ def du(self, path, total=False, deep=False):
return {p['Key']: p['Size'] for p in files}

def exists(self, path):
""" Does such a file exist?
"""
if path.startswith('s3://'):
path = path[len('s3://'):]
path = path.rstrip('/')
if split_path(path)[1]:
return bool(self.ls(path))
else:
Expand Down Expand Up @@ -355,11 +379,12 @@ def rm(self, path, recursive=False):
raise IOError('Delete key failed', (bucket, key))
self._ls(path, refresh=True)
else:
if recursive or not self.s3.list_objects(Bucket=bucket).get('Contents'):
if not self.s3.list_objects(Bucket=bucket).get('Contents'):
try:
self.s3.delete_bucket(Bucket=bucket)
except ClientError:
raise IOError('Delete bucket failed', bucket)
self.dirs.pop(bucket, None)
self._ls('', refresh=True)
else:
raise IOError('Not empty', path)
Expand Down
4 changes: 4 additions & 0 deletions s3fs/tests/test_s3fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def test_du(s3):

assert s3.du(test_bucket_name + '/test/', total=True) ==\
sum(map(len, files.values()))
assert s3.du(test_bucket_name) == s3.du('s3://'+test_bucket_name)


def test_s3_ls(s3):
Expand All @@ -200,6 +201,7 @@ def test_s3_glob(s3):
assert fn in s3.glob(test_bucket_name+'/nested/*')
assert fn in s3.glob(test_bucket_name+'/nested/file*')
assert fn in s3.glob(test_bucket_name+'/*/*')
assert all(f in s3.walk(test_bucket_name) for f in s3.glob(test_bucket_name+'/nested/*'))
with pytest.raises(ValueError):
s3.glob('*')

Expand Down Expand Up @@ -372,6 +374,8 @@ def test_new_bucket(s3):
s3.rm('new/temp')
s3.rmdir('new')
assert not s3.exists('new')
with pytest.raises((IOError, OSError)):
s3.ls('new')

def test_write_small(s3):
with s3.open(test_bucket_name+'/test', 'wb') as f:
Expand Down

0 comments on commit 2c1bea5

Please sign in to comment.