Skip to content

Commit

Permalink
First-stab attempt at fixing test/interface errors.
Browse files Browse the repository at this point in the history
Updates `ls` to return non-prefix separated prefix search, needs to be
verified? Should this be glob-like? Fix error from dask.bytes when
read-only file is flushed. Fixup returning listing with "path"
attribute.
  • Loading branch information
asford committed Feb 14, 2018
1 parent 193a02a commit 94d9bfc
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 47 deletions.
87 changes: 53 additions & 34 deletions gcsfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,8 @@ def buckets(self):
@classmethod
def _process_object(self, bucket, object_metadata):
object_metadata["size"] = int(object_metadata.get("size", 0))
object_metadata["path"] = posixpath.join(bucket, object_metadata["name"])

return object_metadata

def _get_object(self, path):
Expand Down Expand Up @@ -444,6 +446,7 @@ def _get_object(self, path):
return result


@_tracemethod
def _maybe_get_cached_listing(self, path):
logger.debug("_maybe_get_cached_listing: %s", path)
if path in self._listing_cache:
Expand All @@ -461,6 +464,7 @@ def _maybe_get_cached_listing(self, path):

return None

@_tracemethod
def _list_objects(self, path):
path = norm_path(path)

Expand All @@ -474,13 +478,11 @@ def _list_objects(self, path):
self._listing_cache[path] = (retrieved_time, listing)
return listing

@_tracemethod
def _do_list_objects(self, path, max_results = None):
"""Return depaginated object listing for the given {bucket}/{prefix}/ path."""
logger.debug("_list_objects(%s, max_results=%s)", path, max_results)
bucket, prefix = split_path(path)
if prefix:
assert prefix.endswith("/")
else:
if not prefix:
prefix = None

prefixes = []
Expand Down Expand Up @@ -598,43 +600,57 @@ def rmdir(self, bucket):
@_tracemethod
def ls(self, path, detail=False):
"""List objects under the given '/{bucket}/{prefix} path."""
path = norm_path(path)

if path in ['/', '']:
out = self.buckets
return self.buckets
elif path.endswith("/"):
return self._ls(path, detail)
else:
if not path.endswith("/"):
path = path + "/"
combined_listing = self._ls(path, detail) + self._ls(path + "/", detail)
if detail:
combined_entries = dict((l["path"],l) for l in combined_listing )
combined_entries.pop(path+"/", None)
return list(combined_entries.values())
else:
return list(set(combined_listing) - {path + "/"})

listing = self._list_objects(path)
bucket, key = split_path(path)
def _ls(self, path, detail=False):
listing = self._list_objects(path)
bucket, key = split_path(path)

if not detail:
result = []
if not detail:
result = []

# Convert item listing into list of 'item' and 'subdir/'
# entries. Items may be of form "key/", in which case there
# will be duplicate entries in prefix and item_names.
item_names = [
f["name"][len(key):] for f in listing["items"]
if f["name"][len(key):]
]
prefixes = [p for p in listing["prefixes"]]
# Convert item listing into list of 'item' and 'subdir/'
# entries. Items may be of form "key/", in which case there
# will be duplicate entries in prefix and item_names.
item_names = [
f["name"] for f in listing["items"] if f["name"]
]
prefixes = [p for p in listing["prefixes"]]

return list(set(item_names + prefixes))
logger.debug("path: %s item_names: %s prefixes: %s", path, item_names, prefixes)

else:
item_details = listing["items"]
return [
posixpath.join(bucket, n) for n in set(item_names + prefixes)
]

pseudodirs = [{
'bucket': bucket,
'name': prefix,
'kind': 'storage#object',
'size': 0,
'storageClass': 'DIRECTORY',
}
for prefix in listing["prefixes"]
]
else:
item_details = listing["items"]

return item_details + pseudodirs
pseudodirs = [{
'bucket': bucket,
'name': prefix,
'path': bucket + "/" + prefix,
'kind': 'storage#object',
'size': 0,
'storageClass': 'DIRECTORY',
}
for prefix in listing["prefixes"]
]

return item_details + pseudodirs

@_tracemethod
def walk(self, path, detail=False):
Expand Down Expand Up @@ -676,7 +692,7 @@ def du(self, path, total=False, deep=False):
files = [f for f in self.ls(path, True)]
if total:
return sum(f['size'] for f in files)
return {f['name']: f['size'] for f in files}
return {f['path']: f['size'] for f in files}

@_tracemethod
def glob(self, path):
Expand Down Expand Up @@ -735,7 +751,7 @@ def info(self, path):
# Return a pseudo dir for the bucket root
return {
'bucket': bucket,
'name': "/",
'name': bucket + "/",
'kind': 'storage#object',
'size': 0,
'storageClass': 'DIRECTORY',
Expand Down Expand Up @@ -1098,6 +1114,9 @@ def flush(self, force=False):
blocks are allowed to be. Disallows further writing to this file.
"""
if self.mode not in {'wb', 'ab'}:
if self.mode == "rb" and not force:
return

raise ValueError('Flush on a file not in write mode')
if self.closed:
raise ValueError('Flush on closed file')
Expand Down
32 changes: 19 additions & 13 deletions gcsfs/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,6 @@ def test_ls2(token_restore):
gcs.touch(fn)
assert fn in gcs.ls(TEST_BUCKET+'/test')

@my_vcr.use_cassette(match=['all'])
def test_list_bucket_multipage(token_restore):
with gcs_maker() as gcs:
gcs.touch(a)
gcs.touch(b)
gcs.touch(c)
dirs=gcs._list_bucket(TEST_BUCKET, max_results=2)
assert len(dirs) == 3

@my_vcr.use_cassette(match=['all'])
def test_pickle(token_restore):
import pickle
Expand Down Expand Up @@ -103,13 +94,16 @@ def test_pickle(token_restore):
def test_ls_touch(token_restore):
with gcs_maker() as gcs:
assert not gcs.exists(TEST_BUCKET+'/tmp/test')

gcs.touch(a)
gcs.touch(b)
L = gcs.ls(TEST_BUCKET+'/tmp/test', True)
assert set(d['name'] for d in L) == set([a, b])

L = gcs.ls(TEST_BUCKET+'/tmp/test', False)
assert set(L) == set([a, b])

L_d = gcs.ls(TEST_BUCKET+'/tmp/test', True)
assert set(d['path'] for d in L_d) == set([a, b])


@my_vcr.use_cassette(match=['all'])
def test_rm(token_restore):
Expand Down Expand Up @@ -179,11 +173,13 @@ def test_du(token_restore):
def test_ls(token_restore):
with gcs_maker(True) as gcs:
fn = TEST_BUCKET+'/nested/file1'
gcs.touch(fn)

assert fn not in gcs.ls(TEST_BUCKET+'/')
assert fn in gcs.ls(TEST_BUCKET+'/nested/')
assert fn in gcs.ls(TEST_BUCKET+'/nested')
assert gcs.ls('gcs://'+TEST_BUCKET+'/nested/') == gcs.ls(
TEST_BUCKET+'/nested')
assert list(sorted(gcs.ls('gcs://'+TEST_BUCKET+'/nested/'))) == \
list(sorted(gcs.ls(TEST_BUCKET+'/nested')))


@my_vcr.use_cassette(match=['all'])
Expand Down Expand Up @@ -395,6 +391,16 @@ def test_read_block(token_restore):
assert gcs.read_block(path, 5, None) == gcs.read_block(path, 5, 1000)


@my_vcr.use_cassette(match=['all'])
def test_empty_flush(token_restore):
with gcs_maker() as gcs:
gcs.touch(TEST_BUCKET+'/temp')
handle = gcs.open(TEST_BUCKET+'/temp', 'rb')
handle.flush()

with pytest.raises(ValueError):
handle.write("abc")

@my_vcr.use_cassette(match=['all'])
def test_write_fails(token_restore):
with gcs_maker() as gcs:
Expand Down

0 comments on commit 94d9bfc

Please sign in to comment.