First-stab attempt at fixing test/interface errors.

Updates `ls` to return non-prefix separated prefix search, needs to be verified? Should this be glob-like? Fix error from dask.bytes when read-only file is flushed. Fixup returning listing with "path" attribute.
fsspec · Feb 14, 2018 · 94d9bfc · 94d9bfc
1 parent 193a02a
commit 94d9bfc
Show file tree

Hide file tree

Showing 2 changed files with 72 additions and 47 deletions.
diff --git a/gcsfs/core.py b/gcsfs/core.py
@@ -414,6 +414,8 @@ def buckets(self):
     @classmethod
     def _process_object(self, bucket, object_metadata):
         object_metadata["size"] = int(object_metadata.get("size", 0))
+        object_metadata["path"] = posixpath.join(bucket, object_metadata["name"])
+
         return object_metadata
 
     def _get_object(self, path):
@@ -444,6 +446,7 @@ def _get_object(self, path):
         return result
 
 
+    @_tracemethod
     def _maybe_get_cached_listing(self, path):
         logger.debug("_maybe_get_cached_listing: %s", path)
         if path in self._listing_cache:
@@ -461,6 +464,7 @@ def _maybe_get_cached_listing(self, path):
 
         return None
 
+    @_tracemethod
     def _list_objects(self, path):
         path = norm_path(path)
 
@@ -474,13 +478,11 @@ def _list_objects(self, path):
         self._listing_cache[path] = (retrieved_time, listing)
         return listing
 
+    @_tracemethod
     def _do_list_objects(self, path, max_results = None):
         """Return depaginated object listing for the given {bucket}/{prefix}/ path."""
-        logger.debug("_list_objects(%s, max_results=%s)", path, max_results)
         bucket, prefix = split_path(path)
-        if prefix:
-            assert prefix.endswith("/")
-        else:
+        if not prefix:
             prefix = None
 
         prefixes = []
@@ -598,43 +600,57 @@ def rmdir(self, bucket):
     @_tracemethod
     def ls(self, path, detail=False):
         """List objects under the given '/{bucket}/{prefix} path."""
+        path = norm_path(path)
+
         if path in ['/', '']:
-            out = self.buckets
+            return self.buckets
+        elif path.endswith("/"):
+            return self._ls(path, detail)
         else:
-            if not path.endswith("/"):
-                path = path + "/"
+            combined_listing = self._ls(path, detail) + self._ls(path + "/", detail)
+            if detail:
+                combined_entries = dict((l["path"],l) for l in combined_listing )
+                combined_entries.pop(path+"/", None)
+                return list(combined_entries.values())
+            else:
+                return list(set(combined_listing) - {path + "/"})
 
-            listing = self._list_objects(path)
-            bucket, key = split_path(path)
+    def _ls(self, path, detail=False):
+        listing = self._list_objects(path)
+        bucket, key = split_path(path)
 
-            if not detail:
-                result = []
+        if not detail:
+            result = []
 
-                # Convert item listing into list of 'item' and 'subdir/'
-                # entries. Items may be of form "key/", in which case there
-                # will be duplicate entries in prefix and item_names.
-                item_names = [
-                    f["name"][len(key):] for f in listing["items"]
-                    if f["name"][len(key):]
-                ]
-                prefixes = [p for p in listing["prefixes"]]
+            # Convert item listing into list of 'item' and 'subdir/'
+            # entries. Items may be of form "key/", in which case there
+            # will be duplicate entries in prefix and item_names.
+            item_names = [
+                f["name"] for f in listing["items"] if f["name"]
+            ]
+            prefixes = [p for p in listing["prefixes"]]
 
-                return list(set(item_names + prefixes))
+            logger.debug("path: %s item_names: %s prefixes: %s", path, item_names, prefixes)
 
-            else:
-                item_details = listing["items"]
+            return [
+                posixpath.join(bucket, n) for n in set(item_names + prefixes)
+            ]
 
-                pseudodirs = [{
-                        'bucket': bucket,
-                        'name': prefix,
-                        'kind': 'storage#object',
-                        'size': 0,
-                        'storageClass': 'DIRECTORY',
-                    }
-                    for prefix in listing["prefixes"]
-                ]
+        else:
+            item_details = listing["items"]
 
-                return item_details + pseudodirs
+            pseudodirs = [{
+                    'bucket': bucket,
+                    'name': prefix,
+                    'path': bucket + "/" + prefix,
+                    'kind': 'storage#object',
+                    'size': 0,
+                    'storageClass': 'DIRECTORY',
+                }
+                for prefix in listing["prefixes"]
+            ]
+
+            return item_details + pseudodirs
 
     @_tracemethod
     def walk(self, path, detail=False):
@@ -676,7 +692,7 @@ def du(self, path, total=False, deep=False):
             files = [f for f in self.ls(path, True)]
         if total:
             return sum(f['size'] for f in files)
-        return {f['name']: f['size'] for f in files}
+        return {f['path']: f['size'] for f in files}
 
     @_tracemethod
     def glob(self, path):
@@ -735,7 +751,7 @@ def info(self, path):
             # Return a pseudo dir for the bucket root
             return {
                 'bucket': bucket,
-                'name': "/",
+                'name': bucket + "/",
                 'kind': 'storage#object',
                 'size': 0,
                 'storageClass': 'DIRECTORY',
@@ -1098,6 +1114,9 @@ def flush(self, force=False):
             blocks are allowed to be. Disallows further writing to this file.
         """
         if self.mode not in {'wb', 'ab'}:
+            if self.mode == "rb" and not force:
+                return
+
             raise ValueError('Flush on a file not in write mode')
         if self.closed:
             raise ValueError('Flush on closed file')

diff --git a/gcsfs/tests/test_core.py b/gcsfs/tests/test_core.py
@@ -67,15 +67,6 @@ def test_ls2(token_restore):
         gcs.touch(fn)
         assert fn in gcs.ls(TEST_BUCKET+'/test')
 
-@my_vcr.use_cassette(match=['all'])
-def test_list_bucket_multipage(token_restore):
-    with gcs_maker() as gcs:
-        gcs.touch(a)
-        gcs.touch(b)
-        gcs.touch(c)
-        dirs=gcs._list_bucket(TEST_BUCKET, max_results=2)
-        assert len(dirs) == 3
-
 @my_vcr.use_cassette(match=['all'])
 def test_pickle(token_restore):
     import pickle
@@ -103,13 +94,16 @@ def test_pickle(token_restore):
 def test_ls_touch(token_restore):
     with gcs_maker() as gcs:
         assert not gcs.exists(TEST_BUCKET+'/tmp/test')
+
         gcs.touch(a)
         gcs.touch(b)
-        L = gcs.ls(TEST_BUCKET+'/tmp/test', True)
-        assert set(d['name'] for d in L) == set([a, b])
+
         L = gcs.ls(TEST_BUCKET+'/tmp/test', False)
         assert set(L) == set([a, b])
 
+        L_d = gcs.ls(TEST_BUCKET+'/tmp/test', True)
+        assert set(d['path'] for d in L_d) == set([a, b])
+
 
 @my_vcr.use_cassette(match=['all'])
 def test_rm(token_restore):
@@ -179,11 +173,13 @@ def test_du(token_restore):
 def test_ls(token_restore):
     with gcs_maker(True) as gcs:
         fn = TEST_BUCKET+'/nested/file1'
+        gcs.touch(fn)
+
         assert fn not in gcs.ls(TEST_BUCKET+'/')
         assert fn in gcs.ls(TEST_BUCKET+'/nested/')
         assert fn in gcs.ls(TEST_BUCKET+'/nested')
-        assert gcs.ls('gcs://'+TEST_BUCKET+'/nested/') == gcs.ls(
-                TEST_BUCKET+'/nested')
+        assert list(sorted(gcs.ls('gcs://'+TEST_BUCKET+'/nested/'))) == \
+               list(sorted(gcs.ls(TEST_BUCKET+'/nested')))
 
 
 @my_vcr.use_cassette(match=['all'])
@@ -395,6 +391,16 @@ def test_read_block(token_restore):
         assert gcs.read_block(path, 5, None) == gcs.read_block(path, 5, 1000)
 
 
+@my_vcr.use_cassette(match=['all'])
+def test_empty_flush(token_restore):
+    with gcs_maker() as gcs:
+        gcs.touch(TEST_BUCKET+'/temp')
+        handle = gcs.open(TEST_BUCKET+'/temp', 'rb')
+        handle.flush()
+
+        with pytest.raises(ValueError):
+            handle.write("abc")
+
 @my_vcr.use_cassette(match=['all'])
 def test_write_fails(token_restore):
     with gcs_maker() as gcs: