Skip to content

Commit

Permalink
Merge pull request #139 from davidbrochart/put_recursive
Browse files Browse the repository at this point in the history
Add GCSFileSystem.put(recursive=True)
  • Loading branch information
martindurant committed Mar 18, 2019
2 parents 1912942 + a9ce574 commit 0847583
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 109 deletions.
76 changes: 47 additions & 29 deletions gcsfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -918,28 +918,28 @@ def get(self, rpath, lpath, blocksize=5 * 2 ** 20, recursive=False):
blocksize: int
Chunks in which the data is fetched
recursive: bool
If true, recursively download files in subdirectories. "rpath" must
be a directory.
If true, recursively download files in subdirectories.
"""
if recursive:
if not rpath.endswith('/'):
rpath += '/'
subpaths = [key[len(rpath):] for key in self.walk(rpath)]
else:
subpaths = ['']
for subpath in subpaths:
if subpath:
lsubpath = os.path.join(lpath, subpath)
rsubpath = os.path.join(rpath, subpath)
ldirname = os.path.dirname(lsubpath)
if not os.path.exists(ldirname):
# python2 doesn't have exist_ok argument in makedirs
os.makedirs(ldirname)
rpaths = self.walk(rpath)
rootdir = os.path.basename(rpath.rstrip('/'))
if os.path.isdir(lpath):
# copy rpath inside lpath directory
lpath2 = os.path.join(lpath, rootdir)
else:
rsubpath = rpath
lsubpath = lpath
with self.open(rsubpath, 'rb', block_size=blocksize) as f1:
with open(lsubpath, 'wb') as f2:
# copy rpath as lpath directory
lpath2 = lpath
lpaths = [os.path.join(lpath2, path[len(rpath):].lstrip('/')) for path in rpaths]
for lpath in lpaths:
dirname = os.path.dirname(lpath)
if not os.path.isdir(dirname):
os.makedirs(dirname)
else:
rpaths = [rpath]
lpaths = [lpath]
for rpath, lpath in zip(rpaths, lpaths):
with self.open(rpath, 'rb', block_size=blocksize) as f1:
with open(lpath, 'wb') as f2:
while True:
d = f1.read(blocksize)
if not d:
Expand All @@ -948,8 +948,8 @@ def get(self, rpath, lpath, blocksize=5 * 2 ** 20, recursive=False):

@_tracemethod
def put(self, lpath, rpath, blocksize=5 * 2 ** 20, acl=None,
metadata=None):
"""Upload local file to remote
metadata=None, recursive=False):
"""Upload local files to remote
Parameters
----------
Expand All @@ -963,15 +963,33 @@ def put(self, lpath, rpath, blocksize=5 * 2 ** 20, acl=None,
Optional access control to apply to the created object
metadata: None or dict
Gets added to object metadata on server
recursive: bool
If true, recursively upload files in subdirectories
"""
with self.open(rpath, 'wb', block_size=blocksize, acl=acl,
metadata=metadata) as f1:
with open(lpath, 'rb') as f2:
while True:
d = f2.read(blocksize)
if not d:
break
f1.write(d)
if recursive:
lpaths = []
for dirname, subdirlist, filelist in os.walk(lpath):
lpaths += [os.path.join(dirname, filename) for filename in filelist]
rootdir = os.path.basename(lpath.rstrip('/'))
if self.exists(rpath):
# copy lpath inside rpath directory
rpath2 = os.path.join(rpath, rootdir)
else:
# copy lpath as rpath directory
rpath2 = rpath
rpaths = [os.path.join(rpath2, path[len(lpath):].lstrip('/')) for path in lpaths]
else:
lpaths = [lpath]
rpaths = [rpath]
for lpath, rpath in zip(lpaths, rpaths):
with self.open(rpath, 'wb', block_size=blocksize, acl=acl,
metadata=metadata) as f1:
with open(lpath, 'rb') as f2:
while True:
d = f2.read(blocksize)
if not d:
break
f1.write(d)

def getxattr(self, path, attr):
"""Get user-defined metadata attribute"""
Expand Down
159 changes: 90 additions & 69 deletions gcsfs/tests/recordings/test_get_put_recursive.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -674,13 +674,34 @@ interactions:
X-Goog-Metageneration: ['1']
X-Goog-Storage-Class: [STANDARD]
status: {code: 206, message: Partial Content}
- request:
body: null
headers:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
Content-Length: ['0']
method: GET
uri: https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/temp_dir
response:
body: {string: "{\n \"error\": {\n \"errors\": [\n {\n \"domain\": \"global\",\n
\ \"reason\": \"notFound\",\n \"message\": \"No such object: gcsfs-testing/temp_dir\"\n
\ }\n ],\n \"code\": 404,\n \"message\": \"No such object: gcsfs-testing/temp_dir\"\n
}\n}\n"}
headers:
Cache-Control: ['private, max-age=0']
Content-Length: ['237']
Content-Type: [application/json; charset=UTF-8]
Server: [UploadServer]
Vary: [Origin, X-Origin]
status: {code: 404, message: Not Found}
- request:
body: '--==0==
Content-Type: application/json; charset=UTF-8
{"name": "temp"}
{"name": "temp_dir/accounts.1.json"}
--==0==
Expand All @@ -706,14 +727,64 @@ interactions:
method: POST
uri: https://www.googleapis.com/upload/storage/v1/b/gcsfs-testing/o?uploadType=multipart
response:
body: {string: "{\n \"kind\": \"storage#object\",\n \"id\": \"gcsfs-testing/temp/1541774718062883\",\n
\"selfLink\": \"https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/temp\",\n
\"name\": \"temp\",\n \"bucket\": \"gcsfs-testing\",\n \"generation\": \"1541774718062883\",\n
body: {string: "{\n \"kind\": \"storage#object\",\n \"id\": \"gcsfs-testing/temp_dir/accounts.1.json/1541774718062883\",\n
\"selfLink\": \"https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/temp_dir/accounts.1.json\",\n
\"name\": \"temp_dir/accounts.1.json\",\n \"bucket\": \"gcsfs-testing\",\n \"generation\": \"1541774718062883\",\n
\"metageneration\": \"1\",\n \"contentType\": \"application/octet-stream\",\n
\"timeCreated\": \"2018-11-09T14:45:18.062Z\",\n \"updated\": \"2018-11-09T14:45:18.062Z\",\n
\"storageClass\": \"STANDARD\",\n \"timeStorageClassUpdated\": \"2018-11-09T14:45:18.062Z\",\n
\"size\": \"133\",\n \"md5Hash\": \"xK7pmJz/Oj5HGIyfQpYTig==\",\n \"mediaLink\":
\"https://www.googleapis.com/download/storage/v1/b/gcsfs-testing/o/temp?generation=1541774718062883&alt=media\",\n
\"https://www.googleapis.com/download/storage/v1/b/gcsfs-testing/o/temp_dir/accounts.1.json?generation=1541774718062883&alt=media\",\n
\"crc32c\": \"6wJAgQ==\",\n \"etag\": \"CKOik7PGx94CEAE=\"\n}\n"}
headers:
Cache-Control: ['no-cache, no-store, max-age=0, must-revalidate']
Content-Length: ['723']
Content-Type: [application/json; charset=UTF-8]
ETag: [CKOik7PGx94CEAE=]
Pragma: [no-cache]
Server: [UploadServer]
Vary: [Origin, X-Origin]
status: {code: 200, message: OK}
- request:
body: '--==0==
Content-Type: application/json; charset=UTF-8
{"name": "temp_dir/accounts.2.json"}
--==0==
Content-Type: application/octet-stream
{"amount": 500, "name": "Alice"}
{"amount": 600, "name": "Bob"}
{"amount": 700, "name": "Charlie"}
{"amount": 800, "name": "Dennis"}
--==0==--'
headers:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
Content-Length: ['263']
Content-Type: [multipart/related; boundary="==0=="]
method: POST
uri: https://www.googleapis.com/upload/storage/v1/b/gcsfs-testing/o?uploadType=multipart
response:
body: {string: "{\n \"kind\": \"storage#object\",\n \"id\": \"gcsfs-testing/temp_dir/accounts.2.json/1541774718062883\",\n
\"selfLink\": \"https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/temp_dir/accounts.2.json\",\n
\"name\": \"temp_dir/accounts.2.json\",\n \"bucket\": \"gcsfs-testing\",\n \"generation\": \"1541774718062883\",\n
\"metageneration\": \"1\",\n \"contentType\": \"application/octet-stream\",\n
\"timeCreated\": \"2018-11-09T14:45:18.062Z\",\n \"updated\": \"2018-11-09T14:45:18.062Z\",\n
\"storageClass\": \"STANDARD\",\n \"timeStorageClassUpdated\": \"2018-11-09T14:45:18.062Z\",\n
\"size\": \"133\",\n \"md5Hash\": \"xK7pmJz/Oj5HGIyfQpYTig==\",\n \"mediaLink\":
\"https://www.googleapis.com/download/storage/v1/b/gcsfs-testing/o/temp_dir/accounts.2.json?generation=1541774718062883&alt=media\",\n
\"crc32c\": \"6wJAgQ==\",\n \"etag\": \"CKOik7PGx94CEAE=\"\n}\n"}
headers:
Cache-Control: ['no-cache, no-store, max-age=0, must-revalidate']
Expand All @@ -731,18 +802,18 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/?delimiter=%2F&prefix=temp
uri: https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/?delimiter=%2F&prefix=temp_dir%2Faccounts.1.json
response:
body: {string: "{\n \"kind\": \"storage#objects\",\n \"items\": [\n {\n \"kind\":
\"storage#object\",\n \"id\": \"gcsfs-testing/temp/1541774718062883\",\n
\ \"selfLink\": \"https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/temp\",\n
\ \"name\": \"temp\",\n \"bucket\": \"gcsfs-testing\",\n \"generation\":
\"storage#object\",\n \"id\": \"gcsfs-testing/temp_dir/accounts.1.json/1541774718062883\",\n
\ \"selfLink\": \"https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/temp_dir/accounts.1.json\",\n
\ \"name\": \"temp_dir/accounts.1.json\",\n \"bucket\": \"gcsfs-testing\",\n \"generation\":
\"1541774718062883\",\n \"metageneration\": \"1\",\n \"contentType\":
\"application/octet-stream\",\n \"timeCreated\": \"2018-11-09T14:45:18.062Z\",\n
\ \"updated\": \"2018-11-09T14:45:18.062Z\",\n \"storageClass\": \"STANDARD\",\n
\ \"timeStorageClassUpdated\": \"2018-11-09T14:45:18.062Z\",\n \"size\":
\"133\",\n \"md5Hash\": \"xK7pmJz/Oj5HGIyfQpYTig==\",\n \"mediaLink\":
\"https://www.googleapis.com/download/storage/v1/b/gcsfs-testing/o/temp?generation=1541774718062883&alt=media\",\n
\"https://www.googleapis.com/download/storage/v1/b/gcsfs-testing/o/temp_dir/accounts.1.json?generation=1541774718062883&alt=media\",\n
\ \"crc32c\": \"6wJAgQ==\",\n \"etag\": \"CKOik7PGx94CEAE=\"\n }\n ]\n}\n"}
headers:
Cache-Control: ['private, max-age=0, must-revalidate, no-transform']
Expand All @@ -758,7 +829,7 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/?delimiter=%2F&prefix=temp%2F
uri: https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/?delimiter=%2F&prefix=temp_dir%2Faccounts.1.json%2F
response:
body: {string: "{\n \"kind\": \"storage#objects\"\n}\n"}
headers:
Expand All @@ -775,7 +846,7 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: https://www.googleapis.com/download/storage/v1/b/gcsfs-testing/o/temp?alt=media
uri: https://www.googleapis.com/download/storage/v1/b/gcsfs-testing/o/temp_dir%2Faccounts.1.json?alt=media
response:
body: {string: '{"amount": 100, "name": "Alice"}

Expand All @@ -800,75 +871,25 @@ interactions:
X-Goog-Metageneration: ['1']
X-Goog-Storage-Class: [STANDARD]
status: {code: 200, message: OK}
- request:
body: '--==0==
Content-Type: application/json; charset=UTF-8
{"name": "temp"}
--==0==
Content-Type: application/octet-stream
{"amount": 500, "name": "Alice"}
{"amount": 600, "name": "Bob"}
{"amount": 700, "name": "Charlie"}
{"amount": 800, "name": "Dennis"}
--==0==--'
headers:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
Content-Length: ['263']
Content-Type: [multipart/related; boundary="==0=="]
method: POST
uri: https://www.googleapis.com/upload/storage/v1/b/gcsfs-testing/o?uploadType=multipart
response:
body: {string: "{\n \"kind\": \"storage#object\",\n \"id\": \"gcsfs-testing/temp/1541774718062883\",\n
\"selfLink\": \"https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/temp\",\n
\"name\": \"temp\",\n \"bucket\": \"gcsfs-testing\",\n \"generation\": \"1541774718062883\",\n
\"metageneration\": \"1\",\n \"contentType\": \"application/octet-stream\",\n
\"timeCreated\": \"2018-11-09T14:45:18.062Z\",\n \"updated\": \"2018-11-09T14:45:18.062Z\",\n
\"storageClass\": \"STANDARD\",\n \"timeStorageClassUpdated\": \"2018-11-09T14:45:18.062Z\",\n
\"size\": \"133\",\n \"md5Hash\": \"xK7pmJz/Oj5HGIyfQpYTig==\",\n \"mediaLink\":
\"https://www.googleapis.com/download/storage/v1/b/gcsfs-testing/o/temp?generation=1541774718062883&alt=media\",\n
\"crc32c\": \"6wJAgQ==\",\n \"etag\": \"CKOik7PGx94CEAE=\"\n}\n"}
headers:
Cache-Control: ['no-cache, no-store, max-age=0, must-revalidate']
Content-Length: ['723']
Content-Type: [application/json; charset=UTF-8]
ETag: [CKOik7PGx94CEAE=]
Pragma: [no-cache]
Server: [UploadServer]
Vary: [Origin, X-Origin]
status: {code: 200, message: OK}
- request:
body: null
headers:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/?delimiter=%2F&prefix=temp
uri: https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/?delimiter=%2F&prefix=temp_dir%2Faccounts.2.json
response:
body: {string: "{\n \"kind\": \"storage#objects\",\n \"items\": [\n {\n \"kind\":
\"storage#object\",\n \"id\": \"gcsfs-testing/temp/1541774718062883\",\n
\ \"selfLink\": \"https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/temp\",\n
\ \"name\": \"temp\",\n \"bucket\": \"gcsfs-testing\",\n \"generation\":
\"storage#object\",\n \"id\": \"gcsfs-testing/temp_dir/accounts.2.json/1541774718062883\",\n
\ \"selfLink\": \"https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/temp_dir/accounts.2.json\",\n
\ \"name\": \"temp_dir/accounts.2.json\",\n \"bucket\": \"gcsfs-testing\",\n \"generation\":
\"1541774718062883\",\n \"metageneration\": \"1\",\n \"contentType\":
\"application/octet-stream\",\n \"timeCreated\": \"2018-11-09T14:45:18.062Z\",\n
\ \"updated\": \"2018-11-09T14:45:18.062Z\",\n \"storageClass\": \"STANDARD\",\n
\ \"timeStorageClassUpdated\": \"2018-11-09T14:45:18.062Z\",\n \"size\":
\"133\",\n \"md5Hash\": \"xK7pmJz/Oj5HGIyfQpYTig==\",\n \"mediaLink\":
\"https://www.googleapis.com/download/storage/v1/b/gcsfs-testing/o/temp?generation=1541774718062883&alt=media\",\n
\"https://www.googleapis.com/download/storage/v1/b/gcsfs-testing/o/temp_dir/accounts.2.json?generation=1541774718062883&alt=media\",\n
\ \"crc32c\": \"6wJAgQ==\",\n \"etag\": \"CKOik7PGx94CEAE=\"\n }\n ]\n}\n"}
headers:
Cache-Control: ['private, max-age=0, must-revalidate, no-transform']
Expand All @@ -884,7 +905,7 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/?delimiter=%2F&prefix=temp%2F
uri: https://www.googleapis.com/storage/v1/b/gcsfs-testing/o/?delimiter=%2F&prefix=temp_dir%2Faccounts.2.json%2F
response:
body: {string: "{\n \"kind\": \"storage#objects\"\n}\n"}
headers:
Expand All @@ -901,7 +922,7 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: https://www.googleapis.com/download/storage/v1/b/gcsfs-testing/o/temp?alt=media
uri: https://www.googleapis.com/download/storage/v1/b/gcsfs-testing/o/temp_dir%2Faccounts.2.json?alt=media
response:
body: {string: '{"amount": 500, "name": "Alice"}

Expand Down
27 changes: 16 additions & 11 deletions gcsfs/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,19 +312,24 @@ def test_get_put(token_restore):
def test_get_put_recursive(token_restore):
with gcs_maker(True) as gcs:
with tempdir() as dn:
gcs.get(TEST_BUCKET+'/test', dn, recursive=True)
gcs.get(TEST_BUCKET+'/test/', dn+'/temp_dir', recursive=True)
# there is now in local directory:
# dn+'/temp_dir/accounts.1.json'
# dn+'/temp_dir/accounts.2.json'
data1 = files['test/accounts.1.json']
data2 = files['test/accounts.2.json']
assert open(dn+'/accounts.1.json', 'rb').read() == data1
assert open(dn+'/accounts.2.json', 'rb').read() == data2
gcs.put(dn+'/accounts.1.json', TEST_BUCKET+'/temp')
assert gcs.du(TEST_BUCKET+'/temp')[
TEST_BUCKET+'/temp'] == len(data1)
assert gcs.cat(TEST_BUCKET+'/temp') == data1
gcs.put(dn+'/accounts.2.json', TEST_BUCKET+'/temp')
assert gcs.du(TEST_BUCKET+'/temp')[
TEST_BUCKET+'/temp'] == len(data2)
assert gcs.cat(TEST_BUCKET+'/temp') == data2
assert open(dn+'/temp_dir/accounts.1.json', 'rb').read() == data1
assert open(dn+'/temp_dir/accounts.2.json', 'rb').read() == data2
gcs.put(dn+'/temp_dir', TEST_BUCKET+'/temp_dir', recursive=True)
# there is now in remote directory:
# TEST_BUCKET+'/temp_dir/accounts.1.json'
# TEST_BUCKET+'/temp_dir/accounts.2.json'
assert gcs.du(TEST_BUCKET+'/temp_dir/accounts.1.json')[
TEST_BUCKET+'/temp_dir/accounts.1.json'] == len(data1)
assert gcs.cat(TEST_BUCKET+'/temp_dir/accounts.1.json') == data1
assert gcs.du(TEST_BUCKET+'/temp_dir/accounts.2.json')[
TEST_BUCKET+'/temp_dir/accounts.2.json'] == len(data2)
assert gcs.cat(TEST_BUCKET+'/temp_dir/accounts.2.json') == data2


@my_vcr.use_cassette(match=['all'])
Expand Down

0 comments on commit 0847583

Please sign in to comment.