diff --git a/bin/kbucket-download b/bin/kbucket-download new file mode 100755 index 0000000..e63ed55 --- /dev/null +++ b/bin/kbucket-download @@ -0,0 +1,72 @@ +#!/usr/bin/env python + +import sys +from kbucket import client as kb +import argparse +import os + +parser = argparse.ArgumentParser(description='Download a file or directory from kbucket') + +parser.add_argument('-a', action="store_true", default=False) +parser.add_argument('-b', action="store", dest="b") +parser.add_argument('-c', action="store", dest="c", type=int) + +def print_usage(): + print('Usage:') + print('kbucket-download kbucket://[share-id]/[path] [output_file]') + print('kbucket-download sha1://[sha1]/[path] [output_file]') + +def download(path,output_path,args): + if os.path.exists(output_path): + if not args.overwrite: + print('Cannot download directory... output directory already exists. Use --overwrite flag to force.') + sys.exit(-1) + path0=kb.findFile(path) + if not path0: + print('Unable to find file: '+path) + sys.exit(-1) + if is_url(path0): + size0=kb.getFileSize(path) + if size0>args.max_file_size_mb*1024*1024: + print('Ignoring large file ({} > {}): {}'.format(size0/(1024*1024),args.max_file_size_mb,output_path)) + return + kb.realizeFile(path,target_path=output_path) + +def is_url(path): + return (path.startswith('http://') or path.startswith('https://')) + +def download_dir(path,output_path,args): + try: + dd=kb.readDir(path) + except: + print('Unable to read directory: '+path) + sys.exit(-1) + if os.path.exists(output_path): + if not args.overwrite: + print('Cannot download directory... output directory already exists. Use --overwrite flag to force.') + sys.exit(-1) + else: + os.mkdir(output_path) + for ff in dd.files: + print(output_path+'/'+ff.name) + download(path+'/'+ff.name,output_path+'/'+ff.name,args) + for dd in dd.dirs: + print(output_path+'/'+dd.name+'/') + download_dir(path+'/'+dd.name,output_path+'/'+dd.name,args) + +if __name__== "__main__": + parser = argparse.ArgumentParser(description = 'Download a file or directory from kbucket') + parser.add_argument('path', help='The path of the file (or directory) on kbucket') + parser.add_argument('output_path', help='The path of the destination file on the local computer') + parser.add_argument('--dir', action='store_true', help='Download a directory rather than a file') + parser.add_argument('--overwrite', action='store_true', help='Allow overwriting existing files') + parser.add_argument('--max_file_size_mb', help='Ignore files larger than this size in megabytes', type=float, default=10) + + args = parser.parse_args() + + if args.dir: + download_dir(args.path,args.output_path,args) + else: + download(args.path,args.output_path,args) + + diff --git a/kbucket/kbucketclient.py b/kbucket/kbucketclient.py index 1b9c25c..0773793 100644 --- a/kbucket/kbucketclient.py +++ b/kbucket/kbucketclient.py @@ -55,46 +55,60 @@ def getFileSize(self, path=None,*,sha1=None,share_ids=None,key=None,collection=N def moveFileToCache(self,path): return self._sha1_cache.moveFileToCache(path) - def readDir(self,path): + def readDir(self,path,recursive=False,include_sha1=True): if path.startswith('kbucket://'): list=path.split('/') share_id=_filter_share_id(list[2]) path0='/'.join(list[3:]) - obj=self._read_kbucket_dir(share_id=share_id,path=path0) - if not obj: - return None - ret=KBucketClientDirectory() - for a in obj['files']: - ff=KBucketClientDirectoryFile() - ff.name=a['name'] - ff.size=a['size'] - ff.path=path+'/'+ff.name - ff.sha1=a['prv']['original_checksum'] - ret.files.append(ff) - for a in obj['dirs']: - ff=KBucketClientDirectoryDir() - ff.name=a['name'] - ff.path=path+'/'+ff.name - ret.dirs.append(ff) - return ret + ret=self._read_kbucket_dir(share_id=share_id,path=path0,recursive=recursive,include_sha1=include_sha1) else: - ret=KBucketClientDirectory() + ret=self._read_file_system_dir(path=path,recursive=recursive,include_sha1=include_sha1) + return ret + + def _read_file_system_dir(self,*,path,recursive,include_sha1): + ret=dict( + files={}, + dirs={} + ) list=os.listdir(path) - for fname in list: - if os.path.isfile(fname): - ff=KBucketClientDirectoryFile() - ff.name=fname - ff.path=path+'/'+ff.name - ff.size=os.path.getsize(ff.path) - ff.sha1=None - ret.files.append(ff) - elif os.path.isdir(fname): - ff=KBucketClientDirectoryDir() - ff.name=fname - ff.path=path+'/'+ff.name - ret.dirs.append(ff) + for name0 in list: + path0=path+'/'+name0 + if os.path.isfile(path0): + ret['files'][name0]=dict( + size=os.path.getsize(path0) + ) + if include_sha1: + ret['files'][name0]['sha1']=self.computeFileSha1(path0) + elif os.path.isdir(path0): + ret['dirs'][name0]={} + if recursive: + ret['dirs'][name0]=self._read_file_system_dir(path=path0,recursive=recursive,include_sha1=include_sha1) return ret + def _read_kbucket_dir(self,*,share_id,path,recursive,include_sha1): + url=self._config['url']+'/'+share_id+'/api/readdir/'+path + obj=_http_get_json(url) + if not obj['success']: + return None + + ret=dict( + files={}, + dirs={} + ) + for file0 in obj['files']: + name0=file0['name'] + ret['files'][name0]=dict( + size=file0['size'] + ) + if include_sha1: + ret['files'][name0]['sha1']=file0['prv']['original_checksum'] + for dir0 in obj['dirs']: + name0=dir0['name'] + ret['dirs'][name0]={} + if recursive: + ret['dirs'][name0]=_read_kbucket_dir(path+'/'+name0) + return ret + def computeFileSha1(self,path): if path.startswith('sha1://'): list=path.split('/') @@ -106,6 +120,10 @@ def computeFileSha1(self,path): else: return self._sha1_cache.computeFileSha1(path) + def computeDirHash(self,path): + dd=self.readDir(path=path,recursive=True,include_sha1=True) + return _sha1_of_object(dd) + def uploadFile(self,path,share_id=None,upload_token=None): if not share_id: share_id=self._config['upload_share_id'] @@ -197,7 +215,7 @@ def _find_file_helper(self,*,path,sha1,share_ids,key,collection,local=True): if key is not None: sha1=pairio.get(key=key,collection=collection) if not sha1: - raise Exception('Unable to find file SHA-1 for this key.') + return (None,None,None) if path is not None: if sha1 is not None: raise Exception('Cannot specify both path and sha1 in find file') @@ -257,13 +275,6 @@ def _find_in_share(self,*,sha1,share_id): return (url0,size0) return (None,None) - def _read_kbucket_dir(self,*,share_id,path): - url=self._config['url']+'/'+share_id+'/api/readdir/'+path - obj=_http_get_json(url) - if not obj['success']: - return None - return obj - def _get_cas_upload_url_for_share(self,share_id): node_info=self.getNodeInfo(share_id) if not node_info: @@ -272,41 +283,19 @@ def _get_cas_upload_url_for_share(self,share_id): class KBucketClientDirectory: def __init__(self): - self.files=[] - self.dirs=[] + self.files=dict() + self.dirs=dict() def toDict(self): ret=dict( - files=[], - dirs=[] + files={}, + dirs={} ) - for file in self.files: - ret['files'].append(file.toDict()) - for dir in self.dirs: - ret['dirs'].append(dir.toDict()) + for name in self.files: + ret['files'][name]=self.files[name].toDict() + for name in self.dirs: + ret['dirs'][name]=dir.toDict() return ret -class KBucketClientDirectoryFile: - def __init__(self): - self.name='' - self.path='' - self.size=None - self.sha1=None - def toDict(self): - return dict( - name=self.name, - size=self.size, - sha1=self.sha1 - ) - -class KBucketClientDirectoryDir: - def __init__(self): - self.name='' - self.path='' - def toDict(self): - return dict( - name=self.name - ) - def _http_get_json(url): return json.load(urllib.request.urlopen(url)) diff --git a/setup.py b/setup.py index 520143e..3ffc241 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ setuptools.setup( name=pkg_name, - version="0.11.5", + version="0.11.6", author="Jeremy Magland", author_email="jmagland@flatironinstitute.org", description="Python client for kbucket",