Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
magland committed Oct 23, 2018
1 parent f3100d0 commit b6bec40
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 71 deletions.
72 changes: 72 additions & 0 deletions bin/kbucket-download
@@ -0,0 +1,72 @@
#!/usr/bin/env python

import sys
from kbucket import client as kb
import argparse
import os

parser = argparse.ArgumentParser(description='Download a file or directory from kbucket')

parser.add_argument('-a', action="store_true", default=False)
parser.add_argument('-b', action="store", dest="b")
parser.add_argument('-c', action="store", dest="c", type=int)

def print_usage():
print('Usage:')
print('kbucket-download kbucket://[share-id]/[path] [output_file]')
print('kbucket-download sha1://[sha1]/[path] [output_file]')

def download(path,output_path,args):
if os.path.exists(output_path):
if not args.overwrite:
print('Cannot download directory... output directory already exists. Use --overwrite flag to force.')
sys.exit(-1)
path0=kb.findFile(path)
if not path0:
print('Unable to find file: '+path)
sys.exit(-1)
if is_url(path0):
size0=kb.getFileSize(path)
if size0>args.max_file_size_mb*1024*1024:
print('Ignoring large file ({} > {}): {}'.format(size0/(1024*1024),args.max_file_size_mb,output_path))
return
kb.realizeFile(path,target_path=output_path)

def is_url(path):
return (path.startswith('http://') or path.startswith('https://'))

def download_dir(path,output_path,args):
try:
dd=kb.readDir(path)
except:
print('Unable to read directory: '+path)
sys.exit(-1)
if os.path.exists(output_path):
if not args.overwrite:
print('Cannot download directory... output directory already exists. Use --overwrite flag to force.')
sys.exit(-1)
else:
os.mkdir(output_path)
for ff in dd.files:
print(output_path+'/'+ff.name)
download(path+'/'+ff.name,output_path+'/'+ff.name,args)
for dd in dd.dirs:
print(output_path+'/'+dd.name+'/')
download_dir(path+'/'+dd.name,output_path+'/'+dd.name,args)

if __name__== "__main__":
parser = argparse.ArgumentParser(description = 'Download a file or directory from kbucket')
parser.add_argument('path', help='The path of the file (or directory) on kbucket')
parser.add_argument('output_path', help='The path of the destination file on the local computer')
parser.add_argument('--dir', action='store_true', help='Download a directory rather than a file')
parser.add_argument('--overwrite', action='store_true', help='Allow overwriting existing files')
parser.add_argument('--max_file_size_mb', help='Ignore files larger than this size in megabytes', type=float, default=10)

args = parser.parse_args()

if args.dir:
download_dir(args.path,args.output_path,args)
else:
download(args.path,args.output_path,args)


129 changes: 59 additions & 70 deletions kbucket/kbucketclient.py
Expand Up @@ -55,46 +55,60 @@ def getFileSize(self, path=None,*,sha1=None,share_ids=None,key=None,collection=N
def moveFileToCache(self,path):
return self._sha1_cache.moveFileToCache(path)

def readDir(self,path):
def readDir(self,path,recursive=False,include_sha1=True):
if path.startswith('kbucket://'):
list=path.split('/')
share_id=_filter_share_id(list[2])
path0='/'.join(list[3:])
obj=self._read_kbucket_dir(share_id=share_id,path=path0)
if not obj:
return None
ret=KBucketClientDirectory()
for a in obj['files']:
ff=KBucketClientDirectoryFile()
ff.name=a['name']
ff.size=a['size']
ff.path=path+'/'+ff.name
ff.sha1=a['prv']['original_checksum']
ret.files.append(ff)
for a in obj['dirs']:
ff=KBucketClientDirectoryDir()
ff.name=a['name']
ff.path=path+'/'+ff.name
ret.dirs.append(ff)
return ret
ret=self._read_kbucket_dir(share_id=share_id,path=path0,recursive=recursive,include_sha1=include_sha1)
else:
ret=KBucketClientDirectory()
ret=self._read_file_system_dir(path=path,recursive=recursive,include_sha1=include_sha1)
return ret

def _read_file_system_dir(self,*,path,recursive,include_sha1):
ret=dict(
files={},
dirs={}
)
list=os.listdir(path)
for fname in list:
if os.path.isfile(fname):
ff=KBucketClientDirectoryFile()
ff.name=fname
ff.path=path+'/'+ff.name
ff.size=os.path.getsize(ff.path)
ff.sha1=None
ret.files.append(ff)
elif os.path.isdir(fname):
ff=KBucketClientDirectoryDir()
ff.name=fname
ff.path=path+'/'+ff.name
ret.dirs.append(ff)
for name0 in list:
path0=path+'/'+name0
if os.path.isfile(path0):
ret['files'][name0]=dict(
size=os.path.getsize(path0)
)
if include_sha1:
ret['files'][name0]['sha1']=self.computeFileSha1(path0)
elif os.path.isdir(path0):
ret['dirs'][name0]={}
if recursive:
ret['dirs'][name0]=self._read_file_system_dir(path=path0,recursive=recursive,include_sha1=include_sha1)
return ret

def _read_kbucket_dir(self,*,share_id,path,recursive,include_sha1):
url=self._config['url']+'/'+share_id+'/api/readdir/'+path
obj=_http_get_json(url)
if not obj['success']:
return None

ret=dict(
files={},
dirs={}
)
for file0 in obj['files']:
name0=file0['name']
ret['files'][name0]=dict(
size=file0['size']
)
if include_sha1:
ret['files'][name0]['sha1']=file0['prv']['original_checksum']
for dir0 in obj['dirs']:
name0=dir0['name']
ret['dirs'][name0]={}
if recursive:
ret['dirs'][name0]=_read_kbucket_dir(path+'/'+name0)
return ret

def computeFileSha1(self,path):
if path.startswith('sha1://'):
list=path.split('/')
Expand All @@ -106,6 +120,10 @@ def computeFileSha1(self,path):
else:
return self._sha1_cache.computeFileSha1(path)

def computeDirHash(self,path):
dd=self.readDir(path=path,recursive=True,include_sha1=True)
return _sha1_of_object(dd)

def uploadFile(self,path,share_id=None,upload_token=None):
if not share_id:
share_id=self._config['upload_share_id']
Expand Down Expand Up @@ -197,7 +215,7 @@ def _find_file_helper(self,*,path,sha1,share_ids,key,collection,local=True):
if key is not None:
sha1=pairio.get(key=key,collection=collection)
if not sha1:
raise Exception('Unable to find file SHA-1 for this key.')
return (None,None,None)
if path is not None:
if sha1 is not None:
raise Exception('Cannot specify both path and sha1 in find file')
Expand Down Expand Up @@ -257,13 +275,6 @@ def _find_in_share(self,*,sha1,share_id):
return (url0,size0)
return (None,None)

def _read_kbucket_dir(self,*,share_id,path):
url=self._config['url']+'/'+share_id+'/api/readdir/'+path
obj=_http_get_json(url)
if not obj['success']:
return None
return obj

def _get_cas_upload_url_for_share(self,share_id):
node_info=self.getNodeInfo(share_id)
if not node_info:
Expand All @@ -272,41 +283,19 @@ def _get_cas_upload_url_for_share(self,share_id):

class KBucketClientDirectory:
def __init__(self):
self.files=[]
self.dirs=[]
self.files=dict()
self.dirs=dict()
def toDict(self):
ret=dict(
files=[],
dirs=[]
files={},
dirs={}
)
for file in self.files:
ret['files'].append(file.toDict())
for dir in self.dirs:
ret['dirs'].append(dir.toDict())
for name in self.files:
ret['files'][name]=self.files[name].toDict()
for name in self.dirs:
ret['dirs'][name]=dir.toDict()
return ret

class KBucketClientDirectoryFile:
def __init__(self):
self.name=''
self.path=''
self.size=None
self.sha1=None
def toDict(self):
return dict(
name=self.name,
size=self.size,
sha1=self.sha1
)

class KBucketClientDirectoryDir:
def __init__(self):
self.name=''
self.path=''
def toDict(self):
return dict(
name=self.name
)

def _http_get_json(url):
return json.load(urllib.request.urlopen(url))

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -4,7 +4,7 @@

setuptools.setup(
name=pkg_name,
version="0.11.5",
version="0.11.6",
author="Jeremy Magland",
author_email="jmagland@flatironinstitute.org",
description="Python client for kbucket",
Expand Down

0 comments on commit b6bec40

Please sign in to comment.