Permalink
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
381 lines (283 sloc) 8.89 KB
#
# Collective Knowledge (dataset)
#
# See CK LICENSE.txt for licensing details
# See CK COPYRIGHT.txt for copyright details
#
# Developer: Grigori Fursin, Grigori.Fursin@cTuning.org, http://fursin.net
#
cfg={} # Will be updated by CK (meta description of this module)
work={} # Will be updated by CK (temporal data)
ck=None # Will be updated by CK (initialized CK kernel)
# Local settings
##############################################################################
# Initialize module
def init(i):
"""
Input: {}
Output: {
return - return code = 0, if successful
> 0, if error
(error) - error text if return > 0
}
"""
return {'return':0}
##############################################################################
# Import all files to meta
def import_all_files(i):
"""
Input: {
data_uoa
(repo_uoa)
}
Output: {
return - return code = 0, if successful
> 0, if error
(error) - error text if return > 0
}
"""
import os
duoa=i['data_uoa']
ruoa=i.get('repo_uoa','')
r=ck.access({'action':'load',
'module_uoa':work['self_module_uid'],
'data_uoa':duoa,
'repo_uoa':ruoa})
if r['return']>0: return r
duid=r['data_uid']
d=r['dict']
p=r['path']
if 'dataset_files' not in d: d['dataset_files']=[]
dfiles=d['dataset_files']
dirList=os.listdir(p)
for fn in dirList:
p1=os.path.join(p, fn)
if os.path.isfile(p1):
if fn not in dfiles:
dfiles.append(fn)
r=ck.access({'action':'update',
'module_uoa':work['self_module_uid'],
'data_uoa':duid,
'repo_uoa':ruoa,
'dict':d,
'substitute':'yes',
'sort_keys':'yes'})
if r['return']>0: return r
return {'return':0}
##############################################################################
# TBD: generate new data sets to cover unseen behavior
# See https://scholar.google.com/citations?view_op=view_citation&citation_for_view=IwcnpkwAAAAJ:hqOjcs7Dif8C
# http://arxiv.org/abs/1506.06256
def generate(i):
"""
Input: {
}
Output: {
return - return code = 0, if successful
> 0, if error
(error) - error text if return > 0
}
"""
print ('TBD: generate new data sets to cover unseen behavior')
ck.out('')
ck.out('Command line: ')
ck.out('')
import json
cmd=json.dumps(i, indent=2)
ck.out(cmd)
return {'return':0}
##############################################################################
# TBD: prune data sets to find minimal representative data set covering behavior
# See https://scholar.google.com/citations?view_op=view_citation&citation_for_view=IwcnpkwAAAAJ:hqOjcs7Dif8C
# http://arxiv.org/abs/1506.06256
def prune(i):
"""
Input: {
}
Output: {
return - return code = 0, if successful
> 0, if error
(error) - error text if return > 0
}
"""
print ('prune data sets to find minimal representative data set covering behavior')
ck.out('')
ck.out('Command line: ')
ck.out('')
import json
cmd=json.dumps(i, indent=2)
ck.out(cmd)
return {'return':0}
##############################################################################
# check size of all data sets and if less than threshold, add tag "small" -
# needed not to send huge files during collaborative experiments (crowdtuning) via mobile devices
def check_size(i):
"""
Input: {
(repo_uoa) - repository UOA
(data_uoa) - dataset UOA (can be wildcards)
(limit) - size limit (to consider small). By default=500000
}
Output: {
return - return code = 0, if successful
> 0, if error
(error) - error text if return > 0
dict - final dict with key 'features'={...}
}
"""
import os
import json
o=i.get('out','')
sl=i.get('limit','')
if sl=='': sl=500000
sl=int(sl)
muoa=work['self_module_uid']
duoa=i.get('data_uoa','')
ruoa=i.get('repo_uoa','')
rx=ck.access({'action':'search',
'repo_uoa':ruoa,
'module_uoa':muoa,
'data_uoa':duoa})
if rx['return']>0: return rx
lst=rx['lst']
for q in lst:
muid=q['module_uid']
ruid=q['repo_uid']
duid=q['data_uid']
duoa=q['data_uoa']
ck.out('Processing '+duoa+' ...')
ii={'action':'load',
'module_uoa':muid,
'repo_uoa':ruid,
'data_uoa':duid}
rx=ck.access(ii)
if rx['return']>0: return rx
dd=rx['dict']
p=rx['path']
dfiles=dd.get('dataset_files',[])
tags=dd.get('tags',[])
sz=0
for df in dfiles:
pp=os.path.join(p, df)
if os.path.isfile(pp):
sz+=os.path.getsize(pp)
x=''
if sz<sl:
x=' (SMALL)'
if 'small' not in tags:
tags.append('small')
dd['tags']=tags
ii['action']='update'
ii['dict']=dd
ii['sort_keys']='yes'
ii['ignore_update']='yes'
rx=ck.access(ii)
if rx['return']>0: return rx
ck.out(' Size: '+str(sz)+x)
return {'return':0}
##############################################################################
# add file to a given dataset
def add_file_to(i):
"""
Input: {
data_uoa - dataset entry to add file to
(repo_uoa) - repository of the entry
file - file to add
}
Output: {
return - return code = 0, if successful
> 0, if error
(error) - error text if return > 0
}
"""
import shutil
import os
o=i.get('out','')
duoa=i.get('data_uoa','')
muoa=i.get('module_uoa','')
ruoa=i.get('repo_uoa','')
fn=i.get('file','')
if duoa=='' or fn=='':
return {'return':1, 'error':'usage - ck add_file_to dataset:{dataset UOA} --file={filename}'}
# Load entry
r=ck.access({'action':'load',
'module_uoa':muoa,
'data_uoa':duoa,
'repo_uoa':ruoa})
if r['return']>0: return r
p=r['path']
d=r['dict']
# Copy file
pn=os.path.join(p,fn)
if o=='con':
ck.out('Copying file '+fn+' to '+pn+' ...')
shutil.copyfile(fn,pn)
# Adding to dataset list
df=d.get('dataset_files',[])
df.append(fn)
d['dataset_files']=df
# Updating entry
r=ck.access({'action':'update',
'module_uoa':muoa,
'data_uoa':duoa,
'repo_uoa':ruoa,
'dict':d,
'sort_keys':'yes'})
if r['return']>0: return r
return {'return':0}
##############################################################################
# add dataset
def add(i):
"""
Input: {
(tags) - use tags (string; tags separated by comma)
(file) - add file
}
Output: {
return - return code = 0, if successful
> 0, if error
(error) - error text if return > 0
}
"""
import os
import shutil
o=i.get('out','')
duoa=i.get('data_uoa','')
d=i.get('dict',{})
# Check tags
xtags=d.get('tags',[])
if len(xtags)==0:
tags=i.get('tags','').strip()
if tags=='':
if o=='con':
rx=ck.inp({'text':'Enter tags for your data set separated by comma (such as image,jpeg): '})
if rx['return']>0: return rx
tags=rx['string'].strip()
xtags=['dataset']
for t in tags.split(','):
t1=t.strip()
if t1!='':
if t1 not in xtags:
xtags.append(t1)
d['tags']=xtags
# Check files
fn=i.get('file','')
fn1=os.path.basename(fn)
df=d.get('dataset_files',[])
if fn1 not in df:
df.append(fn1)
d['dataset_files']=df
# Create entry
i['dict']=d
i['common_func']='yes'
i['sort_keys']='yes'
r=ck.access(i)
if r['return']>0: return r
p=r['path']
# Copy file
pn=os.path.join(p,fn1)
if o=='con':
ck.out('')
ck.out('Copying file '+fn+' to '+pn+' ...')
shutil.copyfile(fn,pn)
return r