Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
296 lines (229 sloc) 7.74 KB
#
# Collective Knowledge (Unified modeling using R)
#
# See CK LICENSE.txt for licensing details
# See CK COPYRIGHT.txt for copyright details
#
# Developer: Grigori Fursin
#
cfg={} # Will be updated by CK (meta description of this module)
work={} # Will be updated by CK (temporal data)
ck=None # Will be updated by CK (initialized CK kernel)
# Local settings
##############################################################################
# Initialize module
def init(i):
"""
Input: {}
Output: {
return - return code = 0, if successful
> 0, if error
(error) - error text if return > 0
}
"""
return {'return':0}
##############################################################################
# build model
def build(i):
"""
Input: {
model_name - model name
(model_file) - model output file, otherwise generated as tmp file
features_table - features table (in experiment module format)
features_keys - features flat keys
characteristics_table - characteristics table (in experiment module format)
characteristics_keys - characteristics flat keys
(keep_temp_files) - if 'yes', keep temp files
}
Output: {
return - return code = 0, if successful
> 0, if error
(error) - error text if return > 0
model_input_file - temp input file (csv) - can be deleted if not keep_temp_files
model_file - output model file
}
"""
import tempfile
import os
o=i.get('out','')
mn=i['model_name']
mf=i.get('model_file','')
mf1=i['model_file']+'.r.obj'
mf4=i['model_file']+'.r.ft.txt'
ftable=i['features_table']
fkeys=i['features_keys']
ctable=i['characteristics_table']
ckeys=i['characteristics_keys']
lftable=len(ftable)
lctable=len(ctable)
# Enumerate features
s=''
fk=1
for fx in sorted(fkeys): # Needed to be sorted
s+='V'+str(fk)+') '+fx
s+='\n'
fk+=1
if s!='':
r=ck.save_text_file({'text_file':mf4, 'string':s})
if r['return']>0: return r
if o=='con':
ck.out('*******************************************************')
ck.out('Feature key convertion:')
ck.out('')
ck.out(s)
if lftable!=lctable:
return {'return':1, 'error':'length of feature table ('+str(lftable)+') is not the same as length of characteristics table ('+str(lctable)+')'}
# if len(ckeys)>1:
# return {'return':1, 'error':'currently we support only modeling for 1 characteristic'}
ktf=i.get('keep_temp_files','')
# First convert to CSV for R ***********************************
# Prepare common table from features and characteristics
dim=[]
for q in range(0, lftable):
vv=[]
for v in ftable[q]:
vv.append(v)
for v in ctable[q]:
vv.append(v)
dim.append(vv)
# Prepare common keys
keys=[]
for q in fkeys:
keys.append(q)
for q in ckeys:
keys.append(q)
# Prepare temporary CSV file
if ktf=='yes' and mf!='':
fn1=mf+'.build.in.csv'
else:
fd1, fn1=tempfile.mkstemp(suffix='.tmp', prefix='ck-')
os.close(fd1)
os.remove(fn1)
if ktf=='yes' and o=='con':
ck.out('')
ck.out(' Temporary CSV file = '+fn1)
ck.out('')
ii={'action':'convert_table_to_csv',
'module_uoa':cfg['module_deps']['experiment'],
'table':dim,
'keys':keys,
'file_name':fn1,
'csv_no_header':'yes',
'csv_separator':';',
'csv_decimal_mark':'.'
}
r=ck.access(ii)
if r['return']>0: return r
# Prepare (temporary) out model file
fn2=mf
if fn2=='' or i.get('web','')=='yes':
fd2, fn2=tempfile.mkstemp(suffix='.tmp', prefix='ck-')
os.close(fd2)
os.remove(fn2)
else:
fn2=mf1
if os.path.isfile(fn2): os.remove(fn2)
# Calling R
p=work['path']
model_code=cfg['model_code_build'].replace('$#model_name#$',mn)
pmc=os.path.join(p, model_code)
cmd='R --vanilla --args '+fn1+' '+fn2+' < '+pmc
os.system(cmd)
if ktf=='yes' and o=='con':
ck.out('')
ck.out(' Executed command:')
ck.out(cmd)
ck.out('')
if ktf!='yes' and os.path.isfile(fn1): os.remove(fn1)
if not os.path.isfile(fn2):
if ktf=='yes' and o=='con':
ck.out('')
ck.out(' Temporary input CSV file = '+fn1)
ck.out('')
return {'return':1, 'error':'model was not created'}
return {'return':0, 'model_input_file':fn1, 'model_file':fn2}
##############################################################################
# validate model
def validate(i):
"""
Input: {
model_name - model name:
earth
lm
nnet
party
randomforest
rpart
svm
model_file - file with model (object) code
features_table - features table (in experiment module format)
features_keys - features flat keys
(keep_temp_files) - if 'yes', keep temp files
}
Output: {
return - return code = 0, if successful
> 0, if error
(error) - error text if return > 0
prediction_table - experiment table with predictions
}
"""
import tempfile
import os
import csv
import sys
mn=i['model_name']
mf=i['model_file']
mf1=i['model_file']+'.r.obj'
ftable=i['features_table']
fkeys=i['features_keys']
ktf=i.get('keep_temp_files','')
lftable=len(ftable)
# First convert to CSV for R ***********************************
# Prepare temporary CSV file
if ktf=='yes':
fn1=mf+'.validate.in.csv'
else:
fd1, fn1=tempfile.mkstemp(suffix='.tmp', prefix='ck-')
os.close(fd1)
os.remove(fn1)
ii={'action':'convert_table_to_csv',
'module_uoa':cfg['module_deps']['experiment'],
'table':ftable,
'keys':fkeys,
'file_name':fn1,
'csv_no_header':'yes',
'csv_separator':';',
'csv_decimal_mark':'.'
}
r=ck.access(ii)
if r['return']>0: return r
# Prepare temporary out file
if ktf=='yes':
fn2=mf+'.validate.out.csv'
if os.path.isfile(fn2): os.remove(fn2)
else:
fd2, fn2=tempfile.mkstemp(suffix='.tmp', prefix='ck-')
os.close(fd2)
os.remove(fn2)
# Calling R
p=work['path']
model_code=cfg['model_code_predict'].replace('$#model_name#$',mn)
pmc=os.path.join(p, model_code)
cmd='R --vanilla --args '+mf1+' '+fn1+' '+fn2+' < '+pmc
print (cmd)
os.system(cmd)
if ktf!='yes' and os.path.isfile(fn1): os.remove(fn1)
if not os.path.isfile(fn2):
return {'return':1, 'error':'output prediction file was not created'}
# Parse CSV and convert to experiment format
# Read predictions
pr=[]
f=open(fn2, 'r')
c=csv.DictReader(f, delimiter=',')
for a in c:
k=list(a.keys())
if len(k)>0:
pr.append([a[k[1]]])
f.close()
if ktf!='yes': os.remove(fn2)
return {'return':0, 'prediction_table':pr}