In [1]:
from MultiClassClassification import TEClassification

In [2]:
import itertools
import numpy as np
import multiprocessing as mp

In [3]:
def get_model_stats(iters,metrics):
    
    mod_dict = dict()
    
    for mod_iter,mod_metric in zip(iters,metrics):
        if mod_iter[0] in mod_dict:
            mod_dict[mod_iter[0]]['precs'].append(mod_metric[0])
            mod_dict[mod_iter[0]]['accs'].append(mod_metric[1])
        else:
            mod_dict[mod_iter[0]] = dict()
            mod_dict[mod_iter[0]]['precs'] = [mod_metric[0]]
            mod_dict[mod_iter[0]]['accs'] = [mod_metric[1]]
    
    mod_stat = {k:dict() for k in mod_dict.keys()}
    
    for k in mod_dict.keys():
        precs = mod_dict[k]['precs']
        accs = mod_dict[k]['accs']
        
        mod_stat[k]['prec_mean'] = np.mean(precs)
        mod_stat[k]['prec_worst'] = min(precs)
        mod_stat[k]['prec_median'] = np.median(precs)
        mod_stat[k]['accs_mean'] = np.mean(accs)
        mod_stat[k]['accs_worst'] = min(accs)
        mod_stat[k]['accs_median'] = np.median(accs)
        
    return mod_stat

In [4]:
def save_model_results(filename,stat_dict):
    with open(filename,'w') as f:
        
        curr_pointer_line = 0
        max_pointer_line = len(stat_dict[list(stat_dict.keys())[0]].keys())
        
        f.write('model_param,')
        for colkey in stat_dict[list(stat_dict.keys())[0]].keys():
            if curr_pointer_line==max_pointer_line-1:
                f.write(colkey)
            else:
                f.write(colkey+',')
            curr_pointer_line += 1 
            
        f.write('\n')
        
        curr_pointer_line=0
        for model_param in stat_dict.keys():
            f.write(str(model_param)+',')
            for key,value in stat_dict[model_param].items():
                if curr_pointer_line==max_pointer_line-1:
                    f.write(str(round(value,3)))
                else:
                    f.write(str(round(value,3))+',')
                curr_pointer_line += 1 
            curr_pointer_line=0
            f.write('\n')
            
    return
                
            

In [5]:
%%time
if __name__=='__main__':
    # Sequence and label files 
    enz_file = '../data/SeqFile/EnzymeSequence.csv'
    label_file = '../data/LabelFiles/EnzymeLabelsMultiClass.csv'

    # Feature dir for iFeature,kernel,pssm 
    ifeatdatadir = '../featEngg/ifeatMethods/data/featvec/'
    kerneldatadir = '../featEngg/kernelMethods/data/featvec/'
    pssmdatadir = '../featEngg/pssmMethods/data/featvec/'
    featdirs = [ifeatdatadir,kerneldatadir,pssmdatadir]
    
    pca_comps_try = [25,40,55,75]
    n_models_try = [11,17,27,35]
    test_sizes_try = [0.3,0.25,0.2]
    iters_to_run = range(5)

    def multi_func_pca(n_comp,rs):
        te_i = TEClassification(enz_file,label_file,featdirs,random_seed=rs,pca_components=n_comp)
        return te_i.precision,te_i.en.acc 
    
    def multi_func_nmodels(n_mod,rs):
        te_i = TEClassification(enz_file,label_file,featdirs,random_seed=rs,n_models=n_mod)
        return te_i.precision,te_i.en.acc    

    def multi_func_test_size(ts,rs):
        te_i = TEClassification(enz_file,label_file,featdirs,random_seed=rs,test_fraction=ts)
        return te_i.precision,te_i.en.acc    


    pool = mp.Pool(mp.cpu_count())
    pca_iterations = list(itertools.product(pca_comps_try,iters_to_run))
    pca_metrics = list(pool.starmap(multi_func_pca,pca_iterations))
    
    nmod_iterations = list(itertools.product(n_models_try,iters_to_run))
    nmod_metrics = list(pool.starmap(multi_func_nmodels,nmod_iterations))

    ts_iterations = list(itertools.product(test_sizes_try,iters_to_run))
    ts_metrics = list(pool.starmap(multi_func_test_size,ts_iterations))
                           
    pca_mod_stats = get_model_stats(pca_iterations,pca_metrics)
    nmod_mod_stats = get_model_stats(nmod_iterations,nmod_metrics)
    ts_mod_stats = get_model_stats(ts_iterations,ts_metrics)
    
    save_model_results('../data/SimResults/EnsembleResults/GeneralizedOptimizationPCA.csv',pca_mod_stats)
    save_model_results('../data/SimResults/EnsembleResults/GeneralizedOptimizationNModels.csv',nmod_mod_stats)
    save_model_results('../data/SimResults/EnsembleResults/GeneralizedOptimizationTS.csv',ts_mod_stats)
                           

CPU times: user 102 ms, sys: 88.7 ms, total: 191 ms
Wall time: 2min 9s
