In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from mySVM.model import SVM
import itertools 
import multiprocessing as mp
import numpy as np

In [2]:
feature_dir = 'Data/FeatureVectors/iFeatureVectors/'
feature_files = [f.name for f in os.scandir(feature_dir)]

In [3]:
label_dir = 'Data/Labels/'
label_file = 'Labels_Classification.csv'

In [4]:
def model_evaluate(filename,random_seed):
    df1 = pd.read_csv(feature_dir+filename,header=None)
    df2 = pd.read_csv(label_dir+label_file,header=None)
    df = df1.merge(df2,on=0)
    enz_names = df[0].values
    X = df.iloc[:,1:-1].values
    y = df.iloc[:,-1].values
    if X.shape[1]<40:
        n_comp = int(0.75*X.shape[1])
    else:
        n_comp=40
    X_train, X_test, y_train, y_test,enz_train,enz_test = train_test_split(X, y,enz_names, test_size=0.25, random_state=random_seed)
    svm = SVM(X_train,X_test,y_train,y_test,verbose=False,optimize=False, pca_comp=n_comp)
    return svm.acc_test
 

In [5]:
mp.cpu_count()

24

In [6]:
pool = mp.Pool(mp.cpu_count())

In [7]:
iterable = list(itertools.product(feature_files,range(10000)))

In [8]:
%%time
model_accs = list(pool.starmap(model_evaluate,iterable))

CPU times: user 2.45 s, sys: 1.62 s, total: 4.07 s
Wall time: 48min 30s


In [9]:
mydict = dict()

for m,acc in zip(iterable,model_accs):
    model_name=m[0].replace('.csv','')
    seed = m[1]
    if model_name not in mydict:
        mydict[model_name] = {seed:acc}
    else:
        mydict[model_name][seed] = acc

In [10]:
def get_model_stats(mname):
    accs = [v for k,v in mydict[mname].items()]
    return {'min':min(accs),'mean':np.mean(accs),
            'max':max(accs),'std':np.std(accs)}

In [11]:
get_model_stats('Geary')

{'min': 0.3103448275862069,
 'mean': 0.6733517241379312,
 'max': 0.9310344827586207,
 'std': 0.07768239370995797}

In [13]:
results_dict= {mname:get_model_stats(mname) for mname in mydict.keys()}
    

In [16]:
results_df = pd.DataFrame(results_dict).T

In [17]:
results_df.to_csv('Data/SimResults/iFeatResults.csv',index=True)

In [18]:
results_df

Unnamed: 0,min,mean,max,std
Geary,0.310345,0.673352,0.931034,0.077682
GTPC,0.310345,0.678107,0.931034,0.074774
APAAC,0.344828,0.618321,0.896552,0.078522
Moran,0.310345,0.674614,0.931034,0.07775
CKSAAGP,0.310345,0.659,0.931034,0.079622
GDPC,0.275862,0.618234,0.896552,0.081038
CKSAAP,0.344828,0.675783,0.931034,0.07837
KSCTriad,0.310345,0.675966,0.931034,0.07812
PAAC,0.310345,0.615145,0.896552,0.078536
AAC,0.310345,0.618986,0.896552,0.077558
