In [None]:
import csv
import time
import math
import pandas as pd
import numpy as np
import os
import gc
from scipy import stats
from sklearn.feature_selection import SelectFromModel
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [None]:
#models
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import RidgeClassifierCV
from sklearn.svm import SVC

In [None]:
#scalers
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import normalize

In [None]:
#tools
from sklearn.feature_selection import VarianceThreshold
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.inspection import permutation_importance
#import shap

In [None]:
#dimensional reduction & feature selection
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import f_classif

In [None]:
data = pd.read_csv("GitHub-Java-InternalMetricsData.csv", sep=",", encoding='utf-8')
data1 = pd.read_csv("GitHub-Java-ExternalMetricsData.csv", encoding='utf-8')
data = pd.merge(data, data1, left_on='project', right_on='name')
del data1
gc.collect()

cols_to_remove = []

for col in data.columns:
    try:
        _ = data[col].astype(float)
    except ValueError:
        cols_to_remove.append(col)
        pass

df = data[[col for col in data.columns if col not in cols_to_remove]]
df = df.apply(pd.to_numeric)
df  = df.dropna(axis=0)
del data
gc.collect
print(len(df.index))
print(len(df.columns))

In [None]:
def data_preprocessing(df, split):
    #df_ = df.copy(deep=True)
    conditions = [(df['forks'] <= split), (df['forks'] > split)]
    values = [False, True]
    df['reusability'] = np.select(conditions, values)
    # Splits high and low on target variable
    
    #p_corr = df.corr(method='pearson')
    #k_corr = df.corr(method='kendall')
    s_corr = df.corr(method='spearman')
    #corr_stat = pd.concat([p_corr['maven_reuse'].round(3), k_corr['maven_reuse'].round(3), s_corr['maven_reuse'].round(3)], axis=1)
    #corr_stat.columns = ['pearson', 'kendall', 'spearman']
    #corr_stat
    #print(p_corr)
    for i in range(len(s_corr['reusability'])):
        if pd.isnull(s_corr['reusability'][i]) == True:
            df.drop(s_corr['reusability'].index[i], axis='columns', inplace=True)
    #remove features which have NAN pearson coorelation        
    
    print(len(df.columns))
    
    y = df['reusability']
    X = df.drop(['reusability', 'forks'], axis=1)
    ###X = X.apply(pd.to_numeric)
    
    r_array = []
    for col in X:
        #print('{}: {}'.format(col, stats.ttest_ind(X[col], y)[1].round(5)))
        if stats.ttest_ind(X[col], y)[1].round(5) > 0.05:
            r_array.append(col)
    #print(r_array)
    X = X.drop(columns=r_array)
    #remove all insignificant features
    print(len(X.columns))
    print(len(X.index))
    
    return X, y

In [None]:
X_17, y_17 = data_preprocessing(df, 17)

In [None]:
class CLF():
    
    def __init__(self, X, y, classifier, preprocess_method = None, d_reduction = 0, f_selection = None, seed = 27033074, vt = False, **params):
        
        self.X = X
        self.classifier = classifier
        self.pm = preprocess_method
        self.dr = d_reduction
        self.fs = f_selection
        self.vt = vt
        self.p = params
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, random_state=seed)
        #self.clf = Pipeline([('preprocessor', self.preprocess()), ('model', self.model())])
        self.clf = make_pipeline(self.variance_t(), self.preprocess(), self.dim_reduction(), self.fea_selection(), self.model())
        self.clf.fit(self.X_train, self.y_train)
        self.y_pred = self.clf.predict(self.X_test)
        
    def variance_t(self, t = 0.8 * (1 - 0.8)):
        
        if self.vt == True:
            return VarianceThreshold(threshold = t)
        
        return None

        
    
    def preprocess(self, seed = 27033074):
        if self.pm == 'ss':
            return StandardScaler()
        elif self.pm == 'mms':
            return MinMaxScaler()
        elif self.pm == 'mas':
            return MaxAbsScaler()
        elif self.pm == 'rs':
            return RobustScaler()
        elif self.pm == 'pty':
            return PowerTransformer()
        elif self.pm == 'ptb':
            return PowerTransformer(method = 'box-cox')
        elif self.pm == 'qtu':
            return QuantileTransformer(random_state = seed)
        elif self.pm == 'qtn':
            return QuantileTransformer(output_distribution = 'normal', random_state = seed)
        elif self.pm == 'n':
            self.X_train = normalize(self.X_train, axis = 0)
            self.X_test = normalize(self.X_test, axis = 0)
            return None
        else:
            return None
        
    def dim_reduction(self, seed = 27033074):
        
        if self.dr == 0:
            return None
        elif self.dr == 1:
            return PCA(n_components=self.p['f'], random_state=seed)
        else:
            return
        
    def fea_selection(self, seed = 27033074):
        
        if self.fs == None:
            return None
        elif self.fs == 'kb':
            return SelectKBest(self.p['kbest_f'], k=self.p['f'])
        elif self.fs == 'rf':
            return SelectFromModel(RandomForestClassifier(n_estimators = self.p['fs_n'], n_jobs = -1, random_state=seed), 
                                   threshold=-np.inf, max_features=self.p['f'])
        else:
            return
    
    def model(self, seed = 27033074):
        if self.classifier == 'rf':
            return RandomForestClassifier(n_estimators = self.p['n'], n_jobs = -1, random_state=seed)
        elif self.classifier == 'knn':
            return KNeighborsClassifier(n_neighbors = self.p['n'], n_jobs = -1)
        elif self.classifier == 'mlp':
            return MLPClassifier(random_state = seed, max_iter=10000)
        elif self.classifier == 'dt':
            return DecisionTreeClassifier(random_state = seed)
        elif self.classifier == 'sgd':
            return SGDClassifier(loss = self.p['l'], n_jobs = -1, random_state = seed)
        elif self.classifier == 'r':
            return RidgeClassifierCV(cv = self.p['n'])
        elif self.classifier == 'svm':
            return SVC(gamma = self.p['gamma'], random_state = seed)
        else:
            print('No imput model!')
    
    def results_array(self):
        return [self.get_acc(), self.get_f1(), self.get_pre(), self.get_rec(), self.get_model(), self.num_of_features(), self.get_preprocess(), self.fs, self.dr, self.vt]
    
    def write_result(self, filename):
        
        isNone = not os.path.isfile(filename + '.csv')
        
        with open(filename +  ".csv", 'a') as csv_file:
            writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            if isNone:
                writer.writerow(['Accuracy', 'F1-Score', 'Precision', 'Recall', 'Model', 'Num_Features', 'Preprocessing', 'Fea_Selection', 'Dim_Reduction', 'Var_Threshold'])
            writer.writerow(self.results_array())
        
        return
    
    def num_of_features(self):
        try:
            return self.p['f']
        except:
            return len(self.X.columns)
        
    def get_preprocess(self):
        
        if self.pm == 'n':
            return 'normalize'
        else:
            return self.clf[1]
    
    def get_model(self):
        return self.clf[-1]
    
    def get_model_params(self):
        return self.clf[-1].get_params()
    
    def __str__(self):
        return str(self.clf.steps)
    
    def get_acc(self):
        return round(self.clf.score(self.X_test, self.y_test), 3)
    
    def get_f1(self):
        return round(classification_report(self.y_test, self.y_pred, output_dict=True, zero_division=0)['1']['f1-score'],3)
    
    def get_pre(self):
        return round(classification_report(self.y_test, self.y_pred, output_dict=True, zero_division=0)['1']['precision'],3)
    
    def get_rec(self):
        return round(classification_report(self.y_test, self.y_pred, output_dict=True, zero_division=0)['1']['recall'], 3)
    
    def get_cmatrix(self):
        return confusion_matrix(self.y_test, self.y_pred)
    
    def p_importance(self, train_or_test, filename, seed = 27033074):
        
        print('Computing permutation importance...')
        start = time.time()
        if train_or_test == 'train':
            result = permutation_importance(self.clf, self.X_train, self.y_train, random_state=seed, n_jobs=-1)
        elif train_or_test == 'test':
            result = permutation_importance(self.clf, self.X_test, self.y_test, random_state=seed, n_jobs=-1)
        else:
            print('Input train or test!')
            return
        
        print('Process completed at ' + str(round((time.time() - start)/60, 3)) + ' min')
        
        with open("PI-"+ filename +  ".csv", 'w') as csv_file:
            writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            for i in result.importances_mean.argsort()[::-1]:
                #if round(result.importances_mean[i] - 2 * result.importances_std[i], 4) > 0:
                writer.writerow([self.X.columns[i], round(result.importances_mean[i], 6), round(result.importances_std[i], 6)])
        
        return
    
    def f_importance(self):
        #problem with the feature removals
        x = list(zip(self.clf[-1].feature_importances_, self.X.columns.values))
        x = pd.DataFrame(x, columns=["Importance","Feature_Name"])
        x = x.sort_values(by=['Importance'], ascending=False)
        x.to_csv('FI-' + str(self.get_model()) + "_" + str(self.get_preprocess()) +  ".csv", index=False)
        return
    
    def coefs(self):
        
        x = list(zip(self.clf[-1].feature_importances_, self.X.columns.values))
        x = pd.DataFrame(x, columns=["Importance","Feature_Name"])
        x = x.sort_values(by=['Importance'], ascending=False)
        x.to_csv('FI-' + str(self.get_model()) + "_" + str(self.get_preprocess()) +  ".csv", index=False)
        return
    

Model setting evaluation for:
1. Preprocessing Method
2. With or without Variance Threshold

In [None]:
CLF(X_17, y_17, 'rf', n=500).write_result('test')
CLF(X_17, y_17, 'rf','ss', n=500).write_result('test')
CLF(X_17, y_17, 'rf','mms', n=500).write_result('test')
CLF(X_17, y_17, 'rf','mas', n=500).write_result('test')
CLF(X_17, y_17, 'rf','rs', n=500).write_result('test')
CLF(X_17, y_17, 'rf','pty', n=500).write_result('test')
CLF(X_17, y_17, 'rf','qtu', n=500).write_result('test')
CLF(X_17, y_17, 'rf','qtn', n=500).write_result('test')
CLF(X_17, y_17, 'rf','n', n=500).write_result('test')

CLF(X_17, y_17, 'mlp').write_result('test')
CLF(X_17, y_17, 'mlp','ss').write_result('test')
CLF(X_17, y_17, 'mlp','mms').write_result('test')
CLF(X_17, y_17, 'mlp','mas').write_result('test')
CLF(X_17, y_17, 'mlp','rs').write_result('test')
CLF(X_17, y_17, 'mlp','pty').write_result('test')
CLF(X_17, y_17, 'mlp','qtu').write_result('test')
CLF(X_17, y_17, 'mlp','qtn').write_result('test')
CLF(X_17, y_17, 'mlp','n').write_result('test')

CLF(X_17, y_17, 'svm', gamma='auto').write_result('test')
CLF(X_17, y_17, 'svm','ss', gamma='auto').write_result('test')
CLF(X_17, y_17, 'svm','mms', gamma='auto').write_result('test')
CLF(X_17, y_17, 'svm','mas', gamma='auto').write_result('test')
CLF(X_17, y_17, 'svm','rs', gamma='auto').write_result('test')
CLF(X_17, y_17, 'svm','pty', gamma='auto').write_result('test')
CLF(X_17, y_17, 'svm','qtu', gamma='auto').write_result('test')
CLF(X_17, y_17, 'svm','qtn', gamma='auto').write_result('test')
CLF(X_17, y_17, 'svm','n', gamma='auto').write_result('test')

CLF(X_17, y_17, 'knn', n=10).write_result('test')
CLF(X_17, y_17, 'knn','ss', n=10).write_result('test')
CLF(X_17, y_17, 'knn','mms', n=10).write_result('test')
CLF(X_17, y_17, 'knn','mas', n=10).write_result('test')
CLF(X_17, y_17, 'knn','rs', n=10).write_result('test')
CLF(X_17, y_17, 'knn','pty', n=10).write_result('test')
CLF(X_17, y_17, 'knn','qtu', n=10).write_result('test')
CLF(X_17, y_17, 'knn','qtn', n=10).write_result('test')
CLF(X_17, y_17, 'knn','n', n=10).write_result('test')

CLF(X_17, y_17, 'sgd', l='huber').write_result('test')
CLF(X_17, y_17, 'sgd','ss', l='huber').write_result('test')
CLF(X_17, y_17, 'sgd','mms', l='huber').write_result('test')
CLF(X_17, y_17, 'sgd','mas', l='huber').write_result('test')
CLF(X_17, y_17, 'sgd','rs', l='huber').write_result('test')
CLF(X_17, y_17, 'sgd','pty', l='huber').write_result('test')
CLF(X_17, y_17, 'sgd','qtu', l='huber').write_result('test')
CLF(X_17, y_17, 'sgd','qtn', l='huber').write_result('test')
CLF(X_17, y_17, 'sgd','n', l='huber').write_result('test')

CLF(X_17, y_17, 'sgd', l='log').write_result('test')
CLF(X_17, y_17, 'sgd','ss', l='log').write_result('test')
CLF(X_17, y_17, 'sgd','mms', l='log').write_result('test')
CLF(X_17, y_17, 'sgd','mas', l='log').write_result('test')
CLF(X_17, y_17, 'sgd','rs', l='log').write_result('test')
CLF(X_17, y_17, 'sgd','pty', l='log').write_result('test')
CLF(X_17, y_17, 'sgd','qtu', l='log').write_result('test')
CLF(X_17, y_17, 'sgd','qtn', l='log').write_result('test')
CLF(X_17, y_17, 'sgd','n', l='log').write_result('test')

CLF(X_17, y_17, 'sgd', l='perceptron').write_result('test')
CLF(X_17, y_17, 'sgd','ss', l='perceptron').write_result('test')
CLF(X_17, y_17, 'sgd','mms', l='perceptron').write_result('test')
CLF(X_17, y_17, 'sgd','mas', l='perceptron').write_result('test')
CLF(X_17, y_17, 'sgd','rs', l='perceptron').write_result('test')
CLF(X_17, y_17, 'sgd','pty', l='perceptron').write_result('test')
CLF(X_17, y_17, 'sgd','qtu', l='perceptron').write_result('test')
CLF(X_17, y_17, 'sgd','qtn', l='perceptron').write_result('test')
CLF(X_17, y_17, 'sgd','n', l='perceptron').write_result('test')

CLF(X_17, y_17, 'sgd', l='hinge').write_result('test')
CLF(X_17, y_17, 'sgd','ss', l='hinge').write_result('test')
CLF(X_17, y_17, 'sgd','mms', l='hinge').write_result('test')
CLF(X_17, y_17, 'sgd','mas', l='hinge').write_result('test')
CLF(X_17, y_17, 'sgd','rs', l='hinge').write_result('test')
CLF(X_17, y_17, 'sgd','pty', l='hinge').write_result('test')
CLF(X_17, y_17, 'sgd','qtu', l='hinge').write_result('test')
CLF(X_17, y_17, 'sgd','qtn', l='hinge').write_result('test')
CLF(X_17, y_17, 'sgd','n', l='hinge').write_result('test')

CLF(X_17, y_17, 'dt').write_result('test')
CLF(X_17, y_17, 'dt','ss').write_result('test')
CLF(X_17, y_17, 'dt','mms').write_result('test')
CLF(X_17, y_17, 'dt','mas').write_result('test')
CLF(X_17, y_17, 'dt','rs').write_result('test')
CLF(X_17, y_17, 'dt','pty').write_result('test')
CLF(X_17, y_17, 'dt','qtu').write_result('test')
CLF(X_17, y_17, 'dt','qtn').write_result('test')
CLF(X_17, y_17, 'dt','n').write_result('test')


CLF(X_17, y_17, 'r', n=None).write_result('test')
CLF(X_17, y_17, 'r','ss', n=None).write_result('test')
CLF(X_17, y_17, 'r','mms', n=None).write_result('test')
CLF(X_17, y_17, 'r','mas', n=None).write_result('test')
CLF(X_17, y_17, 'r','rs', n=None).write_result('test')
CLF(X_17, y_17, 'r','pty', n=None).write_result('test')
CLF(X_17, y_17, 'r','qtu', n=None).write_result('test')
CLF(X_17, y_17, 'r','qtn', n=None).write_result('test')
CLF(X_17, y_17, 'r','n', n=None).write_result('test')

In [None]:
CLF(X_17, y_17, 'rf', n=500, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'rf','ss', n=500, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'rf','mms', n=500, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'rf','mas', n=500, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'rf','rs', n=500, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'rf','pty', n=500, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'rf','qtu', n=500, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'rf','qtn', n=500, vt=True).write_result('test_vt')

CLF(X_17, y_17, 'mlp', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'mlp','ss', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'mlp','mms', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'mlp','mas', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'mlp','rs', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'mlp','pty', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'mlp','qtu', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'mlp','qtn', vt=True).write_result('test_vt')

CLF(X_17, y_17, 'svm', gamma='auto', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'svm','ss', gamma='auto', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'svm','mms', gamma='auto', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'svm','mas', gamma='auto', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'svm','rs', gamma='auto', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'svm','pty', gamma='auto', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'svm','qtu', gamma='auto', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'svm','qtn', gamma='auto', vt=True).write_result('test_vt')

CLF(X_17, y_17, 'knn', n=10, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'knn','ss', n=10, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'knn','mms', n=10, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'knn','mas', n=10, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'knn','rs', n=10, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'knn','pty', n=10, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'knn','qtu', n=10, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'knn','qtn', n=10, vt=True).write_result('test_vt')

CLF(X_17, y_17, 'sgd', l='huber', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','ss', l='huber', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','mms', l='huber', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','mas', l='huber', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','rs', l='huber', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','pty', l='huber', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','qtu', l='huber', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','qtn', l='huber', vt=True).write_result('test_vt')

CLF(X_17, y_17, 'sgd', l='log', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','ss', l='log', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','mms', l='log', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','mas', l='log', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','rs', l='log', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','pty', l='log', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','qtu', l='log', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','qtn', l='log', vt=True).write_result('test_vt')

CLF(X_17, y_17, 'sgd', l='perceptron', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','ss', l='perceptron', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','mms', l='perceptron', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','mas', l='perceptron', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','rs', l='perceptron', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','qtu', l='perceptron', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','qtn', l='perceptron', vt=True).write_result('test_vt')

CLF(X_17, y_17, 'sgd', l='hinge', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','ss', l='hinge', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','mms', l='hinge', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','mas', l='hinge', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','rs', l='hinge', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','pty', l='hinge', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'sgd','qtn', l='hinge', vt=True).write_result('test_vt')

CLF(X_17, y_17, 'dt', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'dt','ss', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'dt','mms', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'dt','mas', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'dt','rs', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'dt','pty', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'dt','qtu', vt=True).write_result('test_vt')
CLF(X_17, y_17, 'dt','qtn', vt=True).write_result('test_vt')

CLF(X_17, y_17, 'r', n=None, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'r','ss', n=None, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'r','mms', n=None, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'r','mas', n=None, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'r','rs', n=None, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'r','pty', n=None, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'r','qtu', n=None, vt=True).write_result('test_vt')
CLF(X_17, y_17, 'r','qtn', n=None, vt=True).write_result('test_vt')

In [None]:
#best preprocessing and variance threshold setting

CLF(X_17, y_17, 'rf','pty', n=500).write_result('best_pvt_setting')
CLF(X_17, y_17, 'mlp','mas', vt=True).write_result('best_pvt_setting')
CLF(X_17, y_17, 'svm','pty', gamma='auto', vt=True).write_result('best_pvt_setting')
CLF(X_17, y_17, 'knn','pty', n=10, vt=True).write_result('best_pvt_setting')
CLF(X_17, y_17, 'sgd','qtu', l='huber').write_result('best_pvt_setting')
CLF(X_17, y_17, 'sgd','pty', l='log').write_result('best_pvt_setting')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', vt=True).write_result('best_pvt_setting')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', vt=True).write_result('best_pvt_setting')
CLF(X_17, y_17, 'dt','mas').write_result('best_pvt_setting')
CLF(X_17, y_17, 'r','qtu', n=None, vt=True).write_result('best_pvt_setting')

For each best setting model:
1. Feature reduction (PCA, k_best: f_classif, RF)
2. Maintained no. of features

In [None]:
CLF(X_17, y_17, 'rf','pty', d_reduction=0, n=500).write_result('test_PCA')
CLF(X_17, y_17, 'rf','pty', d_reduction=1, f = 400, n=500).write_result('test_PCA')
CLF(X_17, y_17, 'rf','pty', d_reduction=1, f = 300, n=500).write_result('test_PCA')
CLF(X_17, y_17, 'rf','pty', d_reduction=1, f = 200, n=500).write_result('test_PCA')
CLF(X_17, y_17, 'rf','pty', d_reduction=1, f = 100, n=500).write_result('test_PCA')
CLF(X_17, y_17, 'rf','pty', d_reduction=1, f = 50, n=500).write_result('test_PCA')


CLF(X_17, y_17, 'mlp','mas', d_reduction=0, n=500, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'mlp','mas', d_reduction=1, f = 400, n=500, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'mlp','mas', d_reduction=1, f = 300, n=500, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'mlp','mas', d_reduction=1, f = 200, n=500, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'mlp','mas', d_reduction=1, f = 100, n=500, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'mlp','mas', d_reduction=1, f = 50, n=500, vt=True).write_result('test_PCA')


CLF(X_17, y_17, 'svm','pty', d_reduction=0, gamma='auto', vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'svm','pty', d_reduction=1, f = 400, gamma='auto', vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'svm','pty', d_reduction=1, f = 300, gamma='auto', vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'svm','pty', d_reduction=1, f = 200, gamma='auto', vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'svm','pty', d_reduction=1, f = 100, gamma='auto', vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'svm','pty', d_reduction=1, f = 50, gamma='auto', vt=True).write_result('test_PCA')


CLF(X_17, y_17, 'knn','pty', n=10, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'knn','pty', d_reduction=1, f = 400, n=10, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'knn','pty', d_reduction=1, f = 300, n=10, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'knn','pty', d_reduction=1, f = 200, n=10, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'knn','pty', d_reduction=1, f = 100, n=10, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'knn','pty', d_reduction=1, f = 50, n=10, vt=True).write_result('test_PCA')


CLF(X_17, y_17, 'sgd','qtu', l='huber').write_result('test_PCA')
CLF(X_17, y_17, 'sgd','qtu', l='huber', d_reduction=1, f = 400).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','qtu', l='huber', d_reduction=1, f = 300).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','qtu', l='huber', d_reduction=1, f = 200).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','qtu', l='huber', d_reduction=1, f = 100).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','qtu', l='huber', d_reduction=1, f = 50).write_result('test_PCA')


CLF(X_17, y_17, 'sgd','pty', l='log').write_result('test_PCA')
CLF(X_17, y_17, 'sgd','pty', l='log', d_reduction=1, f = 400).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','pty', l='log', d_reduction=1, f = 300).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','pty', l='log', d_reduction=1, f = 200).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','pty', l='log', d_reduction=1, f = 100).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','pty', l='log', d_reduction=1, f = 50).write_result('test_PCA')


CLF(X_17, y_17, 'sgd','pty', l='perceptron', vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', d_reduction=1, f = 400, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', d_reduction=1, f = 300, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', d_reduction=1, f = 200, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', d_reduction=1, f = 100, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', d_reduction=1, f = 50, vt=True).write_result('test_PCA')


CLF(X_17, y_17, 'sgd','qtu', l='hinge', vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', d_reduction=1, f = 400, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', d_reduction=1, f = 300, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', d_reduction=1, f = 200, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', d_reduction=1, f = 100, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', d_reduction=1, f = 50, vt=True).write_result('test_PCA')


CLF(X_17, y_17, 'dt','mas').write_result('test_PCA')
CLF(X_17, y_17, 'dt','mas', d_reduction=1, f = 400).write_result('test_PCA')
CLF(X_17, y_17, 'dt','mas', d_reduction=1, f = 300).write_result('test_PCA')
CLF(X_17, y_17, 'dt','mas', d_reduction=1, f = 200).write_result('test_PCA')
CLF(X_17, y_17, 'dt','mas', d_reduction=1, f = 100).write_result('test_PCA')
CLF(X_17, y_17, 'dt','mas', d_reduction=1, f = 50).write_result('test_PCA')


CLF(X_17, y_17, 'r', 'qtu', n=None, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'r','qtu', n=None, d_reduction=1, f = 400, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'r','qtu', n=None, d_reduction=1, f = 300, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'r','qtu', n=None, d_reduction=1, f = 200, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'r','qtu', n=None, d_reduction=1, f = 100, vt=True).write_result('test_PCA')
CLF(X_17, y_17, 'r','qtu', n=None, d_reduction=1, f = 50, vt=True).write_result('test_PCA')

In [None]:
CLF(X_17, y_17, 'rf','pty', d_reduction=0, n=500).write_result('test_KBf_classif')
CLF(X_17, y_17, 'rf','pty', f_selection='kb', kbest_f=f_classif, f = 400, n=500).write_result('test_KBf_classif')
CLF(X_17, y_17, 'rf','pty', f_selection='kb', kbest_f=f_classif, f = 300, n=500).write_result('test_KBf_classif')
CLF(X_17, y_17, 'rf','pty', f_selection='kb', kbest_f=f_classif, f = 200, n=500).write_result('test_KBf_classif')
CLF(X_17, y_17, 'rf','pty', f_selection='kb', kbest_f=f_classif, f = 100, n=500).write_result('test_KBf_classif')
CLF(X_17, y_17, 'rf','pty', f_selection='kb', kbest_f=f_classif, f = 50, n=500).write_result('test_KBf_classif')


CLF(X_17, y_17, 'mlp','mas', d_reduction=0, n=500, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'mlp','mas', f_selection='kb', kbest_f=f_classif, f = 400, n=500, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'mlp','mas', f_selection='kb', kbest_f=f_classif, f = 300, n=500, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'mlp','mas', f_selection='kb', kbest_f=f_classif, f = 200, n=500, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'mlp','mas', f_selection='kb', kbest_f=f_classif, f = 100, n=500, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'mlp','mas', f_selection='kb', kbest_f=f_classif, f = 50, n=500, vt=True).write_result('test_KBf_classif')


CLF(X_17, y_17, 'svm','pty', d_reduction=0, gamma='auto', vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'svm','pty', f_selection='kb', kbest_f=f_classif, f = 400, gamma='auto', vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'svm','pty', f_selection='kb', kbest_f=f_classif, f = 300, gamma='auto', vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'svm','pty', f_selection='kb', kbest_f=f_classif, f = 200, gamma='auto', vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'svm','pty', f_selection='kb', kbest_f=f_classif, f = 100, gamma='auto', vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'svm','pty', f_selection='kb', kbest_f=f_classif, f = 50, gamma='auto', vt=True).write_result('test_KBf_classif')


CLF(X_17, y_17, 'knn','pty', n=10, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'knn','pty', f_selection='kb', kbest_f=f_classif, f = 400, n=10, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'knn','pty', f_selection='kb', kbest_f=f_classif, f = 300, n=10, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'knn','pty', f_selection='kb', kbest_f=f_classif, f = 200, n=10, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'knn','pty', f_selection='kb', kbest_f=f_classif, f = 100, n=10, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'knn','pty', f_selection='kb', kbest_f=f_classif, f = 50, n=10, vt=True).write_result('test_KBf_classif')


CLF(X_17, y_17, 'sgd','qtu', l='huber').write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','qtu', l='huber', f_selection='kb', kbest_f=f_classif, f = 400).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','qtu', l='huber', f_selection='kb', kbest_f=f_classif, f = 300).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','qtu', l='huber', f_selection='kb', kbest_f=f_classif, f = 200).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','qtu', l='huber', f_selection='kb', kbest_f=f_classif, f = 100).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','qtu', l='huber', f_selection='kb', kbest_f=f_classif, f = 50).write_result('test_KBf_classif')


CLF(X_17, y_17, 'sgd','pty', l='log').write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','pty', l='log', f_selection='kb', kbest_f=f_classif, f = 400).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','pty', l='log', f_selection='kb', kbest_f=f_classif, f = 300).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','pty', l='log', f_selection='kb', kbest_f=f_classif, f = 200).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','pty', l='log', f_selection='kb', kbest_f=f_classif, f = 100).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','pty', l='log', f_selection='kb', kbest_f=f_classif, f = 50).write_result('test_KBf_classif')


CLF(X_17, y_17, 'sgd','pty', l='perceptron', vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', f_selection='kb', kbest_f=f_classif, f = 400, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', f_selection='kb', kbest_f=f_classif, f = 300, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', f_selection='kb', kbest_f=f_classif, f = 200, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', f_selection='kb', kbest_f=f_classif, f = 100, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', f_selection='kb', kbest_f=f_classif, f = 50, vt=True).write_result('test_KBf_classif')


CLF(X_17, y_17, 'sgd','qtu', l='hinge', vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', f_selection='kb', kbest_f=f_classif, f = 400, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', f_selection='kb', kbest_f=f_classif, f = 300, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', f_selection='kb', kbest_f=f_classif, f = 200, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', f_selection='kb', kbest_f=f_classif, f = 100, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', f_selection='kb', kbest_f=f_classif, f = 50, vt=True).write_result('test_KBf_classif')


CLF(X_17, y_17, 'dt','mas').write_result('test_KBf_classif')
CLF(X_17, y_17, 'dt','mas', f_selection='kb', kbest_f=f_classif, f = 400).write_result('test_KBf_classif')
CLF(X_17, y_17, 'dt','mas', f_selection='kb', kbest_f=f_classif, f = 300).write_result('test_KBf_classif')
CLF(X_17, y_17, 'dt','mas', f_selection='kb', kbest_f=f_classif, f = 200).write_result('test_KBf_classif')
CLF(X_17, y_17, 'dt','mas', f_selection='kb', kbest_f=f_classif, f = 100).write_result('test_KBf_classif')
CLF(X_17, y_17, 'dt','mas', f_selection='kb', kbest_f=f_classif, f = 50).write_result('test_KBf_classif')


CLF(X_17, y_17, 'r', 'qtu', n=None, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'r','qtu', n=None, f_selection='kb', kbest_f=f_classif, f = 400, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'r','qtu', n=None, f_selection='kb', kbest_f=f_classif, f = 300, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'r','qtu', n=None, f_selection='kb', kbest_f=f_classif, f = 200, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'r','qtu', n=None, f_selection='kb', kbest_f=f_classif, f = 100, vt=True).write_result('test_KBf_classif')
CLF(X_17, y_17, 'r','qtu', n=None, f_selection='kb', kbest_f=f_classif, f = 50, vt=True).write_result('test_KBf_classif')

In [None]:
CLF(X_17, y_17, 'rf','pty', d_reduction=0, n=500).write_result('test_rf')
CLF(X_17, y_17, 'rf','pty', f_selection='rf', fs_n=500, f = 400, n=500).write_result('test_rf')
CLF(X_17, y_17, 'rf','pty', f_selection='rf', fs_n=500, f = 300, n=500).write_result('test_rf')
CLF(X_17, y_17, 'rf','pty', f_selection='rf', fs_n=500, f = 200, n=500).write_result('test_rf')
CLF(X_17, y_17, 'rf','pty', f_selection='rf', fs_n=500, f = 100, n=500).write_result('test_rf')
CLF(X_17, y_17, 'rf','pty', f_selection='rf', fs_n=500, f = 50, n=500).write_result('test_rf')


CLF(X_17, y_17, 'mlp','mas', d_reduction=0, n=500, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'mlp','mas', f_selection='rf', fs_n=500, f = 400, n=500, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'mlp','mas', f_selection='rf', fs_n=500, f = 300, n=500, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'mlp','mas', f_selection='rf', fs_n=500, f = 200, n=500, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'mlp','mas', f_selection='rf', fs_n=500, f = 100, n=500, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'mlp','mas', f_selection='rf', fs_n=500, f = 50, n=500, vt=True).write_result('test_rf')


CLF(X_17, y_17, 'svm','pty', d_reduction=0, gamma='auto', vt=True).write_result('test_rf')
CLF(X_17, y_17, 'svm','pty', f_selection='rf', fs_n=500, f = 400, gamma='auto', vt=True).write_result('test_rf')
CLF(X_17, y_17, 'svm','pty', f_selection='rf', fs_n=500, f = 300, gamma='auto', vt=True).write_result('test_rf')
CLF(X_17, y_17, 'svm','pty', f_selection='rf', fs_n=500, f = 200, gamma='auto', vt=True).write_result('test_rf')
CLF(X_17, y_17, 'svm','pty', f_selection='rf', fs_n=500, f = 100, gamma='auto', vt=True).write_result('test_rf')
CLF(X_17, y_17, 'svm','pty', f_selection='rf', fs_n=500, f = 50, gamma='auto', vt=True).write_result('test_rf')


CLF(X_17, y_17, 'knn','pty', n=10, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'knn','pty', f_selection='rf', fs_n=500, f = 400, n=10, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'knn','pty', f_selection='rf', fs_n=500, f = 300, n=10, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'knn','pty', f_selection='rf', fs_n=500, f = 200, n=10, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'knn','pty', f_selection='rf', fs_n=500, f = 100, n=10, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'knn','pty', f_selection='rf', fs_n=500, f = 50, n=10, vt=True).write_result('test_rf')


CLF(X_17, y_17, 'sgd','qtu', l='huber').write_result('test_rf')
CLF(X_17, y_17, 'sgd','qtu', l='huber', f_selection='rf', fs_n=500, f = 400).write_result('test_rf')
CLF(X_17, y_17, 'sgd','qtu', l='huber', f_selection='rf', fs_n=500, f = 300).write_result('test_rf')
CLF(X_17, y_17, 'sgd','qtu', l='huber', f_selection='rf', fs_n=500, f = 200).write_result('test_rf')
CLF(X_17, y_17, 'sgd','qtu', l='huber', f_selection='rf', fs_n=500, f = 100).write_result('test_rf')
CLF(X_17, y_17, 'sgd','qtu', l='huber', f_selection='rf', fs_n=500, f = 50).write_result('test_rf')


CLF(X_17, y_17, 'sgd','pty', l='log').write_result('test_rf')
CLF(X_17, y_17, 'sgd','pty', l='log', f_selection='rf', fs_n=500, f = 400).write_result('test_rf')
CLF(X_17, y_17, 'sgd','pty', l='log', f_selection='rf', fs_n=500, f = 300).write_result('test_rf')
CLF(X_17, y_17, 'sgd','pty', l='log', f_selection='rf', fs_n=500, f = 200).write_result('test_rf')
CLF(X_17, y_17, 'sgd','pty', l='log', f_selection='rf', fs_n=500, f = 100).write_result('test_rf')
CLF(X_17, y_17, 'sgd','pty', l='log', f_selection='rf', fs_n=500, f = 50).write_result('test_rf')


CLF(X_17, y_17, 'sgd','pty', l='perceptron', vt=True).write_result('test_rf')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', f_selection='rf', fs_n=500, f = 400, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', f_selection='rf', fs_n=500, f = 300, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', f_selection='rf', fs_n=500, f = 200, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', f_selection='rf', fs_n=500, f = 100, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', f_selection='rf', fs_n=500, f = 50, vt=True).write_result('test_rf')


CLF(X_17, y_17, 'sgd','qtu', l='hinge', vt=True).write_result('test_rf')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', f_selection='rf', fs_n=500, f = 400, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', f_selection='rf', fs_n=500, f = 300, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', f_selection='rf', fs_n=500, f = 200, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', f_selection='rf', fs_n=500, f = 100, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', f_selection='rf', fs_n=500, f = 50, vt=True).write_result('test_rf')


CLF(X_17, y_17, 'dt','mas').write_result('test_rf')
CLF(X_17, y_17, 'dt','mas', f_selection='rf', fs_n=500, f = 400).write_result('test_rf')
CLF(X_17, y_17, 'dt','mas', f_selection='rf', fs_n=500, f = 300).write_result('test_rf')
CLF(X_17, y_17, 'dt','mas', f_selection='rf', fs_n=500, f = 200).write_result('test_rf')
CLF(X_17, y_17, 'dt','mas', f_selection='rf', fs_n=500, f = 100).write_result('test_rf')
CLF(X_17, y_17, 'dt','mas', f_selection='rf', fs_n=500, f = 50).write_result('test_rf')


CLF(X_17, y_17, 'r', 'qtu', n=None, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'r','qtu', n=None, f_selection='rf', fs_n=500, f = 400, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'r','qtu', n=None, f_selection='rf', fs_n=500, f = 300, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'r','qtu', n=None, f_selection='rf', fs_n=500, f = 200, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'r','qtu', n=None, f_selection='rf', fs_n=500, f = 100, vt=True).write_result('test_rf')
CLF(X_17, y_17, 'r','qtu', n=None, f_selection='rf', fs_n=500, f = 50, vt=True).write_result('test_rf')

In [None]:
#Best feature reduction method and intensity along with pvt setting.

CLF(X_17, y_17, 'rf','pty', f_selection='rf', fs_n=500, f = 50, n=500).write_result('optimal_model_setting')
CLF(X_17, y_17, 'mlp','mas', f_selection='rf', fs_n=500, f = 50, n=500, vt=True).write_result('optimal_model_setting')
CLF(X_17, y_17, 'svm','pty', f_selection='rf', fs_n=500, f = 50, gamma='auto', vt=True).write_result('optimal_model_setting')
CLF(X_17, y_17, 'knn','pty', f_selection='kb', kbest_f=f_classif, f = 50, n=10, vt=True).write_result('optimal_model_setting')
CLF(X_17, y_17, 'sgd','qtu', l='huber', f_selection='rf', fs_n=500, f = 400).write_result('optimal_model_setting')
CLF(X_17, y_17, 'sgd','pty', l='log', f_selection='kb', kbest_f=f_classif, f = 50).write_result('optimal_model_setting')
CLF(X_17, y_17, 'sgd','pty', l='perceptron', vt=True).write_result('optimal_model_setting')
CLF(X_17, y_17, 'sgd','qtu', l='hinge', f_selection='kb', kbest_f=f_classif, f = 200, vt=True).write_result('optimal_model_setting')
CLF(X_17, y_17, 'dt','mas', f_selection='kb', kbest_f=f_classif, f = 400).write_result('optimal_model_setting')
CLF(X_17, y_17, 'r','qtu', n=None, f_selection='rf', fs_n=500, f = 200, vt=True).write_result('optimal_model_setting')

In [None]:

CLF(X_17, y_17, 'knn','pty', n=10).write_result('optimal_model_setting')
CLF(X_17, y_17, 'sgd','qtu', l='huber').write_result('optimal_model_setting')
CLF(X_17, y_17, 'sgd','pty', l='log').write_result('optimal_model_setting')
CLF(X_17, y_17, 'sgd','pty', l='perceptron').write_result('optimal_model_setting')
CLF(X_17, y_17, 'sgd','qtu', l='hinge').write_result('optimal_model_setting')
CLF(X_17, y_17, 'dt','mas').write_result('optimal_model_setting')
CLF(X_17, y_17, 'r','qtu', n=None).write_result('optimal_model_setting')

Compute permutation importance for each of the optimal models:

In [None]:
a = CLF(X_17, y_17, 'sgd','qtu', l='huber', f_selection='rf', fs_n=500, f = 400)
a.p_importance('test', 'huber')
b = CLF(X_17, y_17, 'sgd','pty', l='log', f_selection='kb', kbest_f=f_classif, f = 50)
b.p_importance('test', 'log')
c = CLF(X_17, y_17, 'sgd','pty', l='perceptron', vt=True)
c.p_importance('test', 'perceptron')
d = CLF(X_17, y_17, 'sgd','qtu', l='hinge', f_selection='kb', kbest_f=f_classif, f = 200, vt=True)
d.p_importance('test', 'hinge')
e = CLF(X_17, y_17, 'dt','mas', f_selection='kb', kbest_f=f_classif, f = 400)
e.p_importance('test', 'dt')
f = CLF(X_17, y_17, 'rf','pty', f_selection='rf', fs_n=500, f = 50, n=500)
f.p_importance('test', 'rf')
g = CLF(X_17, y_17, 'mlp', 'mas', f_selection='rf', fs_n=500, f = 50, n=500, vt=True)
g.p_importance('test', 'mlp')
h = CLF(X_17, y_17, 'svm','pty', f_selection='rf', fs_n=500, f = 50, gamma='auto', vt=True)
h.p_importance('test', 'svm')
i = CLF(X_17, y_17, 'r','qtu', n=None, f_selection='rf', fs_n=500, f = 200, vt=True)
i.p_importance('test', 'r')
j = CLF(X_17, y_17, 'knn','pty', f_selection='kb', kbest_f=f_classif, f = 50, n=10, vt=True)
j.p_importance('test', 'knn')