In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import scipy as sp
import random
import math

from numpy import mean, std
from scipy import stats
import scipy as sp

from matplotlib import pyplot
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV,  cross_val_score, RepeatedStratifiedKFold, cross_validate
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import get_scorer, make_scorer, confusion_matrix, classification_report, recall_score, precision_score, accuracy_score, fbeta_score, roc_curve, roc_auc_score, f1_score, confusion_matrix, mean_squared_error, log_loss
from sklearn import svm, datasets
from sklearn.naive_bayes import BernoulliNB
from sklearn.datasets import make_classification
from sklearn.exceptions import ConvergenceWarning
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder, RobustScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.dummy import DummyClassifier
from sklearn import svm, datasets
from sklearn.svm import SVC
from sklearn.exceptions import ConvergenceWarning
from sklearn.naive_bayes import BernoulliNB

from warnings import simplefilter
import imblearn
from imblearn.over_sampling import SMOTE

from sklearn.base import clone
from sklearn.preprocessing import FunctionTransformer



simplefilter("ignore", category = ConvergenceWarning)

In [2]:
class KidronClassifier:
    
    def __init__(self, min_rows = None, min_columns = None, max_estimators = None, estimators = None, base_estimator = None, random_state = None):

        self.min_rows = min_rows
        self.min_columns = min_columns
        self.max_estimators = max_estimators
        self.estimators = estimators
        self.base_estimator = base_estimator 
        self.random_state = random_state 
        #self.fitted_estimators = None
        #self.fitted_columns = None        
        return
             
    def get_params(self, deep = True):
        return {"min_rows": self.min_rows, 
                "min_columns": self.min_columns,
                "max_estimators": self.max_estimators,
                "estimators": self.estimators,
                "base_estimator": self.base_estimator,
                "random_state": self.random_state}
    
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

    #build training data for a single model
    def even_data(self, dict_X, length, width, seed):     
        
        labels = dict_X.keys()          
        X = pd.concat([dict_X[label].sample(n = length, random_state = seed) for label in labels]) #length
        
        random.seed(seed)
        columns = random.sample(range(len(X.columns)), width)
        X = X.iloc[:, columns]
        
        y = []
        for label in labels:
            y.extend([label] * length)
        
        return X, y, columns

    #fit 
    def fit(self, X, y):

        #label encoding
        self.le = LabelEncoder()
        self.le.fit(y)
        
        #X = pd.DataFrame(X)
        
        length = X.shape[0] #len(X);
        width  = X.shape[1] #len(X.columns)
        col = min([int(width  ** 0.5) + 1, width])
        row = min([int(length ** 0.5) + 1, length])            
        estimators = row * col
        
        if(self.max_estimators):
            estimators = min([estimators, self.max_estimators])
            
        if(self.estimators is None):
            if(self.base_estimator):
                self.estimators = [clone(self.base_estimator) for i in range(estimators)]
            else:
                self.estimators = [LogisticRegression() for i in range(estimators)]  #default
                                
        dict_X = {}
        for label in self.le.classes_:
            dict_X[label] = X[y == label]
            
        rows = min([len(dict_X[label]) for label in dict_X.keys()])  #maximal length available  
        rows = min(row, length)
        if(self.min_rows):
            rows = max([self.min_rows, rows])
            
        cols = col
        if(self.min_columns):
            cols = max([self.min_columns, cols])
        
        seed, self.fitted_estimators, self.fitted_columns = 0, [], []
        #print(len(self.estimators))
        
        for estimator in self.estimators:

            #get a balanced data
            X_train, y_train, fitted_columns = self.even_data(dict_X, rows, cols, seed)
            
            #fit
            fitted_estimator = estimator.fit(X_train, y_train)
            
            #save fitted
            self.fitted_estimators.append(fitted_estimator)
            self.fitted_columns.append(fitted_columns)
            
            seed += 1
        return
    
    #predict_proba
    def predict_proba(self, X):

        length = len(X)
        labels = len(self.le.classes_)
        
        pred = []
        
        # Predict 'soft' voting with probabilities
        predict_proba = []
        for fitted_estimator, fitted_columns in zip(self.fitted_estimators, self.fitted_columns):             
            fitted_X = X.iloc[:, fitted_columns]
            predict_proba.append(np.asarray(fitted_estimator.predict_proba(fitted_X)).reshape(length * labels, 1))
            
        predict_proba = np.concatenate(predict_proba, axis = 1)
        predict_proba = np.average(predict_proba, axis = 1).reshape(length, labels)
            
        # Convert integer predictions to original labels:
        return predict_proba
    
    #predict
    def predict(self, X):
            
        proba = self.predict_proba(X)
        
        # Convert integer predictions to original labels:
        return self.le.inverse_transform(np.argmax(proba, axis = 1))
    
    #predict, old slow version
    def predict_slow(self, X):

        pred = []
        for i in range(len(X)):

            # Predict 'soft' voting with probabilities
            x = X[i : i + 1]
            predict_proba = np.asarray([estimator.predict_proba(x) for estimator in self.fitted_estimators])
            predict_proba = np.average(predict_proba, axis = 0)
        
            #the indice of the average
            pred.append(np.argmax(predict_proba, axis = 1)[0])        

        # Convert integer predictions to original labels:
        return self.le.inverse_transform(pred)

In [3]:
path = "/Users/yaeerk/Documents/NAYA/classification/"
jobs = 8

In [4]:
df = pd.read_csv(path + 'PROTECT_and_RISK_shared_DEGs_NOT NORM.csv', index_col = 0)

In [5]:
X = df.drop('Diagnosis', axis = 1)
y = df.Diagnosis
y = LabelEncoder().fit_transform(y)
numerical_cols = X.columns.to_list()

In [6]:
tab = "  "
testsizes = [0.08, 0.1, 0.15, 0.2]
#randomstates = [132, 400, 1440, 1600, 2500, 3333, 4567]
randomstates = [250, 650, 850, 1050, 1250, 1850, 2050]

In [7]:
#bagging parameters
n_estimators = [2, 10, 20, 30]
max_samples = [0.6, 0.8, 1.0]
bootstrap = [True, False]
bootstrap_features = [True, False]

#cross validation
cv = 5

#scores we want
scores = ['f1', 'accuracy', 'f2']

results = pd.DataFrame(columns = ["score", "test score", "train score", "test variance", "train variance", "test rmse", "train rmse", "test log_loss", "train log_loss", "test size", "random state", "estimator", "estimator params"])
results.head()

Unnamed: 0,score,test score,train score,test variance,train variance,test rmse,train rmse,test log_loss,train log_loss,test size,random state,estimator,estimator params


In [8]:
#random forest
max_depths = [10, 20, 30]
min_samples_leafs = [3, 10]

for testsize in testsizes:
  print(f"test size: {testsize}")
    
  for randomstate in randomstates:
        print(tab * 2 + f"random state: {randomstate}")
        random.seed(randomstate)
    
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = testsize, random_state = randomstate)
                     
        smote = SMOTE(random_state = randomstate)
        X_train, y_train = smote.fit_resample(X_train, y_train)

        robustscaler = RobustScaler(quantile_range = (1, 99))
        robustscaler.fit(X_train)
                                    
        X_train = robustscaler.transform(X_train)
        X_test  = robustscaler.transform(X_test)

        best_avg_scores = {score : [None] for score in scores}
        
        # Run Grid search for each classifier
        for max_depth in max_depths:
            for min_samples_leaf in min_samples_leafs:
                    
                rf = RandomForestClassifier(max_depth = max_depth, min_samples_leaf = min_samples_leaf, random_state = randomstate)
                cv_results = cross_validate(rf, X_train, y_train, cv = cv, scoring = scores, return_train_score = True, n_jobs = jobs)
 
                for score in scores:
                    avg_score_test = np.mean(cv_results['test_' + score])
                    var_score_test = np.var(cv_results['test_' + score])
                    avg_score_train = np.mean(cv_results['train_' + score])
                    var_score_train = np.var(cv_results['train_' + score])

                    if(best_avg_scores[score][0] is None or avg_score_test > best_avg_scores[score][0]):
                        best_avg_scores[score] = [avg_score_test, var_score_test, avg_score_train, var_score_train, max_depth, min_samples_leaf]

        for score in scores: 
            
            print(tab * 3 + str(score))
            print(tab * 4 + f"CV score: {best_avg_scores[score][0]} using:" + ','.join([str(p) for p in best_avg_scores[score][4:]]))
            print(tab * 5 + f"train score: {best_avg_scores[score][2]} with variance: {best_avg_scores[score][3]}")
            print(tab * 5 + f"test  score: {best_avg_scores[score][0]} with variance: {best_avg_scores[score][1]}")

            rf = RandomForestClassifier(max_depth = best_avg_scores[score][4], min_samples_leaf = best_avg_scores[score][5], random_state = randomstate)
            
            rf.fit(X_train, y_train)            
            y_train_pred, y_test_pred = rf.predict(X_train), rf.predict(X_test)                          
            rmse_train, rmse_test = math.sqrt(mean_squared_error(y_train, y_train_pred)), math.sqrt(mean_squared_error(y_test, y_test_pred))                    
            log_loss_train, log_loss_test = log_loss(y_train, y_train_pred), log_loss(y_test, y_test_pred)        

            score_train, score_test = get_scorer(score)(rf, X_train, y_train), get_scorer(score)(rf, X_test, y_test)
            
            print(tab * 4 + f"Refitted train score: {score_train},  RMSE: {rmse_train}, Log-Loss:{log_loss_train}")
            print(tab * 4 + f"Refitted test  score: {score_test},  RMSE: {rmse_test}, Log-Loss:{log_loss_test}")
            
            n = len(results)
            results.at[n, 'score'] = score
            results.at[n, 'test score'] = best_avg_scores[score][0]
            results.at[n, 'train score'] = best_avg_scores[score][2]
            results.at[n, 'test variance'] = best_avg_scores[score][1]
            results.at[n, 'train variance'] = best_avg_scores[score][3]
            results.at[n, 'test rmse'] = rmse_test
            results.at[n, 'train rmse'] = rmse_train
            results.at[n, 'test log_loss'] = log_loss_test
            results.at[n, 'train log_loss'] = log_loss_train
            results.at[n, 'test size'] = testsize
            results.at[n, 'random state'] = randomstate
            results.at[n, 'estimator'] = "Smote/RandomForestClassifier"
            results.at[n, 'estimator params'] = ','.join([str(p) for p in best_avg_scores[score][4:]])

test size: 0.08
    random state: 250
      f1
        CV score: 0.9773180459387356 using:10,3
          train score: 0.9934004809337067 with variance: 4.900485401730748e-06
          test  score: 0.9773180459387356 with variance: 0.0003723410952958266
        Refitted train score: 0.9934065934065934,  RMSE: 0.08093341918275387, Log-Loss:0.22623652660421945
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
      accuracy
        CV score: 0.9780936454849499 using:10,3
          train score: 0.9934470898289185 with variance: 4.795623726079488e-06
          test  score: 0.9780936454849499 with variance: 0.00033865758247236897
        Refitted train score: 0.9934497816593887,  RMSE: 0.08093341918275387, Log-Loss:0.22623652660421945
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
    random state: 650
      f1
        CV score: 0.9752685184053371 using:10,3
          train score: 0.9945533382166358 with variance: 5.971925030038383e-

        Refitted train score: 0.9933920704845814,  RMSE: 0.08128917219051073, Log-Loss:0.2282297999663711
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626415e-16
    random state: 1050
      f1
        CV score: 0.9734119336369027 using:10,10
          train score: 0.9860350146812433 with variance: 1.5780569271654763e-05
          test  score: 0.9734119336369027 with variance: 0.0002582524766246141
        Refitted train score: 0.9820627802690582,  RMSE: 0.1327446623199944, Log-Loss:0.6086127999103214
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626415e-16
      accuracy
        CV score: 0.9736019536019537 using:10,10
          train score: 0.9862364907819453 with variance: 1.506177140460222e-05
          test  score: 0.9736019536019537 with variance: 0.000269749732020795
        Refitted train score: 0.9823788546255506,  RMSE: 0.1327446623199944, Log-Loss:0.6086127999103214
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221

        Refitted train score: 0.9929906542056075,  RMSE: 0.08372183582789214, Log-Loss:0.2420942270671319
        Refitted test  score: 0.9387755102040817,  RMSE: 0.24743582965269675, Log-Loss:2.1146189629537164
    random state: 2050
      f1
        CV score: 0.9716155896069288 using:10,3
          train score: 0.9926604471831594 with variance: 2.3141343432462886e-06
          test  score: 0.9716155896069288 with variance: 0.0008542779608175916
        Refitted train score: 0.9926650366748166,  RMSE: 0.08533201859828615, Log-Loss:0.25149594462313707
        Refitted test  score: 0.9885057471264368,  RMSE: 0.14285714285714285, Log-Loss:0.7048893059661414
      accuracy
        CV score: 0.973258889215398 using:10,3
          train score: 0.9927180620797642 with variance: 2.215156908177756e-06
          test  score: 0.973258889215398 with variance: 0.0007359670975351553
        Refitted train score: 0.9927184466019418,  RMSE: 0.08533201859828615, Log-Loss:0.25149594462313707
        Re

In [9]:
results.tail()

Unnamed: 0,score,test score,train score,test variance,train variance,test rmse,train rmse,test log_loss,train log_loss,test size,random state,estimator,estimator params
51,accuracy,0.974489,0.99171,0.000453633,2.43434e-06,0.124035,0.0874818,0.531378,0.264327,0.2,1250,Smote/RandomForestClassifier,103
52,f1,0.982531,0.992627,0.000100312,2.30015e-06,0.214834,0.0855399,1.5941,0.252723,0.2,1850,Smote/RandomForestClassifier,103
53,accuracy,0.982927,0.992683,9.51814e-05,2.23081e-06,0.214834,0.0855399,1.5941,0.252723,0.2,1850,Smote/RandomForestClassifier,103
54,f1,0.97233,0.981381,0.000911546,3.90103e-05,0.0,0.144338,9.99201e-16,0.719558,0.2,2050,Smote/RandomForestClassifier,1010
55,accuracy,0.974026,0.981772,0.000742115,3.64666e-05,0.0,0.144338,9.99201e-16,0.719558,0.2,2050,Smote/RandomForestClassifier,1010


In [10]:
#kidronclassifier
max_estimators = [8, 10, 12]
base_estimators = [LogisticRegression(), KNeighborsClassifier(), DecisionTreeClassifier(), SVC()]
min_rows = [28, 30, 32]
min_columns = [500, 600, 700]
    
for testsize in testsizes:
  print(f"test size: {testsize}")
    
  for randomstate in randomstates:
        print(tab * 2 + f"random state: {randomstate}")
        random.seed(randomstate)
    
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = testsize, random_state = randomstate)
           
        #no need for treating imbalance here, the classifier does it on its own
        #smote = SMOTE(random_state = randomstate)
        #X_train, y_train = smote.fit_resample(X_train, y_train)

        robustscaler = RobustScaler(quantile_range = (1, 99))
        robustscaler.fit(X_train)
                                    
        X_train = pd.DataFrame(robustscaler.transform(X_train))
        X_test  = pd.DataFrame(robustscaler.transform(X_test))

        #best_scores = {score : [None] for score in scores}
        best_avg_scores = {score : [None] for score in scores}
        
        # Run Grid
        for max_estimator in max_estimators:
            for base_estimator in base_estimators:
                for min_row in min_rows:
                    for min_column in min_columns:
       
                        kc = KidronClassifier(max_estimators = max_estimator, base_estimator = base_estimator, min_rows = min_row, min_columns = min_column, random_state = randomstate)                           
                        cv_results = cross_validate(kc, X_train, y_train, cv = cv, scoring = scores, return_train_score = True, n_jobs = jobs)
 
                        for score in scores:
                            avg_score_test = np.mean(cv_results['test_' + score])
                            var_score_test = np.var(cv_results['test_' + score])
                            avg_score_train = np.mean(cv_results['train_' + score])
                            var_score_train = np.var(cv_results['train_' + score])
                        
                            if(best_avg_scores[score][0] is None or avg_score_test > best_avg_scores[score][0]):
                                best_avg_scores[score] = [avg_score_test, var_score_test, avg_score_train, var_score_train, max_estimator, base_estimator, min_row, min_column]

                    
            
        for score in scores: 
            
            print(tab * 3 + str(score))
            print(tab * 4 + f"CV score: {best_avg_scores[score][0]} using:" + ','.join([str(p) for p in best_avg_scores[score][4:]]))
            print(tab * 5 + f"train score: {best_avg_scores[score][2]} with variance: {best_avg_scores[score][3]}")
            print(tab * 5 + f"test  score: {best_avg_scores[score][0]} with variance: {best_avg_scores[score][1]}")

            kc = KidronClassifier(max_estimators = best_avg_scores[score][4], base_estimator = best_avg_scores[score][5], min_rows = best_avg_scores[score][6], min_columns = best_avg_scores[score][7], random_state = randomstate)
            
            kc.fit(X_train, y_train)            
            y_train_pred, y_test_pred = kc.predict(X_train), kc.predict(X_test)                          
            rmse_train, rmse_test = math.sqrt(mean_squared_error(y_train, y_train_pred)), math.sqrt(mean_squared_error(y_test, y_test_pred))                    
            log_loss_train, log_loss_test = log_loss(y_train, y_train_pred), log_loss(y_test, y_test_pred)        

            score_train, score_test = get_scorer(score)(kc, X_train, y_train), get_scorer(score)(kc, X_test, y_test)
            
            print(tab * 4 + f"Refitted train score: {score_train},  RMSE: {rmse_train}, Log-Loss:{log_loss_train}")
            print(tab * 4 + f"Refitted test  score: {score_test},  RMSE: {rmse_test}, Log-Loss:{log_loss_test}")

            n = len(results)
            results.at[n, 'score'] = score
            results.at[n, 'test score'] = best_avg_scores[score][0]
            results.at[n, 'train score'] = best_avg_scores[score][2]
            results.at[n, 'test variance'] = best_avg_scores[score][1]
            results.at[n, 'train variance'] = best_avg_scores[score][3]
            results.at[n, 'test rmse'] = rmse_test
            results.at[n, 'train rmse'] = rmse_train
            results.at[n, 'test log_loss'] = log_loss_test
            results.at[n, 'train log_loss'] = log_loss_train
            results.at[n, 'test size'] = testsize
            results.at[n, 'random state'] = randomstate
            results.at[n, 'estimator'] = "KidronClassifier"
            results.at[n, 'estimator params'] = ','.join([str(p) for p in best_avg_scores[score][4:]])

test size: 0.08
    random state: 250
      f1
        CV score: 0.9733546481436166 using:10,KNeighborsClassifier(),32,500
          train score: 0.9698470197837992 with variance: 3.468482372025526e-05
          test  score: 0.9733546481436166 with variance: 0.0007747467626388868
        Refitted train score: 0.9686098654708519,  RMSE: 0.21674839277792138, Log-Loss:1.6226297621683892
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
      accuracy
        CV score: 0.9595480225988702 using:10,KNeighborsClassifier(),32,500
          train score: 0.9546886537041595 with variance: 8.087072544002419e-05
          test  score: 0.9595480225988702 with variance: 0.0017919307989402796
        Refitted train score: 0.9530201342281879,  RMSE: 0.21674839277792138, Log-Loss:1.6226297621683892
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
    random state: 650
      f1
        CV score: 0.9776133941483485 using:8,DecisionTreeClassifier(),3

      f1
        CV score: 0.9732103253286333 using:10,DecisionTreeClassifier(),28,600
          train score: 0.9757537137601542 with variance: 3.5746349442639016e-05
          test  score: 0.9732103253286333 with variance: 0.00044559184501957006
        Refitted train score: 0.9751693002257337,  RMSE: 0.19442398845107403, Log-Loss:1.3055894857182742
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626415e-16
      accuracy
        CV score: 0.9587375803623612 using:10,DecisionTreeClassifier(),28,600
          train score: 0.963053130087317 with variance: 8.591860590902434e-05
          test  score: 0.9587375803623612 with variance: 0.0010242590915658665
        Refitted train score: 0.9553264604810997,  RMSE: 0.211361158964698, Log-Loss:1.5429693922125056
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626415e-16
    random state: 1050
      f1
        CV score: 0.9734234932667182 using:8,DecisionTreeClassifier(),30,500
          train score: 0.976

      f1
        CV score: 0.9775400943803909 using:8,DecisionTreeClassifier(),30,500
          train score: 0.9786502052491347 with variance: 3.564571667975878e-05
          test  score: 0.9775400943803909 with variance: 0.00022349720827057827
        Refitted train score: 0.9705882352941176,  RMSE: 0.2088931871468374, Log-Loss:1.507146606323376
        Refitted test  score: 0.9873417721518987,  RMSE: 0.14285714285714285, Log-Loss:0.7048893059661413
      accuracy
        CV score: 0.9672727272727272 using:8,DecisionTreeClassifier(),30,500
          train score: 0.9681818181818181 with variance: 7.438016528925615e-05
          test  score: 0.9672727272727272 with variance: 0.00044958677685950443
        Refitted train score: 0.9636363636363636,  RMSE: 0.19069251784911845, Log-Loss:1.2559555052694804
        Refitted test  score: 0.9795918367346939,  RMSE: 0.14285714285714285, Log-Loss:0.7048893059661413
    random state: 1850
      f1
        CV score: 0.9783269963714158 using:10,Deci

        Refitted train score: 0.9652509652509652,  RMSE: 0.18641092980036, Log-Loss:1.2001891411358934
        Refitted test  score: 0.9692307692307692,  RMSE: 0.17541160386140583, Log-Loss:1.062731581381868
    random state: 2050
      f1
        CV score: 0.9706130699651101 using:12,DecisionTreeClassifier(),32,700
          train score: 0.978609269573546 with variance: 3.6815558818623244e-05
          test  score: 0.9706130699651101 with variance: 0.0009020769625173528
        Refitted train score: 0.976,  RMSE: 0.18641092980036, Log-Loss:1.2001891411358934
        Refitted test  score: 0.9824561403508771,  RMSE: 0.17541160386140583, Log-Loss:1.062743882880794
      accuracy
        CV score: 0.9615384615384617 using:10,DecisionTreeClassifier(),32,700
          train score: 0.9720039018952062 with variance: 8.793168483076878e-05
          test  score: 0.9615384615384617 with variance: 0.0010355029585798813
        Refitted train score: 0.9652509652509652,  RMSE: 0.18641092980036, Log

In [11]:
results.tail()

Unnamed: 0,score,test score,train score,test variance,train variance,test rmse,train rmse,test log_loss,train log_loss,test size,random state,estimator,estimator params
107,accuracy,0.961538,0.964284,0.000591716,6.17416e-05,0.175412,0.186411,1.06274,1.20019,0.2,1250,KidronClassifier,"12,DecisionTreeClassifier(),28,600"
108,f1,0.979801,0.979449,0.000164634,2.2327e-05,0.214834,0.196494,1.5941,1.33354,0.2,1850,KidronClassifier,"10,DecisionTreeClassifier(),32,500"
109,accuracy,0.969231,0.969124,0.000384615,4.18983e-05,0.175412,0.186411,1.06273,1.20019,0.2,1850,KidronClassifier,"10,DecisionTreeClassifier(),30,500"
110,f1,0.970613,0.978609,0.000902077,3.68156e-05,0.175412,0.186411,1.06274,1.20019,0.2,2050,KidronClassifier,"12,DecisionTreeClassifier(),32,700"
111,accuracy,0.961538,0.972004,0.0010355,8.79317e-05,0.175412,0.186411,1.06274,1.20019,0.2,2050,KidronClassifier,"10,DecisionTreeClassifier(),32,700"


In [12]:
#logistic parameters
C = [1.0, 0.001, 1.5]

for testsize in testsizes:
  print(f"test size: {testsize}")
    
  for randomstate in randomstates:
        print(tab * 2 + f"random state: {randomstate}")
        random.seed(randomstate)
            
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = testsize, random_state = randomstate)
                     
        smote = SMOTE(random_state = randomstate)
        X_train, y_train = smote.fit_resample(X_train, y_train)

        robustscaler = RobustScaler(quantile_range = (1, 99))
        robustscaler.fit(X_train)
                                    
        X_train = robustscaler.transform(X_train)
        X_test  = robustscaler.transform(X_test)

        best_avg_scores = {score : [None] for score in scores}
        
        # Run Grid search for each classifier
        for c in C:            
            lr = LogisticRegression(C = c, solver = 'liblinear', max_iter = 200, class_weight = 'balanced', random_state = randomstate)

            for b in bootstrap:
                for bf in bootstrap_features:
                    for n_estimator in n_estimators:
                        for max_sample in max_samples:

                            bc = BaggingClassifier(base_estimator = lr, bootstrap = b, bootstrap_features = bf, n_estimators = n_estimator, max_samples = max_sample, n_jobs = jobs, random_state = randomstate)
                            cv_results = cross_validate(bc, X_train, y_train, cv = cv, scoring = scores, return_train_score = True, n_jobs = jobs)
                                
                            for score in scores:
                                avg_score_test = np.mean(cv_results['test_' + score])
                                var_score_test = np.var(cv_results['test_' + score])
                                avg_score_train = np.mean(cv_results['train_' + score])
                                var_score_train = np.var(cv_results['train_' + score])

                                if(best_avg_scores[score][0] is None or avg_score_test > best_avg_scores[score][0]):
                                    best_avg_scores[score] = [avg_score_test, var_score_test, avg_score_train, var_score_train, c, b, bf, n_estimator, max_sample]

            
            for score in scores: 
            
                print(tab * 3 + str(score))
                print(tab * 4 + f"CV score: {best_avg_scores[score][0]} using:" + ','.join([str(p) for p in best_avg_scores[score][4:]]))
                print(tab * 5 + f"train score: {best_avg_scores[score][2]} with variance: {best_avg_scores[score][3]}")
                print(tab * 5 + f"test  score: {best_avg_scores[score][0]} with variance: {best_avg_scores[score][1]}")

                lr = LogisticRegression(C = best_avg_scores[score][4], solver = 'liblinear', max_iter = 200, class_weight = 'balanced', random_state = randomstate)
                bc = BaggingClassifier(base_estimator = lr, bootstrap = best_avg_scores[score][5], bootstrap_features = best_avg_scores[score][6], n_estimators = best_avg_scores[score][7], max_samples = best_avg_scores[score][8], n_jobs = jobs, random_state = randomstate)
                    
                bc.fit(X_train, y_train)            
                y_train_pred, y_test_pred = bc.predict(X_train), bc.predict(X_test)                          
                rmse_train, rmse_test = math.sqrt(mean_squared_error(y_train, y_train_pred)), math.sqrt(mean_squared_error(y_test, y_test_pred))                    
                log_loss_train, log_loss_test = log_loss(y_train, y_train_pred), log_loss(y_test, y_test_pred)        

                score_train, score_test = get_scorer(score)(bc, X_train, y_train), get_scorer(score)(bc, X_test, y_test)

                print(tab * 4 + f"Refitted train score: {score_train},  RMSE: {rmse_train}, Log-Loss:{log_loss_train}")
                print(tab * 4 + f"Refitted test  score: {score_test},  RMSE: {rmse_test}, Log-Loss:{log_loss_test}") 

                n = len(results)
                results.at[n, 'score'] = score
                results.at[n, 'test score'] = best_avg_scores[score][0]
                results.at[n, 'train score'] = best_avg_scores[score][2]
                results.at[n, 'test variance'] = best_avg_scores[score][1]
                results.at[n, 'train variance'] = best_avg_scores[score][3]
                results.at[n, 'test rmse'] = rmse_test
                results.at[n, 'train rmse'] = rmse_train
                results.at[n, 'test log_loss'] = log_loss_test
                results.at[n, 'train log_loss'] = log_loss_train
                results.at[n, 'test size'] = testsize
                results.at[n, 'random state'] = randomstate
                results.at[n, 'estimator'] = "Smote/Bagging/LogisticRegression"
                results.at[n, 'estimator params'] = ','.join([str(p) for p in best_avg_scores[score][4:]])

test size: 0.08
    random state: 250
      f1
        CV score: 0.9773180459387356 using:1.0,False,True,10,1.0
          train score: 1.0 with variance: 0.0
          test  score: 0.9773180459387356 with variance: 0.0003723410952958266
        Refitted train score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
        Refitted test  score: 0.9743589743589743,  RMSE: 0.19611613513818404, Log-Loss:1.328414476727335
      accuracy
        CV score: 0.9780936454849499 using:1.0,False,True,10,1.0
          train score: 1.0 with variance: 0.0
          test  score: 0.9780936454849499 with variance: 0.00033865758247236897
        Refitted train score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
        Refitted test  score: 0.9615384615384616,  RMSE: 0.19611613513818404, Log-Loss:1.328414476727335
      f1
        CV score: 0.9773180459387356 using:1.0,False,True,10,1.0
          train score: 1.0 with variance: 0.0
          test  score: 0.9773180459387356 with variance: 0.000372341095

      f1
        CV score: 0.9732699520842003 using:1.0,True,True,10,1.0
          train score: 0.9972811278284563 with variance: 5.907381172626802e-06
          test  score: 0.9732699520842003 with variance: 0.00013942752365760298
        Refitted train score: 0.9956521739130434,  RMSE: 0.0657951694959769, Log-Loss:0.14951851253208187
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
      accuracy
        CV score: 0.9740532959326789 using:1.0,True,True,10,0.6
          train score: 0.9918816377352963 with variance: 2.9536769165154275e-06
          test  score: 0.9740532959326789 with variance: 0.00012006261418597692
        Refitted train score: 0.9891774891774892,  RMSE: 0.10403129732205987, Log-Loss:0.3737962813302032
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
      f1
        CV score: 0.9753747335987277 using:1.5,False,True,2,1.0
          train score: 1.0 with variance: 0.0
          test  score: 0.9753747335987277 

      f1
        CV score: 0.9749662023949976 using:1.0,False,True,2,1.0
          train score: 1.0 with variance: 0.0
          test  score: 0.9749662023949976 with variance: 0.00023529053565001526
        Refitted train score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
      accuracy
        CV score: 0.9758241758241759 using:1.0,False,True,2,0.6
          train score: 0.9961432506887054 with variance: 2.6106292071731672e-05
          test  score: 0.9758241758241759 with variance: 0.00026083806303586505
        Refitted train score: 0.9955947136563876,  RMSE: 0.0663723311599972, Log-Loss:0.1521531999775811
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
test size: 0.1
    random state: 250
      f1
        CV score: 0.9791437979862441 using:1.0,False,True,2,1.0
          train score: 1.0 with variance: 0.0
          test  score: 0.9791437979862441 with variance: 0.00041203

        Refitted train score: 0.9889867841409692,  RMSE: 0.10494387004027837, Log-Loss:0.38038299994395114
        Refitted test  score: 0.9696969696969697,  RMSE: 0.17407765595569785, Log-Loss:1.0466538179497245
    random state: 1050
      f1
        CV score: 0.9775264061780915 using:1.0,False,True,2,0.8
          train score: 0.9933670202781203 with variance: 1.4120296003622557e-05
          test  score: 0.9775264061780915 with variance: 0.00030412714288742337
        Refitted train score: 0.9911504424778761,  RMSE: 0.09386465089278642, Log-Loss:0.30430816118298093
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626415e-16
      accuracy
        CV score: 0.977997557997558 using:1.0,False,True,2,0.8
          train score: 0.9933929706656979 with variance: 1.3948901837872361e-05
          test  score: 0.977997557997558 with variance: 0.0002892857515568147
        Refitted train score: 0.9911894273127754,  RMSE: 0.09386465089278642, Log-Loss:0.30430816118298093
   

        Refitted train score: 0.9977272727272727,  RMSE: 0.04767312946227961, Log-Loss:0.07849721907934347
        Refitted test  score: 0.9696969696969697,  RMSE: 0.17407765595569785, Log-Loss:1.046629587724567
      f1
        CV score: 0.9789839991392428 using:1.0,False,True,2,0.8
          train score: 0.9977175417175417 with variance: 4.567723327186159e-06
          test  score: 0.9789839991392428 with variance: 0.00019001351774717692
        Refitted train score: 0.9977220956719818,  RMSE: 0.04767312946227961, Log-Loss:0.07849721907934347
        Refitted test  score: 0.9824561403508771,  RMSE: 0.17407765595569785, Log-Loss:1.046629587724567
      accuracy
        CV score: 0.9795454545454545 using:1.0,True,True,10,1.0
          train score: 0.9948863636363636 with variance: 1.29132231404966e-06
          test  score: 0.9795454545454545 with variance: 0.0001756198347107435
        Refitted train score: 0.9977272727272727,  RMSE: 0.04767312946227961, Log-Loss:0.07849721907934347
 

      f1
        CV score: 0.9781225743309372 using:1.0,True,True,2,1.0
          train score: 0.9916981573487321 with variance: 1.5805831610485575e-05
          test  score: 0.9781225743309372 with variance: 0.0002191520425110363
        Refitted train score: 0.990521327014218,  RMSE: 0.09690031662230185, Log-Loss:0.32430775957662705
        Refitted test  score: 0.9722222222222222,  RMSE: 0.20203050891044214, Log-Loss:1.40976229361738
      accuracy
        CV score: 0.9788782489740082 using:1.0,True,True,2,1.0
          train score: 0.9917871312747973 with variance: 1.5117549187091787e-05
          test  score: 0.9788782489740082 with variance: 0.00018798976721729426
        Refitted train score: 0.9906103286384976,  RMSE: 0.09690031662230185, Log-Loss:0.32430775957662705
        Refitted test  score: 0.9591836734693877,  RMSE: 0.20203050891044214, Log-Loss:1.40976229361738
      f1
        CV score: 0.9781225743309372 using:1.0,True,True,2,1.0
          train score: 0.9916981573487

        Refitted train score: 0.9906542056074766,  RMSE: 0.09667364890456635, Log-Loss:0.3227923027561756
        Refitted test  score: 0.9183673469387755,  RMSE: 0.2857142857142857, Log-Loss:2.819508268919857
      f1
        CV score: 0.978485370051635 using:1.0,True,True,10,0.6
          train score: 0.9905639013211192 with variance: 4.898208913603691e-06
          test  score: 0.978485370051635 with variance: 0.00013854286089526514
        Refitted train score: 0.9905660377358491,  RMSE: 0.09667364890456635, Log-Loss:0.3227923027561756
        Refitted test  score: 0.9411764705882354,  RMSE: 0.2857142857142857, Log-Loss:2.819508268919857
      accuracy
        CV score: 0.9790150478796169 using:1.0,True,True,10,0.6
          train score: 0.990655209452202 with variance: 4.762095769662352e-06
          test  score: 0.9790150478796169 with variance: 0.00013006637834722204
        Refitted train score: 0.9906542056074766,  RMSE: 0.09667364890456635, Log-Loss:0.3227923027561756
       

        Refitted train score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
        Refitted test  score: 0.9538461538461539,  RMSE: 0.21483446221182986, Log-Loss:1.5941096735717277
    random state: 850
      f1
        CV score: 0.9744197363817616 using:1.0,True,False,2,0.6
          train score: 0.9860878653794838 with variance: 1.4528372829677859e-05
          test  score: 0.9744197363817616 with variance: 6.927364942653582e-05
        Refitted train score: 0.98989898989899,  RMSE: 0.1, Log-Loss:0.34538776394910775
        Refitted test  score: 0.9791666666666666,  RMSE: 0.17541160386140583, Log-Loss:1.062731581381868
      accuracy
        CV score: 0.975 using:1.0,True,False,2,0.6
          train score: 0.9862500000000001 with variance: 1.406250000000001e-05
          test  score: 0.975 with variance: 6.250000000000011e-05
        Refitted train score: 0.99,  RMSE: 0.1, Log-Loss:0.34538776394910775
        Refitted test  score: 0.9692307692307692,  RMSE: 0.17541160386140583, Lo

      f1
        CV score: 0.9774050632911393 using:1.0,True,True,2,0.6
          train score: 0.9895155874079944 with variance: 1.0095973808490853e-05
          test  score: 0.9774050632911393 with variance: 0.0001528841531805793
        Refitted train score: 0.9851485148514851,  RMSE: 0.12097167578182678, Log-Loss:0.505445508218206
        Refitted test  score: 0.9647058823529412,  RMSE: 0.21483446221182986, Log-Loss:1.5940973720728018
      accuracy
        CV score: 0.978048780487805 using:1.0,True,True,2,0.6
          train score: 0.9896341463414634 with variance: 9.666864961332556e-06
          test  score: 0.978048780487805 with variance: 0.0001427721594289116
        Refitted train score: 0.9853658536585366,  RMSE: 0.12097167578182678, Log-Loss:0.505445508218206
        Refitted test  score: 0.9538461538461539,  RMSE: 0.21483446221182986, Log-Loss:1.5940973720728018
      f1
        CV score: 0.9774050632911393 using:1.0,True,True,2,0.6
          train score: 0.9895155874079944

In [13]:
results.tail()

Unnamed: 0,score,test score,train score,test variance,train variance,test rmse,train rmse,test log_loss,train log_loss,test size,random state,estimator,estimator params
275,accuracy,0.974026,0.990888,0.00053972,5.87585e-06,0.124035,0.0883883,0.531366,0.269836,0.2,2050,Smote/Bagging/LogisticRegression,"1.0,True,True,2,0.8"
276,f1,0.972859,0.990837,0.000628125,5.99955e-06,0.124035,0.0883883,0.531366,0.269836,0.2,2050,Smote/Bagging/LogisticRegression,"1.0,True,True,2,0.8"
277,accuracy,0.974026,0.990888,0.00053972,5.87585e-06,0.124035,0.0883883,0.531366,0.269836,0.2,2050,Smote/Bagging/LogisticRegression,"1.0,True,True,2,0.8"
278,f1,0.972859,0.990837,0.000628125,5.99955e-06,0.124035,0.0883883,0.531366,0.269836,0.2,2050,Smote/Bagging/LogisticRegression,"1.0,True,True,2,0.8"
279,accuracy,0.974026,0.990888,0.00053972,5.87585e-06,0.124035,0.0883883,0.531366,0.269836,0.2,2050,Smote/Bagging/LogisticRegression,"1.0,True,True,2,0.8"


In [14]:
#bagging svc
kernels = ['linear', 'rbf']
C = [0.001, 1, 1.5]

for testsize in testsizes:
  print(f"test size: {testsize}")
    
  for randomstate in randomstates:
        print(tab * 2 + f"random state: {randomstate}")
        random.seed(randomstate)
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = testsize, random_state = randomstate)
                     
        smote = SMOTE(random_state = randomstate)
        X_train, y_train = smote.fit_resample(X_train, y_train)

        robustscaler = RobustScaler(quantile_range = (1, 99))
        robustscaler.fit(X_train)
                                    
        X_train = robustscaler.transform(X_train)
        X_test  = robustscaler.transform(X_test)

        best_avg_scores = {score : [None] for score in scores}
        
        # Run Grid search for each classifier
        for kernel in kernels:
            for c in C:
                    
                svc = SVC(kernel = kernel, C = c, max_iter = 200, class_weight = 'balanced', random_state = randomstate)

                for b in bootstrap:
                    for bf in bootstrap_features:
                        for n_estimator in n_estimators:
                            for max_sample in max_samples:

                                random.seed(randomstate)
                                bc = BaggingClassifier(base_estimator = svc, bootstrap = b, bootstrap_features = bf, n_estimators = n_estimator, max_samples = max_sample, n_jobs = jobs, random_state = randomstate)
                                cv_results = cross_validate(bc, X_train, y_train, cv = cv, scoring = scores, return_train_score = True, n_jobs = jobs)

                                for score in scores:
                                    avg_score_test = np.mean(cv_results['test_' + score])
                                    var_score_test = np.var(cv_results['test_' + score])
                                    avg_score_train = np.mean(cv_results['train_' + score])
                                    var_score_train = np.var(cv_results['train_' + score])

                                    if(best_avg_scores[score][0] is None or avg_score_test > best_avg_scores[score][0]):
                                        best_avg_scores[score] = [avg_score_test, var_score_test, avg_score_train, var_score_train, b, bf, n_estimator, max_sample, kernel, c]


        for score in scores: 
            
            print(tab * 3 + str(score))
            print(tab * 4 + f"CV score: {best_avg_scores[score][0]} using:" + ','.join([str(p) for p in best_avg_scores[score][4:]]))
            print(tab * 5 + f"train score: {best_avg_scores[score][2]} with variance: {best_avg_scores[score][3]}")
            print(tab * 5 + f"test  score: {best_avg_scores[score][0]} with variance: {best_avg_scores[score][1]}")

            svc = SVC(kernel = best_avg_scores[score][8], C = best_avg_scores[score][9], max_iter = 200, class_weight = 'balanced', random_state = randomstate)
            bc = BaggingClassifier(base_estimator = svc, bootstrap = best_avg_scores[score][4], bootstrap_features = best_avg_scores[score][5], n_estimators = best_avg_scores[score][6], max_samples = best_avg_scores[score][7], n_jobs = jobs, random_state = randomstate)
            
            bc.fit(X_train, y_train)            
            y_train_pred, y_test_pred = bc.predict(X_train), bc.predict(X_test)                          
            rmse_train, rmse_test = math.sqrt(mean_squared_error(y_train, y_train_pred)), math.sqrt(mean_squared_error(y_test, y_test_pred))                    
            log_loss_train, log_loss_test = log_loss(y_train, y_train_pred), log_loss(y_test, y_test_pred)        

            score_train, score_test = get_scorer(score)(bc, X_train, y_train), get_scorer(score)(bc, X_test, y_test)
            
            print(tab * 4 + f"Refitted train score: {score_train},  RMSE: {rmse_train}, Log-Loss:{log_loss_train}")
            print(tab * 4 + f"Refitted test  score: {score_test},  RMSE: {rmse_test}, Log-Loss:{log_loss_test}")            
            
            n = len(results)
            results.at[n, 'score'] = score
            results.at[n, 'test score'] = best_avg_scores[score][0]
            results.at[n, 'train score'] = best_avg_scores[score][2]
            results.at[n, 'test variance'] = best_avg_scores[score][1]
            results.at[n, 'train variance'] = best_avg_scores[score][3]
            results.at[n, 'test rmse'] = rmse_test
            results.at[n, 'train rmse'] = rmse_train
            results.at[n, 'test log_loss'] = log_loss_test
            results.at[n, 'train log_loss'] = log_loss_train
            results.at[n, 'test size'] = testsize
            results.at[n, 'random state'] = randomstate
            results.at[n, 'estimator'] = "Smote/Bagging/SVC"
            results.at[n, 'estimator params'] = ','.join([str(p) for p in best_avg_scores[score][4:]])

test size: 0.08
    random state: 250
      f1
        CV score: 0.9842174554534106 using:False,True,10,1.0,linear,1
          train score: 1.0 with variance: 0.0
          test  score: 0.9842174554534106 with variance: 0.0002881491974344508
        Refitted train score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
        Refitted test  score: 0.9743589743589743,  RMSE: 0.19611613513818404, Log-Loss:1.328414476727335
      accuracy
        CV score: 0.9846631629240324 using:False,True,10,1.0,linear,1
          train score: 1.0 with variance: 0.0
          test  score: 0.9846631629240324 with variance: 0.0002698778194737806
        Refitted train score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
        Refitted test  score: 0.9615384615384616,  RMSE: 0.19611613513818404, Log-Loss:1.328414476727335
    random state: 650
      f1
        CV score: 0.9842668442668442 using:False,True,30,1.0,linear,1
          train score: 1.0 with variance: 0.0
          test  score: 0.9842668442

      f1
        CV score: 0.9821178821178822 using:False,False,20,0.8,linear,1
          train score: 1.0 with variance: 0.0
          test  score: 0.9821178821178822 with variance: 0.0001283581553311823
        Refitted train score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626415e-16
      accuracy
        CV score: 0.9823931623931624 using:True,True,30,0.6,linear,1
          train score: 0.9873338782429691 with variance: 4.87714624817187e-06
          test  score: 0.9823931623931624 with variance: 7.696571506095349e-05
        Refitted train score: 0.9889867841409692,  RMSE: 0.10494387004027837, Log-Loss:0.38038299994395114
        Refitted test  score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626415e-16
    random state: 1250
      f1
        CV score: 0.9838502673796793 using:False,True,20,1.0,linear,1
          train score: 1.0 with variance: 0.0
          test  score: 0.9838502673796793 with variance: 0.000194

      f1
        CV score: 0.9773128106461441 using:False,True,30,1.0,linear,1
          train score: 1.0 with variance: 0.0
          test  score: 0.9773128106461441 with variance: 0.0005029489717641053
        Refitted train score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
        Refitted test  score: 0.9767441860465116,  RMSE: 0.20203050891044214, Log-Loss:1.40976229361738
      accuracy
        CV score: 0.9781369379958861 using:False,True,30,1.0,linear,1
          train score: 1.0 with variance: 0.0
          test  score: 0.9781369379958861 with variance: 0.00044051466576007866
        Refitted train score: 1.0,  RMSE: 0.0, Log-Loss:9.992007221626413e-16
        Refitted test  score: 0.9591836734693877,  RMSE: 0.20203050891044214, Log-Loss:1.40976229361738
test size: 0.2
    random state: 250
      f1
        CV score: 0.987079587079587 using:False,True,2,1.0,linear,1
          train score: 1.0 with variance: 0.0
          test  score: 0.987079587079587 with variance: 0.000

In [15]:
results.tail()

Unnamed: 0,score,test score,train score,test variance,train variance,test rmse,train rmse,test log_loss,train log_loss,test size,random state,estimator,estimator params
331,accuracy,0.979585,0.996811,0.000298975,8.13991e-06,0.175412,0.0714286,1.06274,0.176218,0.2,1250,Smote/Bagging/SVC,"True,False,10,1.0,linear,1"
332,f1,0.985062,1.0,8.78144e-05,0.0,0.248069,0.0,2.1255,9.99201e-16,0.2,1850,Smote/Bagging/SVC,"False,False,2,1.0,linear,1"
333,accuracy,0.985366,1.0,8.32838e-05,0.0,0.248069,0.0,2.1255,9.99201e-16,0.2,1850,Smote/Bagging/SVC,"False,False,2,1.0,linear,1"
334,f1,0.975831,0.998028,0.000338689,6.91955e-06,0.124035,0.0,0.531366,9.99201e-16,0.2,2050,Smote/Bagging/SVC,"False,True,10,0.6,linear,1"
335,accuracy,0.976623,0.992187,0.000499241,1.95338e-05,0.124035,0.0721688,0.531366,0.179889,0.2,2050,Smote/Bagging/SVC,"True,False,10,0.8,linear,1"


In [16]:
results.to_csv(path + "classification-results-4.csv")