In [1]:
import numpy as np
import pandas as pd
import random

# Getting Data

## For Split

In [2]:
df = pd.read_csv(r'C:\Users\caron\OneDrive - University of North Carolina at Chapel Hill\Honors Project\CSV Files\MRI Data.csv')
temp = pd.concat([df.loc[0:, ['SDx']], df.loc[0:, 'L_LatVent': 'R_insula_surfavg']], axis=1, sort=False)
temp = temp.dropna(how='any')
df1 = temp.loc[0:, ['SDx']]
df2 = temp.loc[0:, 'L_LatVent': 'R_insula_surfavg']

## For Cross Val

In [3]:
temp2 = pd.concat([df.loc[0:, ['SDx']], df.loc[0:, 'L_LatVent': 'R_insula_surfavg']], axis=1, sort=False).dropna(how='any').sample(frac=1).reset_index(drop=True)  
train = temp2[:523]
test = temp2[523:]
df1_train = train.loc[0:, ['SDx']]
df2_train = train.loc[0:, 'L_LatVent': 'R_insula_surfavg']
df1_test = test.loc[0:, ['SDx']]
df2_test = test.loc[0:, 'L_LatVent': 'R_insula_surfavg']

## To numpy array

In [4]:
y = np.array(df1).ravel()
x = np.array(df2)
y_train = np.array(df1_train).ravel()
x_train = np.array(df2_train)
y_test = np.array(df1_test).ravel()
x_test = np.array(df2_test)

# Data Preprocessing and Normalization Models

In [5]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import PowerTransformer

In [6]:
minMax = MinMaxScaler()
robust = RobustScaler()
standard = StandardScaler()
quantile = QuantileTransformer()
normal = Normalizer()
power = PowerTransformer()

# SVC Models

In [7]:
from sklearn.svm import SVC, LinearSVC

In [8]:
svc = SVC(probability=True)
lsvc = LinearSVC()

# Pipelines

In [9]:
from sklearn.pipeline import Pipeline

In [36]:
pl1 = Pipeline([('minMax', minMax), ('svc', svc)])
pl1b = Pipeline([('minMax', minMax), ('lsvc', lsvc)])# linearSVC with min max pl1b
pl1c = Pipeline([('minMax', minMax), ('svc', svc)])# svc/minMax w/ linear kernel
pl2 = Pipeline([('robust', robust), ('svc', svc)])
pl3 = Pipeline([('standard', standard), ('svc', svc)])
pl4 = Pipeline([('quantile', quantile), ('svc', svc)])
pl5 = Pipeline([('normal', normal), ('svc', svc)])
pl6 = Pipeline([('power', power), ('svc', svc)])
plall = Pipeline([('minMax', minMax), ('robust', robust), ('standard', standard), ('quantile', quantile), ('normal', normal), ('power', power), ('svc', svc)])

# Grid Search Cross Validation

In [37]:
from sklearn.model_selection import GridSearchCV

## pl1 (minMax)

* Transform features by scaling each feature to a given range.

In [32]:
# super hunky test w/ 5 folds and many parameters
#parameters1 = {'svc__kernel':('linear', 'poly', 'rbf'), 'svc__C':(.01, .02, .03, .04, .05, .06, .07, .08, .09, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 'svc__degree':(0.5, 1, 1.5, 2, 2.5, 3), 'svc__gamma':('auto','scale')}
#cv1 = GridSearchCV(pl1, parameters1, iid = False, cv=5)
#cv1.fit(x, y)

#print('best score:', end=" ")
#print(cv1.best_score_)
#print('\nbest params:', end=" ")
#print(cv1.best_params_)
#print('\nbest index:', end=" ")
#print(cv1.best_index_)
#print(cv1.best_estimator_)
#print(pd.DataFrame(data=cv1.cv_results_))

In [13]:
parameters1 = {'svc__C':(1.5, 1.6, 1.7, 1.8, 1.9), 'svc__gamma':('auto','scale')}
cv1 = GridSearchCV(pl1, parameters1, iid = False, cv=10)
cv1.fit(x, y)

print('best score:', end=" ")
print(cv1.best_score_)
print('\nbest params:', end=" ")
print(cv1.best_params_)
print('\nbest index:', end=" ")
print(cv1.best_index_)
#print('\nbest estimator:')
#print(cv1.best_estimator_)
#print(pd.DataFrame(data=cv1.cv_results_))

best score: 0.8941664888509548

best params: {'svc__C': 1.7, 'svc__gamma': 'scale'}

best index: 5


In [14]:
# narrower test w/ 10 folds & fewer, more precise parameters
#parameters1 = {'svc__C':(0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85)}
#cv1 = GridSearchCV(pl1, parameters1, iid = False, cv=10)
#cv1.fit(x, y)

#print('best score:', end=" ")
#print(cv1.best_score_)
#print('\nbest params:', end=" ")
#print(cv1.best_params_)
#print('\nbest index:', end=" ")
#print(cv1.best_index_)
#print('\nbest estimator:')
#print(cv1.best_estimator_)
#print(pd.DataFrame(data=cv1.cv_results_))

**Best MinMaxScaler/SVC = 'svc__C': 0.78, 'svc__gamma': 'scale', 'kernel':'rbf'**

## pl1b (minMax w/ lsvc)

* Transform features by scaling each feature to a given range.
* Apparently, LinearSVC would be better for large (10's of thousands) data sets

In [42]:
parameters1b = {'lsvc__C':( .1, .3, .2, .4, .5)}
cv1b = GridSearchCV(pl1b, parameters1b, iid = False, cv=10)
cv1b.fit(x, y)

print('best score:', end=" ")
print(cv1b.best_score_)
print('\nbest params:', end=" ")
print(cv1b.best_params_)
print('\nbest index:', end=" ")
print(cv1b.best_index_)
#print('\nbest estimator:')
#print(cv1b.best_estimator_)
#print(pd.DataFrame(data=cv1b.cv_results_))

best score: 0.8807135744514385

best params: {'lsvc__C': 0.2}

best index: 2


## pl1c (minMax w/ linear kernel only)

* Sigmoid and linear for kernel: will always use linear because sigmoid is worse
* For surfice model

In [43]:
parameters1c = {'svc__kernel':('sigmoid', 'linear'), 'svc__C':(0.7, 0.8, 0.4, 0.9, 1, 1.1, 1.2, 1.3)}
cv1c = GridSearchCV(pl1c, parameters1c, iid = False, cv=10)
cv1c.fit(x, y)

print('best score:', end=" ")
print(cv1c.best_score_)
print('\nbest params:', end=" ")
print(cv1c.best_params_)
print('\nbest index:', end=" ")
print(cv1c.best_index_)
#print('\nbest estimator:')
#print(cv1c.best_estimator_)
#print(pd.DataFrame(data=cv1c.cv_results_))

best score: 0.890356965041431

best params: {'svc__C': 0.4}

best index: 2


## pl2 (robust)

* Scale features using statistics that are robust to outliers.
* use pl1 to narrow down to rbf for al other normalizers/processors
* made decision based on scores to us rbf for rest

In [16]:
parameters2 = {'svc__C':(1.8, 1.9, 2, 2.1, 2.2), 'svc__gamma':('auto','scale')}
cv2 = GridSearchCV(pl2, parameters2, iid = False, cv=10)
cv2.fit(x, y)

print('best score:', end=" ")
print(cv2.best_score_)
print('\nbest params:', end=" ")
print(cv2.best_params_)
print('\nbest index:', end=" ")
print(cv2.best_index_)
#print('\nbest estimator:')
#print(cv2.best_estimator_)
#print(pd.DataFrame(data=cv2.cv_results_))

best score: 0.8884522031366691

best params: {'svc__C': 2, 'svc__gamma': 'auto'}

best index: 4


## pl3 (standard)

* 	Standardize features by removing the mean and scaling to unit variance
* use pl1 to narrow down to rbf

In [17]:
parameters3 = {'svc__C':(1.4, 1.5, 1.6, 1.7, 1.8), 'svc__gamma':('auto','scale')}
cv3 = GridSearchCV(pl3, parameters3, iid = False, cv=10)
cv3.fit(x, y)

print('best score:', end=" ")
print(cv3.best_score_)
print('\nbest params:', end=" ")
print(cv3.best_params_)
print('\nbest index:', end=" ")
print(cv3.best_index_)
#print('\nbest estimator:')
#print(cv3.best_estimator_)
#print(pd.DataFrame(data=cv3.cv_results_))

best score: 0.8894045840890501

best params: {'svc__C': 1.6, 'svc__gamma': 'auto'}

best index: 4


## pl4 (quantile)

* Transform features using quantiles information.
* use pl1 to narrow down to rbf

In [18]:
parameters4 = {'svc__C':(5, 6, 7, 8, 9), 'svc__gamma':('auto','scale')}
cv4 = GridSearchCV(pl4, parameters4, iid = False, cv=10)
cv4.fit(x, y)

print('best score:', end=" ")
print(cv4.best_score_)
print('\nbest params:', end=" ")
print(cv4.best_params_)
print('\nbest index:', end=" ")
print(cv4.best_index_)
#print('\nbest estimator:')
#print(cv4.best_estimator_)
#print(pd.DataFrame(data=cv4.cv_results_))

  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantile

  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantile

  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))


best score: 0.870255876809275

best params: {'svc__C': 7, 'svc__gamma': 'auto'}

best index: 4


## pl5 (normal)

* Normalize samples individually to unit norm.
* use pl1 to narrow down to rbf

In [19]:
parameters5 = {'svc__C':(14, 15, 16, 17, 18), 'svc__gamma':('auto','scale')}
cv5 = GridSearchCV(pl5, parameters5, iid = False, cv=10)
cv5.fit(x, y)

print('best score:', end=" ")
print(cv5.best_score_)
print('\nbest params:', end=" ")
print(cv5.best_params_)
print('\nbest index:', end=" ")
print(cv5.best_index_)
#print('\nbest estimator:')
#print(cv5.best_estimator_)
#print(pd.DataFrame(data=cv5.cv_results_))

best score: 0.8912172374551016

best params: {'svc__C': 16, 'svc__gamma': 'scale'}

best index: 5


## pl6 (power)

* Apply a power transform featurewise to make data more Gaussian-like.
* use pl1 to narrow down to rbf

In [20]:
parameters6 = {'svc__C':(1.3, 1.4, 1.5, 1.6, 1.7), 'svc__gamma':('auto','scale')}
cv6 = GridSearchCV(pl6, parameters6, iid = False, cv=10)
cv6.fit(x, y)

print('best score:', end=" ")
print(cv6.best_score_)
print('\nbest params:', end=" ")
print(cv6.best_params_)
print('\nbest index:', end=" ")
print(cv6.best_index_)
#print('\nbest estimator:')
#print(cv6.best_estimator_)
#print(pd.DataFrame(data=cv6.cv_results_))

  loglike = -n_samples / 2 * np.log(x_trans.var())
  loglike = -n_samples / 2 * np.log(x_trans.var())
  loglike = -n_samples / 2 * np.log(x_trans.var())
  loglike = -n_samples / 2 * np.log(x_trans.var())
  loglike = -n_samples / 2 * np.log(x_trans.var())
  loglike = -n_samples / 2 * np.log(x_trans.var())
  loglike = -n_samples / 2 * np.log(x_trans.var())
  loglike = -n_samples / 2 * np.log(x_trans.var())
  loglike = -n_samples / 2 * np.log(x_trans.var())
  loglike = -n_samples / 2 * np.log(x_trans.var())


best score: 0.8865474412319072

best params: {'svc__C': 1.5, 'svc__gamma': 'auto'}

best index: 4


# plall (all)

* Has all normalization models in one

In [16]:
parametersall = {'svc__kernel':('rbf', 'linear', 'poly'), 'svc__C':(.1, 1, 10, 100), 'svc__degree':(1, 2, 3), 'svc__gamma':('auto','scale')}
cvall = GridSearchCV(plall, parametersall, iid = False, cv=5)
cvall.fit(x, y)

print('best score:', end=" ")
print(cvall.best_score_)
print('\nbest params:', end=" ")
print(cvall.best_params_)
print('\nbest index:', end=" ")
print(cvall.best_index_)
#print('\nbest estimator:')
#print(cv6.best_estimator_)
#print(pd.DataFrame(data=cv6.cv_results_))

  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantile

  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantile

  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantile

  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantile

  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantile

  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantile

  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantile

  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantile

  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantile

  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantiles, n_samples))
  % (self.n_quantile

best score: 0.821179084073821

best params: {'svc__C': 1, 'svc__degree': 1, 'svc__gamma': 'auto', 'svc__kernel': 'poly'}

best index: 20


# Cross val comparison tool

* interchange pipelines and parameters

In [45]:
# for testing parameter combinations
# note that this is for one dataset so cv best parameters may not be same as here
pltest = Pipeline([('minMax', minMax), ('svc', svc)])
pltest.set_params(svc__C=1.7, svc__degree=1, svc__gamma='scale', svc__kernel='rbf')
pltest.fit(x_train, y_train)
pltest.score(x_test, y_test)

0.9137931034482759

# predict_proba

In [22]:
parameters = {'svc__C':(.78, .79), 'svc__gamma':('auto','scale')}
cv = GridSearchCV(pl1, parameters, iid = False, cv=10)
cv.fit(x, y)

cv.predict_proba(np.array(df2[1000:1001]))

array([[5.45553992e-07, 9.99999454e-01]])

* **99.9% chance of positive SDx**

# Trials

minMax Test 1: best score: 0.8941664888509548 {'svc__C': 1.7, 'svc__gamma': 'scale'}

minMax Test 2: best score: 0.8941664888509548 {'svc__C': 1.7, 'svc__gamma': 'scale'}

minMax Test 3: best score: 0.8941664888509548 {'svc__C': 1.7, 'svc__gamma': 'scale'}

minMax Test 4: best score: 0.8941664888509548 {'svc__C': 1.7, 'svc__gamma': 'scale'}

minMax Test 5: best score: 0.8941664888509548 {'svc__C': 1.7, 'svc__gamma': 'scale'}

minMax w/lsvc Test 1: best score: 0.8807135744514385 {'lsvc__C': 0.2}

minMax w/lsvc Test 2: best score: 0.8807135744514385 {'lsvc__C': 0.2}

minMax w/lsvc Test 3: best score: 0.8807135744514385 {'lsvc__C': 0.2}

minMax w/lsvc Test 4: best score: 0.8807135744514385 {'lsvc__C': 0.2}

minMax w/lsvc Test 5: best score: 0.8807135744514385 {'lsvc__C': 0.2}

-

-

minMax SVC w/ linear kernel: best score: 0.890356965041431 {'svc__C': 0.4}

-

-


robust Test 1: best score: 0.8884522031366691 {'svc__C': 2, 'svc__gamma': 'auto'}

robust Test 2: best score: 0.8884522031366691 {'svc__C': 2, 'svc__gamma': 'auto'}

robust Test 3: best score: 0.8884522031366691 {'svc__C': 2, 'svc__gamma': 'auto'}

robust Test 4: best score: 0.8884522031366691 {'svc__C': 2, 'svc__gamma': 'auto'}

robust Test 5: best score: 0.8884522031366691 {'svc__C': 2, 'svc__gamma': 'auto'}

standard Test 1: best score: 0.8894045840890501 {'svc__C': 1.6, 'svc__gamma': 'auto'}

standard Test 2: best score: 0.8894045840890501 {'svc__C': 1.6, 'svc__gamma': 'auto'}

standard Test 3: best score: 0.8894045840890501 {'svc__C': 1.6, 'svc__gamma': 'auto'}

standard Test 4: best score: 0.8894045840890501 {'svc__C': 1.6, 'svc__gamma': 'auto'}

standard Test 5: best score: 0.8894045840890501 {'svc__C': 1.6, 'svc__gamma': 'auto'}

quantile Test 1: best score: 0.870255876809275 {'svc__C': 7, 'svc__gamma': 'auto'}

quantile Test 2: best score: 0.870255876809275 {'svc__C': 7, 'svc__gamma': 'auto'}

quantile Test 3: best score: 0.870255876809275 {'svc__C': 7, 'svc__gamma': 'auto'}

quantile Test 4: best score: 0.870255876809275 {'svc__C': 7, 'svc__gamma': 'auto'}

quantile Test 5: best score: 0.870255876809275 {'svc__C': 7, 'svc__gamma': 'auto'}

normal Test 1: best score: 0.8912172374551016 {'svc__C': 16, 'svc__gamma': 'scale'}

normal Test 2: best score: 0.8912172374551016 {'svc__C': 16, 'svc__gamma': 'scale'}

normal Test 3: best score: 0.8912172374551016 {'svc__C': 16, 'svc__gamma': 'scale'}

normal Test 4: best score: 0.8912172374551016 {'svc__C': 16, 'svc__gamma': 'scale'}

normal Test 5: best score: 0.8912172374551016 {'svc__C': 16, 'svc__gamma': 'scale'}

power Test 1: best score: 0.8865474412319072 {'svc__C': 1.5, 'svc__gamma': 'auto'}

power Test 2: best score: 0.8865474412319072 {'svc__C': 1.5, 'svc__gamma': 'auto'}

power Test 3: best score: 0.8865474412319072 {'svc__C': 1.5, 'svc__gamma': 'auto'}

power Test 4: best score: 0.8865474412319072 {'svc__C': 1.5, 'svc__gamma': 'auto'}

power Test 5: best score: 0.8865474412319072 {'svc__C': 1.5, 'svc__gamma': 'auto'}

1

1
* how to we noramlize by site or sex?