In [172]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np 

import pandas as pd
import sklearn as sklearn
import scipy.stats as stats
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import StratifiedKFold, ParameterGrid
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neighbors import KNeighborsClassifier

from utils.randomized_search import logUnifD
from utils.base_set import X_train, y_train, seed
from utils.estimators import DecisionTree

### Decision Tree

In [173]:
model1 = DecisionTree()

In [186]:
k = 5
n = 30
N = X_train.shape[1]
param = {
    'criterion': ["gini","entropy"],
    'max_depth': stats.randint(1, N)
}

In [187]:
classifier_Dtree = RandomizedSearchCV(estimator = model1.estimator, 
                                param_distributions = param,
                                cv = k,
                                n_iter = n,
                                scoring = "roc_auc"
                               )

In [188]:
classifier_Dtree.fit(X_train,y_train)

RandomizedSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_iter=30,
                   param_distributions={'criterion': ['gini', 'entropy'],
                                        'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x76e8b128bbb0>},
                   scoring='roc_auc')

In [189]:
columns_to_keep = ['param_max_depth','param_criterion','mean_test_score','rank_test_score']
pd.DataFrame(classifier_Dtree.cv_results_)[columns_to_keep]

Unnamed: 0,param_max_depth,param_criterion,mean_test_score,rank_test_score
0,61,entropy,0.569399,21
1,97,entropy,0.585493,6
2,77,entropy,0.57797,13
3,24,entropy,0.588711,4
4,60,gini,0.54929,29
5,81,gini,0.572988,17
6,33,gini,0.552418,28
7,97,entropy,0.588728,3
8,141,entropy,0.569338,22
9,102,entropy,0.612159,2


### KNN 

In [190]:
model2 = KNeighborsClassifier()
K = len(X_train)
n = 100
param = {
    'n_neighbors': logUnifD(10,K/2)
}

In [191]:
classifier_KNN = RandomizedSearchCV(estimator = model2, 
                                param_distributions = param,
                                cv = k,
                                n_iter = n,
                                scoring = "roc_auc", 
                                random_state = seed)

In [192]:
classifier_KNN.fit(X_train,y_train)

RandomizedSearchCV(cv=5, estimator=KNeighborsClassifier(), n_iter=100,
                   param_distributions={'n_neighbors': <utils.randomized_search.logUnifD object at 0x76e8b12899c0>},
                   random_state=8241, scoring='roc_auc')

In [193]:
columns_to_keep = ['param_n_neighbors','mean_test_score','rank_test_score']
pd.DataFrame(classifier_KNN.cv_results_)[columns_to_keep]

Unnamed: 0,param_n_neighbors,mean_test_score,rank_test_score
0,14,0.813417,3
1,166,0.769625,83
2,153,0.772029,82
3,34,0.805245,35
4,93,0.784418,76
...,...,...,...
95,38,0.802264,39
96,15,0.810649,18
97,159,0.769436,84
98,173,0.767385,86


### SVM

In [194]:
model3 = svm.SVC()
n = 100
param = {
    'C': stats.expon(scale=100), 
    'gamma': stats.expon(scale=.1),
    'kernel': ["rbf","linear","poly","sigmoid"]
        }

In [195]:
classifier_SVM = RandomizedSearchCV(estimator = model3, 
                                param_distributions = param,
                                cv = k,
                                n_iter = n,
                                scoring = "roc_auc", 
                                random_state = seed
                               )

In [196]:
classifier_SVM.fit(X_train,y_train)

RandomizedSearchCV(cv=5, estimator=SVC(), n_iter=100,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x76e8b08225c0>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x76e8b0823970>,
                                        'kernel': ['rbf', 'linear', 'poly',
                                                   'sigmoid']},
                   random_state=8241, scoring='roc_auc')

In [197]:
columns_to_keep = ['param_C','param_gamma','param_kernel','mean_test_score','rank_test_score']
pd.DataFrame(classifier_SVM.cv_results_)[columns_to_keep]

Unnamed: 0,param_C,param_gamma,param_kernel,mean_test_score,rank_test_score
0,12.587597,0.234105,sigmoid,0.616457,64
1,239.733755,0.041652,poly,0.791603,1
2,143.974508,0.081293,sigmoid,0.612601,68
3,15.163046,0.069363,sigmoid,0.614574,65
4,148.743757,0.468783,rbf,0.500000,82
...,...,...,...,...,...
95,132.631485,0.075499,poly,0.791603,1
96,61.801592,0.09716,poly,0.791603,1
97,16.13484,0.00918,sigmoid,0.633043,61
98,60.395524,0.117131,poly,0.791603,1


### LDA 

In [166]:
model4 = LinearDiscriminantAnalysis()
param = {
    'solver': ["svd","lsqr"], 
    'n_components' : [0,1]
}

In [167]:
classifier_LDA = RandomizedSearchCV(estimator = model4,
                                   param_distributions=param,
                                   cv = k, 
                                   scoring= "roc_auc",
                                   random_state=seed)

In [168]:
classifier_LDA.fit(X_train,y_train)



RandomizedSearchCV(cv=5, estimator=LinearDiscriminantAnalysis(),
                   param_distributions={'n_components': [0, 1],
                                        'solver': ['svd', 'lsqr']},
                   random_state=8241, scoring='roc_auc')

In [169]:
columns_to_keep = ['param_solver','param_n_components','mean_test_score','rank_test_score']
pd.DataFrame(classifier_LDA.cv_results_)[columns_to_keep]

Unnamed: 0,param_solver,param_n_components,mean_test_score,rank_test_score
0,svd,0,0.716569,1
1,lsqr,0,0.716569,1
2,svd,1,0.716569,1
3,lsqr,1,0.716569,1


### Naive Bayes 