<h1 align = 'center'> Hypertuning SVC Model </h1>
<h3 align = 'center'> Max Butler and Josh Jaeger </h3> 

In [13]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn import neighbors
from sklearn import model_selection
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import BernoulliNB

In [14]:
train_X_scale = pd.read_csv(r"../competition/train_X_scale.csv",index_col = 0)
train_y = pd.read_csv(r"../competition/train_y.csv",index_col = 0)

In [15]:
train_y = train_y['class'].to_numpy()

<h3> Tuning Random Forest </h3>

In [16]:
max_features = [7,9,13,15,17]
n_estimators = range(150,200,10)
criterion = ['gini', 'entropy']
bootstrap = [True, False]
param_grid = dict(max_features = max_features, n_estimators = n_estimators, criterion = criterion, bootstrap = bootstrap,
                  random_state = [1984])

In [17]:
rf = RandomForestClassifier()

In [18]:
gridrf = model_selection.GridSearchCV(estimator=rf, param_grid = param_grid, scoring = 'roc_auc', cv=5, n_jobs=-2)

In [19]:
gridrf.fit(train_X_scale, train_y)

GridSearchCV(cv=5, estimator=RandomForestClassifier(), n_jobs=-2,
             param_grid={'bootstrap': [True, False],
                         'criterion': ['gini', 'entropy'],
                         'max_features': [7, 9, 13, 15, 17],
                         'n_estimators': range(150, 200, 10),
                         'random_state': [1984]},
             scoring='roc_auc')

In [20]:
print("The best parameters are %s with a score of %0.2f"
      % (gridrf.best_params_, gridrf.best_score_))

The best parameters are {'bootstrap': True, 'criterion': 'entropy', 'max_features': 7, 'n_estimators': 180, 'random_state': 1984} with a score of 0.99


<h3> Tune SVC </h3>

In [21]:
parameters = {'kernel':['linear','rbf','poly'],
              'C':[0.001,0.005,0.01,0.025,0.05,1,5,10,100],
              'gamma':[0.01,0.02,0.03,0.04,0.05,0.10,0.2,0.3,0.4,0.5]}

In [22]:
svc = SVC(random_state=1984)

In [23]:
grid_svc = model_selection.GridSearchCV(svc, parameters, scoring="roc_auc", cv = 5, n_jobs=-2)

In [24]:
grid_svc.fit(train_X_scale, train_y)

GridSearchCV(cv=5, estimator=SVC(random_state=1984), n_jobs=-2,
             param_grid={'C': [0.001, 0.005, 0.01, 0.025, 0.05, 1, 5, 10, 100],
                         'gamma': [0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.2, 0.3,
                                   0.4, 0.5],
                         'kernel': ['linear', 'rbf', 'poly']},
             scoring='roc_auc')

In [25]:
print("The best parameters are %s with a score of %0.2f"
      % (grid_svc.best_params_, grid_svc.best_score_))

The best parameters are {'C': 100, 'gamma': 0.4, 'kernel': 'rbf'} with a score of 0.97


In [26]:
## Test to see if there is improvement with a higher C
parameters = {'kernel':['rbf'],
              'C':[100, 200, 300, 400],
              'gamma':[0.3,0.35,0.4,0.45]}

In [27]:
grid_svc = model_selection.GridSearchCV(svc, parameters, scoring="roc_auc", cv = 5, n_jobs=-2)

In [28]:
grid_svc.fit(train_X_scale, train_y)

GridSearchCV(cv=5, estimator=SVC(random_state=1984), n_jobs=-2,
             param_grid={'C': [100, 200, 300, 400],
                         'gamma': [0.3, 0.35, 0.4, 0.45], 'kernel': ['rbf']},
             scoring='roc_auc')

In [29]:
print("The best parameters are %s with a score of %0.2f"
      % (grid_svc.best_params_, grid_svc.best_score_))

The best parameters are {'C': 400, 'gamma': 0.3, 'kernel': 'rbf'} with a score of 0.97


<h3> Tune Neural Network </h3>

In [30]:
ann = MLPClassifier(solver = 'sgd', max_iter=1500, early_stopping=True, tol =2e-4, random_state=1984)

In [31]:
parameters = {'activation':['identity','logistic','tanh','relu'], 'solver':['lbfgs', 'sgd', 'adam']}

In [32]:
grid_ann = model_selection.GridSearchCV(ann, parameters, scoring="roc_auc", cv=5, n_jobs=-2)

In [33]:
grid_ann.fit(train_X_scale, train_y)

GridSearchCV(cv=5,
             estimator=MLPClassifier(early_stopping=True, max_iter=1500,
                                     random_state=1984, solver='sgd',
                                     tol=0.0002),
             n_jobs=-2,
             param_grid={'activation': ['identity', 'logistic', 'tanh', 'relu'],
                         'solver': ['lbfgs', 'sgd', 'adam']},
             scoring='roc_auc')

In [34]:
print("The best parameters are %s with a score of %0.2f"
      % (grid_ann.best_params_, grid_ann.best_score_))

The best parameters are {'activation': 'identity', 'solver': 'lbfgs'} with a score of 0.97
