### Import Modules

In [22]:
import pandas as pd
import numpy as np

from common7 import file_exists, record_results
from common7 import X_adasyn_mean, y_adasyn, X_resampled_mean, y_resampled, X_smoted_mean, y_smoted, mean_train_scaled, median_train_scaled, y_train
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingClassifier

### Optimize Parameters

In [23]:
obs = y_adasyn.shape
unique, counts = np.unique(y_adasyn, return_counts=True)
bal = dict(zip(unique, counts))

In [24]:
params = {
    'n_estimators': [1, 2, 4, 8, 16, 32, 64, 100, 200],
    'max_samples': [1, 5, 10, 20, 40, 80, 150, 300],
    'max_features': [1, 2, 4, 6, 8, 10, 15, 20],
    'bootstrap': [True, False],
    'random_state': [200]
}

grid = GridSearchCV(BaggingClassifier(), param_grid=params, scoring='roc_auc', cv = 10)
grid.fit(X_adasyn_mean, y_adasyn)
grid.best_estimator_

BaggingClassifier(base_estimator=None, bootstrap=True,
         bootstrap_features=False, max_features=20, max_samples=300,
         n_estimators=200, n_jobs=None, oob_score=False, random_state=200,
         verbose=0, warm_start=False)

In [25]:
auc_score = roc_auc_score(y_adasyn, grid.predict_proba(X_adasyn_mean)[:, 1])
grid.best_params_
grid.best_score_

0.9263390256896061

In [28]:
results = {
    'Model': 'Bagging', 
    'Hyperparameters': grid.best_params_, 
    'Target': 'coup',
    'Features': 23,
    'Observations': obs,
    'Train Balance': bal,
    'Train_AUC': auc_score, 
    'CV_AUC': grid.best_score_,
    'Notes': 'Round 4: Missing data imputed with global mean. Classes balanced via ADASYN.'
}

In [29]:
record_results(results)

Unnamed: 0,Model,Hyperparameters,Target,Features,Observations,Train Balance,Train_AUC,CV_AUC,Notes
6,Decision Tree,"{'criterion': 'gini', 'max_depth': 5, 'min_sam...",coup,23,8050,"{0: 4025, 1: 4025}",0.91187,0.91886,Round 2: Missing data imputed with global mean...
7,Decision Tree,"{'criterion': 'gini', 'max_depth': 5, 'min_sam...",coup,23,8050,"{0: 4025, 1: 4025}",0.93524,0.93022,Round 3: Missing data imputed with global mean...
8,Decision Tree,"{'criterion': 'gini', 'max_depth': 5, 'min_sam...",coup,23,8061,"{0: 4025, 1: 4036}",0.90218,0.85401,Round 4: Missing data imputed with global mean...
9,Decision Tree,"{'criterion': 'entropy', 'max_depth': 5, 'min_...",coup,23,4110,"{0: 4025, 1: 85}",0.87664,0.81481,Round 5: Missing data imputed with global medi...
10,Gradient Boosting,"{'learning_rate': 0.02, 'max_depth': 3, 'n_est...",coup,23,4110,"{0: 4025, 1: 85}",0.973399,0.831625,Round 1: Missing data imputed with global mean...
11,Bagging,"{'bootstrap': False, 'max_features': 6, 'max_s...",coup,23,"(4110,)","{0: 4025, 1: 85}",0.955437,0.829117,Round 1: Missing data imputed with global mean...
12,Bagging,"{'bootstrap': False, 'max_features': 15, 'max_...",coup,23,"(8050,)","{0: 4025, 1: 4025}",0.998049,0.997005,Round 2: Missing data imputed with global mean...
13,Gradient Boosting,"{'learning_rate': 0.04, 'max_depth': 7, 'n_est...",coup,23,"(8050,)","{0: 4025, 1: 4025}",0.999947,0.999437,Round 2: Missing data imputed with global mean...
14,Bagging,"{'bootstrap': False, 'max_features': 10, 'max_...",coup,23,"(8050,)","{0: 4025, 1: 4025}",0.98869,0.985494,Round 3: Missing data imputed with global mean...
15,Bagging,"{'bootstrap': True, 'max_features': 20, 'max_s...",coup,23,"(8061,)","{0: 4025, 1: 4036}",0.991649,0.926339,Round 4: Missing data imputed with global mean...


Unnamed: 0,Model,Hyperparameters,Target,Features,Observations,Train Balance,Train_AUC,CV_AUC,Notes
0,Logistic Regression,"{'C': 0.01, 'penalty': 'l2', 'random_state': 200}",coup,23,4110,"{0: 4025, 1: 85}",0.89451,0.86035,Round 1: Missing data imputed with global mean...
1,Logistic Regression,"{'C': 0.1, 'penalty': 'l1', 'random_state': 200}",coup,23,8050,"{0: 4025, 1: 4025}",0.87572,0.87396,Round 2: Missing data imputed with global mean...
2,Logistic Regression,"{'C': 0.1, 'penalty': 'l2', 'random_state': 200}",coup,23,8050,"{0: 4025, 1: 4025}",0.88211,0.88026,Round 3: Missing data imputed with global mean...
3,Logistic Regression,"{'C': 0.001, 'penalty': 'l2', 'random_state': ...",coup,23,8061,"{0: 4025, 1: 4036}",0.85387,0.82355,Round 4: Missing data imputed with global mean...
4,Logistic Regression,"{'C': 0.01, 'penalty': 'l2', 'random_state': 200}",coup,23,4110,"{0: 4025, 1: 85}",0.86035,0.843267,Round 5: Missing data imputed with global medi...
5,Decision Tree,"{'criterion': 'entropy', 'max_depth': 5, 'min_...",coup,23,4110,"{0: 4025, 1: 85}",0.87664,0.81481,Round 1: Missing data imputed with global mean...
6,Decision Tree,"{'criterion': 'gini', 'max_depth': 5, 'min_sam...",coup,23,8050,"{0: 4025, 1: 4025}",0.91187,0.91886,Round 2: Missing data imputed with global mean...
7,Decision Tree,"{'criterion': 'gini', 'max_depth': 5, 'min_sam...",coup,23,8050,"{0: 4025, 1: 4025}",0.93524,0.93022,Round 3: Missing data imputed with global mean...
8,Decision Tree,"{'criterion': 'gini', 'max_depth': 5, 'min_sam...",coup,23,8061,"{0: 4025, 1: 4036}",0.90218,0.85401,Round 4: Missing data imputed with global mean...
9,Decision Tree,"{'criterion': 'entropy', 'max_depth': 5, 'min_...",coup,23,4110,"{0: 4025, 1: 85}",0.87664,0.81481,Round 5: Missing data imputed with global medi...
