### Import Modules

In [1]:
import pandas as pd
import numpy as np

from common7 import file_exists, record_results
from common7 import X_adasyn_mean, y_adasyn, X_resampled_mean, y_resampled, X_smoted_mean, y_smoted, mean_train_scaled, median_train_scaled, y_train
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier

### Optimize parameters

In [2]:
obs = y_resampled.shape
unique, counts = np.unique(y_resampled, return_counts=True)
bal = dict(zip(unique, counts))

In [None]:
params = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [2, 3, 4, 5],
    'min_samples_split': np.linspace(0.01, 0.1, 10, endpoint=True),
    'random_state': [200],
    'min_samples_leaf': np.linspace(0.01, 0.25, 10, endpoint=True)
}

grid = GridSearchCV(DecisionTreeClassifier(), param_grid=params, scoring='roc_auc', cv = 10)
grid.fit(X_resampled_mean, y_resampled)
grid.best_estimator_

In [None]:
auc_score = roc_auc_score(y_resampled, grid.predict_proba(X_resampled_mean)[:, 1])
grid.best_params_
grid.best_score_

In [None]:
results = {
    'Model': 'Decision Tree', 
    'Hyperparameters': grid.best_params_, 
    'Target': 'coup',
    'Features': 23,
    'Observations': obs,
    'Train Balance': bal,
    'Train_AUC': auc_score, 
    'CV_AUC': grid.best_score_,
    'Notes': 'Round 2: Missing data imputed with global mean. Classes balanced via resampling.'
}

In [None]:
record_results(results)