### Import Modules

In [5]:
import pandas as pd
import numpy as np

from common7 import file_exists, record_results
from common7 import X_adasyn_mean, y_adasyn, X_resampled_mean, y_resampled, X_smoted_mean, y_smoted, mean_train_scaled, median_train_scaled, y_train
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

### Optimize Parameters

In [6]:
obs = y_resampled.shape
unique, counts = np.unique(y_resampled, return_counts=True)
bal = dict(zip(unique, counts))

In [7]:
params = {
    'n_neighbors': np.arange(1,25,2),
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': [5, 10, 20, 30, 40, 50, 60]
}

grid = GridSearchCV(KNeighborsClassifier(), param_grid=params, scoring='roc_auc', cv = 10)
grid.fit(X_resampled_mean, y_resampled)
grid.best_estimator_

KeyboardInterrupt: 

In [None]:
auc_score = roc_auc_score(y_resampled, grid.predict_proba(X_resampled_mean)[:, 1])
grid.best_params_
grid.best_score_

In [None]:
results = {
    'Model': 'K-Nearest Neighbors', 
    'Hyperparameters': grid.best_params_, 
    'Target': 'coup',
    'Features': 23,
    'Observations': obs,
    'Train Balance': bal,
    'Train_AUC': auc_score, 
    'CV_AUC': grid.best_score_,
    'Notes': 'Round 2: Missing data imputed with global mean. Classes balanced via resampling.'
}

In [None]:
record_results(results)