# Model Selection with Grid Search (Logistic, SVM, Neural Net)

This notebook demonstrates `grid_search_cv` on three supervised models.
We keep the grids small and compatible with the model implementations.


In [1]:
import cuanalytics as ca


In [2]:
df = ca.load_breast_cancer_data()
train, test = ca.split_data(df, test_size=0.2, random_state=42)
train_scaled, scaler = ca.scale_data(train, exclude_cols=['diagnosis'])
test_scaled, _ = ca.scale_data(test, exclude_cols=['diagnosis'], scaler=scaler)
train.shape, test.shape


((455, 31), (114, 31))

## Logistic Regression (tuning C)


In [None]:
logit_grid = {
    'C': [0.1, 1.0, 10.0],
}
logit_results = ca.grid_search_cv(
    ca.fit_logit,
    train,
    formula='diagnosis ~ .',
    param_grid=logit_grid,
    k=5,
    stratify_on='diagnosis',
    refit='accuracy',
)
logit_results['best_params'], logit_results['best_score']


In [None]:
logit_best = logit_results['best_model']
logit_test_report = logit_best.score(test)
logit_test_report


## Support Vector Machine (tuning C)


In [None]:
svm_grid = {
    'C': [0.1, 1.0, 10.0],
}
svm_results = ca.grid_search_cv(
    ca.fit_svm,
    train,
    formula='diagnosis ~ .',
    param_grid=svm_grid,
    k=5,
    stratify_on='diagnosis',
    refit='accuracy',
)
svm_results['best_params'], svm_results['best_score']


In [None]:
svm_best = svm_results['best_model']
svm_test_report = svm_best.score(test)
svm_test_report


## Neural Network (tuning hidden layers)

Neural nets tend to benefit from scaled features, so we use `train_scaled`.


In [None]:
nn_grid = {
    'hidden_layers': [
        [10],
        [10, 5],
        [20, 10],
    ],
    'alpha': [0.0001, 0.001],
}
nn_results = ca.grid_search_cv(
    ca.fit_nn,
    train_scaled,
    formula='diagnosis ~ .',
    param_grid=nn_grid,
    k=5,
    stratify_on='diagnosis',
    refit='accuracy',
)
nn_results['best_params'], nn_results['best_score']


In [None]:
nn_best = nn_results['best_model']
nn_test_report = nn_best.score(test_scaled)
nn_test_report
