# Cross-validation

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn
sklearn.set_config(print_changed_only=True)

In [None]:
from sklearn.model_selection import train_test_split

diabetes = pd.read_csv("data/breast_cancer_wisconsin.csv")
print(diabetes.head())

X = diabetes.drop('Class', axis=1)
y = diabetes['Class']

X_train, X_test, y_train, y_test = train_test_split(
    X, y)

In [None]:
X_train.shape

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier

In [None]:
cross_val_score(KNeighborsClassifier(),
                X_train, y_train, cv=5)

Grid Searches
=================

Grid-Search with build-in cross validation

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

In [None]:
from sklearn.preprocessing import StandardScaler
# there is some issues with scaling in this way that are discussed in the next section
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Define parameter grid:

In [None]:
import numpy as np

param_grid = {'C': 10. ** np.arange(-2, 4),
              'gamma' : 10. ** np.arange(-5, 0)}

np.set_printoptions(suppress=True)
print(param_grid)

In [None]:
grid_search = GridSearchCV(SVC(), param_grid, verbose=3)

A GridSearchCV object behaves just like a normal classifier.

In [None]:
grid_search.fit(X_train_scaled, y_train)

In [None]:
grid_search.predict(X_test_scaled)

In [None]:
grid_search.score(X_test_scaled, y_test)

In [None]:
grid_search.best_params_

In [None]:
grid_search.best_score_

In [None]:
grid_search.best_estimator_

In [None]:
# We extract just the scores

scores = grid_search.cv_results_['mean_test_score']
scores = np.array(scores).reshape(6, 5)

plt.matshow(scores)
plt.xlabel('gamma')
plt.ylabel('C')
plt.colorbar()
plt.xticks(np.arange(5), param_grid['gamma'])
plt.yticks(np.arange(6), param_grid['C']);

# Exercises
Use GridSearchCV to adjust n_neighbors of KNeighborsClassifier.

In [None]:
# %load solutions/grid_search_k_neighbors.py