In [25]:
from __future__ import division
from sklearn.datasets import load_digits

In [26]:
x, y = load_digits(n_class=10, return_X_y=True)

In [27]:
x.shape, y.shape

((1797L, 64L), (1797L,))

<h2>Normalização</h2>

In [4]:
x = x / 255

<h2>Validação Cruzada</h2>

In [32]:
from sklearn.model_selection import *
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.svm import SVC
import numpy as np

In [14]:
kf = KFold(n_splits=10, random_state=0)

In [33]:
acc_list = []
recall_list = []
precision_list = []

C_range = np.logspace(-2, 10, 13)
gamma_range = np.logspace(-9, 3, 13)
param_grid = dict(gamma=gamma_range, C=C_range)

for train_index, test_index in kf.split(x):
    # Separando os dados por fold
    train_data, test_data = x[train_index], x[test_index]
    train_labels, test_labels = y[train_index], y[test_index]
    
    # Grid Search
    cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
    grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv)
    grid.fit(train_data, train_labels)
    
    best_C = grid.best_params_['C']
    best_gamma = grid.best_params_['gamma']
    
    # Treinamento com os "melhores" parâmetros encontrados
    svm = SVC(C=best_C, gamma=best_gamma)
    svm.fit(train_data, train_labels)
    
    y_pred = svm.predict(test_data)
    acc_list.append(accuracy_score(test_labels, y_pred))
    recall_list.append(recall_score(test_labels, y_pred, average='micro'))
    precision_list.append(precision_score(test_labels, y_pred, average='micro'))

In [34]:
np.mean(acc_list)

0.9832960893854749