In [3]:
from sklearn.cross_validation import train_test_split
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import make_scorer, \
    hamming_loss
from sklearn.datasets import load_digits
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, Imputer
from sklearn.svm import SVC
import pickle
import numpy as np

In [2]:
print('Begin extract')
X, y = load_digits(return_X_y=True)
print(f'X shape: {X.shape}, y shape: {y.shape}')


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)

# Model
print('Begin modelling')
pipe = Pipeline([
    ('imputer', Imputer()),
    ('scale', StandardScaler()),
    ('svm', SVC())
])

param_grid = [{
        'scale__with_mean': [True, False],
        'scale__with_std': [True, False],
        'svm__kernel': ['linear'],
        'svm__C': np.logspace(-6,9, 10)
    },
    {
        'scale__with_mean': [True, False],
        'scale__with_std': [True, False],
        'svm__kernel': ['rbf'],
        'svm__C': np.logspace(-6,9, 10),
        'svm__gamma': np.logspace(-6,9, 10)
    }
]

grid = GridSearchCV(pipe, cv=3, n_jobs=-1, param_grid=param_grid)

grid.fit(X_train, y_train)

y_preds = grid.predict(X_test)

print("Done modelling")
# We can also save the model for later
pickle.dump(grid, open('digit_model.pkl', 'wb'))

Begin extract
X shape: (1797, 64), y shape: (1797,)
Begin modelling


NameError: name 'np' is not defined

In [4]:
grid = pickle.load(open('digit_model.pkl', 'rb'))

In [5]:
grid.best_estimator_

Pipeline(memory=None,
     steps=[('imputer', Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)), ('scale', StandardScaler(copy=True, with_mean=True, with_std=False)), ('svm', SVC(C=4.641588833612782, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0021544346900318843,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))])

In [6]:
grid.best_params_

{'scale__with_mean': True,
 'scale__with_std': False,
 'svm__C': 4.641588833612782,
 'svm__gamma': 0.0021544346900318843,
 'svm__kernel': 'rbf'}

In [7]:
grid.best_score_

0.9809069212410502

In [8]:
y_predict = grid.predict(X_test)

In [9]:
y_predict

array([1, 3, 0, 4, 0, 9, 6, 5, 9, 8, 8, 2, 8, 4, 1, 1, 5, 3, 3, 7, 6, 4,
       2, 5, 0, 3, 9, 7, 9, 9, 4, 6, 9, 6, 3, 4, 4, 9, 2, 4, 7, 1, 6, 4,
       1, 8, 0, 0, 3, 4, 4, 7, 3, 0, 6, 8, 9, 5, 9, 3, 4, 3, 6, 7, 7, 7,
       2, 3, 5, 4, 0, 4, 4, 8, 6, 9, 9, 8, 0, 5, 4, 4, 9, 2, 7, 1, 2, 0,
       1, 8, 0, 4, 4, 5, 8, 1, 0, 9, 2, 8, 3, 1, 8, 6, 3, 7, 1, 5, 7, 6,
       7, 1, 6, 0, 8, 6, 9, 9, 0, 5, 6, 8, 0, 5, 1, 3, 8, 2, 9, 9, 1, 4,
       2, 2, 0, 2, 5, 9, 2, 6, 2, 9, 0, 9, 4, 8, 4, 1, 7, 7, 1, 5, 4, 6,
       0, 3, 7, 4, 4, 6, 9, 2, 7, 5, 1, 7, 4, 2, 4, 5, 6, 2, 9, 6, 2, 9,
       1, 6, 6, 1, 0, 9, 1, 8, 2, 1, 7, 9, 3, 4, 6, 7, 2, 2, 4, 7, 0, 7,
       9, 1, 7, 5, 2, 1, 2, 6, 9, 6, 5, 6, 3, 5, 5, 4, 4, 5, 3, 0, 1, 4,
       4, 8, 0, 5, 7, 2, 5, 9, 4, 0, 2, 5, 8, 3, 6, 6, 6, 4, 7, 4, 6, 8,
       3, 5, 1, 2, 5, 4, 9, 4, 9, 7, 1, 6, 1, 0, 3, 3, 8, 7, 8, 1, 5, 9,
       6, 7, 5, 2, 4, 5, 3, 3, 5, 0, 2, 3, 3, 1, 4, 5, 5, 3, 4, 7, 7, 3,
       6, 1, 9, 2, 3, 5, 6, 5, 8, 8, 6, 3, 0, 0, 4,

In [10]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_predict)

array([[49,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0, 43,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0, 57,  0,  0,  0,  0,  1,  0,  0],
       [ 0,  0,  0, 53,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 61,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0, 55,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0, 57,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0, 54,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0, 52,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0, 58]])