H2.4 Support Vector Machine (SVM)
=================================


In [29]:
from sklearn.datasets import fetch_mldata
from sklearn import svm
from sklearn.model_selection import GridSearchCV
mnist = fetch_mldata('MNIST original')

In [11]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

In [68]:
X = mnist.data
y = mnist.target
X4 = X[y == 4, :]
X9 = X[y == 9, :]
training_X = np.concatenate((X4[:4000], X9[:4000]))
testing_X = np.concatenate((X4[4000:], X9[4000:]))
training_y = np.concatenate((np.ones(4000) * 4, np.ones(4000) * 9))
testing_y = np.concatenate((np.ones(len(X4) - 4000) * 4, np.ones(len(X9) - 4000) * 9))

In [13]:
clf = svm.SVC(C=1.0, kernel='linear')
clf.fit(training_X, training_y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [17]:
Pe = 1 - clf.score(testing_X, testing_y)

0.08978675645342316


In [69]:
def svc_param_selection(X, y, kernel, degree=3, nfolds=2):
    Cs = [10.**(-c) for c in range(9,-1,-1)]
    gammas = [0.001, 0.01, 0.1, 1]
    param_grid = {'C': Cs, 'gamma': gammas}
    grid_search = GridSearchCV(svm.SVC(kernel=kernel, degree=degree), param_grid, cv=nfolds)
    grid_search.fit(X, y)
    grid_search.best_params_
    return grid_search.best_params_

### (a) Inhomogeneous linear and quadratic kernel

In [70]:
p1_params = svc_param_selection(training_X, training_y, 'poly', degree=1)
p2_params = svc_param_selection(training_X, training_y, 'poly', degree=2)
print("best C for p1: {0}".format(p1_params['C']))
clf_p1 = svm.SVC(C=p1_params['C'], kernel='poly', degree=1)
clf_p1.fit(training_X, training_y)
Pe_1 = 1 - clf_p1.score(testing_X, testing_y)
print("p1 error rate: {0}".format(Pe_1))

print("best C for p2: {0}".format(p2_params['C']))
clf_p2 = svm.SVC(C=p2_params['C'], kernel='poly', degree=2)
clf_p2.fit(training_X, training_y)
Pe_2 = 1 - clf_p2.score(testing_X, testing_y)
print("p2 error rate: {0}".format(Pe_2))

best C for p1: 1e-06
p1 error rate: 0.07903839501902454
best C for p2: 1e-09
p2 error rate: 0.03510895883777243


### (b) Radial basis function kernel

In [None]:
rbf_params = svc_param_selection(training_X, training_y, 'rbf')
print("best C for p1: {0}, gamma: {1}".format(rbf_params['C'], rbf_params['gamma']))
clf_rbf = svm.SVC(C=rbf_params['C'], kernel='rbf', gamma=rbf_params['gamma'])
clf_rbf.fit(training_X, training_y)
Pe_rbf = 1 - clf_rbf.score(testing_X, testing_y)
print("rbf error rate: {0}".format(Pe_rbf))

### (c) The hardest example to classify