In [1]:
from sklearn.svm import SVC
import numpy as np
import pickle
import gzip
import pandas as pd
import time
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns; sns.set(font_scale=1.2)
                               
# Allows charts to appear in the notebook
%matplotlib inline

In [2]:
def read_mnist(mnist_file):
    """
    Reads MNIST data.
    
    Parameters
    ----------
    mnist_file : string
        The name of the MNIST file (e.g., 'mnist.pkl.gz').
    
    Returns
    -------
    (train_X, train_Y, val_X, val_Y, test_X, test_Y) : tuple
        train_X : numpy array, shape (N=50000, d=784)
            Input vectors of the training set.
        train_Y: numpy array, shape (N=50000)
            Outputs of the training set.
        val_X : numpy array, shape (N=10000, d=784)
            Input vectors of the validation set.
        val_Y: numpy array, shape (N=10000)
            Outputs of the validation set.
        test_X : numpy array, shape (N=10000, d=784)
            Input vectors of the test set.
        test_Y: numpy array, shape (N=10000)
            Outputs of the test set.
    """
    f = gzip.open(mnist_file, 'rb')
    train_data, val_data, test_data = pickle.load(f, encoding='latin1')
    f.close()
    
    train_X, train_Y = train_data
    val_X, val_Y = val_data
    test_X, test_Y = test_data    
    
    return train_X, train_Y, val_X, val_Y, test_X, test_Y

In [3]:
# Test
train_X, train_Y, val_X, val_Y, test_X, test_Y = read_mnist('mnist.pkl.gz')

print('train_X.shape =', train_X.shape)
print('train_Y.shape =', train_Y.shape)
print('val_X.shape   =', val_X.shape)
print('val_Y.shape   =', val_Y.shape)
print('test_X.shape  =', test_X.shape)
print('test_Y.shape  =', test_Y.shape)

print('\ntrain_X: min = %.3f, max = %.3f' %(train_X.min(), train_X.max()))
print('train_Y: min = %d, max = %d' %(train_Y.min(), train_Y.max()))

train_X.shape = (50000, 784)
train_Y.shape = (50000,)
val_X.shape   = (10000, 784)
val_Y.shape   = (10000,)
test_X.shape  = (10000, 784)
test_Y.shape  = (10000,)

train_X: min = 0.000, max = 0.996
train_Y: min = 0, max = 9


In [4]:
from sklearn.svm import SVC
svclassifier = SVC(kernel='rbf')
svclassifier.fit(train_X, train_Y)

SVC()

In [5]:
y_pred = svclassifier.predict(test_X)

In [6]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(test_Y, y_pred))
print(classification_report(test_Y, y_pred))

[[ 972    0    1    0    0    2    2    1    2    0]
 [   0 1127    3    1    0    1    1    0    2    0]
 [   5    1 1010    1    1    0    1    7    5    1]
 [   0    0    2  992    0    3    0    5    7    1]
 [   0    0    5    0  960    0    2    0    2   13]
 [   2    0    0   10    1  867    4    1    5    2]
 [   6    2    0    0    2    5  942    0    1    0]
 [   0    8   12    2    1    0    0  994    1   10]
 [   3    0    3    5    5    2    1    2  952    1]
 [   4    4    1    7   10    1    0    8    5  969]]
              precision    recall  f1-score   support

           0       0.98      0.99      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.97      0.98      0.98      1032
           3       0.97      0.98      0.98      1010
           4       0.98      0.98      0.98       982
           5       0.98      0.97      0.98       892
           6       0.99      0.98      0.99       958
           7       0.98      0.97   

In [None]:
list_error = []
C_2d_range = [1e-2, 1, 1e2]
gamma_2d_range = [1e-1, 1, 1e1]
for C in C_2d_range:
    for gamma in gamma_2d_range:
        start_time = time.perf_counter()
        svclassifier = SVC(C=C, gamma=gamma,kernel='rbf')
        svclassifier.fit(train_X, train_Y)
        train_time = time.perf_counter() - start_time
        y_pred = svclassifier.predict(test_X)
        my_accuracy = accuracy_score(test_Y, y_pred, normalize=False) / float(test_Y.size)
        error = 1 - my_accuracy
        print("C: {}, gamma: {}, error: {}, time: {}".format(C, gamma, error, train_time))
        list_error.append((error, C, gamma, svclassifier))

In [8]:
y_pred = svclassifier.predict(test_X)
print(confusion_matrix(test_Y, y_pred))
print(classification_report(test_Y, y_pred))

[[ 974    0    1    0    0    1    1    1    2    0]
 [   0 1130    2    0    0    1    0    1    1    0]
 [   4    1 1015    0    1    0    1    7    3    0]
 [   0    0    1  995    0    3    0    3    5    3]
 [   0    0    3    0  968    0    2    0    0    9]
 [   2    0    0    9    1  871    4    0    3    2]
 [   4    2    1    0    2    4  944    0    1    0]
 [   0    6    9    2    0    0    0 1004    0    7]
 [   3    0    2    4    4    2    1    2  953    3]
 [   3    4    0    6    7    2    0    4    1  982]]
              precision    recall  f1-score   support

           0       0.98      0.99      0.99       980
           1       0.99      1.00      0.99      1135
           2       0.98      0.98      0.98      1032
           3       0.98      0.99      0.98      1010
           4       0.98      0.99      0.99       982
           5       0.99      0.98      0.98       892
           6       0.99      0.99      0.99       958
           7       0.98      0.98   