In [10]:
from sklearn.svm import SVC
import numpy as np
import pickle
import gzip
import pandas as pd
import time
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns; sns.set(font_scale=1.2)
from datetime import datetime
# Allows charts to appear in the notebook
%matplotlib inline

In [11]:
def read_mnist(mnist_file):
    """
    Reads MNIST data.
    
    Parameters
    ----------
    mnist_file : string
        The name of the MNIST file (e.g., 'mnist.pkl.gz').
    
    Returns
    -------
    (train_X, train_Y, val_X, val_Y, test_X, test_Y) : tuple
        train_X : numpy array, shape (N=50000, d=784)
            Input vectors of the training set.
        train_Y: numpy array, shape (N=50000)
            Outputs of the training set.
        val_X : numpy array, shape (N=10000, d=784)
            Input vectors of the validation set.
        val_Y: numpy array, shape (N=10000)
            Outputs of the validation set.
        test_X : numpy array, shape (N=10000, d=784)
            Input vectors of the test set.
        test_Y: numpy array, shape (N=10000)
            Outputs of the test set.
    """
    f = gzip.open(mnist_file, 'rb')
    train_data, val_data, test_data = pickle.load(f, encoding='latin1')
    f.close()
    
    train_X, train_Y = train_data
    val_X, val_Y = val_data
    test_X, test_Y = test_data    
    
    return train_X, train_Y, val_X, val_Y, test_X, test_Y

In [12]:
# Test
train_X, train_Y, val_X, val_Y, test_X, test_Y = read_mnist('mnist.pkl.gz')

print('train_X.shape =', train_X.shape)
print('train_Y.shape =', train_Y.shape)
print('val_X.shape   =', val_X.shape)
print('val_Y.shape   =', val_Y.shape)
print('test_X.shape  =', test_X.shape)
print('test_Y.shape  =', test_Y.shape)

print('\ntrain_X: min = %.3f, max = %.3f' %(train_X.min(), train_X.max()))
print('train_Y: min = %d, max = %d' %(train_Y.min(), train_Y.max()))

train_X.shape = (50000, 784)
train_Y.shape = (50000,)
val_X.shape   = (10000, 784)
val_Y.shape   = (10000,)
test_X.shape  = (10000, 784)
test_Y.shape  = (10000,)

train_X: min = 0.000, max = 0.996
train_Y: min = 0, max = 9


In [13]:
# save the model to disk
def save_model(model, mode,  C_k, gamma_k):
    if mode == 'linear':
        file_name = mode + '_C' + str(C_k) + '.sav'
        pickle.dump(model, open(file_name, 'wb'))
    else:
        file_name = mode + '_C' + str(C_k) + '_gamma' + str(gamma_k) + '.sav'
        pickle.dump(model, open(file_name, 'wb'))
        
# load the model from disk
def load_model(file_name):
    loaded_model = pickle.load(open(file_name, 'rb'))
    return loaded_model

In [14]:
list_error = []
C_range = [0.01, 0.1, 1, 10, 100]
gamma_range = [0.0001, 0.001, 0.01, 0.1, 1]
for C in C_range:
    for gamma in gamma_range:
        if (C != 0.01):
            if C == 0.1 and (gamma != 0.0001 and gamma != 0.001 and gamma != 0.01):
                #Training:
                start_time = datetime.now()
                svclassifier = SVC(C = C,gamma = gamma,kernel='rbf')
                svclassifier.fit(train_X, train_Y)
                train_time = datetime.now() - start_time

                #Save model:
                save_model(svclassifier, 'Gaussian', C, gamma)

                #Get Y predict from input X
                y_train_pred = svclassifier.predict(train_X)  
                y_val_pred = svclassifier.predict(val_X)

                #Get error:
                train_error = 1 -  (accuracy_score(train_Y, y_train_pred, normalize=False) / float(train_Y.size))
                val_error = 1 -  (accuracy_score(val_Y, y_val_pred, normalize=False) / float(val_Y.size))

                #Print results:
                print("C: {}  |  gamma: {}  |  train error: {}  |  validation error: {}  |  time: {}".format(C, gamma, train_error, val_error, train_time))
            if (C == 1) or (C == 10) or (C == 100):
                 #Training:
                start_time = datetime.now()
                svclassifier = SVC(C = C,gamma = gamma,kernel='rbf')
                svclassifier.fit(train_X, train_Y)
                train_time = datetime.now() - start_time

                #Save model:
                save_model(svclassifier, 'Gaussian', C, gamma)

                #Get Y predict from input X
                y_train_pred = svclassifier.predict(train_X)  
                y_val_pred = svclassifier.predict(val_X)

                #Get error:
                train_error = 1 -  (accuracy_score(train_Y, y_train_pred, normalize=False) / float(train_Y.size))
                val_error = 1 -  (accuracy_score(val_Y, y_val_pred, normalize=False) / float(val_Y.size))

                #Print results:
                print("C: {}  |  gamma: {}  |  train error: {}  |  validation error: {}  |  time: {}".format(C, gamma, train_error, val_error, train_time))
                


C: 0.1  |  gamma: 0.1  |  train error: 0.28952  |  validation error: 0.3125  |  time: 0:50:24.369580
C: 0.1  |  gamma: 1  |  train error: 0.88644  |  validation error: 0.8936  |  time: 1:30:14.722008
C: 1  |  gamma: 0.0001  |  train error: 0.09970000000000001  |  validation error: 0.08789999999999998  |  time: 0:19:19.807349
C: 1  |  gamma: 0.001  |  train error: 0.06422000000000005  |  validation error: 0.05889999999999995  |  time: 0:07:41.350515
C: 1  |  gamma: 0.01  |  train error: 0.015260000000000051  |  validation error: 0.022299999999999986  |  time: 0:04:10.119237
C: 1  |  gamma: 0.1  |  train error: 4.0000000000040004e-05  |  validation error: 0.04479999999999995  |  time: 1:16:10.113186
C: 1  |  gamma: 1  |  train error: 0.0  |  validation error: 0.8176  |  time: 2:39:45.561629
C: 10  |  gamma: 0.0001  |  train error: 0.06864000000000003  |  validation error: 0.06310000000000004  |  time: 0:09:08.717316
C: 10  |  gamma: 0.001  |  train error: 0.037939999999999974  |  validat