In [1]:
import numpy as np
import pickle
import gzip

In [2]:
def read_mnist(mnist_file):
    """
    Reads MNIST data.
    
    Parameters
    ----------
    mnist_file : string
        The name of the MNIST file (e.g., 'mnist.pkl.gz').
    
    Returns
    -------
    (train_X, train_Y, val_X, val_Y, test_X, test_Y) : tuple
        train_X : numpy array, shape (N=50000, d=784)
            Input vectors of the training set.
        train_Y: numpy array, shape (N=50000)
            Outputs of the training set.
        val_X : numpy array, shape (N=10000, d=784)
            Input vectors of the validation set.
        val_Y: numpy array, shape (N=10000)
            Outputs of the validation set.
        test_X : numpy array, shape (N=10000, d=784)
            Input vectors of the test set.
        test_Y: numpy array, shape (N=10000)
            Outputs of the test set.
    """
    f = gzip.open(mnist_file, 'rb')
    train_data, val_data, test_data = pickle.load(f, encoding='latin1')
    f.close()
    
    train_X, train_Y = train_data
    val_X, val_Y = val_data
    test_X, test_Y = test_data    
    
    return train_X, train_Y, val_X, val_Y, test_X, test_Y

In [3]:
# Test
train_X, train_Y, val_X, val_Y, test_X, test_Y = read_mnist('mnist.pkl.gz')

print('train_X.shape =', train_X.shape)
print('train_Y.shape =', train_Y.shape)
print('val_X.shape   =', val_X.shape)
print('val_Y.shape   =', val_Y.shape)
print('test_X.shape  =', test_X.shape)
print('test_Y.shape  =', test_Y.shape)

print('\ntrain_X: min = %.3f, max = %.3f' %(train_X.min(), train_X.max()))
print('train_Y: min = %d, max = %d' %(train_Y.min(), train_Y.max()))

train_X.shape = (50000, 784)
train_Y.shape = (50000,)
val_X.shape   = (10000, 784)
val_Y.shape   = (10000,)
test_X.shape  = (10000, 784)
test_Y.shape  = (10000,)

train_X: min = 0.000, max = 0.996
train_Y: min = 0, max = 9


In [10]:
from sklearn.svm import SVC
def run_RBF_kernel(train_X, train_Y, val_X, val_Y, c = 1.0, gamma = 0.001, max_iter = -1):
    """
    """
    clf = SVC(C = c, kernel = 'rbf', gamma = gamma, max_iter = max_iter)
    print("Training...")
    %time clf.fit(train_X, train_Y)
    print("Training done!")
    %time training_score = clf.score(train_X, train_Y)
    %time validation_score = clf.score(val_X, val_Y)
    err_train = 1 - training_score
    err_val = 1 - validation_score
    print("train score: ", training_score)
    print("validation score: ", validation_score)
    return err_train, err_val

In [None]:
run_RBF_kernel(train_X, train_Y, val_X, val_Y, c = 1.0, gamma = 0.1, max_iter = 10000)

In [None]:
run_RBF_kernel(train_X, train_Y, val_X, val_Y, c = 5.0, gamma = 0.1, max_iter = 10000)

Training...


In [None]:
run_RBF_kernel(train_X, train_Y, val_X, val_Y, c = 1.0, gamma = 1, max_iter = 10000)

In [None]:
run_RBF_kernel(train_X, train_Y, val_X, val_Y, c = 5.0, gamma = 1.0, max_iter = 10000)

In [5]:
def run_SVM_RBF_kernel():
    """
    """
    C, gamma = 0.1, 0.001
    k = 0
    for i in range(4):
        gamma = 0.001
        for j in range(2):
            k += 1
            print("\nIter %d:" %k)
            print("C = %s, gamma = %s" %(C, gamma))
            err_train, err_val = (0, 0)
            %time (err_train, err_val) = run_RBF_kernel(train_X, train_Y, val_X, val_Y, C, gamma)
            print("Error train: ", err_train)
            print("Error validation: ", err_val)
            gamma *= 10
        C *= 10

In [6]:
%time run_SVM_RBF_kernel()


Iter 1:
C = 0.1, gamma = 0.001
train score:  0.90176
validation score:  0.9139
Wall time: 1h 29min 39s
Error train:  0
Error validation:  0

Iter 2:
C = 0.1, gamma = 0.01
train score:  0.95298
validation score:  0.9578
Wall time: 50min 55s
Error train:  0
Error validation:  0

Iter 3:
C = 1.0, gamma = 0.001
train score:  0.93578
validation score:  0.9411
Wall time: 44min 44s
Error train:  0
Error validation:  0

Iter 4:
C = 1.0, gamma = 0.01
train score:  0.98474
validation score:  0.9777
Wall time: 26min 37s
Error train:  0
Error validation:  0

Iter 5:
C = 10.0, gamma = 0.001
train score:  0.96206
validation score:  0.9592
Wall time: 28min 12s
Error train:  0
Error validation:  0

Iter 6:
C = 10.0, gamma = 0.01
train score:  0.99942
validation score:  0.9835
Wall time: 24min 58s
Error train:  0
Error validation:  0

Iter 7:
C = 100.0, gamma = 0.001
train score:  0.99058
validation score:  0.9718
Wall time: 21min 14s
Error train:  0
Error validation:  0

Iter 8:
C = 100.0, gamma = 0.

### RBF kernel

In [5]:
C = 0.1
print("C = ", C)
err_train, err_val = (0, 0)
%time (err_train, err_val) = run_RBF_kernel(train_X, train_Y, val_X, val_Y, C, 'auto')
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.1
train score:  0.90718
validation score:  0.9178
Wall time: 1h 48min 42s
Error train:  0.09282000000000001
Error validation:  0.08220000000000005


In [15]:
C = 1.0
print("C = ", C)
err_train, err_val = (0, 0)
%time (err_train, err_val) = run_RBF_kernel(train_X, train_Y, val_X, val_Y, C, 'auto')
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  1.0
train score:  0.93998
validation score:  0.9448
Wall time: 49min 56s
Error train:  0.06001999999999996
Error validation:  0.05520000000000003


In [6]:
C = 10
print("C = ", C)
err_train, err_val = (0, 0)
%time (err_train, err_val) = run_RBF_kernel(train_X, train_Y, val_X, val_Y, C, 'auto')
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  10
train score:  0.9678
validation score:  0.9643
Wall time: 29min 15s
Error train:  0.032200000000000006
Error validation:  0.035699999999999954


In [7]:
C = 100
print("C = ", C)
err_train, err_val = (0, 0)
%time (err_train, err_val) = run_RBF_kernel(train_X, train_Y, val_X, val_Y, C, 'auto')
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  100
train score:  0.99436
validation score:  0.9753
Wall time: 22min 48s
Error train:  0.005639999999999978
Error validation:  0.024700000000000055


In [8]:
C = 1000
print("C = ", C)
err_train, err_val = (0, 0)
%time (err_train, err_val) = run_RBF_kernel(train_X, train_Y, val_X, val_Y, C, 'auto')
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  1000
train score:  1.0
validation score:  0.9734
Wall time: 22min 58s
Error train:  0.0
Error validation:  0.026599999999999957


---

### Linear kernel

In [3]:
from sklearn.svm import LinearSVC
def run_linear_kernel(train_X, train_Y, val_X, val_Y, c):
    """
    """
    clf = LinearSVC(C = c, max_iter = 50000)
    clf.fit(train_X, train_Y)
    err_train = 1 - clf.score(train_X, train_Y)
    err_val = 1 - clf.score(val_X, val_Y)
    print("train score: ", clf.score(train_X, train_Y))
    print("validation score: ", clf.score(val_X, val_Y))
    return err_train, err_val

In [8]:
C = 0.1
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.1
train score:  0.92396
validation score:  0.9214
Wall time: 37.7 s
Error train:  0.07604
Error validation:  0.0786


In [5]:
C = 0.1
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.1
train score:  0.92396
validation score:  0.9214
Wall time: 39.8 s
Error train:  0.07604
Error validation:  0.0786


In [6]:
C = 0.3
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.3
train score:  0.92496
validation score:  0.9218
Wall time: 1min 41s
Error train:  0.07504
Error validation:  0.07820000000000005


In [8]:
C = 0.29
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.29
train score:  0.92494
validation score:  0.9217
Wall time: 1min 55s
Error train:  0.07506000000000002
Error validation:  0.07830000000000004


In [9]:
C = 0.32
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.32
train score:  0.9251
validation score:  0.9216
Wall time: 2min 19s
Error train:  0.07489999999999997
Error validation:  0.07840000000000003


In [10]:
C = 0.28
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.28
train score:  0.92492
validation score:  0.9216
Wall time: 1min 32s
Error train:  0.07508000000000004
Error validation:  0.07840000000000003


In [11]:
C = 0.315
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.315
train score:  0.9251
validation score:  0.9218
Wall time: 1min 42s
Error train:  0.07489999999999997
Error validation:  0.07820000000000005


In [13]:
C = 0.31525
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.31525
train score:  0.9251
validation score:  0.9218
Wall time: 1min 45s
Error train:  0.07489999999999997
Error validation:  0.07820000000000005


In [14]:
C = 0.3154
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.3154
train score:  0.9251
validation score:  0.9218
Wall time: 1min 37s
Error train:  0.07489999999999997
Error validation:  0.07820000000000005


In [15]:
C = 0.3153
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.3153
train score:  0.9251
validation score:  0.9218
Wall time: 1min 39s
Error train:  0.07489999999999997
Error validation:  0.07820000000000005


In [16]:
C = 0.3152
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.3152
train score:  0.9251
validation score:  0.9218
Wall time: 1min 31s
Error train:  0.07489999999999997
Error validation:  0.07820000000000005


In [17]:
C = 0.3151
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.3151
train score:  0.9251
validation score:  0.9218
Wall time: 1min 50s
Error train:  0.07489999999999997
Error validation:  0.07820000000000005


In [18]:
C = 0.3125
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.3125
train score:  0.9251
validation score:  0.9219
Wall time: 1min 30s
Error train:  0.07489999999999997
Error validation:  0.07809999999999995


In [22]:
C = 0.31275
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.31275
train score:  0.9251
validation score:  0.9219
Wall time: 1min 27s
Error train:  0.07489999999999997
Error validation:  0.07809999999999995


In [23]:
C = 0.311
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.311
train score:  0.92506
validation score:  0.9218
Wall time: 1min 49s
Error train:  0.07494
Error validation:  0.07820000000000005


In [24]:
C = 0.312
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.312
train score:  0.92508
validation score:  0.9218
Wall time: 1min 40s
Error train:  0.07491999999999999
Error validation:  0.07820000000000005


In [25]:
C = 0.3123
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.3123
train score:  0.92508
validation score:  0.9218
Wall time: 1min 39s
Error train:  0.07491999999999999
Error validation:  0.07820000000000005


In [26]:
C = 0.314
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.314
train score:  0.9251
validation score:  0.9218
Wall time: 1min 34s
Error train:  0.07489999999999997
Error validation:  0.07820000000000005


In [28]:
C = 0.316
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.316
train score:  0.9251
validation score:  0.9217
Wall time: 1min 47s
Error train:  0.07489999999999997
Error validation:  0.07830000000000004


In [29]:
C = 0.316
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.316
train score:  0.9251
validation score:  0.9217
Wall time: 1min 39s
Error train:  0.07489999999999997
Error validation:  0.07830000000000004


In [30]:
C = 0.317
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.317
train score:  0.9251
validation score:  0.9217
Wall time: 1min 38s
Error train:  0.07489999999999997
Error validation:  0.07830000000000004


In [31]:
C = 0.318
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.318
train score:  0.9251
validation score:  0.9216
Wall time: 1min 43s
Error train:  0.07489999999999997
Error validation:  0.07840000000000003


In [20]:
C = 0.325
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.325
train score:  0.92512
validation score:  0.9216
Wall time: 1min 43s
Error train:  0.07487999999999995
Error validation:  0.07840000000000003


In [19]:
C = 0.3275
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.3275
train score:  0.92516
validation score:  0.9216
Wall time: 1min 41s
Error train:  0.07484000000000002
Error validation:  0.07840000000000003


In [21]:
C = 0.3175
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.3175
train score:  0.9251
validation score:  0.9217
Wall time: 1min 32s
Error train:  0.07489999999999997
Error validation:  0.07830000000000004


In [7]:
C = 0.31
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  0.31
train score:  0.92506
validation score:  0.9218
Wall time: 1min 56s
Error train:  0.07494
Error validation:  0.07820000000000005


In [9]:
C = 1
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  1
train score:  0.92636
validation score:  0.9205
Wall time: 6min 1s
Error train:  0.07364000000000004
Error validation:  0.07950000000000002


In [10]:
C = 3
print("C = ", C)
%time (err_train, err_val) = run_linear_kernel(train_X, train_Y, val_X, val_Y, C)
print("Error train: ", err_train)
print("Error validation: ", err_val)

C =  3
train score:  0.92716
validation score:  0.9205
Wall time: 18min 23s
Error train:  0.07284000000000002
Error validation:  0.07950000000000002
