In [11]:
import gzip
import numpy as np
from sklearn.svm import SVC, LinearSVC

In [12]:
def load_mnist_images(filename):
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    data = data.reshape(-1, 784)
    return data

def load_mnist_labels(filename):
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=8)
    return data

In [13]:
## Load the training set
train_data = load_mnist_images('train-images-idx3-ubyte.gz')
train_labels = load_mnist_labels('train-labels-idx1-ubyte.gz')

## Load the testing set
test_data = load_mnist_images('t10k-images-idx3-ubyte.gz')
test_labels = load_mnist_labels('t10k-labels-idx1-ubyte.gz')

In [14]:
train_data.shape, train_labels.shape, test_data.shape, test_labels.shape

((60000, 784), (60000,), (10000, 784), (10000,))

In [15]:
c_vals = [0.01, 0.1, 1.0, 10.0, 100.0]
for c_val in c_vals:
    clf = LinearSVC(loss='hinge', C=c_val)
    clf.fit(train_data, train_labels)
    train_acc = clf.score(train_data, train_labels)
    test_acc = clf.score(test_data, test_labels)
    print('C = {0}\nTraining accuracy = {1}'.format(c_val, train_acc))
    print('Test accuracy = {0}'.format(test_acc))



C = 0.01
Training Error = 0.8650666666666667
Test Error = 0.8583




C = 0.1
Training Error = 0.88445
Test Error = 0.8759




C = 1.0
Training Error = 0.8886833333333334
Test Error = 0.8776




C = 10.0
Training Error = 0.889
Test Error = 0.879




C = 100.0
Training Error = 0.8769333333333333
Test Error = 0.8708


In [19]:
clf = SVC(kernel='poly', degree=2, C=1.0)
clf.fit(train_data, train_labels)
train_error = (1 - clf.score(train_data, train_labels)) * 100
test_error = (1 - clf.score(test_data, test_labels)) * 100
print('Training error = {0}%'.format(train_error))
print('Test error = {0}%'.format(test_error))

Training error = 1.248333333333329%
Test error = 2.2599999999999953%
