In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm
import pandas as pd
from sklearn.svm import LinearSVC, SVC

In [2]:
def read_data():
  import gzip
  
  IMAGE_SIZE = 28
  
  traindata_f = gzip.open('MNIST/train-images-idx3-ubyte.gz','r')
  trainlabels_f = gzip.open('MNIST/train-labels-idx1-ubyte.gz','r')
  testdata_f = gzip.open('MNIST/t10k-images-idx3-ubyte.gz','r')
  testlabels_f = gzip.open('MNIST/t10k-labels-idx1-ubyte.gz','r')

  def read_images(num_images, f):
    f.read(16)
    buf = f.read(IMAGE_SIZE * IMAGE_SIZE * num_images)
    data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
    data = data.reshape(num_images, IMAGE_SIZE * IMAGE_SIZE)/255.0 # Normalized
    return data

  def read_labels(num_labels, f):
    f.read(8)
    buf = f.read(1 * 32 * num_labels)
    labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
    return labels

  train_data = read_images(60_000, traindata_f)
  train_labels = read_labels(60_000, trainlabels_f)
  
  test_data = read_images(10_000, testdata_f)
  test_labels = read_labels(10_000, testlabels_f) 
  
  return train_data, train_labels, test_data, test_labels

train_data, train_labels, test_data, test_labels = read_data()

## Linear SVM

In [4]:
def test_linear_svm(C):
  clf = LinearSVC(loss='hinge', C=C)
  clf.fit(train_data, train_labels)
  return {
    'train_score': clf.score(train_data, train_labels),
    'test_score': clf.score(test_data, test_labels)
  }

In [31]:
for C in [0.01, 0.1, 1, 10, 100]:
  score = test_linear_svm(C)
  print(C, ':', score)

0.01 : {'train_score': 0.9165666666666666, 'test_score': 0.9178}




0.1 : {'train_score': 0.9251666666666667, 'test_score': 0.9228}




1 : {'train_score': 0.9295833333333333, 'test_score': 0.9219}




10 : {'train_score': 0.9280833333333334, 'test_score': 0.9175}




100 : {'train_score': 0.8571166666666666, 'test_score': 0.8529}


## Quadratic SVM

In [4]:
def test_rbf_svm(C):
  inds = np.arange(len(train_data))
  np.random.shuffle(inds)
  inds = inds[:10_000] # More than this will hang
  X, y = train_data[inds], train_labels[inds]  
  clf = SVC(kernel='rbf', degree=2, C=C)
  clf.fit(X, y)
  print("Done fitting.")
  return {
    'clf': clf,
    'train_score': clf.score(train_data, train_labels),
    'test_score': clf.score(test_data, test_labels)
  }

In [5]:
result = test_rbf_svm(1.0)



Done fitting.


In [17]:
result

{'clf': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
   decision_function_shape='ovr', degree=2, gamma='auto_deprecated',
   kernel='rbf', max_iter=-1, probability=False, random_state=None,
   shrinking=True, tol=0.001, verbose=False),
 'train_score': 0.9196666666666666,
 'test_score': 0.9259}

In [10]:
result['clf'].score(test_data, test_labels)

0.9259