In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import os
import struct
import numpy as np

In [2]:
"""
Loosely inspired by http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
which is GPL licensed.
"""

def read(dataset = "training", path = "./data/raw"):
    """
    Python function for importing the MNIST data set.  It returns an iterator
    of 2-tuples with the first element being the label and the second element
    being a numpy.uint8 2D array of pixel data for the given image.
    """

    if dataset is "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset is "testing":
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
    else:
        raise ValueError, "dataset must be 'testing' or 'training'"

    # Load everything in some numpy arrays
    with open(fname_lbl, 'rb') as flbl:
        magic, num = struct.unpack(">II", flbl.read(8))
        lbl = np.fromfile(flbl, dtype=np.int8)

    with open(fname_img, 'rb') as fimg:
        magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
        img = np.fromfile(fimg, dtype=np.uint8).reshape(len(lbl), rows, cols)

    return img.astype(np.float64), lbl.astype(np.float64)

def squash_data(data):
    data = data.reshape(data.shape[0],-1)
    return data

In [3]:
train_images, train_labels = read('training')
test_images, test_labels = read('testing')
train_x = squash_data(train_images)
test_x = squash_data(test_images)

## KNN

In [4]:
from sklearn.neighbors import KNeighborsClassifier

In [5]:
model = KNeighborsClassifier(n_neighbors=1, n_jobs=-1)
model.fit(train_x, train_labels)
print('Accuracy = ',model.score(test_x, test_labels))

('Accuracy = ', 0.9691)


## Linear SVM

In [6]:
from sklearn import linear_model

In [7]:
model = linear_model.SGDClassifier(max_iter=1000, tol=1e-3, n_jobs=-1)
model.fit(train_x, train_labels)
print('Accuracy = ',model.score(test_x, test_labels))

('Accuracy = ', 0.8683)


## Polynomial SVM

In [5]:
from sklearn.svm import SVC

In [9]:
model = SVC(kernel='poly', degree=2)
model.fit(train_x, train_labels)
print('Accuracy = ',model.score(test_x, test_labels))

('Accuracy = ', 0.9806)


## RBF SVM

In [None]:
model = SVC(kernel='rbf', gamma='auto')
model.fit(train_x, train_labels)
print('Degree = ',degree,' Accuracy = ',model.score(test_x, test_labels))