### MNIST Data classification using SVM

We will use quadratic and linear SVM to predict the number using the input vector, first we will load the data 

In [1]:
import numpy as np
from os.path import join, exists 

mnist_dataroot = "./mnist"

if not exists(mnist_dataroot):
    print('Creating mnist dataroot directory')
    from posix import mkdir
    mkdir(mnist_dataroot)
else:
    print('mnist dataroot directory exists')


train_file, train_labels = "train-images-idx3-ubyte.gz", "train-labels-idx1-ubyte.gz"
test_file, test_labels = "t10k-images-idx3-ubyte.gz", "t10k-labels-idx1-ubyte.gz"

for f in [train_file, train_labels, test_file, test_labels]:
    full_file = join(mnist_dataroot, f)
    if exists(full_file):
        print('File', full_file, ' exists, not downloading')
    else:
        from urllib.request import urlretrieve 
        r = requests.get('http://yann.lecun.com/exdb/mnist/' + f, allow_redirects=True)
        open(full_file, 'wb').write(r.content)
        print('File', full_file, ' downloaded and saved')    


def load_(file_name, is_data_file):
    from gzip import open
    with open(file_name, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset= 16 if is_data_file else 8)
        if is_data_file:
            data = data.reshape((-1, 784))
        
        return data
        
    
train_data = load_(join(mnist_dataroot, train_file), True)
train_labels = load_(join(mnist_dataroot, train_labels), False)
test_data = load_(join(mnist_dataroot, test_file), True)
test_labels = load_(join(mnist_dataroot, test_labels), False)
train_data.shape, train_labels.shape, test_data.shape, test_labels.shape

mnist dataroot directory exists
File ./mnist/train-images-idx3-ubyte.gz  exists, not downloading
File ./mnist/train-labels-idx1-ubyte.gz  exists, not downloading
File ./mnist/t10k-images-idx3-ubyte.gz  exists, not downloading
File ./mnist/t10k-labels-idx1-ubyte.gz  exists, not downloading


((60000, 784), (60000,), (10000, 784), (10000,))

Lets train the a Linear SVM first with different values of C

In [2]:
C = [0.01,0.1,1.0,10.0,100.0]
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
for c in C:
    model = LinearSVC(loss = 'hinge', C = c)
    model.fit(train_data, train_labels)
    print('Using C =', c, 'train error', \
          1 - accuracy_score(train_labels, model.predict(train_data)),\
         ', test error is ', 1 - accuracy_score(test_labels, model.predict(test_data)))
    
    

Using C = 0.01 train error 0.126416666667 , test error is  0.1332
Using C = 0.1 train error 0.123183333333 , test error is  0.1311
Using C = 1.0 train error 0.121116666667 , test error is  0.1298
Using C = 10.0 train error 0.145233333333 , test error is  0.1532
Using C = 100.0 train error 0.127883333333 , test error is  0.1346


Now lets train using Poly Kernel

In [3]:
from sklearn.svm import SVC

model = SVC(kernel='poly', C = 1, degree = 2, coef0 = 1)
model.fit(train_data, train_labels)
print('Using SVC, Using C = 1.0, train error', \
          1 - accuracy_score(train_labels, model.predict(train_data)),\
         ', test error is ', 1 - accuracy_score(test_labels, model.predict(test_data)))

Using SVC, Using C = 1.0, train error 0.0 , test error is  0.0194
