In [1]:
%matplotlib inline

import sys
import os,gzip
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.svm import LinearSVC,SVC
matplotlib.rc('xtick', labelsize=14) 
matplotlib.rc('ytick', labelsize=14)

if sys.version_info[0] == 2:
    from urllib import urlretrieve
else:
    from urllib.request import urlretrieve

In [2]:
# Function that downloads a specified MNIST data file from Yann Le Cun's website
def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
    print("Downloading %s" % filename)
    urlretrieve(source + filename, filename)

# Invokes download() if necessary, then reads in images
def load_mnist_images(filename):
    if not os.path.exists(filename):
        download(filename)
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    data = data.reshape(-1,784)
    return data

def load_mnist_labels(filename):
    if not os.path.exists(filename):
        download(filename)
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=8)
    return data

In [3]:
## Load the training set
train_data = load_mnist_images('train-images-idx3-ubyte.gz')
train_labels = load_mnist_labels('train-labels-idx1-ubyte.gz')

## Load the testing set
test_data = load_mnist_images('t10k-images-idx3-ubyte.gz')
test_labels = load_mnist_labels('t10k-labels-idx1-ubyte.gz')

In [4]:
def fit_linear_classifier(C_value=0.1):
    clf = LinearSVC(loss='hinge', C=C_value)
    clf.fit(train_data,train_labels)
    ## Get predictions on training data
    train_predictions = clf.predict(train_data)
    train_error = float(np.sum((train_predictions) != (train_labels)))/len(train_labels)
    ## Get predictions on test data
    test_predictions = clf.predict(test_data)
    test_error = float(np.sum((test_predictions) != (test_labels)))/len(test_labels)
    ##
    return test_error

In [5]:
clf = LinearSVC(loss='hinge', C=0.01)
clf.fit(train_data,train_labels)
## Get predictions on training data
train_predictions = clf.predict(train_data)
## Get predictions on test data
test_predictions = clf.predict(test_data)

#train_error



In [20]:
train_error1 = float(np.sum((train_predictions) != (train_labels)))/len(train_labels)
train_error1

0.12735

In [23]:
train_error2 = 1 - clf.fit(train_data,train_labels).score(train_data,train_labels)
train_error2  #0.1164



0.12916666666666665

In [22]:
from sklearn.metrics import accuracy_score
train_error3 = 1 - accuracy_score(train_labels,train_predictions)
train_error3

0.12734999999999996

In [25]:
test_error1 = float(np.sum((test_predictions) != (test_labels)))/len(test_labels)
test_error1

0.1317

In [26]:
test_error2 = 1 - clf.fit(test_data,test_labels).score(test_data,test_labels)
test_error2



0.04379999999999995

In [27]:
test_error3 = 1 - accuracy_score(test_labels,test_predictions)
test_error3

0.13170000000000004

In [6]:
cvals = [0.01,0.1,1.0,10.0,100.0]
for C in cvals:
    test_error = fit_linear_classifier(C)
    print ("Error rate for C = %0.2f: test %0.3f" % (C, test_error))



Error rate for C = 0.01: test 0.137




Error rate for C = 0.10: test 0.134




Error rate for C = 1.00: test 0.114




Error rate for C = 10.00: test 0.117




Error rate for C = 100.00: test 0.123


In [4]:
def fit_quadratic_classifier(C_value=1.0):
    clf = SVC(C=C_value, kernel='poly',degree=2)
    clf.fit(train_data,train_labels)
    ## Get predictions on training data
    train_predictions = clf.predict(train_data)
    train_error = float(np.sum((train_predictions) != (train_labels)))/len(train_labels)
    ## Get predictions on test data
    test_predictions = clf.predict(test_data)
    test_error = float(np.sum((test_predictions) != (test_labels)))/len(test_labels)
    ##
    return test_error

In [6]:
clf = SVC(C=1.0, kernel='poly',degree=2)
clf.fit(train_data,train_labels)
## Get predictions on training data
train_predictions = clf.predict(train_data)

## Get predictions on test data
test_predictions = clf.predict(test_data)

#train_error

In [25]:
train_error = 1 -  float(np.sum((train_predictions) != (train_labels)))/len(test_labels)
train_error

0.01248333333333329

In [7]:
test_error1 = float(np.sum((test_predictions) != (test_labels)))/len(test_labels)
test_error1

0.0226

In [8]:
test_error2 = 1 - clf.fit(test_data,test_labels).score(test_data,test_labels)
test_error2   # it should be 0.0194 when C = 1

0.014000000000000012

In [9]:
test_error3 = 1 - accuracy_score(test_labels,test_predictions)
test_error3

0.022599999999999953

In [5]:
cvals = [0.01,0.1,1.0]
for C in cvals:
    test_error = fit_quadratic_classifier(C)
    print ("Error rate for C = %0.2f: test %0.3f" % (C, test_error))

Error rate for C = 0.01: test 0.083
Error rate for C = 0.10: test 0.040
Error rate for C = 1.00: test 0.023
