In [17]:
import csv
import numpy as np
from matplotlib import pyplot as plt
from sklearn.svm import SVC
from numpy.linalg import eig
from numpy.linalg import inv 

In [18]:
def load_mnist(path, kind='train'):
    import os
    import gzip
    import numpy as np

    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels

In [23]:
images, Y = load_mnist('../fMNIST', kind='train')
images_test, Y_test = load_mnist('../fMNIST', kind='t10k') 

In [24]:
print (images.shape)
print (Y)
classes = [0,1,2,3,4,5,6,7,8,9]

(60000, 784)
[9 0 0 ... 3 0 5]


In [25]:
def PCA(images,dimensionToProject):
    # calculate the mean of each column
    M = np.mean(np.array(images).T.tolist(), axis=1)
    # center columns by subtracting column means

    C = images - M
    print (C.shape)
    # calculate covariance matrix of centered matrix
    V = np.cov(C.T)
    # eigendecomposition of covariance matrix
    values, vectors = eig(V)
    vectors = vectors[:np.shape(vectors)[0],0:dimensionToProject]
    # project data
    P = (vectors.T.dot(C.T)).T
    
    return (np.real(P))

In [26]:
X = PCA(images,50)
print (X.shape)
print (Y)

(60000, 784)
(60000, 50)
[9 0 0 ... 3 0 5]


In [27]:
X_test = PCA(images_test,50)
print (X_test.shape) 

(10000, 784)
(10000, 50)


In [28]:
X = (X-np.mean(X,axis = 0))/np.std(X,axis = 0)

In [41]:
def multiclassSVM(X,Y,classes):
    f = X.shape[1]
    n = X.shape[0]
    c = len(classes)
    W = np.zeros((c,f))
    b = np.zeros((c))
    for i in range(c):
        print (i)
        counter = 0
        X_temp = np.zeros((12000,X.shape[1]))
        Ty = np.zeros((12000))
        k = 0
        for j in range(n):
            if Y[j] == classes[i]:
                Ty[k] = 1
                X_temp[k] = X[j]
                k+=1
            elif (counter<6000):
                Ty[k] = -1
                X_temp[k] = X[j]
                counter = counter+1
                k +=1
        print (Ty)
        print (Ty.shape)
        clf = SVC(C = 10, kernel = 'linear')
        clf.fit(X_temp, Ty.ravel())
        W[i],b[i] = clf.coef_[0],clf.intercept_
    return W,b

In [42]:
W,b = multiclassSVM(X,Y,classes)

0
[-1.  1.  1. ...  1.  1.  1.]
(12000,)
1
[-1. -1. -1. ...  1.  1.  1.]
(12000,)
2
[-1. -1. -1. ...  1.  1.  1.]
(12000,)
3
[-1. -1. -1. ...  1.  1.  1.]
(12000,)
4
[-1. -1. -1. ...  1.  1.  1.]
(12000,)
5
[-1. -1. -1. ...  1.  1.  1.]
(12000,)
6
[-1. -1. -1. ...  1.  1.  1.]
(12000,)
7
[-1. -1. -1. ...  1.  1.  1.]
(12000,)
8
[-1. -1. -1. ...  1.  1.  1.]
(12000,)
9
[ 1. -1. -1. ...  1.  1.  1.]
(12000,)


In [43]:
def accuracy(X,Y,W,b,classes):
    c = len(classes)         #number of classes
    n = X.shape[0]           #number of data samples
    f = W.shape[1]           #input dimension
    acc = 0                  #number of correctly classified inputs
    
    # implementing soft-max to calculate predicted class
    g = np.zeros(c)     # probability for each class
    for i in range(n):
        g = np.exp((W@X[i].reshape(f,1)).reshape(c)+b)
        y_predicted = np.where(g == max(g))
        if (y_predicted[0] == classes.index(Y[i])):
            acc += 1
    
    return acc/X.shape[0]

In [44]:
acc = accuracy(X,Y,W,b,classes)
print (acc*100)

82.63666666666667


In [45]:
print (W)

[[ 6.08070720e-01 -1.24100132e+00 -3.43423293e-01  6.34928705e-01
  -2.06558373e-01  1.43265201e+00  1.79453323e-01 -4.47998816e-01
   1.41737650e-01 -8.15469307e-02  1.91110756e-01 -1.33573562e-01
   3.24453515e-01 -4.09476790e-02  2.23453114e-01  1.92438144e-01
  -3.11196545e-01 -4.38794391e-03 -3.21724723e-02  3.21703178e-02
  -3.86321338e-02 -7.13850425e-02 -2.71574018e-02 -8.17004068e-03
  -6.75671895e-02  1.43351660e-01  8.63560932e-02  2.21306064e-02
   3.39063977e-02  2.34116693e-01 -3.08898155e-01 -6.10235444e-02
   1.44117031e-01  1.19779757e-01 -1.08555252e-01  5.15515979e-02
   8.27313472e-02 -1.52023706e-01  1.57687849e-01  1.13498523e-01
  -4.43148428e-03 -1.09438558e-01 -3.67802322e-02 -8.38482326e-02
  -5.34981681e-02 -1.77602688e-02 -5.89832705e-02 -2.15210828e-03
   7.29276210e-03 -4.17221821e-02]
 [ 4.92603430e-01 -2.61231611e+00 -5.34385580e-01 -5.36153017e-01
   7.37449694e-01 -8.42446410e-01 -7.24773809e-01 -6.45575092e-01
  -8.03933112e-01 -4.96597438e-01  4.1056

In [46]:
print (b)

[ -2.15376793  -3.14697298  -1.55186299  -2.879385    -1.94467452
  -8.8773299   -1.76469957 -16.06734067  -2.11097158  -7.3548144 ]
