In [11]:
import os
import struct
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [12]:
def read_idx(filename):
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.fromstring(f.read(), dtype=np.uint8).reshape(shape)

In [98]:
## loading mnist dataset

raw_train = read_idx("../data/train-patterns-idx3-ubyte")
train_data = np.reshape(raw_train, (60000, 28*28))
train_label = read_idx("../data/train-labels-idx1-ubyte")

raw_test = read_idx("../data/mnist_new_test-patterns-idx3-ubyte")
test_data = np.reshape(raw_test, (10000, 28*28))
test_label = read_idx("../data/mnist_new_test-labels-idx1-ubyte")

  """


In [99]:
## Standardizing the features
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(train_data)
train_std = sc.transform(train_data)
test_std = sc.transform(test_data)

In [100]:
x, y = np.shape(train_std)
print(x, y)
print(train_data.shape, train_label.shape)

60000 784
(60000, 784) (60000,)


In [101]:
print(test_data.shape, test_label.shape)

(10000, 784) (10000,)


In [133]:
class SVM(object):
    def __init__(self, C=0.1, eta=0.001, batch_size=128, num_iter=25, epsilon=0.001, class_num=0, shuffle=True):
        self.C = C
        self.eta = eta
        self.batch_size = batch_size
        self.num_iter = num_iter
        self.epsilon = epsilon
        self.class_num = class_num
        self.shuffle = shuffle
        
    def fit(self, X, y):
        X_n, X_f = np.shape(X) # X_n = 60000(data pts), X_f = 784(features)
        self.class_num = len(np.unique(y)) # class_num = 10
        
        w = np.random.rand(self.class_num, X_f) # w = [class_num][28*28]
        avg_w = w
        b = np.random.rand(self.class_num, 1) # b = [class_num][1]
        avg_b = b
        
        if self.shuffle:
            s_data, s_labels = self.shuffling(X, y)
            # s_ data[60000][28*28], s_labels[60000][1]
        
        # OvRs
        encoded_y = self.encoding(s_labels) # encodede_y[60000][class_num]
        
        cnt = 0
        batch_count = X_n / self.batch_size
        for Xi in range(self.num_iter):
            # minibatch training
            
            delta_w = np.zeros((self.class_num, X_f)) # delta_w[class_num][28*28]
            delta_b = np.zeros((self.class_num, 1)) # delta_b[10][1]
            
            for t in range(int(batch_count)):
                ### SGD
                batch_X, batch_y, bs = self.batching(s_data, encoded_y, t)
                # batch_X[batch_size][28*28], batch_y[batch_size][class_num]
                
                batch_X = np.reshape(batch_X, (bs, X_f))
                batch_y = np.reshape(batch_y, (bs, self.class_num))
                
                loss = self.hinge_loss(batch_X, batch_y, w, b) # loss[batch_size][class_num]
                loss = 1 - loss - self.epsilon
                
                loss[loss<0] = 1
                loss[loss>=0] = 0
                
                l_M_y = loss * batch_y # l_M_y = loss[bs][class_num], batch_y[bs][class_num]
                
                temp_w = np.dot(np.transpose(l_M_y), batch_X) # temp_w[10][28*28] = batch_y[batch_size][28*28]
                delta_w = -(1/bs)*np.array(temp_w)+(1/self.C)*np.array(w) # delta_w[10][28*28] = c*temp_w[10][28*28]+c*w[10][28*28]
                
                temp_b = np.sum(np.transpose(l_M_y), axis=1)
                temp_b = np.reshape(temp_b, (self.class_num, 1)) # temp_b[class_num][1]
                delta_b = -(1/bs) * temp_b # delta_b[10][1] = c*[class_num][1]
                cnt += 1
            
            w = np.array(w) - (self.eta*delta_w) # w[class_num][28*28]
            b = np.subtract(b, (self.eta*delta_b)) # b[class_num][1]
            
            temp2_w = (cnt/cnt+1) * avg_w + (1/cnt+1)*w
            temp2_b = (cnt/cnt+1) * avg_b + (1/cnt+1)*b
            
            avg_w = np.where(avg_w > temp2_w, avg_w, temp2_w)
            avg_b = np.where(avg_b > temp2_b, avg_b, temp2_b)
        
        return avg_w, avg_b
        
    
    def encoding(self, y):
        encoded_y = np.ones((np.shape(y)[0], self.class_num))
        for i in range(self.class_num):
            encoded_y[:, i][y != i] = -1
#         encoded_y = -1*np.ones(np.shape(y)[0], self.class_num)
#         # encoded_y[60000][class_num]
#         for i in range(np.shape(y)[0]):
#             encoded_y[i,y[i]] = 1
        return encoded_y
    
    def shuffling(self, X, y):
        ran = np.arange(0, np.shape(X)[0])
        np.random.shuffle(ran)
        return X[ran], y[ran]
    
    def batching(self, X, y, t):
        batch_X = X[t*self.batch_size : min(len(X),(t+1)*self.batch_size)]
        # batch_X[batch_size][28*28]
        
        batch_y = y[t*self.batch_size : min(len(X),(t+1)*self.batch_size)]
        # batch_y[batch_size][class_num]
        
        last_size = min(len(X), (t+1)*self.batch_size)-t*self.batch_size
        # last_size[size][28*28]
        
        return batch_X, batch_y, last_size
    
    def hinge_loss(self, X, y, w, b):
        net_v = self.net_input(X, w) # net_v[batch_size][class_num]
        temp_l = np.array(net_v) + np.transpose(b) # temp_l[batch_size][class_num]
        loss = y*temp_l # loss[batch_size][class_num]
        return loss
    
    def net_input(self, X, w):
        # X[batch_size][28*28], w[class_num][28*28]
        net = np.dot(X, np.transpose(w)) # [batch_size][class_num]
        return net
    
    def predict(self, X, w, b):
        pred = []
        net_v = self.net_input(X, w) # net_v[size of X][class_num]
        temp_t = np.array(net_v)+np.transpose(b) # temp_t[size of X][class_num]
        print(np.shape(temp_t))
        for i in range(0, X.shape[0]):
            p = np.argmax(temp_t, axis=1)[i]
            pred.append(p)
            print(p)
        pred = np.array(pred)
        return pred

In [134]:
svm = SVM()
w, b = svm.fit(train_std, train_label)

In [135]:
result = svm.predict(test_std, w, b)
print(result)

(10000, 10)
6
3
3
2
4
6
7
6
7
5
5
1
6
2
4
2
2
8
2
9
6
3
2
7
9
1
0
6
6
9
9
9
4
0
4
5
4
7
7
4
4
5
7
5
0
6
7
5
4
6
7
6
3
9
6
5
3
0
0
7
3
9
8
6
4
3
9
7
3
7
8
2
7
9
0
7
7
7
2
2
4
5
1
9
7
7
6
2
1
4
6
2
7
9
5
5
7
5
6
7
8
7
7
2
4
4
8
1
2
7
9
6
4
5
3
4
0
6
7
3
6
8
1
0
7
8
4
9
0
8
8
0
2
5
9
1
2
5
7
1
6
0
0
7
7
5
4
8
2
6
9
8
4
6
0
8
7
0
4
7
0
5
5
5
9
8
1
0
5
8
0
6
3
5
2
3
7
2
1
8
1
6
6
5
4
2
9
3
7
7
1
3
7
4
2
0
8
4
4
7
8
2
8
4
4
5
6
9
2
7
7
4
7
1
7
5
1
0
4
3
8
1
1
4
5
5
0
5
8
3
0
4
4
6
1
5
3
7
4
5
2
0
1
2
8
7
8
0
4
6
9
5
8
9
3
8
7
4
2
5
0
2
1
4
1
9
7
5
2
2
7
0
4
3
7
7
7
1
1
6
2
1
0
8
6
2
6
3
2
3
4
6
4
6
4
4
1
3
9
1
7
0
9
7
0
2
3
2
1
5
4
2
2
5
0
2
6
1
7
2
2
5
7
9
6
1
3
0
2
4
7
6
1
5
0
3
0
2
2
9
3
9
0
7
0
2
6
9
5
0
0
2
6
2
0
7
2
5
7
3
8
6
0
0
4
5
7
6
2
2
0
6
0
2
4
9
4
2
1
0
7
7
4
2
3
8
1
6
2
0
1
9
7
4
7
9
8
1
0
3
5
5
3
5
3
5
7
8
0
2
1
2
0
9
2
1
1
7
7
0
5
2
3
5
6
8
7
1
2
6
8
9
1
5
4
7
2
5
0
4
2
2
2
2
4
7
7
5
4
4
8
0
6
7
7
2
7
4
1
6
0
4
3
7
3
2
4
8
9
5
8
9
2
9
3
5
6
1
2
0
7
4
2
4
2
5
1
3
0
3
8
4
5
7


7
4
7
4
2
3
9
2
0
4
7
5
4
2
4
9
1
7
8
0
4
2
9
9
9
8
0
3
0
7
5
9
7
5
7
5
7
3
5
7
4
6
2
6
6
6
7
2
3
5
3
0
0
6
7
1
4
7
7
5
6
6
2
7
2
7
3
4
5
5
9
6
4
8
4
6
7
5
0
2
1
7
6
4
6
9
1
8
1
6
4
6
0
7
7
8
0
4
5
4
4
3
7
7
1
0
1
9
9
1
9
7
5
5
2
4
7
5
4
4
4
9
8
4
5
8
7
5
4
2
9
0
9
5
7
5
9
1
6
4
6
5
0
1
3
5
6
7
7
3
0
9
5
9
5
0
6
1
2
6
0
8
7
9
7
5
9
5
8
6
2
3
1
0
1
7
4
6
0
3
7
6
2
1
7
7
8
1
5
0
2
2
4
2
5
5
5
6
2
0
4
9
6
2
9
3
3
5
1
7
7
3
2
2
1
8
7
1
6
3
7
7
2
3
5
3
4
7
7
4
9
0
4
1
1
6
8
1
6
2
8
8
4
0
0
3
0
8
1
9
0
2
9
0
4
4
5
2
7
1
7
0
7
4
5
2
7
1
1
1
3
3
2
0
4
1
9
2
6
9
8
3
9
7
4
1
5
8
6
9
9
7
1
2
3
5
7
2
7
4
0
8
2
5
5
1
7
9
7
5
7
2
6
8
2
5
2
3
3
1
3
6
4
1
4
0
0
2
5
2
8
8
6
0
9
3
4
2
6
4
4
4
5
4
2
7
7
0
2
5
4
9
3
7
1
7
9
8
2
8
5
1
3
7
8
2
4
2
1
7
8
6
2
2
4
5
8
5
2
1
7
5
7
6
6
4
2
9
2
3
3
1
6
3
3
2
2
4
4
4
3
3
7
3
1
1
8
8
2
4
7
5
1
2
2
9
2
5
4
6
4
8
2
2
7
6
0
0
2
1
3
6
7
6
3
6
1
4
1
4
2
0
7
1
9
6
5
8
4
2
1
6
4
2
1
9
2
1
1
7
0
9
2
1
4
1
1
1
7
2
2
1
4
6
4
2
4
8
7
8
2
4
7
2
6
2
3
6
0
7
2
4
3
6
3
7
2
1
0
6


5
1
1
0
5
1
2
2
8
6
7
4
7
8
2
9
8
6
0
2
3
0
4
8
4
6
7
0
2
7
9
0
4
0
7
0
3
0
7
8
5
1
1
5
7
5
7
0
6
2
3
4
1
2
3
5
1
3
1
1
4
3
6
8
5
8
4
0
5
4
0
9
3
2
8
3
2
1
7
2
3
1
7
4
1
4
1
7
3
7
6
2
2
8
0
2
5
3
7
7
2
5
8
4
4
9
7
1
5
5
7
4
4
2
2
2
1
1
6
7
5
0
8
2
0
0
9
4
7
6
5
5
2
4
2
7
3
1
5
8
8
3
0
4
4
7
3
5
4
6
2
6
7
3
7
5
8
2
9
5
0
9
1
9
2
0
6
8
5
0
0
1
5
7
1
5
6
2
3
5
7
1
7
3
6
7
3
8
4
7
9
9
6
3
1
5
9
2
8
4
4
2
2
2
6
8
9
8
7
7
2
2
7
7
0
8
0
5
6
2
4
5
9
2
4
2
4
4
9
1
3
5
1
8
7
5
1
8
1
0
2
7
9
4
1
7
2
7
0
4
0
7
2
4
7
8
4
6
4
1
6
6
7
9
9
7
4
6
3
9
5
8
7
0
8
8
0
0
9
1
0
7
8
3
2
4
1
7
4
4
7
8
2
7
0
8
6
4
7
5
6
5
5
1
0
7
8
4
7
8
6
7
8
2
3
3
1
6
7
5
8
4
9
7
4
4
1
4
3
7
0
2
6
2
5
3
7
4
4
2
3
7
3
8
9
5
5
0
0
5
6
6
9
3
6
3
1
2
2
2
1
4
6
2
2
6
4
3
6
0
0
6
8
5
4
2
2
7
8
8
6
2
4
8
2
2
7
7
7
4
0
2
8
7
6
7
6
3
4
5
0
1
1
4
4
3
7
7
3
4
4
6
3
0
4
0
9
1
5
4
4
8
7
1
0
6
2
5
1
8
5
7
1
6
3
5
9
9
7
9
4
1
7
4
0
1
7
9
9
8
5
2
4
5
8
2
6
7
6
4
7
5
4
6
4
0
7
7
9
1
2
5
1
5
3
3
7
5
7
2
9
3
9
5
0
3
5
8
0
0
6
4
4
0
4
7
7
1
8
0


In [136]:
def score(pred, actual):
    score = np.mean(pred == actual)
    return score

def accuracy(pred, actual):
    count = 0
    error = 0
    for t in range(0,actual.shape[0]):
        if pred[t] == actual[t]:
            count += 1;
        else:
            error += 1
    return count, error

In [137]:
result = np.array(result)
print(result)

[6 3 3 ... 3 5 5]


In [138]:
sc = score(result, test_label)
print(sc)

0.0994


In [139]:
cor, err = accuracy(result, test_label)
print(cor, err)

994 9006
