In [1]:
import struct
import numpy as np
from sklearn.utils import shuffle
from sklearn import svm, metrics
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def read_idx(filename):
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.fromstring(f.read(), dtype=np.uint8).reshape(shape)

In [3]:
## loading mnist dataset

raw_train = read_idx("./data/train-patterns-idx3-ubyte")
train_data = np.reshape(raw_train, (60000, 28*28))
train_label = read_idx("./data/train-labels-idx1-ubyte")

raw_test = read_idx("./data/test-patterns-idx3-ubyte")
test_data = np.reshape(raw_test, (10000, 28*28))
test_label = read_idx("./data/test-labels-idx1-ubyte")

  """


In [4]:
# Preprocessing
X_train_p = train_data/255
X_test_p = test_data/255

In [5]:
class SVM(object):
    def __init__(self, C=0.1, eta=0.001, batch_size=60, epochs=100, shuffle=True):
        self.C = C
        self.eta = eta
        self.batch_size = batch_size
        self.epochs = epochs
        self.shuffle = shuffle
        self.class_num = 0
    
    def fit(self, X, y, params=None, eval_score=None):
        xn, xf = np.shape(X) # xn: data points of X, xf: number of feature
        self.class_num = len(np.unique(y)) # 클래스 수 = 10
        
        y_ovr = self.encode_ovr(X, y)
        
        ## Initialize params: w, b
        if params is None:
            self.params = {
                'w_': np.random.randn(xf, self.class_num), #(784,10) 정규분포난수
                'b_': np.random.randn(1, self.class_num),
                'aver_w': np.random.randn(xf, self.class_num),
                'aver_b': np.random.randn(1, self.class_num)
            }
            w = self.params['w_']
            b = self.params['b_']
        
        if eval_score is None:
            self.score_val = 0
            
        cnt = 0 # SVM-SGD : k
        
        for epoch in range(0, self.epochs):
            if self.shuffle:
                X_shuffled, y_shuffled = self.shuffles(X, y_ovr)
            
            batch_count = int(xn/self.batch_size)
                
            for t in range(0, batch_count):
                X_batch, y_batch, bs = self.mini_batch(X_shuffled, y_shuffled, t)
                
                X_batch = np.reshape(X_batch, (bs, xf))
                y_batch = np.reshape(y_batch, (bs, self.class_num))
                
                loss = self.hinge_loss(X_batch, y_batch) # y*(X*W+b)
                
                loss = 1 - loss
                loss[loss <= 0] = 0
                loss[loss > 0] = 1
                # loss = np.mean(loss)
                
                dw = np.zeros(self.params['w_'].shape)
                db = np.zeros(self.params['b_'].shape)
                
                #fx = np.reshape(fx, (bs, self.class_num))
                #tmp = np.multiply(y, fx)
                #tmp = 1 - tmp
                
                #tmp[tmp <= 0] = 0
                #tmp[tmp > 0] = 1
                # y_tmp = np.multiply(y, loss.reshape(bs, self.class_num))
                y_tmp = loss * y_batch
                
                dw = -(1/bs) * np.dot(np.transpose(y_tmp), X_batch) + np.transpose((1/self.C) * self.params['w_'])
                db = -(1/bs) * np.sum(y_tmp, axis=0)
                
                self.params['w_'] = self.params['w_'] - np.transpose(self.eta * dw)
                self.params['b_'] = self.params['b_'] - self.eta * db
                
                cnt += 1
            
            temp_w = cnt * (cnt/(cnt+1))*self.params['aver_w'] + (1/(cnt+1))*self.params['w_']
            temp_b = cnt * (cnt/(cnt+1))*self.params['aver_b'] + (1/(cnt+1))*self.params['b_']
            
            prev_score = self.score_val
            pres_score = self.score(X, y)
            if epoch % 10 == 0:
                print("epochs: ", epoch)
                print("prev_score: %d", prev_score)
                print("pres_score: %d", pres_score)
                print('\n')
            if prev_score < pres_score:
                self.score_val = pres_score
            
            if self.det_weight(X, y, self.params['aver_w'], self.params['aver_b']) < self.det_weight(X, y, temp_w, temp_b):
                self.params['aver_w'] = temp_w
                self.params['aver_b'] = temp_b
        
        return self.params

    
    def shuffles(self, X, y):
        # Shuffle training data
        ran = np.arange(0, np.shape(X)[0])
        np.random.shuffle(ran)
        return X[ran], y[ran]
    
    
    def encode_ovr(self, X, y):
        # 1로 이루어진 배열(np.shape(y)[0], class_num)
        encode = np.ones((np.shape(y)[0], self.class_num))
        # one-hot encode     
        for i in range(self.class_num):
            encode[:, i][y != i] = -1
        return encode
    
    def mini_batch(self, X, y, t):
        xn = np.shape(X)[0]
        X_batch = X[t*self.batch_size : min(xn, (t+1)*self.batch_size)]
        y_batch = y[t*self.batch_size : min(xn, (t+1)*self.batch_size)]
        bs = min(xn, (t+1)*self.batch_size) - t*self.batch_size
        
        return X_batch, y_batch, bs
    
    def hinge_loss(self, X, y):
        fx = np.dot(X, self.params['w_']) + self.params['b_'] # X*W+b
        cfx = y * fx
        
        return cfx
    
    def predict(self, X):
        cla_score = np.dot(X, self.params['w_']) + self.params['b_']
        pred = np.argmax(cla_score, axis=1)
        
        return pred
    
    def score(self, X, y):
        pred = self.predict(X)
        score = np.mean(pred == y)
        
        return score
    
    def det_weight(self, X, y, w1, b1):
        temp = np.dot(X, w1) + b1
        temp = temp.T
        pred = np.argmax(temp, axis=1)
        sco = np.mean(pred == y)
        
        return sco
    
    def get_parameters(self):
        return self.params
    
    def get_params(self, deep=True):
        return {'C':self.C, 'batch_size':self.batch_size, 'epochs':self.epochs}
    
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

In [6]:
X = X_train_p
y = train_label

In [7]:
svm = SVM(C=1000, eta=0.01, batch_size=20, epochs=200)
svm.fit(X,y)

epochs:  0
prev_score: %d 0
pres_score: %d 0.7999333333333334






epochs:  1
prev_score: %d 0.7999333333333334
pres_score: %d 0.8303666666666667


epochs:  2
prev_score: %d 0.8303666666666667
pres_score: %d 0.8393333333333334


epochs:  3
prev_score: %d 0.8393333333333334
pres_score: %d 0.8412833333333334


epochs:  4
prev_score: %d 0.8412833333333334
pres_score: %d 0.83845


epochs:  5
prev_score: %d 0.8412833333333334
pres_score: %d 0.8437


epochs:  6
prev_score: %d 0.8437
pres_score: %d 0.8465666666666667


epochs:  7
prev_score: %d 0.8465666666666667
pres_score: %d 0.8263833333333334


epochs:  8
prev_score: %d 0.8465666666666667
pres_score: %d 0.8355


epochs:  9
prev_score: %d 0.8465666666666667
pres_score: %d 0.8436333333333333


epochs:  10
prev_score: %d 0.8465666666666667
pres_score: %d 0.84415


epochs:  11
prev_score: %d 0.8465666666666667
pres_score: %d 0.8507333333333333


epochs:  12
prev_score: %d 0.8507333333333333
pres_score: %d 0.8396833333333333


epochs:  13
prev_score: %d 0.8507333333333333
pres_score: %d 0.8443666666666667


e

{'w_': array([[-3.30266539e-14, -1.85420400e-13,  9.74531338e-14, ...,
         -1.79271612e-13, -5.92273719e-14,  4.06681496e-14],
        [-3.18109807e-14,  1.52173679e-13, -4.39750299e-14, ...,
          1.32016269e-13,  4.49134548e-14,  4.88019159e-14],
        [-5.94488962e-14,  7.74002823e-14, -3.45469468e-14, ...,
         -6.25343844e-14, -2.69868342e-13, -1.56160405e-16],
        ...,
        [-8.49559620e-08,  7.63548824e-14, -1.18271532e-04, ...,
         -2.64811659e-13,  1.19103449e-13,  1.36389548e-03],
        [-7.98588340e-14,  1.06450386e-13, -2.98943295e-14, ...,
         -5.17377507e-14, -2.98420804e-14,  7.88108935e-14],
        [ 4.67203983e-14, -2.03942738e-14, -9.03796477e-15, ...,
          2.97547251e-14,  6.48543544e-14,  2.83753821e-14]]),
 'b_': array([[-2.88213985,  1.08406477, -1.73911725, -2.36571185, -0.88579247,
         -0.19329255, -1.91554347, -0.37744413, -4.43350191, -2.24854788]]),
 'aver_w': array([[-0.05768219,  1.13581065, -0.63024389, ...,  1.

In [8]:
pred = svm.predict(X_test_p)
print(pred)

[5 0 4 ... 6 9 7]


In [10]:
from sklearn.metrics import accuracy_score
acc_sco = accuracy_score(test_label, pred)

In [11]:
print(acc_sco)

0.842
