In [1]:
import numpy as np
import itertools
import matplotlib.pyplot as plt
from scipy import stats

In [2]:
x_train = np.loadtxt('./data/mnist/train.csv',delimiter=',',dtype=np.float64)
y_train = x_train[:,-1].reshape(-1,1)
x_train = np.delete(x_train,-1,1)
train_mean, train_std = np.mean(x_train), np.std(x_train)
x_train = (x_train - train_mean)/train_std
x_train = np.concatenate((np.ones((x_train.shape[0],1)),x_train),axis=1)

x_test = np.loadtxt('./data/mnist/test.csv',delimiter=',',dtype=np.float64)
y_test = x_test[:,-1].reshape(-1,1)
x_test = np.delete(x_test,-1,1)
x_test = (x_test - train_mean)/train_std
x_test = np.concatenate((np.ones((x_test.shape[0],1)),x_test),axis=1)

In [77]:
class SVM():
    def __init__(self,maxit,batchsize,lambda_,c=1):
        self.maxit = maxit
        self.batchsize = batchsize
        self.lambda_ = lambda_
        self.c = c
    
    def fit(self,x_train,y_train,printAfter=1,seed=0):
        indices = np.arange(self.batchsize)
        self.w = np.zeros((x_train.shape[1],1))
        counter = 0
        prev_cost = 0
        np.random.seed(seed)
        for it in range(self.maxit):
            rand_idx = np.random.randint(0,x_train.shape[0],self.batchsize)
            sub_x, sub_y = x_train[rand_idx], y_train[rand_idx]
            loss = sub_y*np.dot(sub_x,self.w)
            cost = self.lambda_*(self.w**2).sum()/2 + self.c*loss.sum()/float(self.batchsize)
            if(it > 0 and it%printAfter == 0): print("{0} - {1}".format(it,cost))
            counter = counter + 1 if cost > prev_cost else 0
            prev_cost = cost
            idx = indices[loss.ravel() < 1]
            eta = 1/float(self.lambda_*(it+1))
            if(counter > 5):
                print("{0} - {1}".format(it,cost))
                break
            self.w = self.w*(1 - self.lambda_*eta) + (self.c*eta/float(self.batchsize))*(sub_x[idx]*sub_y[idx]).sum(axis=0).reshape(-1,1)
            
    def predict(self,x_test,conf=False):
        return (np.dot(x_test,self.w) > 0).astype(np.int64) if not conf else np.dot(x_test,self.w)

In [117]:
def onevsone(x_train,y_train,maxit,batchsize,lambda_,c=1,printAfter=1,seed=0):
    classifiers = []
    labels = np.unique(y_train)
    class_split = list(itertools.combinations(np.arange(10),2))
    indices = np.arange(x_train.shape[0])
    for split in class_split:
        idx_c1 = indices[(y_train == split[0]).ravel()]
        idx_c2 = indices[(y_train == split[1]).ravel()]
        idx = np.concatenate((idx_c1,idx_c2))
        sub_x, sub_y = x_train[idx], y_train[idx]
        sub_y = (sub_y == split[1]).astype(np.int64)
        classifier = SVM(maxit,batchsize,lambda_,c)
        classifier.fit(sub_x,sub_y,printAfter,seed)
        classifiers.append((classifier,split[0],split[1]))
    return classifiers

def pred_onevsone(classifiers,x_test):
    preds = np.zeros((x_test.shape[0],len(classifiers)))
    ones = np.ones((x_test.shape[0],1))
    for idx, c in enumerate(classifiers):
        pred = c[0].predict(x_test,conf=False).reshape(-1,1)
        preds[:,idx] = np.where(pred == 0,c[1]*ones,c[2]*ones).ravel()
    final_pred = -stats.mode(-preds,axis=1)[0]
    return final_pred

def indv_acc_onevsone(classifiers,x_test,y_test):
    for c in classifiers:
        indices = np.arange(x_test.shape[0])
        idx_c1 = indices[(y_test == c[1]).ravel()]
        idx_c2 = indices[(y_test == c[2]).ravel()]
        idx = np.concatenate((idx_c1,idx_c2))
        sub_x, sub_y = x_test[idx], y_test[idx]
        ones = np.ones((sub_x.shape[0],1))
        pred = np.where(c[0].predict(sub_x).reshape(-1,1) == 0,c[1]*ones,c[2]*ones)
        print("Accuracy for classifier b/w {0}/{1} = {2}".format(c[1],c[2],(pred == sub_y).sum()/sub_y.shape[0]))

In [118]:
svm_one = onevsone(x_train,y_train,1000,100,1,1,1000,1)
pred_one = pred_onevsone(svm_one,x_test)

In [119]:
accuracy_one = (pred_one == y_test).sum()/y_test.shape[0]
print("One-vs-One Accuracy = {0}".format(accuracy_one))
indv_acc_onevsone(svm_one,x_test,y_test)

One-vs-One Accuracy = 0.1009
Accuracy for classifier b/w 0/1 = 0.5385342789598109
Accuracy for classifier b/w 0/2 = 0.5129224652087475
Accuracy for classifier b/w 0/3 = 0.507537688442211
Accuracy for classifier b/w 0/4 = 0.5005096839959225
Accuracy for classifier b/w 0/5 = 0.47649572649572647
Accuracy for classifier b/w 0/6 = 0.49432404540763675
Accuracy for classifier b/w 0/7 = 0.5119521912350598
Accuracy for classifier b/w 0/8 = 0.4984646878198567
Accuracy for classifier b/w 0/9 = 0.5072900955253896
Accuracy for classifier b/w 1/2 = 0.47623442547300415
Accuracy for classifier b/w 1/3 = 0.47086247086247085
Accuracy for classifier b/w 1/4 = 0.46386395843174305
Accuracy for classifier b/w 1/5 = 0.44005920078934385
Accuracy for classifier b/w 1/6 = 0.4577161968466316
Accuracy for classifier b/w 1/7 = 0.47526583448913545
Accuracy for classifier b/w 1/8 = 0.4618302513039355
Accuracy for classifier b/w 1/9 = 0.470615671641791
Accuracy for classifier b/w 2/3 = 0.49461312438785504
Accuracy fo

In [90]:
def onevsall(x_train,y_train,maxit,batchsize,lambda_,c=1,printAfter=1,seed=0):
    classifiers = []
    labels = np.sort(np.unique(y_train))
    for l in labels:
        sub_x, sub_y = x_train, (y_train == l).astype(np.int64)
        classifier = SVM(maxit,batchsize,lambda_,c)
        classifier.fit(sub_x,sub_y,printAfter,seed)
        classifiers.append((classifier,l))
    return classifiers
        
def pred_onevsall(classifiers,x_test):
    preds = np.zeros((x_test.shape[0],len(classifiers)))
    for idx, c in enumerate(classifiers):
        preds[:,idx] = c[0].predict(x_test,conf=True).ravel()
    final_pred = preds.argmax(axis=1).reshape(-1,1)
    return final_pred

def indv_acc_onevsall(classifiers,x_test,y_test):
    for c in classifiers:
        ones = np.ones((x_test.shape[0],1))
        pred = (c[0].predict(x_test,conf=True) > 0.5).astype(np.int64)
        pred = np.where(pred == 1,c[1]*ones,-1*ones)
        sub_y = np.where(y_test == c[1],c[1]*ones,-1*ones)
        print("Accuracy for classifier b/w {0}/Rest = {1}".format(c[1],(pred == sub_y).sum()/sub_y.shape[0]))

In [92]:
svm_all = onevsall(x_train,y_train,1000,100,1,1,1000,1)
pred_all = pred_onevsall(svm_all,x_test)

345 - 0.17890920224273874
515 - 0.169568276320533
943 - 0.19313234866362924


In [93]:
accuracy_all = (pred_all == y_test).sum()/y_test.shape[0]
print("One-vs-All Accuracy = {0}".format(accuracy_all))
indv_acc_onevsall(svm_all,x_test,y_test)

One-vs-All Accuracy = 0.7774
Accuracy for classifier b/w 0.0/Rest = 0.1978
Accuracy for classifier b/w 1.0/Rest = 0.2986
Accuracy for classifier b/w 2.0/Rest = 0.1069
Accuracy for classifier b/w 3.0/Rest = 0.1096
Accuracy for classifier b/w 4.0/Rest = 0.1251
Accuracy for classifier b/w 5.0/Rest = 0.0895
Accuracy for classifier b/w 6.0/Rest = 0.1118
Accuracy for classifier b/w 7.0/Rest = 0.1204
Accuracy for classifier b/w 8.0/Rest = 0.1066
Accuracy for classifier b/w 9.0/Rest = 0.1166
