# Import Data

In [None]:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [None]:
X, y = datasets.load_digits(return_X_y = True)
X_train, X_test, y_train_orig, y_test_orig = train_test_split(X, y, test_size=0.3, random_state=42)
print(X_train.shape)

(1257, 64)


In [None]:
scaler = StandardScaler() 
scaler.fit(X_train)   # calculate mean
X_train_norm = scaler.transform(X_train)  # apply normalization on X_train
X_test_norm = scaler.transform(X_test)    # apply normalization on X_test

# Convert to Binary Classification Problem

In [None]:
def convert_to_binary(desired_class, incorrect_class, vector):
    N = len(vector)
    new_vec = np.zeros(N, dtype = 'int')
    for n in range(N):
        if(vector[n] != desired_class):
            new_vec[n] = incorrect_class
        else:
            new_vec[n] = 1
    return new_vec

In [None]:
y_train = convert_to_binary(1, -1, y_train_orig)
y_test = convert_to_binary(1, 0, y_test_orig)
print(y_train)
print(y_test)

[ 1  1 -1 ... -1 -1  1]
[0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1
 

Our loss function is defined as
$$L = \frac{1}{N} \sum_{i = 1}^N \sum_{i \neq j} \max\{0,1-y_j f(x_i)\}$$
where $f(x_i) = x_i^{T}W + b$. 

In [None]:
def accuracy(ypred, yexact):
    p = np.array(ypred == yexact, dtype=int)
    return np.sum(p) / float(len(yexact))

# Forward Pass

In [None]:
def forward(X, W, b, y):
    y_hat = np.dot(X,W) + b
    condition = 1 - y * y_hat
    return condition

In [None]:
def print_loss(condition, y, W):
    hinge_loss = np.where(condition > 0, y, 0) 
    print(hinge_loss)
    return 

# Subgradient Descent

In [None]:
def sub_gradient_descent(condition, yval, W, b, X, lr):
    y = np.where(condition > 0, yval, 0)
    dW = -np.dot(X.T, y)
    db = -np.sum(y, axis = 0)
    W = W - lr * dW
    b = b - lr * db
    return W, b

In [None]:
def predict(X_test, W, b):
    y_prob = np.dot(X_test, W) + b
    return y_prob

# Train/Test for Binary Classification

In [None]:
lr = 0.01 
b = 0.0
W = np.zeros(X.shape[1], dtype = 'float')
y_pred = np.zeros(X_test_norm.shape[0], dtype = 'int')
iterations = 100
for i in range(iterations):
    condition = forward(X_train_norm, W, b, y_train)
    W, b = sub_gradient_descent(condition, y_train, W, b, X_train_norm, lr)
y_prob = predict(X_test_norm, W, b)

for n in range(y_pred.shape[0]):
    if(y_prob[n] > 0):
        y_pred[n] = 1
    else:
        y_pred[n] = 0    
print('Accuracy of our model ', accuracy(y_pred, y_test))
print(y_pred)
print(y_test)

Accuracy of our model  0.9797979797979798
[0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0

Okay, so our binary classifier works, so let's try to make a multiclass classifier. Let's first try to bundle our SVM classifier into a function or class.

In [None]:
def binary_SVM(lr, X_train, X_test, y_train):
    b = 0.0
    W = np.zeros(X.shape[1], dtype = 'float')
    iterations = 100
    for i in range(iterations):
        condition = forward(X_train, W, b, y_train)
        W, b = sub_gradient_descent(condition, y_train, W, b, X_train, lr)
        y_prob = predict(X_test, W, b)
    return y_prob

# One vs. All Classification

In [None]:
y_prob_list = np.zeros((X_test.shape[0], 10), dtype = 'float')
y_pred = np.zeros(X_test.shape[0], dtype = 'int')
#print(y_train_orig)
for k in range(0,10):
    y_train = convert_to_binary(k, -1, y_train_orig) 
    #print(y_train)    
    y_prob_list[:,k] = binary_SVM(0.01, X_train_norm, X_test_norm, y_train)   
for n in range(X_test.shape[0]):
    y_pred[n] = np.argmax(y_prob_list[n,:])

print(y_pred)
print(y_test_orig)
print(y_pred-y_test_orig)
print('Accuracy of our model ', accuracy(y_pred, y_test_orig))

    

[6 9 3 7 2 1 5 2 5 2 1 9 4 0 4 2 3 7 8 8 4 3 9 7 5 6 3 5 6 3 4 9 1 4 4 6 9
 4 7 6 6 9 1 3 6 1 3 0 6 5 5 1 9 5 6 0 9 0 0 1 0 4 5 2 4 5 7 0 7 5 9 5 5 4
 7 0 4 5 5 9 9 0 2 3 8 0 6 4 4 9 1 2 8 3 5 2 9 0 4 4 4 3 5 3 1 3 5 9 4 2 7
 7 4 4 1 9 2 7 8 7 2 6 9 4 0 7 2 7 5 8 7 5 7 9 0 6 6 4 2 8 0 9 4 6 9 9 6 9
 0 5 5 6 6 0 6 4 9 9 3 9 7 2 9 0 6 5 3 6 5 9 9 8 4 2 1 3 7 7 2 2 3 9 8 0 3
 2 2 5 6 9 9 4 1 2 4 2 3 6 4 8 5 9 5 7 8 9 4 8 1 5 4 4 9 6 1 8 6 0 4 5 2 7
 1 6 4 5 6 0 3 2 3 6 7 1 9 1 4 7 6 5 1 5 5 1 4 2 8 8 9 9 7 6 2 2 2 3 4 8 8
 3 6 0 9 7 7 0 1 0 4 5 1 5 3 6 0 4 1 0 0 3 6 5 9 7 3 5 5 9 9 8 5 3 3 2 0 5
 8 3 4 0 2 4 6 4 3 4 5 0 5 2 1 3 1 4 1 1 7 0 1 5 2 1 2 8 7 0 6 4 8 8 5 1 8
 4 5 8 7 9 8 6 0 6 6 0 7 9 1 9 5 2 7 7 1 8 7 4 3 8 3 5 6 0 0 3 0 5 0 0 4 1
 2 8 8 5 9 6 3 1 8 8 4 2 3 8 9 8 8 5 0 6 3 3 7 1 6 4 1 2 1 1 6 4 7 4 8 3 4
 0 5 1 9 4 5 7 6 3 7 0 5 9 7 5 9 7 4 2 1 9 0 7 5 8 3 6 3 9 6 9 5 0 1 5 5 8
 3 3 6 2 6 5 7 2 0 8 7 3 7 0 2 2 3 5 8 7 3 6 5 9 9 2 1 6 3 0 7 1 1 9 6 1 1
 0 0 2 9 3 9 9 3 7 7 1 3 

This is pretty darn good actually. A better test would be NMIST, but NMIST should be done with a one vs one classifier for better accuracy. 
