# SVM Soft Margin Extension with NumPy

In [1]:
import numpy as np
import csv
import math
from numpy import genfromtxt
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from cvxopt import matrix, solvers
import matplotlib.pyplot as plt
%matplotlib inline

## 1. MNIST

### Homework implementation

In [2]:
digits=load_digits()
X = digits.data
y = digits.target

# Scale training features
X_scale = StandardScaler()
X = X_scale.fit_transform(digits.data)

In [3]:
# Assign X and y the subset of data that describe the numbers 8 and 9

new_X = []
new_y = []
for i in range(len(X)):
    if y[i] == 8:
        new_X.append(X[i])
        new_y.append(y[i])
    elif y[i] == 9:
        new_X.append(X[i])
        new_y.append(y[i])
new_X = np.array(new_X)
new_y = np.array(new_y)

X = new_X
y = new_y

In [4]:
# Train-test split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.6,random_state=42)


In [5]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(141, 64)
(141,)
(213, 64)
(213,)


In [6]:
y_train.shape

(141,)

In [7]:
def kernel_svm(X, y): 

    m,n = X.shape
    y = y.reshape(-1,1)
    X_y = X*y
    H = np.dot(X_y, X_y.T)
    
    P = matrix(H)
    q = matrix(-np.ones((m, 1)))
    G = matrix(-np.eye(m))
    h = matrix(np.zeros(m))
    A = matrix(y.reshape(1,-1))
    A = matrix(A, (1, m), 'd')
    b = matrix(np.zeros(1))
    
    sol = solvers.qp(P,q,G,h,A,b) 
    
    alphas = np.array(sol['x'])[:,0]
    
    return alphas

# fit svm dual classifier
alphas = kernel_svm(X_train, y_train)

     pcost       dcost       gap    pres   dres
 0: -3.8262e-02 -4.0188e-02  2e+02  1e+01  1e+00
 1: -3.0905e-04 -1.1130e-04  2e+00  1e-01  1e-02
 2: -1.8827e-05 -6.7504e-05  3e-02  2e-03  2e-04
 3: -6.8873e-06 -1.1057e-05  1e-03  8e-05  7e-06
 4: -1.2441e-06 -1.0286e-07  3e-05  3e-06  2e-07
 5: -1.3608e-08 -1.0581e-11  4e-07  3e-08  2e-09
 6: -1.3618e-10 -1.0581e-15  4e-09  3e-10  2e-11
Optimal solution found.


In [8]:
def kernel_svm(X, y): 

    m,n = X.shape
    y = y.reshape(-1,1)*1.
    X_y = X*y
    H = np.dot(X_y, X_y.T)
    
    P = matrix(H)
    q = matrix(-np.ones((m, 1)))
    G = matrix(-np.eye(m))
    h = matrix(np.zeros(m))
    A = matrix(y.reshape(1,-1))
    A = matrix(A, (1, m), 'd')
    b = matrix(np.zeros(1))
    
    sol = solvers.qp(P,q,G,h,A,b) 
    
    alphas = np.array(sol['x'])[:,0]
    
    return alphas

# fit svm dual classifier
alphas = kernel_svm(X_train, y_train)
print(alphas)

     pcost       dcost       gap    pres   dres
 0: -3.8262e-02 -4.0188e-02  2e+02  1e+01  1e+00
 1: -3.0905e-04 -1.1130e-04  2e+00  1e-01  1e-02
 2: -1.8827e-05 -6.7504e-05  3e-02  2e-03  2e-04
 3: -6.8873e-06 -1.1057e-05  1e-03  8e-05  7e-06
 4: -1.2441e-06 -1.0286e-07  3e-05  3e-06  2e-07
 5: -1.3608e-08 -1.0581e-11  4e-07  3e-08  2e-09
 6: -1.3618e-10 -1.0581e-15  4e-09  3e-10  2e-11
Optimal solution found.
[ 1.10008300e-11  2.09343171e-11 -1.52888491e-11 -1.67481751e-11
 -1.66363190e-11 -1.37071990e-11 -1.49454228e-11 -4.22863306e-12
 -1.65478139e-11  3.43433702e-11  2.60853523e-11  2.11618856e-11
 -1.54137825e-11 -1.85374511e-11 -1.44943188e-11  2.11612318e-11
  2.58213534e-11 -1.36190907e-11 -1.72111703e-11 -1.74237986e-11
  1.34051841e-11  8.87109839e-12 -1.49446968e-11 -1.58828641e-11
 -1.29457904e-11  1.39840867e-11  2.43037776e-11  2.68312148e-11
  1.28495891e-11 -9.40838869e-12 -1.56115124e-11  2.20192537e-11
  2.57802252e-11 -1.64524268e-11  1.20248647e-11  8.71851732e-12


In [9]:
def compute_classification_boundary (X, y, alpha):
    #cond = (alphas > 1e-3).reshape(-1)
    cond = [i for i in range(len(alphas)) if alphas[i] > 1e-12]
    w = np.dot(X.T, alpha*y).reshape(-1,1)
    w0 = y[cond] - np.dot(X[cond], w)
    w0 = np.mean(w0)
    return w, w0



w, w0 = compute_classification_boundary(X_train, y_train, alphas)

In [10]:
# Determine which training examples are support vectors
support_vectors = []

for i in range(len(alphas)):
    if alphas[i] > 1e-12:
        support_vectors.append([X_train[i], y_train[i], i])

In [11]:
def K(xi, xj):
    return np.dot(xi,xj)

alpha_indices = [support_vectors[i][2] for i in range(len(support_vectors))]
print(alpha_indices)

[0, 1, 9, 10, 11, 15, 16, 20, 21, 25, 26, 27, 28, 31, 32, 34, 35, 40, 45, 46, 50, 53, 54, 55, 57, 58, 60, 61, 63, 64, 65, 67, 69, 73, 75, 76, 79, 83, 86, 89, 91, 95, 96, 103, 104, 107, 108, 110, 114, 117, 118, 120, 121, 125, 126, 127, 128, 129, 130, 131, 133, 134, 135, 136, 137, 138, 139, 140]


In [12]:
def f_dual(x):
    summation = 0
    for i in range(len(support_vectors)):
        summation += alphas[alpha_indices[i]]*y_train[alpha_indices[i]]*K(X_train[alpha_indices[i]],x)
    if (summation >= 0):
        return 8
    else:
        return 9

In [13]:
# Test SVM dual classifier on X_test

def predict(X):
    predictions = []
    for i in range(len(X_test)):
        predictions.append(f_dual(X_test[i]))
    return predictions

y_pred = predict(X_test)

In [14]:
# Print accuracy

print('Prediction accuracy is {}%'.format(accuracy_score(y_test, y_pred) * 100))

Prediction accuracy is 79.81220657276995%


### Extended implementation using Numpy

In [54]:
def kernel_soft_margin_svm(X, y, C): 

    m,n = X.shape
    y = y.reshape(-1,1)
    X_y = X*y
    H = np.dot(X_y, X_y.T)
    
    P = matrix(H)
    q = matrix(-np.ones((m, 1)))
    
    # Changed G and h
    G = matrix(np.vstack((np.diag(np.ones(m))*-1, np.identity(m))))
    h = matrix(np.hstack((np.zeros(m), np.ones(m)*C)))
    
    A = matrix(y.reshape(1,-1))
    A = matrix(A, (1, m), 'd')
    b = matrix(np.zeros(1))
    
    sol = solvers.qp(P,q,G,h,A,b) 
    
    alphas = np.array(sol['x'])[:,0]
    
    return alphas

# fit svm dual classifier
alphas = kernel_soft_margin_svm(X_train, y_train, 0.001)

print(alphas)

     pcost       dcost       gap    pres   dres
 0: -1.9163e-02 -1.4487e-01  3e+02  2e+01  2e-14
 1: -2.5634e-04 -1.4459e-01  3e+00  2e-01  3e-14
 2: -6.1243e-05 -1.2071e-01  2e-01  3e-03  6e-16
 3: -6.9198e-06 -7.9138e-03  9e-03  1e-04  5e-16
 4: -1.2140e-06 -2.9272e-04  3e-04  4e-06  5e-16
 5: -1.3375e-08 -2.9688e-06  3e-06  4e-08  8e-16
 6: -1.3386e-10 -2.9688e-08  3e-08  4e-10  6e-16
Optimal solution found.
[ 1.24120257e-11  1.41890516e-11 -1.60729212e-11 -1.64779020e-11
 -1.68332696e-11 -1.38694933e-11 -1.63472005e-11  1.16480750e-12
 -1.73180058e-11  2.89400710e-11  1.99031890e-11  1.40888734e-11
 -1.68585770e-11 -1.64294551e-11 -1.37141364e-11  2.23473821e-11
  1.71598390e-11 -1.41454501e-11 -1.79452898e-11 -1.69506435e-11
  1.31399772e-11  2.09989902e-11 -1.51620537e-11 -1.34305108e-11
 -1.57734870e-11  1.53280444e-11  2.22223175e-11  2.63242526e-11
  1.81660315e-11 -1.14676542e-11 -1.68604848e-11  1.69591847e-11
  2.17654599e-11 -1.43833269e-11  1.00316635e-11  1.28877874e-11


In [55]:
def compute_classification_boundary (X, y, alpha):
    #cond = (alphas > 1e-3).reshape(-1)
    cond = [i for i in range(len(alphas)) if alphas[i] > 1e-12]
    w = np.dot(X.T, alpha*y).reshape(-1,1)
    w0 = y[cond] - np.dot(X[cond], w)
    w0 = np.mean(w0)
    return w, w0



w, w0 = compute_classification_boundary(X_train, y_train, alphas)

In [56]:
# Determine which training examples are support vectors
support_vectors = []

for i in range(len(alphas)):
    if alphas[i] > 1e-12:
        support_vectors.append([X_train[i], y_train[i], i])

# print("The following are support vectors: ")
# for i in range(len(support_vectors)):
#     print(support_vectors[i][0])

In [57]:
def K(xi, xj):
    return np.dot(xi,xj)

alpha_indices = [support_vectors[i][2] for i in range(len(support_vectors))]
print(alpha_indices)

[0, 1, 7, 9, 10, 11, 15, 16, 20, 21, 25, 26, 27, 28, 31, 32, 34, 35, 40, 45, 46, 50, 53, 54, 55, 57, 58, 60, 61, 63, 64, 65, 67, 69, 73, 75, 76, 79, 83, 86, 89, 91, 95, 96, 103, 104, 107, 108, 110, 114, 117, 118, 120, 121, 125, 126, 127, 128, 129, 130, 131, 133, 134, 135, 136, 137, 138, 139, 140]


In [58]:
def f_dual(x):
    summation = 0
    for i in range(len(support_vectors)):
        summation += alphas[alpha_indices[i]]*y_train[alpha_indices[i]]*K(X_train[alpha_indices[i]],x)
    if (summation >= 0):
        return 8
    else:
        return 9

In [59]:
# Test SVM dual classifier on X_test

def predict(X):
    predictions = []
    for i in range(len(X_test)):
        predictions.append(f_dual(X_test[i]))
    return predictions

y_pred = predict(X_test)

In [60]:
# Print accuracy

print('Prediction accuracy is {}%'.format(accuracy_score(y_test, y_pred) * 100))

Prediction accuracy is 84.03755868544602%


## 2. Fashion-MNIST

### Homework implementation

In [61]:
from keras.datasets import fashion_mnist
((trainX, trainY), (testX, testY)) = fashion_mnist.load_data()

Using TensorFlow backend.


In [62]:
X_train = trainX
y_train = trainY
X_test = testX
y_test = testY

In [63]:
# Assign X_train and y_train the subset of data that describe the labels 0 and 2 (T-shirts and pullovers, respectively)

new_X_train = []
new_y_train = []
for i in range(len(X_train)):
    if y_train[i] == 0:
        new_X_train.append(X_train[i])
        new_y_train.append(y_train[i])
    elif y_train[i] == 2:
        new_X_train.append(X_train[i])
        new_y_train.append(y_train[i])
new_X_train = np.array(new_X_train)
new_y_train = np.array(new_y_train)

X_train = new_X_train
y_train = new_y_train

In [64]:
# Assign X_test and y_test the subset of data that describe the labels 0 and 2 (T-shirts and pullovers, respectively)

new_X_test = []
new_y_test = []
for i in range(len(X_test)):
    if y_test[i] == 0:
        new_X_test.append(X_test[i])
        new_y_test.append(y_test[i])
    elif y_test[i] == 2:
        new_X_test.append(X_test[i])
        new_y_test.append(y_test[i])
new_X_test = np.array(new_X_test)
new_y_test = np.array(new_y_test)

X_test = new_X_test
y_test = new_y_test

In [65]:
X_train = np.array([X_train[i].flatten() for i in range(len(X_train))])
X_test = np.array([X_test[i].flatten() for i in range(len(X_test))])

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(12000, 784)
(12000,)
(2000, 784)
(2000,)


In [66]:
# Downsample the data

# Add y_train back as an additional column to X_train
y_train = y_train.reshape((-1,1))
X_train = np.append(X_train, y_train, axis=1)

# Add y_test back as an additional column to X_test
y_test = y_test.reshape((-1,1))
X_test = np.append(X_test, y_test, axis=1)

# Shuffle the data
np.random.shuffle(X_train)
np.random.shuffle(X_test)

# Slice out only the first 141 from X_train and 213 from X_test
X_train = X_train[0:141]
X_test = X_test[0:213]

# Remove the last columns of X_train and X_test and place them back into y_train and y_test
y_train = X_train[:,-1]
y_test = X_test[:,-1]
X_train = X_train[:,0:X_train.shape[1]-1]
X_test = X_test[:,0:X_test.shape[1]-1]

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(141, 784)
(141,)
(213, 784)
(213,)


In [67]:
# Scale the dataset

X_scale = StandardScaler()
X_train = X_scale.fit_transform(X_train) 
X_test = X_scale.fit_transform(X_test) 

In [68]:
def kernel_svm(X, y): 

    m,n = X.shape
    y = y.reshape(-1,1)*1.
    X_y = y*X
    H = np.dot(X_y, X_y.T)*1.

    P = matrix(H)
    q = matrix(-np.ones((m, 1)))
    G = matrix(-np.eye(m))
    h = matrix(np.zeros(m))
    A = matrix(y.reshape(1,-1))
    A = matrix(A, (1, m), 'd')
    b = matrix(np.zeros(1))

    
    sol = solvers.qp(P,q,G,h,A,b) 
    
    alphas = np.array(sol['x'])[:,0]
    
    return alphas

# fit svm dual classifier
alphas = kernel_svm(X_train, y_train)
print(alphas)

     pcost       dcost       gap    pres   dres
 0: -6.8000e+01 -1.3600e+02  3e+02  1e+01  2e+00
 1: -3.0479e+02 -3.0784e+02  8e+01  6e+00  1e+00
 2: -3.4242e+04 -3.4246e+04  8e+01  6e+00  1e+00
 3: -3.3940e+08 -3.3940e+08  4e+02  6e+00  1e+00
 4: -3.3601e+14 -3.3601e+14  3e+06  6e+00  1e+00
 5: -3.3265e+22 -3.3265e+22  3e+12  1e+06  1e+00
 6: -3.2932e+32 -3.2932e+32  3e+20  9e+15  1e+00
 7: -3.2603e+44 -3.2603e+44  3e+30  4e+00  1e+00
 8: -3.2126e+58 -3.2126e+58  3e+42  4e+00  1e+00
 9: -5.4955e+74 -5.4955e+74  5e+56  4e+00  1e+00
10: -1.0166e+91 -1.0166e+91  1e+71  4e+00  1e+00
11: -4.4480e+106 -4.4480e+106  4e+84  4e+00  1e+00
12: -4.4035e+150 -2.7145e+156  3e+156 9e+133  4e+05
13: -4.4035e+150 -2.7150e+154  3e+154 9e+133  4e+03
14: -4.4049e+150 -2.7595e+152  3e+152 9e+133  4e+01
15: -4.4049e+150 -2.7595e+152  3e+152 2e+134  4e+01
16: -4.4049e+150 -2.7595e+152  3e+152 2e+134  4e+01
17: -4.4049e+150 -2.7595e+152  3e+152 2e+134  4e+01
18: -4.4049e+150 -2.7595e+152  3e+152 2e+134  4e+0

In [69]:
def compute_classification_boundary (X, y, alpha):
    #cond = (alphas > 1e-3).reshape(-1)
    cond = [i for i in range(len(alphas)) if alphas[i] < 1e-7]
    w = np.dot(X.T, alpha*y).reshape(-1,1)
    w0 = y[cond] - np.dot(X[cond], w)
    w0 = np.mean(w0)
    return w, w0



w, w0 = compute_classification_boundary(X_train, y_train, alphas)

In [70]:
# Determine which training examples are support vectors
support_vectors = []

for i in range(len(alphas)):
    if alphas[i] < 1e-7:
        support_vectors.append([X_train[i], y_train[i], i])

In [71]:
def K(xi, xj):
    return np.dot(xi,xj)

alpha_indices = [support_vectors[i][2] for i in range(len(support_vectors))]
print(alpha_indices)

[0, 3, 5, 13, 16, 17, 18, 20, 21, 24, 27, 28, 29, 33, 36, 41, 42, 45, 55, 58, 61, 62, 63, 64, 68, 70, 71, 73, 74, 75, 78, 80, 81, 85, 89, 92, 93, 94, 97, 98, 100, 101, 102, 103, 105, 109, 111, 113, 117, 124, 125, 127, 130, 131, 138, 139]


In [72]:
def f_dual(x):
    summation = 0
    for i in range(len(support_vectors)):
        summation += alphas[alpha_indices[i]]*y_train[alpha_indices[i]]*K(X_train[alpha_indices[i]],x)
    if (summation >= 0):
        return 0
    else:
        return 2

In [73]:
# Test SVM dual classifier on X_test

def predict(X):
    predictions = []
    for i in range(len(X_test)):
        predictions.append(f_dual(X_test[i]))
    return predictions

y_pred = predict(X_test)

In [74]:
# Print accuracy

print('Prediction accuracy is {}%'.format(accuracy_score(y_test, y_pred) * 100))

Prediction accuracy is 87.32394366197182%


### Extended implementation using Numpy

In [75]:
def kernel_soft_margin_svm(X, y, C): 

    m,n = X.shape
    y = y.reshape(-1,1)*1.
    X_y = X*y
    H = np.dot(X_y, X_y.T)*1.
    
    P = matrix(H)
    q = matrix(-np.ones((m, 1)))
    
    # Changed G and h
    G = matrix(np.vstack((np.diag(np.ones(m))*-1, np.identity(m))))
    h = matrix(np.hstack((np.zeros(m), np.ones(m)*C)))
    
    A = matrix(y.reshape(1,-1))
    A = matrix(A, (1, m), 'd')
    b = matrix(np.zeros(1))
    
    sol = solvers.qp(P,q,G,h,A,b) 
    
    alphas = np.array(sol['x'])[:,0]
    
    return alphas

# fit svm dual classifier
alphas = kernel_soft_margin_svm(X_train, y_train, 0.01)
print(alphas)

     pcost       dcost       gap    pres   dres
 0: -3.4340e+01 -2.4513e+00  6e+02  3e+01  3e-16
 1: -8.8217e-01 -2.4317e+00  8e+00  3e-01  5e-16
 2: -5.7624e-01 -1.4136e+00  9e-01  3e-03  4e-16
 3: -6.6265e-01 -7.0241e-01  4e-02  1e-04  3e-16
 4: -6.7983e-01 -6.8023e-01  4e-04  1e-06  2e-16
 5: -6.8000e-01 -6.8000e-01  4e-06  1e-08  3e-16
 6: -6.8000e-01 -6.8000e-01  4e-08  1e-10  3e-16
Optimal solution found.
[-8.56293230e-28  9.99999974e-03  9.99999974e-03  6.50534043e-27
  9.99999974e-03  8.48243603e-26  9.99999974e-03  9.99999974e-03
  9.99999974e-03  9.99999974e-03  9.99999974e-03  9.99999974e-03
  5.27602331e-26 -5.19956758e-27  9.99999974e-03  9.99999974e-03
  2.69652899e-26  3.73005360e-26 -8.61825824e-26  1.90149523e-26
  3.89815599e-26  9.86331063e-26  9.99999974e-03  9.99999974e-03
 -1.46910059e-25  9.99999974e-03  9.99999974e-03  8.50029001e-26
 -1.67609057e-25  2.63621268e-26  9.99999974e-03  9.99999974e-03
  9.99999974e-03 -1.13665882e-25  9.99999974e-03  9.99999974e-03


In [76]:
def compute_classification_boundary (X, y, alpha):
    #cond = (alphas > 1e-3).reshape(-1)
    cond = [i for i in range(len(alphas)) if alphas[i] < 0]
    w = np.dot(X.T, alpha*y).reshape(-1,1)
    w0 = y[cond] - np.dot(X[cond], w)
    w0 = np.mean(w0)
    return w, w0



w, w0 = compute_classification_boundary(X_train, y_train, alphas)

In [77]:
# Determine which training examples are support vectors
support_vectors = []

for i in range(len(alphas)):
    if alphas[i] < 0:
        support_vectors.append([X_train[i], y_train[i], i])

# print("The following are support vectors: ")
# for i in range(len(support_vectors)):
#     print(support_vectors[i][0])

In [78]:
def K(xi, xj):
    return np.dot(xi,xj)

alpha_indices = [support_vectors[i][2] for i in range(len(support_vectors))]
print(alpha_indices)

[0, 13, 18, 24, 28, 33, 36, 38, 39, 40, 42, 46, 55, 58, 62, 71, 74, 75, 78, 85, 89, 94, 101, 102, 103, 111, 113, 125, 127, 130, 132, 133, 135, 140]


In [79]:
def f_dual(x):
    summation = 0
    for i in range(len(support_vectors)):
        summation += alphas[alpha_indices[i]]*y_train[alpha_indices[i]]*K(X_train[alpha_indices[i]],x)
    if (summation >= 0):
        return 0
    else:
        return 2

In [80]:
# Test SVM dual classifier on X_test

def predict(X):
    predictions = []
    for i in range(len(X_test)):
        predictions.append(f_dual(X_test[i]))
    return predictions

y_pred = predict(X_test)

In [81]:
# Print accuracy

print('Prediction accuracy is {}%'.format(accuracy_score(y_test, y_pred) * 100))

Prediction accuracy is 89.67136150234741%
