In [10]:
import numpy as np
import pandas as pd

def holdout(data, trainp, validp):
  m = np.shape(data)[0]

  train = data[0 : int(np.floor(m*trainp/100))]
  valid = data[int(np.floor(m*trainp/100)) : int(np.floor(m*trainp/100))+int(np.floor(m*validp/100))]
  test = data[int(np.floor(m*trainp/100))+int(np.floor(m*validp/100)) : None]

  y_train = train[:,-1] #shape = (rows,)
  x_train = np.delete(train, -1, axis=1)

  y_valid = valid[:,-1] #shape = (rows,)
  x_valid = np.delete(valid, -1, axis=1)

  y_test = test[:,-1] #shape = (rows,)
  x_test = np.delete(test, -1, axis=1)

  return x_train,x_valid,x_test,y_train,y_valid,y_test

def onevall(k,y):
  return np.where(y==k, 1, -1)

def rbf_kernel(x1, x2):
  return np.exp(-np.linalg.norm(x1-x2)**2 / (2 * 2.25**2)) # fixed sigma=2.25

def polynomial_kernel(x1, x2):
  return (np.dot(x1, x2))**2 # fixed degree=2

def gram_matrix(x1, x2, kernel):
  if kernel=='rbf':
    K = np.zeros((x1.shape[0],x2.shape[0]))
    for i in range(x1.shape[0]):
      for j in range(x2.shape[0]):
        K[i,j] = rbf_kernel(x1[i], x2[j])
    return K
 
  if kernel=='polynomial':
    K = np.zeros((x1.shape[0],x2.shape[0]))
    for i in range(x1.shape[0]):
      for j in range(x2.shape[0]):
        K[i,j] = polynomial_kernel(x1[i], x2[j])
    return K

def calculate_eta(x1,x2, kernel):
  if kernel=='rbf':
    return (rbf_kernel(x1,x1) + rbf_kernel(x2,x2) -2*rbf_kernel(x1,x2))
  if kernel=='polynomial':
    return (polynomial_kernel(x1,x1) + polynomial_kernel(x2,x2) -2*polynomial_kernel(x1,x2))

def calculate_L_H(C, alpha_j, alpha_i, y_j, y_i):
  if(y_i != y_j):
    return (max(0, alpha_j - alpha_i), min(C, C - alpha_i + alpha_j))
  else:
    return (max(0, alpha_i + alpha_j - C), min(C, alpha_i + alpha_j))

def SVM(x_train, y_train, C, epsilon, max_iters, kernel):
  K = gram_matrix(x_train,x_train,kernel)
  alpha = np.zeros(x_train.shape[0]) # lagrange multipliers
  iters = 0
  b = 0

  while True:
    alpha_old = alpha
    iters += 1
    for j in range(x_train.shape[0]):
      i = np.random.choice(np.delete(np.arange(x_train.shape[0]),np.where(np.arange(x_train.shape[0])==j)))
      xi, xj, yi, yj = x_train[i,:], x_train[j,:], y_train[i], y_train[j]
      L, H = calculate_L_H(C, alpha[j], alpha[i], yj, yi) # higher and lower bounds
      if L==H:
        continue

      eta = calculate_eta(xi, xj, kernel)
      if eta <= 0:
        continue

      E_i = np.sum(alpha*y_train*K[:,i]) - b - yi
      E_j = np.sum(alpha*y_train*K[:,j]) - b - yj
    
      alpha_j_old = alpha[j]
      alpha_i_old = alpha[i]

      alpha[j] = alpha[j] + (yj * (E_i - E_j))/eta
      alpha[j] = max(alpha[j], L)
      alpha[j] = min(alpha[j], H)

      alpha[i] = alpha_i_old + yi*yj * (alpha_j_old - alpha[j])

      b1 = E_i + yi*(alpha[i] - alpha_i_old)*K[i,i] + yj*(alpha[j] - alpha_j_old)*K[i,j] + b 
      b2 = E_j + yi*(alpha[i] - alpha_i_old)*K[i,j] + yj*(alpha[j] - alpha_j_old)*K[j,j] + b
      if alpha[i]>L and alpha[i]<H:
        b = b1
      elif alpha[j]>L and alpha[j]<H:
        b = b2
      else :
        b = (b1+b2)/2

    diff = np.linalg.norm(alpha_old - alpha)
    if diff < epsilon:
      break
    if iters >= max_iters:
      break
  
  n_sv = np.sum(alpha>0) # number of support vectors
  alpha_sv = alpha[np.where(alpha>0)[0]]
  x_sv = x_train[np.where(alpha>0)[0],:]
  y_sv = y_train[np.where(alpha>0)[0]]
  w = (alpha_sv*y_sv) @ x_sv

  return w, b, diff

def performance(y, pred):
  m = np.zeros((3,3)) # confusion matrix
  for p in range(len(pred)):
    if pred[p]==1 and y[p]==1:
      m[0,0]+=1
    if pred[p]==2 and y[p]==2:
      m[1,1]+=1
    if pred[p]==3 and y[p]==3:
      m[2,2]+=1
    if pred[p]==1 and y[p]==2:
      m[1,0]+=1
    if pred[p]==1 and y[p]==3:
      m[2,0]+=1
    if pred[p]==2 and y[p]==1:
      m[0,1]+=1
    if pred[p]==2 and y[p]==3:
      m[2,1]+=1
    if pred[p]==3 and y[p]==1:
      m[0,2]+=1
    if pred[p]==3 and y[p]==2:
      m[1,2]+=1
  ind_accuracy = [m[0,0]/np.sum(m[0,:]), m[1,1]/np.sum(m[1,:]), m[2,2]/np.sum(m[2,:])]
  accuracy = (m[0,0]+m[1,1]+m[2,2])/np.sum(m)
  return ind_accuracy, accuracy

In [11]:
data = pd.read_excel('/content/drive/MyDrive/NNFL Assignments (Aug 2021)/Assignment 2/data5.xlsx')
cols = np.array(data.columns, ndmin=2)
data = data.to_numpy()
data = np.concatenate((cols,data), axis=0)

# shuffle & train-test-valid-split
np.random.seed(0)
np.random.shuffle(data)
x_train, x_valid, x_test, y_train, y_valid, y_test = holdout(data, 70, 10)

# normalizing input data
mu = np.mean(x_train, axis=0)
std = np.std(x_train, axis=0)

x_train = (x_train-mu)/std
x_valid = (x_valid-mu)/std
x_test = (x_test-mu)/std

In [12]:
# one vs. all encoding output data
y_train1 = onevall(1,y_train)
y_train2 = onevall(2,y_train)
y_train3 = onevall(3,y_train)
y_test1 = onevall(1,y_test)
y_test2 = onevall(2,y_test)
y_test3 = onevall(3,y_test)

RBF KERNEL

In [13]:
# SVM hyperparameters
C = 13
epsilon = 1e-3
max_iters = 1000
kernel = 'rbf'

In [14]:
# training 3 models
w1, b1, diff1 = SVM(x_train, y_train1, C, epsilon, max_iters, kernel)
w2, b2, diff2 = SVM(x_train, y_train2, C, epsilon, max_iters, kernel)
w3, b3, diff3 = SVM(x_train, y_train3, C, epsilon, max_iters, kernel)

In [15]:
# testing
pred_1 = ((w1.T @ x_test.T) + b1)
pred_2 = ((w2.T @ x_test.T) + b2)
pred_3 = ((w3.T @ x_test.T) + b3)

# pred vector
pred = np.zeros((x_test.shape[0],3))
for i in range(x_test.shape[0]):
  pred[i] = [pred_1[i], pred_2[i], pred_3[i]]

# final prediction
y_pred = np.argmax(pred, axis=1) +1

# performance
ind_accuracy, accuracy = performance(y_test, y_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.5
accuracy of class 2 = 1.0
accuracy of class 3 = 1.0
overall accuracy of classifier = 0.8809523809523809


POLYNOMIAL KERNEL


In [16]:
# SVM hyperparameters
C = 13
epsilon = 1e-3
max_iters = 1000
kernel = 'polynomial'

In [17]:
# training 3 models
w1, b1, diff1 = SVM(x_train, y_train1, C, epsilon, max_iters, kernel)
w2, b2, diff2 = SVM(x_train, y_train2, C, epsilon, max_iters, kernel)
w3, b3, diff3 = SVM(x_train, y_train3, C, epsilon, max_iters, kernel)

In [18]:
# testing
pred_1 = ((w1.T @ x_test.T) + b1)
pred_2 = ((w2.T @ x_test.T) + b2)
pred_3 = ((w3.T @ x_test.T) + b3)

# pred vector
pred = np.zeros((x_test.shape[0],3))
for i in range(x_test.shape[0]):
  pred[i] = [pred_1[i], pred_2[i], pred_3[i]]

# final prediction
y_pred = np.argmax(pred, axis=1) +1

# performance
ind_accuracy, accuracy = performance(y_test, y_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.0
accuracy of class 2 = 1.0
accuracy of class 3 = 1.0
overall accuracy of classifier = 0.7619047619047619
