#ANN A3: Multi-class SVM Loss & Affine Layer backward pass
# Submitted by: Sarim Aeyzaz (21i-0328)

In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
iris = datasets.load_iris()

In [None]:
def train_test_split(X, Y, split = 0.25):

  length = X.shape[0]
  index_split = int(length * split)

  # Generates random order of numbers from 0 till length
  indices = np.random.permutation(length)
  test_indices, train_indices = indices[:index_split], indices[index_split:]

  X_train, X_test = X[train_indices], X[test_indices]
  Y_train, Y_test = Y[train_indices], Y[test_indices]
  return X_train, X_test, Y_train, Y_test

In [None]:
def calculate_accuracy(X, Y):
  return (X == Y).sum() / len(Y) * 100

In [None]:
class NeuralNetwork:
  def __init__(self):
    self.thetas = None
    self.lr = None

  def __svm_loss_derrivative(self, X, y):
    derr = X - X[np.arange(y.shape[0]), y].reshape(-1,1) # (135,5) - (135, 1) vectorized subtraction
    derr[derr < 0] = 0 # Creating mask and replacing negative values with 0
    loss = np.sum(derr) # Sum up the matrix to compute loss
    derr[derr > 0] = 1 # Creating mask and replacing positive values with 1
    derr[np.arange(y.shape[0]), y] = np.sum(derr, axis = 1) * -1 # Replace target label value with -1 * (sum of 1's in row)
    return loss, derr

  # Predict values given a feature vector
  def __predict(self, X, theta):
    answer = np.dot(X, theta) # (samples, features + 1) * (features + 1, labels)
    return answer # (samples, labels)

  # Update theta values based on partial derrivate error
  def __gradient_descent(self, lr, theta, del_theta):
    return theta - lr * del_theta.T

  # Return weights (thetas) of the model
  def Get_Weights(self):
    return self.thetas

  # Returns Logits of test data
  def Predict_Confidence(self, X):
    X = np.hstack((X, np.ones((X.shape[0], 1)))).T
    predictions = np.dot(X.T, self.Get_Weights())
    return predictions

  # Predicts class of test data (argmax)
  def Predict_Class(self, X):
    X = self.Predict_Confidence(X)
    return np.argmax(X, axis=1)

  def Train(self, X, Y, alpha = 0.0001, loss_at_iter = 50, max_iter = None):

    # Setting up some stuff
    convergence_check = False
    X = np.array(X) # Dimensions are: (samples, features)
    X = np.hstack((X, np.ones((X.shape[0], 1)))) # Dimensions are (samples, features + 1)
    Y = np.array(Y_train).reshape(-1,) # Dimensions are: (samples, )
    Y_labels = np.unique(Y) # Number of unique elements of Y (labels)

    self.thetas = np.empty((0,X.shape[1]), float)

    # Handling Termination by changes in old and new loss values incase max_iter is not defined
    if max_iter is None:
      convergence_check = True
      print("Convergence Criteria will be used")

    old_loss = 0
    counter = 0

    # Generating random weights
    theta = np.random.random(size=(X.shape[1], len(Y_labels)))  # Dimensions of theta = (features + 1, labels)

    while(True):

      predictions = self.__predict(X, theta) # Returns (samples, labels)

      loss, derrivative = self.__svm_loss_derrivative(predictions, Y) # Returns loss and derrivative

      if (counter % loss_at_iter == 0):
          print(f"Loss at iteration {counter} = {loss}")

      if (loss == 0):
        print(f"Final Loss at iteration {counter} = {loss}\n")
        self.thetas = theta
        return

      del_theta = np.dot(derrivative.T, X) / X.shape[0] # (labels, n) * (n, d) / number of x    For normalization

      theta = self.__gradient_descent(alpha, theta, del_theta)

      # Either do convergence check or max iteration check
      if convergence_check:
        if abs(old_loss - loss) / loss < 0.0001: # If the loss difference is lesser than 0.01%, break
          print("Convergence Reached")
          break
      else:
        if (counter >= max_iter - 1): # If max iterations are reached, break
          print("Maximum Iterations Reached")
          break

      old_loss = loss
      counter +=1

    print(f"Final Loss at iteration {counter} = {loss}\n")

    self.thetas = theta


In [None]:
X, Y, Y_names = iris['data'], iris['target'], iris['target_names']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, 0.1)

nn = NeuralNetwork()
nn.Train(X_train, Y, 0.01, 50)

Convergence Criteria will be used
Loss at iteration 0 = 804.7972130570148
Loss at iteration 50 = 7.43677740604278
Loss at iteration 100 = 7.858957662349513
Loss at iteration 150 = 6.579129008541231
Loss at iteration 200 = 4.94690451220563
Loss at iteration 250 = 2.4297536810977354
Loss at iteration 300 = 1.708940905574095
Loss at iteration 350 = 1.1011040437454787
Loss at iteration 400 = 0.8343384871768942
Loss at iteration 450 = 0.7008093446888859
Loss at iteration 500 = 0.6159477496882833
Loss at iteration 550 = 0.6064573342267492
Loss at iteration 600 = 0.5173410379304864
Loss at iteration 650 = 0.5297228859581518
Loss at iteration 700 = 0.5031735341555494
Loss at iteration 750 = 0.49007862193787766
Loss at iteration 800 = 0.3513881803754524
Loss at iteration 850 = 0.47103723785911455
Loss at iteration 900 = 0.30994077296805234
Loss at iteration 950 = 0.29567364442994304
Loss at iteration 1000 = 0.2785632730173093
Loss at iteration 1050 = 0.26673364338767325
Loss at iteration 1100 =

In [None]:
print(nn.Predict_Confidence(X_test))
predicted = nn.Predict_Class(X_test)

[[ 5.09500027  4.71171634  3.59603323]
 [ 6.6633369   7.30124981  7.24047851]
 [ 6.87292302  6.99321968  6.7918506 ]
 [ 7.51539531  7.87618328  8.03282431]
 [ 9.38317389  9.61787044  9.87037219]
 [ 6.59126334  6.67747402  6.30621918]
 [ 7.72432538  8.22133873  8.37395865]
 [ 4.97117224  4.68173748  3.68139394]
 [ 8.14546209  8.59323821  8.84274104]
 [ 5.16328105  4.64636372  3.57144081]
 [ 8.11887641  8.74292315  8.94742301]
 [ 4.83152049  4.57437336  3.49900456]
 [ 9.67296412 10.04712752 10.43440203]
 [ 6.24252821  6.60275754  6.376815  ]
 [ 6.77847424  7.0642109   6.78242875]]


In [None]:
print(f"Predicted: {predicted}")
print(f"Actual: {Y_test}")
print(f"Accuracy: {calculate_accuracy(predicted, Y_test)}")

Predicted: [0 1 1 2 2 1 2 0 2 0 2 0 2 1 1]
Actual: [0 1 1 2 2 1 2 0 2 0 2 0 2 1 1]
Accuracy: 100.0
