**Support Vector Machine in a Class Form**



In [None]:
import time
import numpy as np
import numpy as np
from keras.datasets import cifar100
import matplotlib.pyplot as plt
import cv2

class SVM:

    training_data=[]
    testing_data=[]

    def __init__(self, training_data, testing_data):
      self.training_data= training_data
      self.testing_data= testing_data

    def __len__(self, data):
      return len(data)

    def calculate_linear_output(self, data, weights):
      return np.dot(data, weights)

    # distance of point from separating hyperplane?
    def calculate_distance(self, X, w):
      return  y * (np.dot(X, w)) - 1

    # calculate gradient, use L2 regularisation 
    def calculate_gradient(self, weights, regularisation_param, training_data, training_labels, num_classes):
        
        num_training_samples, num_training_features = training_data.shape
        
        gradient = np.zeros((num_training_features, num_classes))
        
        # w^Tx
        linear_output = self.calculate_linear_output(training_data, weights)

        #linear output with labels 
        linear_output_y_i = linear_output[np.arange(num_training_samples),training_labels]
        delta = linear_output - linear_output_y_i[:,np.newaxis] + 1
        
        ones_and_zeros = np.zeros(delta.shape)
        
        # makes all the places where delta > 0, 1 else 0
        # With lagrange multiplier considered, if the sample is on the support vector: 𝛼 = 1
        # else: 𝛼 = 0
        ones_and_zeros = np.where(delta > 0, 1, 0)
        
        # calculate the sum of each row 
        sum_of_each_row = np.sum(ones_and_zeros, axis=1)
        
        ones_and_zeros[np.arange(num_training_samples), training_labels] = - sum_of_each_row

        gradient = (1/num_training_samples) * np.dot((training_data.T), ones_and_zeros)
        
        # controls the influence of each individual support vector on the objective function. 
        # Greater C decreases the effect of |w|²/2, and results in the narrower margin
        gradient = gradient + (2* regularisation_param * weights)
        
        return gradient 

    # train model using stochastic gradient descent 
    def train_model(self, training_data, training_labels, weights, learning_rate, regularisation_param, iterations, batch_size, num_classes):
      
      num_training_samples = len(training_data)
      weights = weights

      for i in range(iterations):
      # create batch
          batch = np.random.choice(num_training_samples, batch_size)
          gradient = self.calculate_gradient(weights, regularisation_param, training_data[batch], training_labels[batch], num_classes)
          weights = weights - learning_rate * gradient

      return weights

    # calculate accuracy of model 
    def calculate_accuracy (self, data, labels, weights):
        
        accuracy = 0
        prediction = np.zeros(len(data))

      #w^Tx
        linear_output= self.calculate_linear_output(data, weights)

      # returns the indices of the maximum values along an axis, ie. in this case will return the 
      # column index corresponding to the greatest index of each row
        prediction = np.argmax(linear_output, axis=1)

      # count the number of predictions that are correct 
        total_correct_predictions = (prediction == labels).sum()
        num_data_points = len(data)
        accuracy = (total_correct_predictions/num_data_points)*100

        return accuracy


# centre the data
def centre_data(train, validation, test):
    
    # calculate the means for each attribute of the training data
    column_means = np.mean(train, axis=0) 
    
    # centre training data by subtracting training data attribute means
    for i in range(len(train)):
        train[i] = train[i] - column_means
    
    # centre testing data by subtracting training data attribute means
    for x in range(len(test)):
        test[x] = test[x] - column_means
        
    for x in range(len(validation)):
        validation[x] = validation[x] - column_means
        
    return train, test, validation

# apply PCA on the data 
def PCA(variance_target, training_data, validation_data, testing_data):

    U, sigma, Vt = np.linalg.svd(training_data, full_matrices=False)
    
    sum_square_singular = np.sum(sigma**2)
    
    ratios = sigma**2/sum_square_singular
    n_components = 0
    explained_variance = 0
    
    # determine how many principle components must be retained to maintain the target level of explained variance
    for i in range(len(ratios)):
        if explained_variance >= variance_target:
            break
        else: 
            n_components += 1
            explained_variance += ratios[i]
    
    return training_data.dot(Vt.T[:, :n_components]), testing_data.dot(Vt.T[:, :n_components]), validation_data.dot(Vt.T[:, :n_components])



# helper function for concatenating labels onto their corresponding data points
def concatenate_data(training_data, training_labels):
    return np.column_stack((training_data, training_labels))

# data set is randomised and then split in a 70:30 ratio for training:validation sets
def split_into_validation_training(training_matrix):
    
    import random
    random.shuffle(training_matrix)

    training_set = training_matrix[:int(len(training_matrix)*0.7)]
    validation_set = training_matrix[int(len(training_matrix)*0.7):]
    
    return training_set, validation_set

def load_in_dataset_and_preprocess(explained_variance):
  
    (training_data, training_labels), (testing_data, testing_labels) = (cifar100.load_data("coarse"))
    
    # reshape the data 
    training_data = training_data.reshape(50000, 3072)
    testing_data = testing_data.reshape(10000, 3072)

    concatenated_training = concatenate_data(training_data, training_labels)

    training_set, validation_set = split_into_validation_training(concatenated_training)

    training_data = training_set[:, :-1]
    training_labels = np.squeeze(training_set[:, -1])

    validation_data = validation_set[:, :-1]
    validation_labels = np.squeeze(validation_set[:, -1])

    training_data = training_data.astype('float32')
    testing_data = testing_data.astype('float32')
    validation_data = validation_data.astype('float32')

    # Centre data
    training_data, testing_data, validation_data = centre_data(training_data, testing_data, validation_data)

    # Apply PCA
    training_data, testing_data, validation_data = PCA(explained_variance, training_data, testing_data, validation_data)

    number_training_samples = len(training_data)
    number_validation_samples = len(validation_data)
    number_testing_samples = len(testing_data)

    # Reshape data from channel to rows
    training_data = np.reshape(training_data, (number_training_samples, -1))
    validation_data = np.reshape(validation_data, (number_validation_samples, -1))
    testing_data = np.reshape(testing_data, (number_testing_samples, -1))

    return training_data, training_labels, testing_data, testing_labels, validation_data, validation_labels


training_data, training_labels, testing_data, testing_labels, validation_data, validation_labels = load_in_dataset_and_preprocess(0.8)
svm = SVM(training_data, testing_data)
num_classes = np.max(training_labels) + 1
weights = np.ones((len(training_data[0]), num_classes))
weights= svm.train_model(training_data, training_labels, weights, 0.0000001, 5000, 15000, 200, 20)

total_accuracy = svm.calculate_accuracy(validation_data, validation_labels, weights)
print(total_accuracy)