**Loading and Preprocessing - setting up for cross validation**

In [3]:
import time
import numpy as np
from keras.datasets import cifar100
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import KFold


def flatten_data(x_train, y_train, x_test, y_test):
    new_image_shape = 1
    for dim in range(1, len(x_train.shape)):
        new_image_shape *= x_train.shape[dim]
        
    flat_x_train = x_train.reshape((x_train.shape[0], new_image_shape))
    flat_y_train = np.ravel(y_train)
    
    flat_x_test = x_test.reshape((x_test.shape[0], new_image_shape))
    flat_y_test = np.ravel(y_test)
    return flat_x_train, flat_y_train, flat_x_test, flat_y_test

# centre the data
def centre_data(train, test):
    
    # calculate the means for each attribute of the training data
    column_means = np.mean(train, axis=0) 
    
    # centre training data by subtracting training data attribute means
    for i in range(len(train)):
        train[i] = train[i] - column_means
    
    # centre testing data by subtracting training data attribute means
    for x in range(len(test)):
        test[x] = test[x] - column_means
        
    return train, test

# apply PCA on the data 
def PCA(variance_target, training_data, testing_data):

    U, sigma, Vt = np.linalg.svd(training_data, full_matrices=False)
    
    sum_square_singular = np.sum(sigma**2)
    
    ratios = sigma**2/sum_square_singular
    n_components = 0
    explained_variance = 0
    
    # determine how many principle components must be retained to maintain the target level of explained variance
    for i in range(len(ratios)):
        if explained_variance >= variance_target:
            break
        else: 
            n_components += 1
            explained_variance += ratios[i]
    
    return training_data.dot(Vt.T[:, :n_components]), testing_data.dot(Vt.T[:, :n_components])



def load_in_dataset_and_preprocess(explained_variance, training_data, testing_data, training_labels,testing_labels):

    concatenated_training = concatenate_data(training_data, training_labels)

    training_set, validation_set = split_into_validation_training(concatenated_training)

    training_data = training_set[:, :-1]
    training_labels = np.squeeze(training_set[:, -1])

    validation_data = validation_set[:, :-1]
    validation_labels = np.squeeze(validation_set[:, -1])

    training_data = training_data.astype('float32')
    testing_data = testing_data.astype('float32')
    validation_data = validation_data.astype('float32')

    # Centre data
    #training_data, testing_data, validation_data = centre_data(training_data, testing_data, validation_data)

    # Apply PCA
    #training_data, testing_data, validation_data = PCA(explained_variance, training_data, testing_data, validation_data)

    number_training_samples = len(training_data)
    number_validation_samples = len(validation_data)
    number_testing_samples = len(testing_data)

    # Reshape data from channel to rows
    training_data = np.reshape(training_data, (number_training_samples, -1))
    validation_data = np.reshape(validation_data, (number_validation_samples, -1))
    testing_data = np.reshape(testing_data, (number_testing_samples, -1))

    return training_data, training_labels, testing_data, testing_labels

**Support Vector Machine**

In [5]:
import time
import numpy as np
import numpy as np
from keras.datasets import cifar100
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import KFold

class SVM:

    training_data=[]
    testing_data=[]

    def __init__(self, training_data, testing_data):
      self.training_data= training_data
      self.testing_data= testing_data

    def __len__(self, data):
      return len(data)

    def calculate_linear_output(self, data, weights):
      return np.dot(data, weights)

    # distance of point from separating hyperplane?
    def calculate_distance(self, X, w):
      return  y * (np.dot(X, w)) - 1

    # calculate gradient, use L2 regularisation 
    def calculate_gradient(self, weights, regularisation_param, training_data, training_labels, num_classes):
        
        num_training_samples, num_training_features = training_data.shape
        
        gradient = np.zeros((num_training_features, num_classes))
        
        # w^Tx
        linear_output = self.calculate_linear_output(training_data, weights)

        #linear output with labels 
        linear_output_y_i = linear_output[np.arange(num_training_samples),training_labels]
        delta = linear_output - linear_output_y_i[:,np.newaxis] + 1
        
        ones_and_zeros = np.zeros(delta.shape)
        
        # makes all the places where delta > 0, 1 else 0
        # With lagrange multiplier considered, if the sample is on the support vector: 𝛼 = 1
        # else: 𝛼 = 0
        ones_and_zeros = np.where(delta > 0, 1, 0)
        
        # calculate the sum of each row 
        sum_of_each_row = np.sum(ones_and_zeros, axis=1)
        
        ones_and_zeros[np.arange(num_training_samples), training_labels] = - sum_of_each_row

        gradient = (1/num_training_samples) * np.dot((training_data.T), ones_and_zeros)
        
        # controls the influence of each individual support vector on the objective function. 
        # Greater C decreases the effect of |w|²/2, and results in the narrower margin
        gradient = gradient + (2* regularisation_param * weights)
        
        return gradient 

    # train model using stochastic gradient descent 
    def train_model(self, training_data, training_labels, weights, learning_rate, regularisation_param, iterations, batch_size, num_classes):
      
      num_training_samples = len(training_data)
      weights = weights

      for i in range(iterations):
      # create batch
          batch = np.random.choice(5000, batch_size) #change this to num_training_samples 
          gradient = self.calculate_gradient(weights, regularisation_param, training_data[batch], training_labels[batch], num_classes)
          weights = weights - learning_rate * gradient

      return weights

    # calculate accuracy of model 
    def calculate_accuracy (self, data, labels, weights):
        
        accuracy = 0
        prediction = np.zeros(len(data))

      #w^Tx
        linear_output= self.calculate_linear_output(data, weights)

      # returns the indices of the maximum values along an axis, ie. in this case will return the 
      # column index corresponding to the greatest index of each row
        prediction = np.argmax(linear_output, axis=1)

      # count the number of predictions that are correct 
        total_correct_predictions = (prediction == labels).sum()
        num_data_points = len(data)
        accuracy = (total_correct_predictions/num_data_points)*100

        return accuracy

# helper function for concatenating labels onto their corresponding data points
def concatenate_data(training_data, training_labels):
    return np.column_stack((training_data, training_labels))

# data set is randomised and then split in a 70:30 ratio for training:validation sets
def split_into_validation_training(training_matrix):
    
    import random
    random.shuffle(training_matrix)

    training_set = training_matrix[:int(len(training_matrix)*0.7)]
    validation_set = training_matrix[int(len(training_matrix)*0.7):]
    
    return training_set, validation_set

#using 10 fold cross validation here to evaluate the performance of SVM
def cross_validation():

  (training_data, training_labels), (testing_data, testing_labels) = (cifar100.load_data("coarse"))

  # reshape the data 
  training_data = training_data.reshape(50000, 3072)
  testing_data = testing_data.reshape(10000, 3072)
  
  concatenated_training = concatenate_data(training_data, training_labels)

  training_set, validation_set = split_into_validation_training(concatenated_training)

  training_data = training_set[:, :-1]
  training_labels = np.squeeze(training_set[:, -1])

  validation_data = validation_set[:, :-1]
  validation_labels = np.squeeze(validation_set[:, -1])

  training_data = training_data.astype('float32')
  testing_data = testing_data.astype('float32')
  validation_data = validation_data.astype('float32')

  # Centre data
  #training_data, testing_data, validation_data = centre_data(training_data, testing_data, validation_data)

  # Apply PCA
  #training_data, testing_data, validation_data = PCA(explained_variance, training_data, testing_data, validation_data)

  number_training_samples = len(training_data)
  number_validation_samples = len(validation_data)
  number_testing_samples = len(testing_data)

  # Reshape data from channel to rows
  training_data = np.reshape(training_data, (number_training_samples, -1))
  validation_data = np.reshape(validation_data, (number_validation_samples, -1))
  testing_data = np.reshape(testing_data, (number_testing_samples, -1))

  #training_data, training_labels, testing_data, testing_labels = load_in_dataset_and_preprocess(0.9, training_data, training_labels, testing_data, testing_labels)
  
  cv = KFold(n_splits=10)

  for train_index, test_index in cv.split(training_data):

    training_set, training_set_labels = training_data[train_index], training_labels[train_index]
    testing_set, testing_set_labels = training_data[test_index], training_labels[test_index]

    svm = SVM(training_set, testing_set)
    num_classes = np.max(training_set_labels) + 1
    weights = np.ones((len(training_set[1]), num_classes))
    weights= svm.train_model(training_set, training_set_labels, weights, 0.00000001, 1000, 20000, 200, 20)

    total_accuracy = svm.calculate_accuracy(testing_set, testing_set_labels, weights)
    print('accuracy: ', total_accuracy)

cross_validation()

  #run the classifiers here 

accuracy:  34.91428571428571
accuracy:  32.74285714285714
accuracy:  28.142857142857142
accuracy:  24.942857142857143
accuracy:  24.257142857142856
accuracy:  22.8
accuracy:  20.82857142857143
accuracy:  21.02857142857143
accuracy:  22.142857142857142
accuracy:  21.428571428571427


**Multi-Layer Perceptron**

In [None]:
from keras.optimizers import Adam
from keras.layers import Dense, Activation, Dropout

from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import Adam, SGD
from keras.utils.np_utils import to_categorical  
from keras.datasets import cifar100
from keras.optimizers import Adam
from keras.layers import Dense, Activation, Dropout
import numpy as np

def run_MLP_model(training_data, training_labels, testing_data, testing_labels, first_activation_function, second_activation_function, num_hidden_units, learning_rate, optimiser, decay_level, momentum, epochs, loss_function):

    model = Sequential()

    model.add(Dense(num_hidden_units, activation=first_activation_function, input_dim=training_data.shape[1]))
    model.add(Dropout(0.5))
    model.add(Dense(num_hidden_units, activation=first_activation_function))
    model.add(Dropout(0.5))
    model.add(Dense(20, activation=second_activation_function))

    if (optimiser == 'SGD'):
        op = SGD(lr=learning_rate, decay=decay_level, momentum=momentum, nesterov=True)

    else:
        op = Adam(lr=learning_rate, decay=decay_level)

    # can also use loss function categorical_crossentropy
    # or optimiser SGD
    # try with different optimisers and loss functions
    model.compile(optimizer=op,
                  loss=loss_function,
                  metrics=['accuracy'])

    history = model.fit(training_data, training_labels, epochs=epochs, batch_size=32, verbose=2, validation_split=0.2)

    score = model.evaluate(testing_data, testing_labels, batch_size=128, verbose=0)
    return score[1]


# helper function for concatenating labels onto their corresponding data points
def concatenate_data(training_data, training_labels):
    return np.column_stack((training_data, training_labels))

# data set is randomised and then split in a 70:30 ratio for training:validation sets
def split_into_validation_training(training_matrix):
    
    import random
    random.shuffle(training_matrix)

    training_set = training_matrix[:int(len(training_matrix)*0.7)]
    validation_set = training_matrix[int(len(training_matrix)*0.7):]
    
    return training_set, validation_set

#using 10 fold cross validation here to evaluate the performance of SVM
def cross_validation():

    (training_data, training_labels), (testing_data, testing_labels) = (cifar100.load_data("coarse"))

    momentum = 0.9
    decay=1e-06
    learning_rate = 0.001
    first_activation_function = 'relu'
    second_activation_function = 'softmax'
    loss = 'sparse_categorical_crossentropy'
    optimiser = 'Adam'
    epochs = 20
    num_hidden_units = 256

    # reshape the data 
    training_data = training_data.reshape(50000, 3072)
    testing_data = testing_data.reshape(10000, 3072)

    concatenated_training = concatenate_data(training_data, training_labels)

    training_set, validation_set = split_into_validation_training(concatenated_training)

    training_data = training_set[:, :-1]
    training_labels = np.squeeze(training_set[:, -1])

    validation_data = validation_set[:, :-1]
    validation_labels = np.squeeze(validation_set[:, -1])

    training_data = training_data.astype('float32')
    testing_data = testing_data.astype('float32')
    validation_data = validation_data.astype('float32')

    # Centre data
    training_data, testing_data = centre_data(training_data, testing_data)

    # Apply PCA
    training_data, testing_data = PCA(0.9, training_data, testing_data)

    number_training_samples = len(training_data)
    number_validation_samples = len(validation_data)
    number_testing_samples = len(testing_data)

    # Reshape data from channel to rows
    training_data = np.reshape(training_data, (number_training_samples, -1))
    validation_data = np.reshape(validation_data, (number_validation_samples, -1))
    testing_data = np.reshape(testing_data, (number_testing_samples, -1))

    # Normalization of pixel values (to [0-1] range)
    training_data = training_data / 255
    testing_data = testing_data / 255
    validation_data = validation_data / 255

    cv = KFold(n_splits=10)

    for train_index, test_index in cv.split(training_data):

      training_set, training_set_labels = training_data[train_index], training_labels[train_index]
      testing_set, testing_set_labels = training_data[test_index], training_labels[test_index]

      accuracy= run_MLP_model(training_set, training_set_labels, testing_set, testing_set_labels, first_activation_function, second_activation_function, num_hidden_units, learning_rate, optimiser, decay, momentum, epochs, loss)
      print('accuracy: ', accuracy)


cross_validation()

**Random Forests**

In [4]:
import tensorflow as tf
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from skimage import exposure
from skimage import feature
import matplotlib.pyplot as plt

def flatten_data(x_train, y_train, x_test, y_test):
    new_image_shape = 1
    for dim in range(1, len(x_train.shape)):
        new_image_shape *= x_train.shape[dim]
        
    flat_x_train = x_train.reshape((x_train.shape[0], new_image_shape))
    flat_y_train = np.ravel(y_train)
    
    flat_x_test = x_test.reshape((x_test.shape[0], new_image_shape))
    flat_y_test = np.ravel(y_test)
    return flat_x_train, flat_y_train, flat_x_test, flat_y_test

def cross_validation():

    (training_data, training_labels), (testing_data, testing_labels) = (cifar100.load_data("coarse"))
    cv = KFold(n_splits=10)

    momentum = 0.9
    decay=1e-6
    learning_rate = 0.001
    first_activation_function = 'relu'
    second_activation_function = 'softmax'
    loss = 'sparse_categorical_crossentropy'
    optimiser = 'Adam'
    epochs = 20
    num_hidden_units = 256

    training_data, training_labels, testing_data, testing_labels= flatten_data(training_data, training_labels, testing_data, testing_labels)

    cv = KFold(n_splits=10)

    for train_index, test_index in cv.split(training_data):

      training_set, training_set_labels = training_data[train_index], training_labels[train_index]
      testing_set, testing_set_labels = training_data[test_index], training_labels[test_index]
      
      model = RandomForestClassifier(
      n_jobs=-1, 
      verbose=1,
      n_estimators=400,
      bootstrap=False, 
      max_features='sqrt', 
      criterion='gini')

      model.fit(training_set, training_set_labels)
      accuracy= model.score(testing_set, testing_set_labels)
      print('accuracy: ',accuracy)


cross_validation()

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed: 11.0min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 22.1min finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.3s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    1.1s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    1.9s finished


accuracy:  0.363


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed: 10.8min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 21.6min finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.2s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.8s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    1.6s finished


accuracy:  0.3602


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed: 10.6min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 22.1min finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.3s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    1.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    2.2s finished


accuracy:  0.3622


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed: 10.5min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 21.5min finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.2s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.9s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    1.8s finished


accuracy:  0.3634


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed: 10.6min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 21.3min finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.2s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.8s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    1.9s finished


accuracy:  0.3538


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed: 10.6min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 21.9min finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.3s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.9s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    1.8s finished


accuracy:  0.3798


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed: 10.3min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 21.1min finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.3s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.9s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    1.8s finished


accuracy:  0.3504


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed: 10.3min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 21.1min finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.4s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    1.2s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    2.0s finished


accuracy:  0.3558


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed: 10.4min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 21.2min finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.3s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    1.0s
[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    2.1s finished


accuracy:  0.3674


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed: 10.2min
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 20.8min finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.3s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    1.0s


accuracy:  0.3614


[Parallel(n_jobs=2)]: Done 400 out of 400 | elapsed:    1.9s finished
