### ----------------------------------------------------------------------------------------------------------------------
### Darie-Dragos Mitoiu - 1905367
### CM4107 Advanced Artificial Intelligence
### Artificial Neural Network and K-Nearest Neighbour Hybrid v1.0.0 26/10/2020
### A jupyter notebook for an artificial neural network and a k-nearest neighbour hybrid
### -----------------------------------------------------------------------------------------------------------------------

### Importing Libraries

In [3]:
# Importing Libraries
from __future__ import division # backward compatibility for python2
import pandas as pd
import numpy as np
import scipy.special
import matplotlib.pyplot as plt
import seaborn as sns
import operator
import random

### Setting Preferences

In [4]:
# Setting matplotlib inline
%matplotlib inline

In [5]:
# Setting seaborn style
sns.set_style("darkgrid")

In [6]:
# Setting numpy seed
np.random.seed(42)

### Set Global Variables

In [7]:
# Set the training file name
train_file = "data/mnist_train.csv"
# The testing file name
test_file = "data/mnist_test.csv"

# Set the number of input nodes
input_nodes = 784
# Set the number of hidden nodes
hidden_nodes = 200
# Set the number of output nodes
output_nodes = 10

# Set the epochs number
epochs = 10
# Set the batch size number
batch_size = 1
# Set the learning rate number
learning_rate = 0.3

### Load MNIST Training and Testing Datasets

In [8]:
# Read the training dataset
df_orig_train = pd.read_csv(train_file, header=None)
# Read the testing dataset
df_orig_test = pd.read_csv(test_file, header=None)

### MNIST Training Data Insight

In [9]:
print("The number of records in the mnist training dataset is: ",df_orig_train.shape[0])
print("The number of columns in the mnist training dataset is: ", df_orig_train.shape[1])

The number of records in the mnist training dataset is:  60000
The number of columns in the mnist training dataset is:  785


In [10]:
# Show training dataset relevant information
df_orig_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Columns: 785 entries, 0 to 784
dtypes: int64(785)
memory usage: 359.3 MB


In [11]:
# Show the head of the training dataset
df_orig_train.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### MNIST Testing Data Insight 

In [12]:
# Show testing dataset relevant information
df_orig_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Columns: 785 entries, 0 to 784
dtypes: int64(785)
memory usage: 59.9 MB


In [13]:
# Show the head of the testing dataset
df_orig_test.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### MNIST Training Data Frame Dimensionality

In [14]:
y_train_all =  pd.get_dummies(df_orig_train[0]).values
X_train_all = df_orig_train.drop(0, axis = 1).values
print(y_train_all.shape)
print(X_train_all.shape)

(60000, 10)
(60000, 784)


### MNIST Testing Data Frame Dimensionality

In [15]:
y_test_all =  pd.get_dummies(df_orig_test[0]).values
X_test_all = df_orig_test.drop(0, axis = 1).values
print(y_test_all.shape)
print(X_test_all.shape)

(10000, 10)
(10000, 784)


### Data Pre-Processing

In [16]:
# Select smaller samples of the train and test datasets
train_sample_size = 1500  # choosing a smaller sample instead of the entire dataset
random_indices = np.random.choice(range(len(y_train_all)), train_sample_size, replace = False)

X_train = X_train_all[random_indices]
y_train = y_train_all[random_indices]
print(y_train.shape)
print(X_train.shape)

#preprocessing steps
X_train = (X_train / 255.0 * 0.99) + 0.01
y_train = y_train + 0.01
y_train = np.where(y_train != 1.01, y_train, 0.99)
print(y_train.shape)

test_sample_size = 100 
random_test_indices = np.random.choice(range(len(y_test_all)), test_sample_size, replace = False)
X_test = X_test_all[random_test_indices]
y_test = y_test_all[random_test_indices]
print(y_test.shape)
print(X_test.shape)

X_test = (X_test / 255.0 * 0.99) + 0.01
y_test = y_test + 0.01
y_test = np.where(y_test != 1.01, y_test, 0.99)

(1500, 10)
(1500, 784)
(1500, 10)
(100, 10)
(100, 784)


### Utility Functions

In [17]:
def mean_squared_error(predictions, targets):
    """
    Calculates mean squared error of a model's predictions.
    """
    N=targets.size
    mse = ((targets - predictions) **2).sum() / (2*N)
    return mse


def accuracy(predictions, targets):
    """
    Calculates the accuracy of a model's predictions.
    """
    prediction_labels = np.argmax(predictions, axis=1)
    target_labels = np.argmax(targets, axis=1)
    predictions_correct = (prediction_labels == target_labels.round())
    accuracy = predictions_correct.mean()
    return accuracy

### Activation Classes

In [18]:
class Activation_ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)
    def backward(self, inputs):
        self.output = np.greater(inputs, 0).astype(int) # inputs > 0 then convert bools to int
        
class Activation_Sigmoid:
    def forward(self, x):
        return(1 / (1 + np.exp(-x)))
    def backward(self, x):
        return(x * ( 1 - x))

### Layer Class

In [19]:
class Layer_Dense:
    def __init__(self, n_inputs, n_neurons, learningrate=0.01, activation='sigmoid'):
        
        self.weights = np.random.normal(0.0, pow(n_inputs, -0.5), (n_inputs, n_neurons))
        print(self.weights.shape)
        self.biases = np.zeros((1, n_neurons))
       
        self.lr = learningrate
        self.activate=activation  
        
    def forward(self, inputs):
        self.in_values = inputs
        self.layer_input = np.dot(inputs , self.weights) + self.biases
        self.activation()
    
    def activation(self):
        if self.activate == 'sigmoid':
            a = Activation_Sigmoid()
            self.layer_output = a.forward(self.layer_input)
            
           
    def del_activation(self):
        if self.activate == 'sigmoid':
            del_a = Activation_Sigmoid()
            self.del_layer_output =  del_a.backward(del_a.forward(self.layer_input))
      
    def backward(self, delta_in, weights_in, targets=None, output_layer=False):
        self.del_activation()
        if output_layer:
            self.layer_error = self.layer_output - targets
            self.layer_delta = self.layer_error * self.del_layer_output
        else:          
            self.layer_error = np.dot(delta_in, weights_in.T)
            self.layer_delta = self.layer_error * self.del_layer_output
        
    def weight_update(self, prev_layer_output):
        # print("prev_layer_output.T.shape: "+str(prev_layer_output.T.shape))
        # print("self.layer_delta.shape: "+str(self.layer_delta.shape))
        N = self.layer_delta.shape[0]
        weights_update = np.dot(prev_layer_output.T, self.layer_delta) / N
        # print(weights_update.shape)
        self.weights -= self.lr * weights_update
        biases_update = np.mean(self.layer_delta, axis=0, keepdims=True)
        # print("biases_update.shape: "+ str(biases_update.shape))
        # print("self.biases.shape: "+ str(self.biases.shape))
        self.biases -= self.lr * biases_update

In [20]:
layer1 = Layer_Dense(784, 200) # set the weight matrix dimensions (input nodes x hidden nodes)
output = Layer_Dense(200, 10) # set the weight matrix dimensions (hidden nodes x output nodes)

# maintain a dataframe to keep track of the network error
monitoring = {}
monitoring['mean_squared_error'] = []
monitoring['accuracy'] = []

# iterate and update weights at each epoch
for epoch in range(100):
    layer1.forward(X_train)
    # print('layer1 output \n' ,layer1.layer_output.shape)
    output.forward(layer1.layer_output)
    # print('layer output  \n', output.layer_output.shape)

    # train metrics
    pred = output.layer_output
    mse = mean_squared_error(pred, y_train)
    acc = accuracy(output.layer_output, y_train)
    monitoring['mean_squared_error'].append(mse)
    monitoring['accuracy'].append(acc)

    # backprop through the layers 
    output.backward(None, None, y_train, True)
    # print('layer out delta  \n', output.layer_delta.shape)
    layer1.backward(output.layer_delta, output.weights)
    # print('layer1 delta  \n', layer1.layer_delta.shape)

    # update all the layer weights
    output.weight_update(layer1.layer_output)
    # print('layer weights  \n', output.weights.shape)
    layer1.weight_update(X_train)
    # print('layer weights  \n', layer1.weights.shape)

monitoring_df = pd.DataFrame(monitoring)   
monitoring_df

(784, 200)
(200, 10)


Unnamed: 0,mean_squared_error,accuracy
0,0.115913,0.104000
1,0.111464,0.104000
2,0.107237,0.104000
3,0.103236,0.104000
4,0.099461,0.104000
...,...,...
95,0.044720,0.164667
96,0.044681,0.164667
97,0.044642,0.164000
98,0.044605,0.165333


### Creating Artificial Neural Network

In [21]:
class ANN():
    def __init__(self, ouput_layer, hidden_layer, batch_size = 10):
        self.output = ouput_layer
        self.layer1 = hidden_layer
        self.batch_size = batch_size

    def batch_input(self, x, y):
        for i in range(0, len(x), self.batch_size):
            yield (x[i:i + self.batch_size], y[i:i + self.batch_size])

    def train(self, x, y, epochs, lr):
        self.layer1.lr = lr
        self.output.lr = lr

        monitoring = {}
        monitoring['mean_squared_error'] = []
        monitoring['accuracy'] = []

        for epoch in range(epochs):

            for (batch_x, batch_y) in self.batch_input(x, y):
                self.layer1.forward(batch_x)
                self.output.forward(self.layer1.layer_output)

                # backprop through the layers 
                self.output.backward(None, None, batch_y, True)
                self.layer1.backward(self.output.layer_delta, self.output.weights)

                # update all the layer weights
                self.output.weight_update(self.layer1.layer_output)
                self.layer1.weight_update(batch_x)
                
            pred = self.predict(x)
            mse, acc = self.evaluate(pred, y)
            monitoring['mean_squared_error'].append(mse)
            monitoring['accuracy'].append(acc)

        monitoring_df = pd.DataFrame(monitoring)   
        return monitoring_df

    def predict(self, x):
        self.layer1.forward(x)
        self.output.forward(self.layer1.layer_output)
        return self.output.layer_output

    def evaluate(self, predicts, y):
        mse = mean_squared_error(predicts, y)
        acc = accuracy(predicts, y)
        return mse, acc

    def test(self, x, y):
        monitoring = {}
        pred = self.predict(x)
        mse, acc = self.evaluate(pred, y)
        monitoring['mean_squared_error'] = [mse]
        monitoring['accuracy'] = [acc]
        return pd.DataFrame(monitoring)

### Creating K-Nearest Neighbour

In [22]:
#Within our class we now need code for each of the components of k-NN.
#First, let's create a method that will measure the distance between two vectors.
def euclidean(instance1, instance2):
        '''
        Calculates euclidean distance between two instances of data
        instance1 will be a List of Float values
        instance2 will be a List of Float values
        length will be an Integer denoting the length of the Lists
        '''
        distance = 0
        for val1, val2 in zip(instance1, instance2):            
            distance += pow((val1 - val2), 2)
        
        distance = pow(distance, 1/2)
             
              
        return 1 / (1+ distance)
    

def manhattan(instance1, instance2):
        '''
        Calculates manhattan distance between two instances of data
        instance1 will be a List of Float values
        instance2 will be a List of Float values
        length will be an Integer denoting the length of the Lists
        '''
        distance = 0
        for val1, val2 in zip(instance1, instance2):
            distance += abs(val1 - val2)      
              
        return 1 / (1+ distance)
    
def dot_product(instance1, instance2):
        '''
        Calculates dot product between two instances 
        instance1 will be a List of Float values
        instance2 will be a List of Float values
        length will be an Integer denoting the length of the Lists
        '''
        return np.dot(instance1, instance2)

In [23]:
class kNN:
    """
    Parameters
    X_train, Y_train : list
    these consists of the training set feature values and associated class labels
    k : int
    specify the number of neighbours
    sim : literal
    specify the name of the similarity metric (e.g. manhattan, eucliedean)
    weighted : Boolean
    specify the voting strategy as weighted or not weighted by similarity values
  
    Attributes
    -----------  
    Results : list
      Target and predicted class labels for the test data.    
    """


    def __init__(self, X_train, Y_train, k=3, sim=manhattan, weighted=False):
        self.X_train = X_train
        self.Y_train = Y_train
        
        if k <= len(self.X_train):
            self.k = k # set the k value for neighbourhood size
        else:
            self.k = len(self.X_train) # to ensure the get_neighbours dont crash
    
        self.similarity = sim # specify a sim metric that has been pre-defined e.g. manhattan or euclidean
        
        self.weighted = weighted # boolean to choose between weighted / unweighted majority voting
        
        #store results from testing 
        self.results= []
        
    #With k-NN, we are interested in finding the k number of points with the greatest similarity 
    # to the the query or test instance.
    def get_neighbours(self, test_instance):
        '''
        Locate most similar neighbours 
        X_train will be a containing features (Float) values (i.e. your training data)
        Y_train will be the corresponding class labels for each instance in X_train
        test_instance will be a List of Float values (i.e. a query instance)
        '''
        similarities = [] # collection to store the similarities to be computed

        for train_instance, y in zip(self.X_train, self.Y_train): #for each member of the training set
            sim = self.similarity(test_instance, train_instance) #calculate the similarity to the test instance
            
            similarities.append((y, sim)) #add the actual label of the example and the computed similarity to a collection 
        #print(distances)
        similarities.sort(key = operator.itemgetter(1), reverse = True) #sort the collection by decreasing similarity
        neighbours = [] # holds the k most similar neighbours
        for x in range(self.k): #extract the k top indices of the collection for return
            neighbours.append(similarities[x])

        return neighbours

    # given the neighbours make a prediction
    # the boolean parameter when set to False will use unweighted majority voting; otherwise weighted majority voting
    # weighting can be helpful to break any ties in voting
    def predict(self, neighbours):
        '''
        Summarise a prediction based upon weighted neighbours calculation
        '''
        class_votes = {}
        for x in range(len(neighbours)):
            response = neighbours[x][0]
            if response in class_votes:
                class_votes[response] += (1-self.weighted) + (self.weighted * neighbours[x][1]) #if not weighted simply add 1
                #class_votes[response] += [1, neighbours[x][1]][weighted == True] 
              
            else:
                class_votes[response] = (1-self.weighted) + (self.weighted * neighbours[x][1])
                #class_votes[response] = [1, neighbours[x][1]][weighted == True] 
                
        #print(class_votes)
        sorted_votes = sorted(class_votes, key = lambda k: (class_votes[k], k), reverse = True)
        #print(sorted_votes)
        return sorted_votes[0]
    
    #iterate through all the test data to calculate accuracy
    def test(self, X_test, Y_test):
        self.results = [] # store the predictions returned by kNN

        for test_instance, target_label in zip(X_test, Y_test):
            neighbours = self.get_neighbours(test_instance)
            predict_label = self.predict(neighbours)
            self.results.append([predict_label, target_label])
            #print('> predicted = ', result,', actual = ', test_label)
    

In [24]:
class Hybrid():
    def __init__(self, ouput_layer, hidden_layer, batch_size = 10):
        self.output = ouput_layer
        self.layer1 = hidden_layer
        self.batch_size = batch_size
        self.features = []
        self.class_label = []

    def batch_input(self, x, y):
        for i in range(0, len(x), self.batch_size):
            yield (x[i:i + self.batch_size], y[i:i + self.batch_size])

    def train(self, x, y, epochs, lr):
        self.layer1.lr = lr
        self.output.lr = lr

        monitoring = {}
        monitoring['mean_squared_error'] = []
        monitoring['accuracy'] = []

        for epoch in range(epochs):

            for (batch_x, batch_y) in self.batch_input(x, y):
                self.layer1.forward(batch_x)
                # print('layer1 output \n' ,layer1.layer_output.shape)
                self.output.forward(self.layer1.layer_output)
                # print('layer output  \n', output.layer_output.shape)

                # backprop through the layers 
                self.output.backward(None, None, batch_y, True)
                # print('layer out delta  \n', output.layer_delta.shape)
                self.layer1.backward(self.output.layer_delta, self.output.weights)
                # print('layer1 delta  \n', layer1.layer_delta.shape)

                # update all the layer weights
                self.output.weight_update(self.layer1.layer_output)
                # print('layer weights  \n', output.weights.shape)
                self.layer1.weight_update(batch_x)
                # print('layer weights  \n', layer1.weights.shape)
            
            pred = self.predict(x)
            mse, acc = self.evaluate(pred, y)
            monitoring['mean_squared_error'].append(mse)
            monitoring['accuracy'].append(acc)
        
        monitoring_df = pd.DataFrame(monitoring)   
        return monitoring_df

    def predict(self, x):
        self.layer1.forward(x)
        self.output.forward(self.layer1.layer_output)
        return self.output.layer_output

    def evaluate(self, predicts, y):
        mse = mean_squared_error(predicts, y)
        acc = accuracy(predicts, y)
        return mse, acc

    def test(self, x, y):
        self.knn.test(x, y)

In [25]:
np.random.seed(42)

# set other hyperparameters
batch_size = 10
epochs = 200
lr = 1.0

# configure the layers
hidden = Layer_Dense(784,200)
output = Layer_Dense(200,10)

# create an ANN model
hybrid = Hybrid(output, hidden, batch_size)

# train the ANN model with training data
hybrid.train(X_train, y_train, epochs, lr)

(784, 200)
(200, 10)
Pred:  [[0.06226847 0.03628825 0.04210856 ... 0.35427848 0.03645686 0.17584971]
 [0.10551803 0.05812782 0.1425378  ... 0.04721494 0.08193733 0.05621014]
 [0.08140993 0.06007074 0.09973767 ... 0.10393678 0.13783874 0.13151303]
 ...
 [0.08781152 0.06726742 0.20756234 ... 0.033799   0.06155507 0.09399921]
 [0.08404087 0.05768056 0.05993967 ... 0.12033355 0.11300735 0.12330531]
 [0.05872167 0.04367029 0.11248258 ... 0.12084682 0.05331243 0.11324302]]
Pred:  [[0.03506434 0.03559416 0.00757645 ... 0.63247896 0.00663559 0.10872184]
 [0.07561803 0.0322211  0.05361606 ... 0.00716107 0.04795054 0.03015437]
 [0.03195713 0.06827786 0.0458393  ... 0.03054439 0.44000265 0.14430015]
 ...
 [0.09319474 0.00695234 0.05451601 ... 0.00120314 0.03660121 0.07564133]
 [0.08610593 0.01439844 0.01060102 ... 0.03411033 0.17679303 0.11533628]
 [0.00571379 0.24038793 0.08567982 ... 0.06673352 0.06079918 0.08848752]]
Pred:  [[1.15876166e-02 1.15609801e-02 4.21821479e-03 ... 8.24579577e-01
  2.

Pred:  [[4.35640352e-04 1.24642190e-04 4.81329187e-05 ... 9.91601411e-01
  5.85797013e-04 1.59257548e-03]
 [1.52768518e-03 9.67441739e-05 6.35215910e-04 ... 2.21817015e-05
  1.79209233e-03 2.13499504e-03]
 [8.23775222e-04 1.09993580e-03 6.31769076e-03 ... 2.57543460e-04
  9.59429953e-01 3.20137865e-03]
 ...
 [9.01528084e-03 1.41909618e-05 4.53178489e-03 ... 1.97139429e-07
  2.82878610e-02 2.59868659e-04]
 [5.14061329e-04 3.46113265e-06 3.84857906e-04 ... 8.46929854e-04
  1.76328274e-01 1.40633360e-03]
 [3.48893377e-06 1.87684254e-01 5.64693644e-03 ... 4.45314030e-03
  4.86004958e-01 1.00889975e-03]]
Pred:  [[3.84166365e-04 1.02735472e-04 3.87885102e-05 ... 9.92773205e-01
  5.96132754e-04 1.45468938e-03]
 [1.26533513e-03 8.54743954e-05 5.38486757e-04 ... 1.74864514e-05
  1.72413120e-03 1.84627544e-03]
 [6.94131204e-04 9.98126192e-04 5.88527139e-03 ... 2.18160989e-04
  9.69805010e-01 2.51145583e-03]
 ...
 [7.50388509e-03 1.22295817e-05 4.07394198e-03 ... 1.42902053e-07
  3.25158516e-02 1

Pred:  [[2.16212886e-04 1.52383391e-05 3.51768826e-06 ... 9.97842110e-01
  2.96365996e-04 3.57358881e-04]
 [3.65812967e-04 2.31744364e-05 1.16530514e-04 ... 2.27839428e-06
  3.34351185e-04 5.62210760e-04]
 [1.43662273e-04 3.11180902e-04 2.26341471e-03 ... 2.67804561e-05
  9.92112091e-01 2.70769281e-04]
 ...
 [1.25925326e-03 2.04040503e-06 1.83888808e-03 ... 7.68913137e-09
  2.71326606e-02 7.65811923e-06]
 [1.85969074e-04 1.06372088e-07 3.07242792e-04 ... 1.24976454e-04
  8.66714703e-02 1.12956128e-04]
 [3.93733383e-07 1.35943525e-01 4.74536830e-03 ... 4.26198064e-04
  8.36613924e-01 4.21808799e-05]]
Pred:  [[2.18875462e-04 1.32595495e-05 3.17089584e-06 ... 9.98012850e-01
  2.85759586e-04 3.08695370e-04]
 [3.48672654e-04 2.16229186e-05 1.12071032e-04 ... 2.03600108e-06
  3.21715334e-04 5.22363421e-04]
 [1.31725868e-04 2.83146732e-04 2.09351426e-03 ... 2.33591803e-05
  9.92556575e-01 2.40610117e-04]
 ...
 [1.15856783e-03 1.80038387e-06 1.67231408e-03 ... 6.50226291e-09
  2.66253882e-02 6

KeyboardInterrupt: 