In [None]:
import numpy as np
import pandas as pd
import itertools
import csv
import matplotlib.pyplot as plt


In [None]:
class NeuralNetwork:  
    #Global variables
    input_layer =[]
    hidden_layers = []
    output_layer = []
    output_probs = []

    weights = []
    biases = []
    
    losses = []
    train_accuracies = []
    test_accuracies = []
    
    best_losses = []
    best_accuracies = []
    best_accuracy = 0
    best_combination = {}
    
    previousBackpropagatedErrors = []
    previousBiasUpdates = []

    best_combination_accuracies = []
    one_combination_accuracies = []
    
    batch_size = 200
    
    #extract data
    def loadData(self):
        imgs = np.load("processed_train_images.npy", encoding="bytes")
        imgs = imgs.reshape(10000, -1)
        labels = np.reshape(pd.read_csv("train_labels.csv").values[:, 1], (-1, 1))
        return imgs, labels
    
    #transform the target data into a 1 hot encoded array
    def oneHotEncoded(self, target_data):
        targets = set(target_data[:, 0])
        number_of_outputs = len(set(target_data[:, 0].tolist()))

        encoded_targets = np.eye(number_of_outputs)

        target_dict = {}
        for target, encoded in zip(targets, encoded_targets):
            target_dict[target] = encoded

        return number_of_outputs, target_dict
    
    def encodeTargets(self, dict, targets):
        targets = targets[:, 0]
        encoded_targets = []
        for target in targets:
            encoded_targets.append(dict.get(target))

        encoded_targets = np.array(encoded_targets)
        return encoded_targets
      
    #split the data
    def train_test_split(self, X, y, test_size):        
        merged = np.append(X, y, axis=1)
        #shuffle data
        np.random.shuffle(merged)

        nbOfRows = len(merged[:, 0])

        #split the data randomly
        train_data = merged[0: int(nbOfRows * (1 - test_size)), :]
        test_data = merged[-int(nbOfRows * test_size):, :]

        #split X and Y
        y_train = train_data[..., -1:]
        X_train = train_data[:, :-1]

        y_test = test_data[..., -1:]
        X_test = test_data[:, :-1]

        #get the target dictionary: convert each target string to a unique one hot encoded array of length 31
        number_of_outputs, target_dict = self.oneHotEncoded(y_train)

        #convert our targets of string to array of arrays of 31, encoding each string
        y_train = self.encodeTargets(target_dict, y_train)
        y_test = self.encodeTargets(target_dict, y_test)

        return X_train, X_test, y_train, y_test, number_of_outputs
        
    #method which does a feedforward on the trained model, and computes the model's accuracy
    def testNN(self, X_test, y_test, mode):
        first_hidden_layer = self.leaky_relu(np.dot(X_test, self.weights[0]) + self.biases[0])
        
        weight_nb = len(self.weights) - 1
        hidden_layer = first_hidden_layer
        
        for weight in range(1, weight_nb):
            hidden_layer = self.leaky_relu(np.dot(hidden_layer, self.weights[weight]) + self.biases[weight])
        
        output_layer = self.leaky_relu(np.dot(hidden_layer, self.weights[weight_nb]) + self.biases[weight_nb])

        #softmax activation function
        probs = self.softmax(output_layer)
        
        correct = 0
        not_correct = 0
        for prob, test in zip(probs.tolist(), y_test):
            predicted = prob.index(max(prob))
            actual = np.nonzero(test)[0][0]

            if(predicted ==  actual):
                correct += 1
            else:
                not_correct += 1
        
        this_accuracy = (correct / (correct + not_correct))
        if mode == 'test':
            self.test_accuracies.append(this_accuracy)
            self.one_combination_accuracies.append(this_accuracy)
            print('test accuracy: ', this_accuracy)
        else:
            self.train_accuracies.append(this_accuracy)
            print('train accuracy: ', this_accuracy)
        
        
        return probs
    
    def reportValues(self, epoch):
        print('best combination: ', self.best_combination)
        print('best accuracy: ', self.best_accuracy)
        print('epoch ', epoch)

        x_values = np.arange(0, epoch, 1)
        y_test_values = self.test_accuracies
        y_train_values = self.train_accuracies
        shown_x_values = np.arange(0, epoch, 5)

        plt.plot(x_values, y_test_values, color='b', label='test')
        plt.plot(x_values, y_train_values, color='r', label='train')
        plt.xticks(shown_x_values)
        plt.ylim(top=0.5)
        plt.xlabel("Epoch")
        plt.ylabel("Accuracy")
        plt.legend()
        plt.show()
    
    
    
    #this method initializes all the neural network variables.
    def initializeParams(self, n_inputs, nodes, m_outputs):   
        k = n_inputs
        self.biases = []
        self.hidden_layers = []
        self.weights = []
        self.previousBackpropagatedErrors = []
        self.previousBiasUpdates = []
        for node in nodes: 
            self.biases.append(np.zeros((1, node)))
            self.hidden_layers.append(np.zeros((1, node)))
            self.weights.append(np.random.normal(0, 1, [k, node]))
            self.previousBackpropagatedErrors.append(np.zeros((k, node)))
            self.previousBiasUpdates.append(np.zeros((1, node)))
            k = node

        self.biases.append(np.zeros((1, m_outputs)))
        self.previousBiasUpdates.append(np.zeros((1, m_outputs)))
        
        self.weights.append(np.random.normal(0, 1, [nodes[len(nodes) - 1], m_outputs]))
        self.previousBackpropagatedErrors.append(np.zeros((nodes[len(nodes) - 1], m_outputs)))
        
        self.losses = []
        self.accuracies = []
        
        return self.biases, self.weights
    
    
    def reluDerivative(self, x):
        alpha = 0.01
        for i in range(0, len(x)):
            for k in range(len(x[i])):
                if x[i][k] > 0:
                    x[i][k] = 1
                else:
                    x[i][k] = alpha
        return x


    def leaky_relu(self, x):
        alpha = 0.01
        for i in range(0, len(x)):
            for k in range(0, len(x[i])):
                if x[i][k] > 0:
                    pass  
                else:
                    x[i][k] *= alpha
        return x
    
    
    def tanhActivation(self, data):
        return np.tanh(data)
    
    
    def tanhDeriv(self, data):
        return 1 - np.tanh(data) ** 2
    

    def softmax(self, x):
        softmax_matrix = np.zeros([x.shape[0], x.shape[1]]).astype(np.float64)
        for i, row in enumerate(x):
            e_x = np.exp(row - np.max(row))
            softmax_matrix [i] = e_x / e_x.sum()
        return softmax_matrix
    
    def lossFunction(self, y_train):
        indices = np.argmax(y_train, axis = 1).astype(int)
        predicted_probability = self.output_probs[np.arange(len(self.output_probs)), indices]
        log_preds = np.log(predicted_probability)
        loss = -1.0 * np.sum(log_preds) / len(log_preds)
        return loss

    #this method computes the average accuracies of each run of the cross validation, and stores it.
    def storeAverageAccuracies(self):
        nb_accuracies = len(self.one_combination_accuracies)
        sum = 0
        for accuracy in self.one_combination_accuracies:
            sum += accuracy
        
        avg = sum / nb_accuracies
        self.best_combination_accuracies.append(avg)
        self.one_combination_accuracies = []
                
    #this method gets the index of the best combination of the cross validaton. 
    def storeBestCombination(self, combinations):
        best_acc = max(self.best_combination_accuracies)
        print('bestacc ', best_acc)
        best_index = self.best_combination_accuracies.index(best_acc)
        print('best index ', best_index)
        self.best_combination = combinations[best_index]
        print('best combiantion: ', self.best_combination)
        print('=========================================')

        print('best avg accuracies: ')
        print(self.best_combination_accuracies)

        print('combinations: ')
        print(combinations)


    #check if the input layer is the right one!!!!!
    def feedForward(self, X_train):
        #compute the input, hidden and output layers  
        self.input_layer = X_train
        
        #feedforward on the first hidden layer, we always save the input and output of each hidden layer
        first_hidden_layer = []
        first_hidden_layer.append(np.dot(X_train, self.weights[0]) + self.biases[0])
        first_hidden_layer.append(self.leaky_relu(first_hidden_layer[0]))
        
        nb_hidden_layers = len(self.weights) - 1
        hidden_layer = first_hidden_layer
        
        #update the hidden layers array.
        self.hidden_layers[0] = hidden_layer

        #for each hidden layer in the network
        for layer_nb in range(1, nb_hidden_layers):
            #compute both the input and output of each hidden layer, and then store it
            temp_hidden_layer = []
            temp_hidden_layer.append(np.dot(hidden_layer[1], self.weights[layer_nb]) + self.biases[layer_nb])
            temp_hidden_layer.append(self.leaky_relu(temp_hidden_layer[0]))
            hidden_layer = temp_hidden_layer
            self.hidden_layers[layer_nb] = hidden_layer
        
        #compute the input and output of the output layer
        output = []
        output.append(np.dot(hidden_layer[1], self.weights[nb_hidden_layers]) + self.biases[nb_hidden_layers])
        output.append(self.leaky_relu(output[0]))
        self.output_layer = output
        
        #softmax activation function to get the probabilities
        self.output_probs = self.softmax(self.output_layer[1])
           
    
    def backProp(self, alpha, momentum, y_train):
        #compute the error rate    
        backPropagatedErrors = []
        output_error = (self.output_probs - y_train)
        error = np.sum(output_error**2)
        
        #compute the derivative of the output function, and then the first backpropagated error, and store it
        output_derivatives = self.reluDerivative(self.output_layer[1])
        backPropagatedErrors.append(np.multiply(output_error, output_derivatives))
        
        number_hidden = len(self.hidden_layers)
        number_weights = len(self.weights) - 1

        #for each hidden layer, compute the backpropagated error
        for hidden_nb, propagatedError in zip(range(number_hidden - 1, -1, -1), range(0, number_hidden)):
            hidden_layer = self.hidden_layers[hidden_nb]
            weight = self.weights[number_weights]
            last_error = backPropagatedErrors[propagatedError]
            next_error = np.multiply(np.dot(last_error, weight.T), self.reluDerivative(hidden_layer[1]))
            backPropagatedErrors.append(next_error)
            number_weights -= 1

        
        #compute the gradient weight updates, with momentum. the previously backpropagated errors refer too the backpropagatederror of the last batch
        dW0 = np.dot(self.input_layer.T, backPropagatedErrors[len(backPropagatedErrors) - 1]) + momentum * self.previousBackpropagatedErrors[0]
        #update the first weight
        self.weights[0] = self.weights[0] - alpha * dW0
        #save the update, so we use it in the next backpropagation as momentum
        self.previousBackpropagatedErrors[0] = dW0
        
        #compute the first gradient bias update, with momentum
        db0 = np.sum(backPropagatedErrors[len(backPropagatedErrors) - 1], axis = 0, keepdims = True) + momentum * self.previousBiasUpdates[0]
        #update the bias
        self.biases[0] = self.biases[0] - alpha * db0
        #store it so we can use it in the next backpropagation as momentum
        self.previousBiasUpdates[0] = db0
        
        #update each hidden layer weight and bias, with momentum as well. 
        layer_nb = 0
        for index, error in zip(range(1, len(self.weights)), range(len(backPropagatedErrors) - 2, -1, -1)):
            dW = np.dot(self.hidden_layers[layer_nb][1].T, backPropagatedErrors[error]) + momentum * self.previousBackpropagatedErrors[index]
            db = np.sum(backPropagatedErrors[error], axis = 0, keepdims = True) + momentum * self.previousBiasUpdates[index]
            
            self.weights[index] = self.weights[index] - alpha * dW
            self.biases[index] = self.biases[index] - alpha * db
            self.previousBackpropagatedErrors[index] = dW
            self.previousBiasUpdates[index] = db
            layer_nb += 1
        
        

    #backpropagation algorithm
    def backPropagation(self, X_train, X_test, y_train, y_test, number_of_outputs, biases, weights, combination, epoch, nodes, alpha, momentum):    
        #for each epoch, and for each mini-batch, call do a feedforward followed by a backprop
        for times in range(0, epoch):

            for batch in range(0, int(X_train.shape[0] / self.batch_size)):            
                start = batch * self.batch_size
                end = (batch + 1) * self.batch_size
                
                train_batch = X_train[start : end]
                target_batch = y_train[start: end]
                
                self.feedForward(train_batch)
                self.backProp(alpha, momentum, target_batch)

            self.testNN(X_test, y_test, 'test')
            self.testNN(X_train, y_train, 'train') 
    
    def neuralNetwork(self, imgs, labels):
        #TODO change them before submitting hyperparameters
        epoch = [50]
        alpha = [0.00001, 0.0001, 0.001]
        nodes = [[2048], [2048, 1024], [2048, 1024, 512], [2048, 1024, 512, 512]]
        momentum = [0.00001, 0.0005, 0.0001, 0.001]

        cross_validation = 5
        hyperparameters_list = ['epoch', 'nodes', 'alpha', 'momentum']
        hyperparameters = [epoch, nodes, alpha, momentum]   

        #create the combinations of hyperparameters
        combinations = list(itertools.product(*hyperparameters))

        #for each combination, do a cross validation 
        for combination in combinations:
            print('combination: ', combination)
            args = {}
            for hyperparam, param_val in zip(hyperparameters_list, combination):
                args.update({hyperparam:param_val})
            #cross validation: split the data 80 20, initialize the neural netowrk parameters, run the backpropagation algorithm, test each run, and then take the avg of the 5 runs
            for index in range(0, cross_validation):
                #train-test split the data
                X_train, X_test, y_train, y_test, number_of_outputs = NN.train_test_split(imgs, labels, 0.2)
                X_train = np.array(X_train, dtype=np.float64)
                X_test = np.array(X_test, dtype=np.float64)

                #initialize the Neural Network's parameters
                biases, weights = self.initializeParams(len(X_train[0, :]), combination[1], number_of_outputs)
                #train the neural network
                self.backPropagation(X_train, X_test, y_train, y_test, number_of_outputs, biases, weights, combination, **args)
                #test the neural network
                
            #compute the avg of the 5 runs, and store it
            self.storeAverageAccuracies()
        
        #get the best combination of parameters
        self.storeBestCombination(combinations)
        #visualize the best combination
        self.reportValues(combination[0])

NN = NeuralNetwork()
imgs, labels = NN.loadData()
NN.neuralNetwork(imgs, labels)