# Final Project
## ITU Machine Learning Fall 2021
### FF Neural Network From Scratch

#### This notebook contains the code for the Neural Network part of our final project.

#### Group AC
Chrisanna Cornish <ccor@itu.dk> <br>
Carl August Wismer <cwis@itu.dk><br>
Danielle Marie Dequin <ddeq@itu.dk>

Last Edited: 01/01/2021

## Imports

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import preprocessing
from sklearn.metrics import *

from sklearn.decomposition import PCA

import warnings
#warnings.filterwarnings("ignore")

## Paths

In [None]:
TRAIN = '../Data/df_train.csv'
TEST = '../Data/df_test.csv'

## Functions

In [None]:
# code from exercise 5 to calculate the z-score
z_score = lambda x : (x - np.mean(x, axis=0)) / np.std(x, axis=0)

def conf_mat(y_hat, y_true):
    '''Returns a confusion matrix'''
    n = max(y_hat)+1
    bingo = np.zeros([n,n])
    for i in range(len(y_hat)):
        bingo[y_true[i]][y_hat[i]] +=1
    
    return(bingo)

def scores(y_hat, y_true, average = True):
    '''For each class, returns recall, precision and f1'''
    classes = list(np.unique(y_true))
    conf = conf_mat(y_hat, y_true)
    r = []
    p = []
    f = []
    for c in classes:
        recall = conf[c][c] / sum(conf[c])
        precision = conf[c][c] / sum(conf[:, c])
        f1 = 2*(precision*recall)/(precision + recall)
        r.append(recall)
        p.append(precision)
        f.append(f1)
    if average:
        return sum(r)/len(r), sum(p)/len(p), sum(f)/len(f)
    else:
        return (r, p, f)
    
def plot_confusion_matrix(cm, target_names, title='Confusion matrix', cmap=None, normalize=True):
    """
    Function copied from exercise 7.
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """
    import itertools

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    plt.ylim([-0.5, cm.shape[0]-0.5])

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()

## Variables and df loading

In [None]:
col = 'rainbow' # Colour theme

df = pd.read_csv(TRAIN) # Training dataframe
a = len(df)

# ensures data is without order, random state fixed for reproducability, frac=1 gives the whole df back but shuffled
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

if a != len(df):
    print('WARNING, DATA IS BEING LOST') # confirm still have the whole df

attributes = list(df.columns)[:-1] # Creates list of column names for the dataframe without the class

df[attributes] = z_score(df[attributes])

X = df[attributes].copy() # Attributes
y = df['type'].copy() # True values

for i in range(len(y)):
    if y[i] > 4:
        y[i] = y[i] - 2
    else:
        y[i] = y[i] - 1
        
lb = preprocessing.LabelBinarizer()
new_y = pd.DataFrame(lb.fit_transform(y))

y_list = y.unique() # 'y' values


df['type'] = y
df[[0,1,2,3,4,5]] = new_y

df.head(10)
round(df[attributes].describe(),2)

Using ideas from:

https://towardsdatascience.com/how-to-build-your-own-neural-network-from-scratch-in-python-68998a08e4f6

https://hackernoon.com/building-a-feedforward-neural-network-from-scratch-in-python-d3526457156b

https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/

In [None]:
class NeuralNetwork:
    
    def __init__(self, x, y, neurons=10, lr=0.01):
        '''
        neurons --> neurons per hidden layer
        activation --> choose activation function
        '''
        
        self.input = x # All rows with the attributes (all X's)
        #self.y = np.array(y) # True values
        self.labels = y # true labels
        self.rows = x.shape[0] # number of rows
        self.class_count = len(y.unique()) # number of classes
        self.y = np.zeros((self.rows, self.class_count)) # one hot labels
        
        for i in range(self.rows):
            self.y[i, self.labels[i]] = 1
        
        self.output = np.zeros(self.y.shape) #
        
        self.neurons = neurons # number of neurons per label
        np.random.seed(23)
        
        self.weight1 = np.random.rand(x.shape[1], neurons) # (attributes in X, number of neurons)
        #self.weight2 = np.random.rand(neurons, neurons)
        self.weight_final = np.random.rand(neurons, self.y.shape[1])

        self.bias1 = np.random.randn(neurons)
        #self.bias2 = np.random.randn(neurons)
        self.bias_final = np.random.randn(self.class_count)
        
        self.lr = lr
        self.error_cost = []
        self.accuracy = []
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def tanh(self, x):
        return np.tanh(x)

    def relu(self): # Rectified Linear Unit
        return np.maximum(0, self.x)
    
    def softmax(self, x):
        if isinstance(x, pd.DataFrame):
            exp = np.exp(x).to_numpy()
        else: exp = np.exp(x)
        probabilities = exp / np.sum(exp,axis=1, keepdims=True)
        return probabilities 
    
    def sigmoid_der(self, x):
        """Code borrowed from:
        https://towardsdatascience.com/how-to-build-your-own-neural-network-from-scratch-in-python-68998a08e4f6
        """
        return self.sigmoid(x) * (1 - self.sigmoid(x))
   
    def tanh_der(self, x):
        return 1 - (self.tanh(x)**2)

    def forwardpass(self, x):
    
        # 1st layer with Activation Function
        #self.layer1 = self.sigmoid((x.dot(self.weight1) + self.bias1))
        self.layer1 = self.tanh((x.dot(self.weight1) + self.bias1))
        
        #self.layer2 = self.sigmoid(self.layer1.dot(self.weight2) + self.bias2)
        #self.layer2 = self.tanh((self.layer1.dot(self.weight2) + self.bias2)
        #print(self.layer2.shape)
        
        # Final Output layer with softmax
        self.output = self.softmax(self.layer1.dot(self.weight_final) + self.bias_final)
        
        return self.output
   
    def backprop(self, x, y):
        """Back Propogation!!!!
        """
        #print(self.output.shape, y.shape)
        difference = self.output - y # Difference between forward output and true labels
        weight_cost = np.dot(self.layer1.T, difference)
        bias_cost = difference
        
        
        #backwards_sig = self.sigmoid_der((np.dot(self.input, self.weight1) + self.bias1))
        backwards_tanh = self.tanh_der((np.dot(x, self.weight1) + self.bias1))
        
        back_cost_w = np.dot(x.T, backwards_tanh * np.dot(difference, weight_cost.T))
        back_cost_b = np.dot(difference, weight_cost.T) * backwards_tanh
        
        # Update weights and biases using learning rate
        self.weight1 -= self.lr * back_cost_w
        self.bias1 -= self.lr * back_cost_b.sum(axis=0)
        
        self.weight_final -= self.lr * weight_cost
        self.bias_final -= self.lr * bias_cost.sum(axis=0)
        
        loss = -(1.0 / len(self.y) * np.sum(self.y * np.log(1e-15 + self.output))) #we could remove the tiny addition here, but with so little data, it makes little difference    
        
        y_pred = self.output.argmax(axis=1)
        y_true = np.array(y).argmax(axis=1)
        #print('pred: ', y_pred.shape, 'true: ', y_true.shape)
        accuracy = (y_pred==y_true).mean()
        
        return loss, accuracy
        
    def train(self, X, y):
        """Function that updates the output to the result of the forward pass, and 
        appends the error cost after backpropogation.
        """
        self.output = self.forwardpass(X)
        #print(self.output.shape, y.shape)
        (cost, accuracy) = self.backprop(X, y)
        self.error_cost.append(cost)
        self.accuracy.append(accuracy)
        
    def predict(self, X, proba=False):
        if proba:
            return self.forwardpass(X)
        else:
            return self.forwardpass(X).argmax(axis=1)
        

In [None]:
# Build the Neural Network
kitty = NeuralNetwork(X, y, neurons = 100)

# Train for 1000 epochs
for i in range(1500):
    kitty.train(X, new_y)

y_pred = kitty.predict(X, proba=True)
print(f'Sum of all values for first record: {sum(y_pred[0])} \
    \nFirst row label guesses: {y_pred[0]} \
    \nPredicted Class: {np.argmax(y_pred[0])} \
    \nTrue Class: {y[0]}')

In [None]:
plt.plot(kitty.error_cost)
plt.plot(kitty.accuracy)
plt.legend(['cost', 'accuracy'])
plt.ylim([0,1.1])
plt.show() 
#This isn't pretty

In [None]:
print('\n','#'*10,'Result for {} Data'.format('Test'), '#'*10, '\n')

y_pred = kitty.predict(X, proba= True)
print('log_loss:   ', log_loss(y, y_pred, eps=1e-15))

#y_true = one_hot_y.argmax(axis=1)
y_pred = y_pred.argmax(axis=1)
print('accuracy:   ',(y_pred==y).mean(), '\n')

target_names = ['class {}'.format(i+1) for i in range(6)]
print(classification_report(y, y_pred, target_names=target_names,zero_division=0)) # Set 0 division to 0 as default

In [None]:
# Print Confusion matrix
confu = confusion_matrix(y, y_pred)

plot_confusion_matrix(cm           = confu, 
                      normalize    = False,
                      target_names = ['1', '2', '3', '4', '5', '6'],
                      title        = "Confusion Matrix: Test data")

## K-Fold Cross Validation

In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

#hyperparameters for tuning... this would probably be better automated somehow.
neu = 100
lr = 0.01
epochs = 1500

num_folds = 5
# Define the K-fold Cross Validator
kfold = StratifiedKFold(n_splits=num_folds, shuffle=False)

# K-fold Cross Validation model evaluation
fold_no = 1
acc_per_fold = []
loss_per_fold = []
for train, test in kfold.split(X, y):
    fold_x = X.iloc[train].copy().reset_index(drop=True)
    fold_y = y.iloc[train].copy().reset_index(drop=True)
    
    luci = NeuralNetwork(fold_x, fold_y, neurons=neu, lr=lr)
    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')
    new_f_y = pd.DataFrame(lb.fit_transform(fold_y))
    # Fit data to model
    for i in range(epochs): # number of epochs
        luci.train(fold_x, new_f_y)
    
    y_pred = luci.predict(X.iloc[test])
    print(sorted(pd.Series(fold_y).unique()), sorted(pd.Series(y_pred).unique()))
    loss = log_loss(y.iloc[test], luci.predict(X.iloc[test], proba=True), eps=1e-15, labels=[0,1,2,3,4,5])

    #y_true = new_y.argmax(axis=1)
    accuracy = (y_pred==y.iloc[test]).mean()
    
    print(f'Score for fold {fold_no}: Loss of {loss}; Accuracy of {accuracy*100}%')
    acc_per_fold.append(accuracy * 100)
    loss_per_fold.append(loss)
    
    target_names = ['class {}'.format(i+1) for i in range(6)]
    print(classification_report(y.iloc[test], y_pred, target_names=target_names,zero_division=0, labels=[0,1,2,3,4,5]))

    # Increase fold number
    fold_no = fold_no + 1

print(f'\n\nAv_Accuracy: {round(np.array(acc_per_fold).mean(), 2)}%, Av_log_loss: {round(np.array(loss_per_fold).mean(), 4)}')
print(f'Hyperparameters: Neurons: {neu}, learning rate: {lr}, Epochs: {epochs}')

In [None]:
pca = PCA(n_components=9)
pca.fit(X)

X_pca = pd.DataFrame(pca.transform(X))
X_pca = z_score(X_pca)

print(pca.explained_variance_ratio_, '\n')
pca_exp = pca.explained_variance_ratio_
s = 0
c = 1
for i in pca_exp:
    s += i
    print(f'{c} components explain {round(100*s,2)}% of the data')
    c+=1

In [None]:
pca = PCA(n_components=7) #99% of the data explained.
pca.fit(X)

X_pca = pd.DataFrame(pca.transform(X))
X_pca = z_score(X_pca)

In [None]:
# Build the Neural Network
terri = NeuralNetwork(X_pca, y, neurons = 100, lr=0.005)

# Train for 1000 epochs
for i in range(1000):
    terri.train(X_pca, new_y)

y_pred = terri.predict(X_pca, proba = True)
    
print(f'Sum of all values for first record: {sum(y_pred[0])} \
    \nFirst row label guesses: {y_pred[0]} \
    \nPredicted Class: {np.argmax(y_pred[0])} \
    \nTrue Class: {y[0]}')

In [None]:
plt.plot(terri.error_cost)
plt.plot(terri.accuracy)
plt.legend(['cost', 'accuracy'])
plt.ylim([0,1.1])
plt.show()

In [None]:
print('\n','#'*10,'Result for {} Data'.format('Test'), '#'*10, '\n')

y_pred = terri.predict(X_pca)
print('log_loss:   ', log_loss(y, terri.predict(X_pca, proba=True), eps=1e-15))
print('accuracy:   ',(y_pred==y).mean(), '\n')

target_names = ['class {}'.format(i+1) for i in range(6)]
print(classification_report(y, y_pred, target_names=target_names,zero_division=0)) # Set 0 division to 0 as default

In [None]:
# Print Confusion matrix
confu = confusion_matrix(y, y_pred)

plot_confusion_matrix(cm           = confu, 
                      normalize    = False,
                      target_names = ['1', '2', '3', '4', '5', '6'],
                      title        = "Confusion Matrix: Test data")

In [None]:
#hyperparameters for tuning... this would probably be better automated somehow.
neu = 100
lr = 0.01
epochs = 1000

num_folds = 5
# Define the K-fold Cross Validator
kfold = StratifiedKFold(n_splits=num_folds, shuffle=False)

# K-fold Cross Validation model evaluation
fold_no = 1
acc_per_fold = []
loss_per_fold = []
for train, test in kfold.split(X_pca, y):
    fold_x = X_pca.iloc[train].copy().reset_index(drop=True)
    fold_y = y.iloc[train].copy().reset_index(drop=True)
    pia = NeuralNetwork(fold_x, fold_y, neurons=neu, lr=lr)
    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')
    new_f_y = pd.DataFrame(lb.fit_transform(fold_y))
    # Fit data to model
    for i in range(epochs): # number of epochs
        pia.train(fold_x, new_f_y)
    
    y_pred = pia.predict(X_pca.iloc[test])
    print(sorted(pd.Series(fold_y).unique()), sorted(pd.Series(y_pred).unique()))
    loss = log_loss(y.iloc[test], pia.predict(X_pca.iloc[test], proba=True), eps=1e-15, labels=[0,1,2,3,4,5])

    #y_true = new_y.argmax(axis=1)
    accuracy = (y_pred==y.iloc[test]).mean()
    
    print(f'Score for fold {fold_no}: Loss of {loss}; Accuracy of {accuracy*100}%')
    acc_per_fold.append(accuracy * 100)
    loss_per_fold.append(loss)
    
    target_names = ['class {}'.format(i+1) for i in range(6)]
    print(classification_report(y.iloc[test], y_pred, target_names=target_names,zero_division=0, labels=[0,1,2,3,4,5]))

    # Increase fold number
    fold_no = fold_no + 1

print(f'\n\nAv_Accuracy: {round(np.array(acc_per_fold).mean(), 2)}%, Av_log_loss: {round(np.array(loss_per_fold).mean(), 4)}')
print(f'Hyperparameters: Neurons: {neu}, learning rate: {lr}, Epochs: {epochs}')

In [None]:
#Set parameters for final test data here based on previous stuff.
neu = 100
lr = 0.01
epochs = 1000
use_PCA = False

In [None]:
'''loads and predicts on the TEST data, 
uncomment this **last** once everything else is in place then don't change anything!'''

'''df_test = pd.read_csv(TEST) #test dataframe

attributes = list(df_test.columns)[:-1]

X_test = df_test[attributes].copy() #attributes
y_test = df_test['type'].copy() #true values

for i in range(len(y_test)):
    if y_test[i] > 4:
        y_test[i] = y_test[i] - 2
    else:
        y_test[i] = y_test[i] - 1


if use_PCA:

    pca = PCA(n_components=7) #99% of the data explained.
    pca.fit(X)

    X = pd.DataFrame(pca.transform(X))
    X = z_score(X)

    pca = PCA(n_components=7)
    pca.fit(X_test)

    X_test = pd.DataFrame(pca.transform(X_test))
    X_test = z_score(X_test)



#Train the final model with the above parameters
final = NeuralNetwork(X, y, neurons=neu, lr=lr) #set hyperparameters here
for i in range(epochs): # number of epochs
    final.train(X, new_y)

#Predicts on the test data
test_pred = final.predict(X_test)

print('log_loss:   ', log_loss(y_test, final.predict(X_test, proba=True), eps=1e-15))
print('accuracy:   ',(test_pred==y_test).mean(), '\n')

target_names = ['class {}'.format(i+1) for i in range(6)]
print(classification_report(y_test, test_pred, target_names=target_names,zero_division=0))

# Print Confusion matrix
confu = confusion_matrix(y_test, test_pred)

plot_confusion_matrix(cm           = confu, 
                      normalize    = False,
                      target_names = ['1', '2', '3', '4', '5', '6'],
                      title        = "Confusion Matrix: Test data")''';