## Imports

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import preprocessing

import warnings
#warnings.filterwarnings("ignore")

## Paths

In [None]:
TRAIN = '../Data/df_train.csv'
TEST = '../Data/df_test.csv'

## Functions

In [None]:
# code from exercise 5 to calculate the z-score
z_score = lambda x : (x - np.mean(x, axis=0)) / np.std(x, axis=0)

def conf_mat(y_hat, y_true):
    '''Returns a confusion matrix'''
    n = max(y_hat)+1
    bingo = np.zeros([n,n])
    for i in range(len(y_hat)):
        bingo[y_true[i]][y_hat[i]] +=1
    
    return(bingo)

def scores(y_hat, y_true, average = True):
    '''For each class, returns recall, precision and f1'''
    classes = list(np.unique(y_true))
    conf = conf_mat(y_hat, y_true)
    r = []
    p = []
    f = []
    for c in classes:
        recall = conf[c][c] / sum(conf[c])
        precision = conf[c][c] / sum(conf[:, c])
        f1 = 2*(precision*recall)/(precision + recall)
        r.append(recall)
        p.append(precision)
        f.append(f1)
    if average:
        return sum(r)/len(r), sum(p)/len(p), sum(f)/len(f)
    else:
        return (r, p, f)

## Variables and df loading

In [None]:
col = 'rainbow' # Colour theme

df = pd.read_csv(TRAIN) # Training dataframe
a = len(df)

#ensures data is without order, random state fixed for reproducability, frac=1 gives the whole df back but shuffled
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

if a != len(df):
    print('WARNING, DATA IS BEING LOST')#confirm still have the whole df

attributes = list(df.columns)[:-1] # Creates list of column names for the dataframe without the class

df[attributes] = z_score(df[attributes])

X = df[attributes].copy() # Attributes
y = df['type'].copy() # True values

for i in range(len(y)):
    if y[i] > 4:
        y[i] = y[i] - 2
    else:
        y[i] = y[i] - 1
        
lb = preprocessing.LabelBinarizer()
new_y = pd.DataFrame(lb.fit_transform(y))

y_list = y.unique() # 'y' values

#round(df.describe(),2)
df['type'] = y
df[[0,1,2,3,4,5]] = new_y

df.head(10)
round(df[attributes].describe(),2)

Using ideas from:

https://towardsdatascience.com/how-to-build-your-own-neural-network-from-scratch-in-python-68998a08e4f6

https://hackernoon.com/building-a-feedforward-neural-network-from-scratch-in-python-d3526457156b

https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/

In [None]:
class NeuralNetwork:
    
    def __init__(self, x, y, bias=None, neurons=12):
        '''
        neurons --> neurons per hidden layer
        activation --> choose activation function
        '''
        
        self.input = x # All rows with the attributes (all X's)
        #self.y = np.array(y) # True values
        self.labels = y
        self.rows = x.shape[0]
        self.class_count = len(y.unique())
        self.y = np.zeros((self.rows, self.class_count))
        for i in range(self.rows):
            self.y[i, self.labels[i]] = 1
        
        self.output = np.zeros(self.y.shape) #
        
        self.neurons = neurons
        np.random.seed(23)
        self.weight1 = np.random.rand(x.shape[1], neurons) # (attributes in X, number of neurons)
        #self.weight2 = np.random.rand(neurons, neurons)
        self.weight_final = np.random.rand(neurons, self.y.shape[1])

        self.bias1 = bias[0]
        #self.bias2 = bias[1]
        self.bias_final = bias[1]
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def tanh(self):
        return np.tanh(self.x)

    def relu(self): #Rectified Linear Unit
        return np.maximum(0, self.x)
    
    def softmax(self, vec):
        #print(f'type vec: {type(vec)}')
        exp = np.exp(vec.to_numpy())
        probabilities = exp / np.sum(exp,axis=1, keepdims=True)
        return probabilities 
    
    # https://towardsdatascience.com/how-to-build-your-own-neural-network-from-scratch-in-python-68998a08e4f6
    # code borrowed from here
    def sigmoid_derivative(self, p):
        return p * (1 - p)
       
    def forwardpass(self, x):
        # activation_function(x * w_0 + bias_0) etc
        
        # Going through the 1st layer
        self.layer1 = self.sigmoid((x.dot(self.weight1) + self.bias1))
        
        #self.layer2 = self.sigmoid(self.layer1.dot(self.weight2) + self.bias2)
        #print(self.layer2.shape)
        
        # Final Output layer with softmax
        self.output = self.softmax(self.layer1.dot(self.weight_final) + self.bias_final)
        
        return self.output

    def categorical_cross_entropy(self, X):
        '''Function to calculate categorical cross entropy
        '''
        p = self.output # predictions
        sum_score = 0.0
        for i in range(len(self.y)):
            for j in range(len(self.y[i])):
                sum_score += self.y[i][j] * np.log(1e-15 + p[i][j])
        mean_sum_score = 1.0 / len(self.y) * sum_score
        return -mean_sum_score
    
    def backprop(self):
        # Update weights of last layer 
        d_weights2 = np.dot(self.layer1.T, (2*(self.output - self.y) * self.categorical_cross_entropy(self.output)))
        
        # Update weights of 1st layer
        d_weights1 = np.dot(self.input.T, np.dot(2*(self.y - self.output) * self.sigmoid_derivative(self.output), self.weight_final.T)*self.sigmoid_derivative(self.layer1))
        
        #print(self.input.shape, self.weight1.shape, self.y.shape, self.output.shape, self.weight_final.shape, self.layer1.shape)

        # Update weights
        self.weight1 += d_weights1
        self.weight_final += d_weights2

    def train(self, X, y):
        self.output = self.forwardpass(X)
        self.backprop()
 

In [None]:
bias=np.array([0]*3)

kitty = NeuralNetwork(X, y, bias = bias)
#print(kitty.weight1.shape, kitty.weight2.shape)
#kitty.forwardpass(X)
#kitty.backprop()
for i in range(10):
    kitty.train(X, new_y)
    print(sum(kitty.output[0]), kitty.output[0], np.argmax(kitty.output[0]), y[0])

In [None]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigmoid_der(x):
    return sigmoid(x) *(1-sigmoid (x))

def softmax(A):
    expA = np.exp(A)
    return expA / expA.sum(axis=1, keepdims=True)

instances = X.shape[0]
attributes = X.shape[1]
hidden_nodes = 4
output_labels = 6

weight1 = np.random.rand(attributes, hidden_nodes)
bias1 = np.random.randn(hidden_nodes)

weight_final = np.random.rand(hidden_nodes, output_labels)
bias_final = np.random.randn(output_labels)
learning_rate = 0.01

one_hot_labels = np.zeros((149, 6))
for i in range(149):
    one_hot_labels[i, y[i]] = 1

error_cost = []

for epoch in range(1000):
############# feedforward

    # Phase 1
    layer1 = sigmoid(np.dot(X, weight1) + bias1)

    # Phase 2
    output = softmax(np.dot(layer1, weight_final) + bias_final)

########## Back Propagation
########## Phase 1

    difference = output - one_hot_labels

    weight_cost = np.dot(layer1.T, difference)

    bias_cost = difference

########## Phases 2

    dzo_dah = weight_final
    dcost_dah = np.dot(difference , weight_cost.T)
    dah_dzh = sigmoid_der(np.dot(X, weight1) + bias1)
    dzh_dwh = X
    dcost_wh = np.dot(X.T, sigmoid_der(np.dot(X, weight1) + bias1) * np.dot(difference , weight_cost.T))

    dcost_bh = np.dot(difference , weight_cost.T) * sigmoid_der(np.dot(X, weight1) + bias1)

    # Update Weights ================

    weight1 -= learning_rate * dcost_wh
    bias1 -= learning_rate * dcost_bh.sum(axis=0)

    weight_final -= learning_rate * weight_cost
    bias_final -= learning_rate * bias_cost.sum(axis=0)

    loss = np.sum(-one_hot_labels * np.log(output))
    #print('Loss function value: ', loss)
    error_cost.append(loss)

In [None]:
plt.plot(error_cost)
plt.ylim(150,250);