## Imports

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import preprocessing

import warnings
warnings.filterwarnings("ignore")

## Paths

In [2]:
TRAIN = '../Data/df_train.csv'
TEST = '../Data/df_test.csv'

## Functions

In [3]:
# code from exercise 5 to calculate the z-score
z_score = lambda x : (x - np.mean(x, axis=0)) / np.std(x, axis=0)

def conf_mat(y_hat, y_true):
    '''Returns a confusion matrix'''
    n = max(y_hat)+1
    bingo = np.zeros([n,n])
    for i in range(len(y_hat)):
        bingo[y_true[i]][y_hat[i]] +=1
    
    return(bingo)

def scores(y_hat, y_true, average = False):
    '''For each class, returns recall, precision and f1'''
    classes = list(np.unique(y_true))
    conf = conf_mat(y_hat, y_true)
    r = []
    p = []
    f = []
    for c in classes:
        recall = conf[c][c] / sum(conf[c])
        precision = conf[c][c] / sum(conf[:, c])
        f1 = 2*(precision*recall)/(precision + recall)
        r.append(recall)
        p.append(precision)
        f.append(f1)
    if average:
        return sum(r)/len(r), sum(p)/len(p), sum(f)/len(f)
    else:
        return (r, p, f)

## Variables and df loading

In [4]:
col = 'rainbow' # Colour theme

df = pd.read_csv(TRAIN) # Training dataframe
a = len(df)

#ensures data is without order, random state fixed for reproducability, frac=1 gives the whole df back but shuffled
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

if a != len(df):
    print('WARNING, DATA IS BEING LOST')#confirm still have the whole df

attributes = list(df.columns)[:-1] # Creates list of column names for the dataframe without the class

df[attributes] = z_score(df[attributes])

X = df[attributes].copy() # Attributes
y = df['type'].copy() # True values

for i in range(len(y)):
    if y[i] > 4:
        y[i] = y[i] - 2
    else:
        y[i] = y[i] - 1
        
lb = preprocessing.LabelBinarizer()
new_y = pd.DataFrame(lb.fit_transform(y))

y_list = y.unique() # 'y' values

#round(df.describe(),2)

df['type'] = y

df.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,type
0,2.565103,-2.54593,-1.922352,-1.31684,0.751099,-0.854045,3.821565,-0.362309,-0.633117,1
1,-0.673512,-0.488702,0.603378,0.208615,0.264239,0.219743,-0.587138,-0.362309,-0.633117,1
2,-0.145715,0.092436,0.448165,0.386915,-0.876038,0.149331,-0.075888,-0.362309,0.29173,2
3,-0.576697,1.603395,-1.922352,1.100115,0.622978,-0.854045,-0.427787,2.738558,-0.633117,5
4,-0.617297,-0.023792,0.539882,0.168993,0.033621,0.325362,-0.560579,-0.362309,0.394491,1


In [5]:
#Activation functions:

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def tanh(x):
    return np.tanh(x)

def relu(x): #Rectified Linear Unit
    return np.maximum(0, x)


Using ideas from:

https://towardsdatascience.com/how-to-build-your-own-neural-network-from-scratch-in-python-68998a08e4f6

https://hackernoon.com/building-a-feedforward-neural-network-from-scratch-in-python-d3526457156b

https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/

In [15]:
class NeuralNetwork:
    
    def __init__(self, X, y, bias=None, neurons=12, activation=sigmoid):
        '''
        neurons --> neurons per hidden layer
        activation --> choose activation function
        '''
        
        self.input = X #inputs
        self.y = y #True values
        self.output = np.zeros(self.y.shape)
        
        self.activation = activation
        self.neurons = neurons
        
        self.weight1 = np.random.rand(X.shape[1], neurons) #(attributes in X,number of neurons)
        self.weight2 = np.random.rand(neurons, neurons)
        self.weight_final = np.random.rand(neurons, y.shape[1])

        
        self.bias1 = bias[0]
        self.bias2 = bias[1]
        self.bias_final = bias[-1]
    
    def sigmoid(self):
        return 1 / (1 + np.exp(-self.x))

    def tanh(self):
        return np.tanh(self.x)

    def relu(self): #Rectified Linear Unit
        return np.maximum(0, self.x)
       
    def forwardpass(self, x):
        # (x * w_0)
        
        self.layer1 = self.activation(x.dot(self.weight1) + self.bias1)
        print(self.layer1.shape)
        self.layer2 = self.activation(self.layer1.dot(self.weight2) + self.bias2)
        print(self.layer2.shape)
        self.outputlayer = self.activation(self.layer2.dot(self.weight_final) + self.bias_final)
        print(self.outputlayer.shape)
    
    

In [16]:
bias=np.array([0]*3)

kitty = NeuralNetwork(X,new_y, bias = bias)
kitty.weight1.shape, kitty.weight2.shape
kitty.forwardpass(kitty.input)


(149, 12)
(149, 12)
(149, 6)


In [17]:
kitty.outputlayer.iloc[0], new_y.iloc[0]

(0    0.998836
 1    0.996049
 2    0.994277
 3    0.994530
 4    0.964192
 5    0.995454
 Name: 0, dtype: float64,
 0    0
 1    1
 2    0
 3    0
 4    0
 5    0
 Name: 0, dtype: int32)