## Imports

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import preprocessing

import warnings
warnings.filterwarnings("ignore")

## Paths

In [2]:
TRAIN = '../Data/df_train.csv'
TEST = '../Data/df_test.csv'

## Functions

In [3]:
# code from exercise 5 to calculate the z-score
z_score = lambda x : (x - np.mean(x, axis=0)) / np.std(x, axis=0)

def conf_mat(y_hat, y_true):
    '''Returns a confusion matrix'''
    n = max(y_hat)+1
    bingo = np.zeros([n,n])
    for i in range(len(y_hat)):
        bingo[y_true[i]][y_hat[i]] +=1
    
    return(bingo)

def scores(y_hat, y_true, average = True):
    '''For each class, returns recall, precision and f1'''
    classes = list(np.unique(y_true))
    conf = conf_mat(y_hat, y_true)
    r = []
    p = []
    f = []
    for c in classes:
        recall = conf[c][c] / sum(conf[c])
        precision = conf[c][c] / sum(conf[:, c])
        f1 = 2*(precision*recall)/(precision + recall)
        r.append(recall)
        p.append(precision)
        f.append(f1)
    if average:
        return sum(r)/len(r), sum(p)/len(p), sum(f)/len(f)
    else:
        return (r, p, f)

## Variables and df loading

In [4]:
col = 'rainbow' # Colour theme

df = pd.read_csv(TRAIN) # Training dataframe
a = len(df)

#ensures data is without order, random state fixed for reproducability, frac=1 gives the whole df back but shuffled
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

if a != len(df):
    print('WARNING, DATA IS BEING LOST')#confirm still have the whole df

attributes = list(df.columns)[:-1] # Creates list of column names for the dataframe without the class

df[attributes] = z_score(df[attributes])

X = df[attributes].copy() # Attributes
y = df['type'].copy() # True values

for i in range(len(y)):
    if y[i] > 4:
        y[i] = y[i] - 2
    else:
        y[i] = y[i] - 1
        
lb = preprocessing.LabelBinarizer()
new_y = pd.DataFrame(lb.fit_transform(y))

y_list = y.unique() # 'y' values

#round(df.describe(),2)
df['type'] = y
df[[0,1,2,3,4,5]] = new_y

df.head(10)

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,type,0,1,2,3,4,5
0,2.565103,-2.54593,-1.922352,-1.31684,0.751099,-0.854045,3.821565,-0.362309,-0.633117,1,0,1,0,0,0,0
1,-0.673512,-0.488702,0.603378,0.208615,0.264239,0.219743,-0.587138,-0.362309,-0.633117,1,0,1,0,0,0,0
2,-0.145715,0.092436,0.448165,0.386915,-0.876038,0.149331,-0.075888,-0.362309,0.29173,2,0,0,1,0,0,0
3,-0.576697,1.603395,-1.922352,1.100115,0.622978,-0.854045,-0.427787,2.738558,-0.633117,5,0,0,0,0,0,1
4,-0.617297,-0.023792,0.539882,0.168993,0.033621,0.325362,-0.560579,-0.362309,0.394491,1,0,1,0,0,0,0
5,-0.595436,0.162173,0.596323,0.069937,-0.222621,0.272552,-0.640255,-0.362309,-0.633117,1,0,1,0,0,0,0
6,-0.464267,-0.65142,0.603378,-0.247041,-0.017627,0.219743,-0.089167,-0.362309,-0.633117,2,0,0,1,0,0,0
7,1.643799,0.022699,-1.922352,0.287859,-0.517299,-0.290746,2.201499,-0.362309,-0.633117,3,0,0,0,1,0,0
8,4.841814,-1.302295,-1.922352,-0.861185,-3.156592,-0.642808,4.824146,-0.362309,1.833143,1,0,1,0,0,0,0
9,-0.554836,-0.325983,0.511661,0.644459,-0.184185,0.20214,-0.361391,-0.362309,1.113817,2,0,0,1,0,0,0


Using ideas from:

https://towardsdatascience.com/how-to-build-your-own-neural-network-from-scratch-in-python-68998a08e4f6

https://hackernoon.com/building-a-feedforward-neural-network-from-scratch-in-python-d3526457156b

https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/

In [55]:
class NeuralNetwork:
    
    def __init__(self, x, y, bias=None, neurons=12):
        '''
        neurons --> neurons per hidden layer
        activation --> choose activation function
        '''
        
        self.input = x # All rows with the attributes (all X's)
        self.y = np.array(y) # True values
        self.output = np.zeros(self.y.shape)
        
        self.neurons = neurons
        
        self.weight1 = np.random.rand(x.shape[1], neurons) # (attributes in X, number of neurons)
        self.weight2 = np.random.rand(neurons, neurons)
        self.weight_final = np.random.rand(neurons, y.shape[1])

        self.bias1 = bias[0]
        self.bias2 = bias[1]
        self.bias_final = bias[-1]
    
    def sigmoid(self, l):
        return 1 / (1 + np.exp(-l))

    def tanh(self):
        return np.tanh(self.x)

    def relu(self): #Rectified Linear Unit
        return np.maximum(0, self.x)
    
    def softmax(self, vec):
        exponential = np.exp(vec)
        probabilities = exponential / np.sum(exponential)
        return probabilities 
       
    def forwardpass(self, x):
        # activation_function(x * w_0 + bias_0) etc
        
        # Going through the 1st layer
        self.layer1 = self.sigmoid((x.dot(self.weight1) + self.bias1))
        
        #self.layer2 = self.sigmoid(self.layer1.dot(self.weight2) + self.bias2)
        #print(self.layer2.shape)
        
        # Final Output layer with softmax
        self.output = self.softmax(self.layer1.dot(self.weight_final) + self.bias_final)
        
        return self.output

    #https://towardsdatascience.com/how-to-build-your-own-neural-network-from-scratch-in-python-68998a08e4f6
    #code borrowed from here
    def sigmoid_derivative(self, p):
        return p * (1 - p)

    # calculate categorical cross entropy
    def categorical_cross_entropy(self, X):
        p = self.output.to_numpy() # predictions
        sum_score = 0.0
        for i in range(len(self.y)):
            for j in range(len(self.y[i])):
                sum_score += self.y[i][j] * np.log(1e-15 + p[i][j])
        mean_sum_score = 1.0 / len(self.y) * sum_score
        return -mean_sum_score
    
    def backprop(self):
        # Update weights of last layer 
        d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * self.categorical_cross_entropy(self.output)))
        
        # Update weights of 1st layer
        d_weights1 = np.dot(self.input.T, np.dot(2*(self.y - self.output) * self.sigmoid_derivative(self.output), self.weight_final.T)*self.sigmoid_derivative(self.layer1))
        
        print(self.input.shape, self.weight1.shape, self.y.shape, self.output.shape, self.weight_final.shape, self.layer1.shape)

        # Update weights
        self.weight1 += d_weights1
        self.weight_final += d_weights2

    def train(self, X, y):
        self.output = self.forwardpass(X)
        self.backprop()
    

In [65]:
bias=np.array([0]*3)

kitty = NeuralNetwork(X,new_y, bias = bias)
print(kitty.weight1.shape, kitty.weight2.shape)
#kitty.forwardpass(X)
#kitty.backprop()
kitty.train(X, new_y)
sum(kitty.output[0])

(9, 12) (12, 12)
(149, 9) (9, 12) (149, 6) (149, 6) (12, 6) (149, 12)


1.0000000000000002

In [None]:
kitty.outputlayer.iloc[0], new_y.iloc[0]

In [None]:
def backprop(self):
    # application of the chain rule to find derivative of the loss function with respect to weights2 and weights1
    d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output)))
    d_weights1 = np.dot(self.input.T,  (np.dot(2*(self.y - self.output) * sigmoid_derivative(self.output), self.weights2.T) * sigmoid_derivative(self.layer1)))

In [None]:
from tqdm import tqdm_notebook

class FFSN_MultiClass:
  
    def __init__(self, n_inputs, n_outputs, hidden_sizes=[3]):
        self.nx = n_inputs
        self.ny = n_outputs
        self.nh = len(hidden_sizes)
        self.sizes = [self.nx] + hidden_sizes + [self.ny] 

        self.W = {}
        self.B = {}
        for i in range(self.nh+1):
            self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
            self.B[i+1] = np.zeros((1, self.sizes[i+1]))
      
    def sigmoid(self, x):
        return 1.0/(1.0 + np.exp(-x))
  
    def softmax(self, x):
        exps = np.exp(x)
        return exps / np.sum(exps)

    def forward_pass(self, x):
        self.A = {}
        self.H = {}
        self.H[0] = x.reshape(1, -1)
        for i in range(self.nh):
            self.A[i+1] = np.matmul(self.H[i], self.W[i+1]) + self.B[i+1]
            self.H[i+1] = self.sigmoid(self.A[i+1])
        self.A[self.nh+1] = np.matmul(self.H[self.nh], self.W[self.nh+1]) + self.B[self.nh+1]
        self.H[self.nh+1] = self.softmax(self.A[self.nh+1])
        return self.H[self.nh+1]
  
    def predict(self, X):
        Y_pred = []
        for x in X:
            y_pred = self.forward_pass(x)
            Y_pred.append(y_pred)
        return np.array(Y_pred).squeeze()
 
    def grad_sigmoid(self, x):
        return x*(1-x) 
  
    def cross_entropy(self,label,pred):
        yl=np.multiply(pred,label)
        yl=yl[yl!=0]
        yl=-np.log(yl)
        yl=np.mean(yl)
        return yl
 
    def grad(self, x, y):
        self.forward_pass(x)
        self.dW = {}
        self.dB = {}
        self.dH = {}
        self.dA = {}
        L = self.nh + 1
        self.dA[L] = (self.H[L] - y)
        for k in range(L, 0, -1):
            self.dW[k] = np.matmul(self.H[k-1].T, self.dA[k])
            self.dB[k] = self.dA[k]
            self.dH[k-1] = np.matmul(self.dA[k], self.W[k].T)
            self.dA[k-1] = np.multiply(self.dH[k-1], self.grad_sigmoid(self.H[k-1])) 
    
    def fit(self, X, Y, epochs=100, initialize='True', learning_rate=0.01, display_loss=False):
      
        if display_loss:
            loss = {}
      
        if initialize:
            for i in range(self.nh+1):
            self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
            self.B[i+1] = np.zeros((1, self.sizes[i+1]))
        
        for epoch in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
            dW = {}
            dB = {}
            for i in range(self.nh+1):
                dW[i+1] = np.zeros((self.sizes[i], self.sizes[i+1]))
                dB[i+1] = np.zeros((1, self.sizes[i+1]))
        for x, y in zip(X, Y):
            self.grad(x, y)
            for i in range(self.nh+1):
                dW[i+1] += self.dW[i+1]
                dB[i+1] += self.dB[i+1]
                  
        m = X.shape[1]
        for i in range(self.nh+1):
            self.W[i+1] -= learning_rate * (dW[i+1]/m)
            self.B[i+1] -= learning_rate * (dB[i+1]/m)
        
        if display_loss:
            Y_pred = self.predict(X) 
            loss[epoch] = self.cross_entropy(Y, Y_pred)
    
      if display_loss:
          plt.plot(loss.values())
          plt.xlabel('Epochs')
          plt.ylabel('CE')
          plt.show()

In [None]:
X = np.array(X)

milo = FFSN_MultiClass(9,6, [30])
milo.fit(X, new_y,epochs=6000, learning_rate = 0.5, display_loss=True)

In [None]:
y_pred = milo.predict(X)

In [None]:
df['pred'] = np.argmax(y_pred, axis=1)

df[df['type'] == df['pred']].shape[0]
df['pred']

In [None]:
conf_mat(df['pred'], df['type'])