In [2]:
# Multinomial Logistic Regression
# Additional Implementation of L1 regularisation
# Using the Iris dataset 

"""
MODEL SUMMARY: 
- Loss function: Cross Entropy
- Hidden function: ReLU
- Output function: Softmax

USER INFORMATION:
- User can specify the neural network using Structure
- Structure -> [input_dim, hidden layer_dims, ..., ouput_dim]

NOTES:
- Standardisation is not needed for logistic regression -> insensitive to scale
- The derivative of the loss function with the softmax input is the same as the
  derivative with the sigmoid input
"""

import numpy as np
import pandas as pd
import math
import random

"""
A neural network using the relu activation function, softmax output and 
logisitic regression loss function.
"""
class Neural_Network:
    def __init__(self, structure=[4, 10, 3], lr=0.1, epochs=200, up_freq=20, lamba=0.0, batch_size=10, epsilon=1e-7):
        
        np.random.seed(0)
        self.units = structure
        
        self.lr = lr
        self.num_iterations = epochs
        self.update_freq = up_freq
        self.lamba = lamba
        self.batch_size = batch_size
        self.epsilon = epsilon
        
        self.weights = []  
        self.biases = []
        weight = None 
        bias = None
        
        self.Z = []
        self.A = []        
        
        # initialise the weights
        for i in range(len(self.units) - 1):            
            weight = np.random.randn(self.units[i + 1], self.units[i]) * (-1) 
            self.weights.append(weight)
            
            bias = np.zeros((self.units[i + 1], 1))
            self.biases.append(bias) 
    
    """
    Simple ReLU function
    """
    def reLU(self, X):
        return np.maximum(0, X) 
    
    """
    Derivative of RelU function
    """    
    def dx_reLU(self, X):
        return np.where(X > 0.5, 1, 0)
    
    """
    Softmax function
    """
    def softmax(self, X): 
        return np.exp(X)/np.sum(np.exp(X), axis=0, keepdims=True)
    
    """
    Compute the cost using the logistic regression cost function.
    """
    def compute_cost(self, Y):        
        m = Y.shape[0]        
        return - 1/m * np.sum(Y * np.log(self.A[-1].T + self.epsilon))
    
    """
    Perform a forward prediction with the model.
    """     
    def feed_forward(self, X, update): 
        
        a = X
        
        if update:
            self.A = []
            self.Z = [] 
        
        # perform >= 1 hidden layer transformations
        for i in range(len(self.units) - 2):
            z = np.dot(self.weights[i], a) + self.biases[i]
            a = self.reLU(z) 
            
            if update:
                self.Z.append(z)
                self.A.append(a)
         
        # perform softmax output transformation
        z = np.dot(self.weights[-1], a) + self.biases[-1]
        a = self.softmax(z)
        
        if update:
            self.Z.append(z)
            self.A.append(a)
        
        return a
    
    """
    Back propagate through the learned network.
    """  
    def back_prop(self, X, Y):
        
        m = Y.shape[1]
        
        # compute derivative of cost and softmax
        dz = self.A[-1] - Y
        dw = (1/m) * np.dot(dz, self.A[-2].T)
        db = (1/m) * np.sum(dz, axis=1, keepdims=True)
        self.weights[-1] = self.weights[-1] - self.lr * (dw + self.lamba * self.weights[-1]) 
        self.biases[-1] = self.biases[-1] - self.lr * (db + self.lamba * self.biases[-1])
          
        # cycle through hidden layers of NN 
        for i in range(len(self.units) - 3, -1, -1):
            
            if i == 0: 
                a = X.T
            else: 
                a = self.A[i - 1].T
                
            dz = np.dot(self.weights[i+1].T, dz) * self.dx_reLU(self.A[i])
            dw = (1/m) * np.dot(dz, a)
            db = (1/m) * np.sum(dz, axis=1, keepdims=True)
            self.weights[i] = self.weights[i] - self.lr * (dw + self.lamba * self.weights[i]) 
            self.biases[i] = self.biases[i] - self.lr * (db + self.lamba * self.biases[i])        
    
    """
    Train the model on a sample of date.
    """   
    def train(self, X_init, Y_init, X_test, Y_test):
        
        # determine the number of batchs
        batch_int = math.floor(X_init.shape[1] / self.batch_size)
        
        for i in range(1, self.num_iterations + 1):
            
            # save the original
            X_whole = X_init
            Y_whole = Y_init
            
            cost = 0
            accuracy = 0
            
            for j in range(1, batch_int + 1):   
                
                # create batchs
                idx = list(range((j-1) * self.batch_size, j * self.batch_size))                  
                X = X_whole[:, idx] 
                Y = Y_whole[:, idx]
                
                a = self.feed_forward(X, True)  
                
                # get prediction
                Y_pred = np.zeros_like(Y.T)
                Y_pred[np.arange(Y.shape[1]), a.T.argmax(1)] = 1
                diff = Y.T.astype('int32') - Y_pred.astype('int32') 
                
                # compute cost and accuracy 
                cost += self.compute_cost(Y.T)
                accuracy += (1 - np.sum(np.sum(np.abs(diff), axis=1)/2)/Y.shape[1]) * 100    
                
                self.back_prop(X, Y)
                    
            if i % self.update_freq == 0: 
                
                print('\n\nEpoch: {} Cost: {}'.format(i, round(cost/batch_int, 2)))        
                print('---------------------')
                print('Accuracy: {}'.format(round(accuracy/batch_int, 2)))   
                
                self.test(X_test, Y_test)       
    
    """
    Test the learned model against the true values.
    """   
    def test(self, X, Y_true):
        a = self.feed_forward(X, False) 
        
        Y_pred = np.zeros_like(Y_true.T)
        Y_pred[np.arange(Y_true.shape[1]), a.T.argmax(1)] = 1
        diff = Y_true.T.astype('int32') - Y_pred.astype('int32')   
       
        print('Test Accuracy: {}'.format(round((1 - np.sum(np.sum(np.abs(diff), axis=1)/2)/Y_true.shape[1]) * 100, 2)))  
        print('---------------------')
        
"""
Convert the data into a usable form and then split the 
sample into training and testing.
"""
def pre_process(df):
    
    # remove NaN and shuffle
    df = df.dropna()  
    df = df.sample(frac=1) 
    
    # convert the species names to numbers and save the mapping
    df["species"] = df["species"].astype('category') 
    
    # convert to array and one hot encode the 
    y = np.squeeze(np.array([pd.get_dummies(df['species']).to_numpy()])).T
    
    # standardise the dataset
    x = df.loc[:, df.columns != 'species'].to_numpy().T
    x_mean = np.mean(x, axis=1, keepdims=True)
    x_std = np.std(x, axis=1, keepdims=True)
    x = (x - x_mean)/x_std
        
    # split into test and training
    y_train = y[:, 25:]
    x_train = x[:, 25:]
    y_test = y[:, :25]
    x_test = x[:, :25]
    
    return y_train, x_train, y_test, x_test   
    
if __name__ == "__main__":
    
    # load and process the data
    orig_train = pd.read_csv('./Data/iris_train.csv')
    train = orig_train.copy()  

    y_train, x_train, y_test, x_test = pre_process(train)
        
    # initialse the network
    network = Neural_Network([4, 10, 3],
                             lr=0.1, 
                             epochs=200,
                             up_freq=20,
                             lamba=0.0, 
                             batch_size=10,
                             epsilon=1e-7
                            )

    # train and test
    network.train(x_train, y_train, x_test, y_test)   



Epoch: 20 Cost: 0.25
---------------------
Accuracy: 90.0
Test Accuracy: 84.0
---------------------


Epoch: 40 Cost: 0.14
---------------------
Accuracy: 97.5
Test Accuracy: 84.0
---------------------


Epoch: 60 Cost: 0.09
---------------------
Accuracy: 98.33
Test Accuracy: 88.0
---------------------


Epoch: 80 Cost: 0.07
---------------------
Accuracy: 96.67
Test Accuracy: 88.0
---------------------


Epoch: 100 Cost: 0.06
---------------------
Accuracy: 97.5
Test Accuracy: 92.0
---------------------


Epoch: 120 Cost: 0.05
---------------------
Accuracy: 97.5
Test Accuracy: 92.0
---------------------


Epoch: 140 Cost: 0.05
---------------------
Accuracy: 98.33
Test Accuracy: 92.0
---------------------


Epoch: 160 Cost: 0.04
---------------------
Accuracy: 98.33
Test Accuracy: 92.0
---------------------


Epoch: 180 Cost: 0.04
---------------------
Accuracy: 98.33
Test Accuracy: 92.0
---------------------


Epoch: 200 Cost: 0.04
---------------------
Accuracy: 98.33
Test Accur