In [19]:
##### Comparing Two Forms of Regularisation: Bagging and Dropout 
# Using Binary Logistic Regression as a Basis 
# Using the Titanic dataset 

# MODEL SUMMARY: 
# Loss function: Cross Entropy
# Hidden function: Sigmoid
# Output function: Sigmoid

# USER INFORMATION:
# User can specify the neural network using Structure
# Structure -> [input_dim, hidden layer_dims, ..., ouput_dim]

import numpy as np
import pandas as pd

class Neural_Network:
    def __init__(self, structure=[7,12,1], lr=0.1, epochs=100, up_freq=10, dropout_prob=1.0):
        
        np.random.seed(0)
        self.units = structure
        
        self.lr = lr
        self.num_iterations = epochs
        self.update_freq = up_freq
        self.dropout_prob = dropout_prob
        
        self.weights = []  
        self.biases = []
        
        self.Z = []
        self.A = []
        
        for i in range(len(self.units) - 1):
            
            # specifiying type removes overflow error
            weight = np.random.rand(self.units[i + 1], self.units[i]).astype(np.float128)
            self.weights.append(weight)
            
            bias = np.zeros((self.units[i + 1], 1))
            self.biases.append(bias)       
        
    def sigmoid(self, X):
        return 1/(1 + np.exp(-X))    
    
    # use logistic regression cost function
    def compute_cost(self, Y):
        m = Y.shape[1]        
        cost_sum = np.multiply(np.log(self.A[-1]), Y) +  np.multiply((1 - Y), np.log(1 - self.A[-1]))
        cost = np.squeeze(-np.sum(cost_sum) / m)
        
        return cost
    
    def feed_forward(self, X, train=True):
        
        self.A = []
        self.Z = [] 
        
        z1 = np.dot(self.weights[0], X) + self.biases[0]
        
        # setting the dropout in the first layer to 0.9
        # significantly improves performance
        if train: 
            d = np.random.binomial(1, 0.9, size=z1.shape[0]) / 0.9           
            z1 = (z1.T * d).T
        
        a = self.sigmoid(z1)         
        self.Z.append(z1)
        self.A.append(a)
        
        for i in range(len(self.units) - 2):            
            
            z = np.dot(self.weights[i+1], a) + self.biases[i+1]
            
            # ensure dropout is not applied to last layer
            if train and i != len(self.units) - 3 and False:
                d = np.random.binomial(1, self.dropout_prob, size=z.shape[0]) / self.dropout_prob 
                z = (z.T * d).T
                
            a = self.sigmoid(z)          
            self.Z.append(z)
            self.A.append(a) 
    
    def back_prop(self, X, Y):
        
        m = Y.shape[1]
        
        # compute derivative of cost and sigmoid
        dz = self.A[-1] - Y
        dw = (1/m) * np.dot(dz, self.A[-2].T)
        db = (1/m) * np.sum(dz, axis=1, keepdims=True)
        self.weights[-1] = self.weights[-1] - self.lr * dw
        self.biases[-1] = self.biases[-1] - self.lr * db
          
        # cycle through hidden layers of NN 
        for i in range(len(self.units) - 3, 0, -1):
            dz = np.dot(self.weights[i+1].T, dz) * self.sigmoid(self.A[i]) * (1 - self.sigmoid(self.A[i]))
            dw = (1/m) * np.dot(dz, self.A[i - 1].T)
            db = (1/m) * np.sum(dz, axis=1, keepdims=True)
            self.weights[i] = self.weights[i] - self.lr * dw
            self.biases[i] = self.biases[i] - self.lr * db
        
        # compute output
        dz = np.dot(self.weights[1].T, dz) * self.sigmoid(self.A[0]) * (1 - self.sigmoid(self.A[0]))         
        dw = (1/m) * np.dot(dz, X.T)
        db = (1/m) * np.sum(dz, axis=1, keepdims=True)
        
        self.weights[0] = self.weights[0] - self.lr * dw
        self.biases[0] = self.biases[0] - self.lr * db
                    
    def train(self, X, Y):
        
        cost, acc = 0, 0        
        for i in range(1, self.num_iterations + 1):
            self.feed_forward(X, train=True)  
            
            if i % self.update_freq == 0: 
                
                Y_pred = np.around(self.A[-1], 0).astype(int)    
                
                cost = round(self.compute_cost(Y), 2)
                acc = round((1 - np.abs(np.sum(Y - Y_pred)/Y.shape[1])) * 100, 2)                
                print('Epoch: {} Cost: {}'.format(i, cost))
                print('Accuracy: {}'.format(acc))                
                
            self.back_prop(X, Y)        
                      
        return cost, acc
            
    def test(self, X, Y_true):
        self.feed_forward(X, train=False)          
        Y_pred = np.around(self.A[-1], 0).astype(int)         
        
        print('\nTest Summary:')
        print('---------------------')
        print('Test Accuracy: {}'.format(round((1 - np.abs(np.sum(Y_pred - Y_true)/Y_true.shape[1])) * 100, 2)))   
        print('---------------------\n')
        
    def test_return(self, X):
        self.feed_forward(X, train=False)          
        Y_pred = np.around(self.A[-1], 0).astype(int) 
        return Y_pred

def pre_process(df):
    
    np.random.seed(0)
    
    # remove any unhelpful columns
    del df['Name']
    del df['Ticket']
    del df['Cabin']
    
    # convert object columns to int or float columns
    df['Sex'] = df['Sex'].eq('male').mul(1)
    df['Embarked'] = pd.factorize(df['Embarked'])[0] + 1    
    df = df.dropna()   
    df = df.sample(frac=1)
    
    # convert to array    
    y = np.array([df['Survived'].to_numpy()])
    x = df.loc[:, df.columns != 'Survived'].to_numpy().T
    
    # split into test and training
    y_train = y[:, 50:]
    x_train = x[:, 50:]
    y_test = y[:, :50]
    x_test = x[:, :50]
        
    return y_train, x_train, y_test, x_test  

class Bagging:
    def __init__(self, structure=[7,12,1], lr=0.1, epochs=100, up_freq=100, dataset=None, num_networks=3):
        
        np.random.seed(0)        
        self.num_networks = num_networks
        [self.y_train, self.x_train, self.y_test, self.x_test] = dataset 
        
        # divide up the dataset evenly        
        whole_sample_split = int((self.y_train.shape[1] - self.y_train.shape[1] % num_networks) / num_networks)        
        net_data_size = np.zeros(num_networks, dtype=np.int32) + whole_sample_split
        
        # add the leftover samples
        for i in range(len(y_train) % num_networks):            
            index = i % len(net_data_size)
            net_data_size[index] += 1
        
        net_data_size = np.insert(net_data_size, 0, 0)        
        self.net_data_size = np.cumsum(net_data_size)
        
        # initialise the neural networks
        self.neural_networks = {}
        for n in range(num_networks):
            
            network = Neural_Network(structure,
                                     lr=lr,
                                     epochs=epochs,
                                     up_freq=up_freq,
                                     dropout_prob=1.0
                                    )
            
            self.neural_networks["network " + str(n + 1)] = network
            
    def train(self):  
        
        running_cost = np.zeros(self.num_networks)
        running_acc = np.zeros(self.num_networks)
        
        for n in range(self.num_networks):  
            
            print('\nTraining Network {}'.format(n + 1))
            print('---------------------')
            
            # get training examples
            y_train = self.y_train[:, self.net_data_size[n]: self.net_data_size[n + 1]]
            x_train = self.x_train[:, self.net_data_size[n]: self.net_data_size[n + 1]]
            
            # unpack the network and train the model
            network = self.neural_networks["network " + str(n + 1)]
            cost, acc = network.train(x_train, y_train)  
            
            # save cost + accuracy
            running_cost[n] = cost 
            running_acc[n] = acc
            
            # repack the network
            self.neural_networks["network " + str(n + 1)] = network     
            
        mean_cost = np.mean(running_cost)
        mean_acc = np.mean(running_acc)
        
        print('\nTraining Summary')
        print('---------------------')
        print('Mean Cost: {}'.format(round(mean_cost, 2)))
        print('Mean Accuracy: {}'.format(round(mean_acc, 2)))
        print('---------------------')
    
    def test(self):
        
        total_y_pred = np.zeros((self.num_networks, self.x_test.shape[1])) 
        x_test, y_test = self.x_test, self.y_test
        
        for n in range(self.num_networks):
            
            # unpack the network and test the model
            network = self.neural_networks["network " + str(n + 1)]
            y_pred = network.test_return(x_test)     
            
            total_y_pred[n, :] = y_pred
            
        Y_pred = np.around(np.mean(total_y_pred, axis=0), decimals=1)
        Y_true = y_test
        
        print('\nTest Summary')
        print('---------------------')
        print('Test Accuracy: {}'.format(round((1 - np.abs(np.sum(Y_pred - Y_true)/Y_true.shape[1])) * 100, 2)))     
        print('---------------------\n')    
    
if __name__ == "__main__":
    
    # load and process the data
    orig_train = pd.read_csv('./titanic_train.csv', index_col=0)
    train = orig_train.copy()       
    y_train, x_train, y_test, x_test = pre_process(train)
    
    # initialse the bagging network
    bagger = Bagging([7, 8, 12, 1],
                    lr=0.08,
                    epochs=500,
                    up_freq=100,
                    dataset=[y_train, x_train, y_test, x_test],
                    num_networks=4
                    )
    
    # train and test the bagging network
    bagger.train()   
    bagger.test()      
    
    # intialise the dropout network
    network = Neural_Network([7, 8, 12, 1],
                             lr=0.05,
                             epochs=600,
                             up_freq=100,
                             dropout_prob=0.5                          
                            )
    
    # train and test the dropout network
    network.train(x_train, y_train)   
    network.test(x_test, y_test) 


Training Network 1
---------------------
Epoch: 100 Cost: 0.68
Accuracy: 61.68
Epoch: 200 Cost: 0.66
Accuracy: 80.24
Epoch: 300 Cost: 0.65
Accuracy: 85.03
Epoch: 400 Cost: 0.65
Accuracy: 94.61
Epoch: 500 Cost: 0.64
Accuracy: 99.4

Training Network 2
---------------------
Epoch: 100 Cost: 0.67
Accuracy: 61.45
Epoch: 200 Cost: 0.67
Accuracy: 57.23
Epoch: 300 Cost: 0.67
Accuracy: 57.23
Epoch: 400 Cost: 0.68
Accuracy: 57.23
Epoch: 500 Cost: 0.68
Accuracy: 57.23

Training Network 3
---------------------
Epoch: 100 Cost: 0.64
Accuracy: 60.84
Epoch: 200 Cost: 0.62
Accuracy: 92.17
Epoch: 300 Cost: 0.62
Accuracy: 97.59
Epoch: 400 Cost: 0.62
Accuracy: 98.8
Epoch: 500 Cost: 0.62
Accuracy: 96.99

Training Network 4
---------------------
Epoch: 100 Cost: 0.64
Accuracy: 61.21
Epoch: 200 Cost: 0.63
Accuracy: 89.7
Epoch: 300 Cost: 0.63
Accuracy: 92.73
Epoch: 400 Cost: 0.63
Accuracy: 64.85
Epoch: 500 Cost: 0.61
Accuracy: 96.36

Training Summary
---------------------
Mean Cost: 0.64
Mean Accuracy: 87.5