%load_ext autotime

### Instructions
1. Seek to improve the one-layer model by changing the internal activation function. This will involve changing A1 to something else, and also changing the derivative involving one or both of dAI and dZ1.
2. Add another layer with your choice of activation function (other than the logistic function) and your choice of internal nodes
3. Add a third layer
4. Add a fourth layer
5. Report on your choices and assessment of the resulting models - you can extend the basic model assessment tools supplied
6. Based on your answers to tasks 1-4, write code that - for this dataset - allows a user to build and run a neural net with a high degree of flexibility in terms of architecture and hyperperameter choices.
7. Explain how advanced optimisers (e.g. Adam) would extend your code

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings(action='ignore')

In [4]:
# Import the Dataset

df = pd.read_csv ('flower2D.csv')
print(df.shape)
print(df.info())
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.head()

(400, 4)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  400 non-null    int64  
 1   X1          400 non-null    float64
 2   X2          400 non-null    float64
 3   Y           400 non-null    int64  
dtypes: float64(2), int64(2)
memory usage: 12.6 KB
None


Unnamed: 0,X1,X2,Y
0,0.005257,-0.303586,0
1,0.996098,3.461645,0
2,0.384404,2.392678,0
3,0.951043,3.282709,0
4,0.091932,1.066689,0


In [13]:
X, y = df[["X1","X2"]], np.array(df["Y"])
n = 300
n_test = X.shape[0] - n
X_train, X_test = X[:n].T, X[n:].T
y_train, y_test = y[:n].reshape(1, n), y[n:].reshape(1, n_test)

#### Loss & functions

In [17]:
## Loss functions
def binary_cross_entropy(Y, Y_hat):
    m = Y.shape[1]
    return -(1/m) * np.sum(Y * np.log(Y_hat + 1e-15) + (1 - Y) * np.log(1 - Y_hat + 1e-15))

def categorical_cross_entropy(Y, Y_hat):
    m = Y.shape[1]
    return -(1/m) * np.sum(Y * np.log(Y_hat + 1e-15))

#### Neural network design

In [18]:
class Relu:
    def __init__(self):
        self.out = None
    
    def forward(self, x):
        self.out = np.maximum(0, x)
        return self.out
    
    def backward(self, grad_output):
        grad_input = (self.out > 0) * grad_output
        return grad_input
    
    def params_and_grads(self):
        return []

class Sigmoid:
    def __init__(self):
        self.out = None
        
    def forward(self, x):
        self.out = 1 / (1 + np.exp(-x))
        return self.out
    
    def backward(self, grad_output):
        return grad_output * self.out * (1 - self.out)
    
    def params_and_grads(self):
        return []
        

In [2]:
class Linear:
    def __init__(self, input_size, output_size):
        self.W = np.random.randn(output_size, input_size) * 0.01
        self.b = np.zeros((output_size, 1))
        self.grad_W = None
        self.grad_b = None
        self.x = None
        
    def forward(self, x):
        self.x = x
        return np.dot(self.W, x) + self.b
        
    def backward(self, grad_output):
        m = self.x.shape[1]
        self.grad_W = (1/m) * np.dot(grad_output, self.x.T)
        self.grad_b = (1/m) * np.sum(grad_output, axis=1, keepdims=True)
        return np.dot(self.W.T, grad_output)
    
    def params_and_grads(self):
        return [(self.W, self.grad_W), (self.b, self.grad_b)]

In [1]:
class NeuralNetwork:
    def __init__(self, layers, learning_rate=0.01):
        self.layers = layers
        self.learning_rate = learning_rate
        
    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x
    
    def backward(self, grad_loss):
        for layer in reversed(self.layers):
            grad_loss = layer.backward(grad_loss)
        
    def params_and_grads(self):
        params_and_grads_list = []
        for layer in self.layers:
            params_and_grads_list.extend(layer.params_and_grads())
        return params_and_grads_list

    def step(self):
        for param, grad in self.params_and_grads():
            param -= self.learning_rate * grad

### No hidden layers