In [3]:
import numpy as np

def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

class Sigmoid:
    def __init__(self):
        self.out = None

    def forward(self, x):
        # x can be any shape
        self.out = sigmoid(x)
        return self.out

    def backward(self, dout):
        # d/dx sigmoid(x) = s * (1 - s)
        return dout * self.out * (1.0 - self.out)


class ReLU:
    def __init__(self):
        self.out = None

    def forward(self, x):
        self.out = np.maximum(0.0, x)
        return self.out

    def backward(self, dout):
        dx = dout.copy()
        dx[self.out <= 0.0] = 0.0
        return dx
    
class LeakyReLU:
    def __init__(self, a):
        self.out = None
        # hyperparam a value, so we can tune it for testing
        self.a = a

    def forward(self, x):
        self.out = np.maximum(x, self.a * x)
        return self.out
    
    def backward(self, dout):
        return np.where(dout >= 0, 1.0, self.a)

In [None]:
import numpy as np
class Softmax:
    def __init__(self):
        self.y_pred = None

    def forward(self, x, y):
        # May be overflow for larger numbers so potentially use different method
        exp_sum = np.sum(np.exp(x))
        self.y_pred = np.exp(x) / sum
        return self.y_pred

    # Using Cross-Entropy-Loss 
    def backward(self, y_actual):
        return self.y_pred - y_actual

Input layer: 3072 nodes<br>
Output classification layer: 10 nodes<br><br>Note: add biases<br>simplify anything if necessary<br> implement dropout<br>implement optimisers<br>Maybe change activations to be per layer not entire network if necessary

In [None]:
import numpy as np
class NeuralNetwork:
    def __init__(self, x_train, y_train, hidden_layers, hidden_layer_sizes, activation_func,learning_rate=0.03):
        
        self.inputs = x_train
        self.outputs = y_train

        self.hidden_layers = hidden_layers

        if (len(hidden_layer_sizes) != hidden_layers):
            raise ValueError("Neurons array length mismatch with hidden layers amount")
        self.hidden_layers_sizes = hidden_layer_sizes

        if not isinstance(activation_func, (Sigmoid, ReLU, LeakyReLU)):
            raise TypeError("Activation function must be of type Sigmoid, ReLU or LeakyReLU")
        self.activation_func = activation_func

        self.learning_rate = learning_rate

        self.weights = self.create_weight_matrices()

        self.activations = [None] * (self.hidden_layers + 2)

    def create_weight_matrices(self):
        network_layout = [len(self.inputs[0])]
        for i in range(self.hidden_layers):
            network_layout.append(self.hidden_layers_sizes[i])
        network_layout.append(len(self.outputs[0]))
        
        weights = []

        # Sigmoid weight initialisation
        if isinstance(self.activation_func, Sigmoid):
            for i in range(len(network_layout) -  1):
                input = network_layout[i]
                output = network_layout[i+1]
                weight_init = (np.random.uniform(low=-1, high=1, size=(output, input))) / (np.sqrt(input))
                weights.append(weight_init)
        
        # ReLU weight initialisation
        if isinstance(self.activation_func, (ReLU, LeakyReLU)):
            for i in range(len(network_layout) -  1):
                input = network_layout[i]
                output = network_layout[i+1]
                weight_init = (np.random.uniform(low=-1, high=1, size=(output, input))) / (np.sqrt(input) / 2)
                weights.append(weight_init)

        return weights
    
    def train(self, epochs=100):
        
        for i in range(epochs):
            # basic forward pass
            loss_history = []
            loss, cache, softmax = self.forwardpass()            
            loss_history.append(loss)

            gradients = self.backprop(cache, softmax)
            self.weights = self.update_gradients(gradients)
        pass 

    def forwardpass(self):
        cache = []
        self.activations[0] = self.inputs
        softmax = Softmax()
        for j in range(len(self.weights)):
            z1 = np.dot(self.activations[j], self.weights[j].T)
                
            # Softmax
            if j == len(self.weights) - 1:
                softmax.forward(z1)
                self.activations[j+1] = softmax.y_pred
                cache.append(z1)
                cache.append(softmax.y_pred)    
            # All other layers
            else:
                a1 = self.activation_func.forward(z1)
                self.activations[j+1] = a1
                cache.append(z1)
                cache.append(a1)
            
        # Compute CE_loss
        ce_loss = self.cross_entropy_loss(softmax.y_pred, self.outputs)
        return ce_loss, cache, softmax

    def backprop(self, cache, softmax):
        gradients = []
        dsMax = softmax.backward(self.outputs)
        dw_sMax = np.dot((self.activations[-2]).T, dsMax)
        gradients.append(dw_sMax)
        grad = dsMax
        j = len(self.weights) - 2
        for i in range(len(cache) - 4, -1, -2):
            z = cache[i]
            a = cache[i+1]
            prev_a = cache[i-1]
            da = np.dot(self.weights[j].T, grad)
            if isinstance (self.activations, (ReLU, LeakyReLU)):
                dz = da * self.activations.backward(z)
            else:
                dz = da * self.activations.backward(a)
            dw = np.dot(prev_a.T, dz)
            gradients.append(dw)
            grad = dz
            j -= 1
        return gradients.reverse()
    
    def update_gradients():
        pass

    def run(self):
        pass

    def cross_entropy_loss(self, y_pred, y_actual):
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        loss = -np.sum(y_actual * np.log(y_pred), axis=1)
        return np.mean(loss)