In [25]:
import numpy as np
from numpy import random

span = 10


In [26]:
# class for initializing network weights
class Linear_layer:
    def __init__(self, input_size, output_size):
        # forward pass variables
        self.W = random.rand(output_size, input_size)
        self.B = random.rand(output_size, 1)
        self.A0 = np.zeros((input_size, 1))

        # gradients
        self.dA = np.zeros((input_size, 1))
        self.dW = np.zeros((output_size, input_size))
        self.dB = np.zeros((output_size, 1))
        self.dZ = np.zeros((output_size, 1))

    def forward(self, A0):
        # copy the input and store it for gradient computation later
        self.A0 = A0

        # forward pass
        U = np.matmul(self.W, self.A0)
        Z = U + self.B

        return Z

    def backward(self, A0, dZ):
        # compute gradients given the dZ
        self.dB = dZ
        self.dA = np.matmul(np.transpose(self.W), dZ)
        self.dW = np.matmul(dZ, np.transpose(self.A0))

        # return out dA (ideally to be passed to activation function's backward)
        return self.dA

    def update_weights(self, lr=0.01):
        # update weights
        self.W -= lr * self.dW
        self.B -= lr * self.dB

    

In [27]:
# class for ReLU Activation
class ReLU_layer:
    def __init__(self):
        pass

    def forward(self, input):
        return np.maximum(input, 0)

    def backward(self, input, dP):
        return 1. * (input > 0) * dP

# class for Sigmoid Activation
class Softmax_layer:
    def __init__(self):
        self.output = 0

    def forward(self, input):
        output = np.exp(input - np.max(input))
        self.output = output / np.sum(output)
        return self.output

    def backward(self, dL):
        # https://e2eml.school/softmax.html
        grad = self.output * np.identity(self.output.size) - np.matmul(self.output, np.transpose(self.output))
        return np.matmul(grad, dL)

# class for calculating loss function
class Log_loss:
    def __init__(self):
        self.loss = 0

    def forward(self, input, labels):
        # labels must be -1 to 1
        self.loss = - (np.sum(labels * np.log(input)) + np.sum((1 - labels) * np.log(1 - input)))
        return self.loss

    def backward(self, input, labels):
        term1 = - labels / input
        term2 = (1 - labels) / (1 - input)

        return term1 + term2


In [28]:
# generate the neural network
layer1 = Linear_layer(span, 10)
activation1 = ReLU_layer()
layer2 = Linear_layer(10, span)
activation2 = ReLU_layer()
output = Softmax_layer()
loss = Log_loss()

In [29]:
# loop training
for i in range(30000):
    # generate random input
    idx = random.randint(span)
    input = np.zeros((span, 1))
    input[idx] = 1.

    # generate label
    label = np.zeros((span, 1))
    label[idx] = 1.

    # feedforward
    inter1 = layer1.forward(input)
    output1 = activation1.forward(inter1)
    inter2 = layer2.forward(output1)
    output2 = activation2.forward(inter2)
    prediction = output.forward(output2)

    # generate loss
    prediction_loss = loss.forward(prediction, label)
    # backward through loss
    dL = loss.backward(prediction, label)

    # backward through network
    dP = output.backward(dL)
    dZ2 = activation2.backward(output2, dP)
    dA2 = layer2.backward(inter2, dZ2)
    dZ1 = activation1.backward(output1, dA2)
    _ = layer1.backward(inter2, dZ1)
    
    # print(dP)
    # print(dZ2)
    # print(dA2)
    # print(dZ1)

    # update weights
    layer1.update_weights()
    layer2.update_weights()

    # print(prediction_loss)



In [30]:
# loop inference
for i in range(15):
    # generate random input
    idx = random.randint(span)
    input = np.zeros((span, 1))
    input[idx] = 1.

    # feedforward
    inter1 = layer1.forward(input)
    output1 = activation1.forward(inter1)
    inter2 = layer2.forward(output1)
    output2 = activation2.forward(inter2)
    prediction = output.forward(output2)

    print(np.argmax(input), np.argmax(prediction), np.max(prediction))

6 6 0.9989838752145072
4 4 0.9986066402365262
9 9 0.9987067794427505
4 4 0.9986066402365262
2 2 0.998855706379075
7 7 0.9987471852866097
5 5 0.998630050300949
4 4 0.9986066402365262
0 0 0.9988664976536179
3 3 0.9986807969343643
4 4 0.9986066402365262
0 0 0.9988664976536179
1 1 0.9986159757346539
5 5 0.998630050300949
0 0 0.9988664976536179
