In [84]:
import numpy as np
import tensorflow as tf
import math
import random
from matplotlib import pyplot as plt

def sigmoid(x):
    return 1/(1+math.exp(-x))

def sigmoidderivative(x):
    return sigmoid(x) * (1-sigmoid(x))

def tanh(x):
    return np.tanh(x)

def tanhderivative(x):
    return 1 - np.tanh(x)**2

def relu(x):
    return max(0,x)

def reluderivative(x):
    if x>0:
        return 1
    else:
        return 0

vec_sigmoid = np.vectorize(sigmoid)
vec_sigmoidderivative = np.vectorize(sigmoidderivative)
vec_tanh = np.vectorize(tanh)
vec_tanhderivative =  np.vectorize(tanhderivative)
vec_relu = np.vectorize(relu)
vec_reluderivative = np.vectorize(reluderivative)


In [85]:
def OneHot(n: np.uint8):
    res = np.zeros(10)
    res[n] = 1
    return res

In [86]:
class NeuralLayer:

    def __init__(self, numinputs:int, numoutputs:int, activation=None):

        self.numinputs = numinputs
        self.numoutputs = numoutputs
        self.activation = activation
        self.weights = np.random.randn(self.numoutputs, self.numinputs + 1)

    
    def Evaluate(self, inputs):

        inputs = np.append(inputs, np.array([1]))
    
        outputs = self.weights @ inputs # this is \vec{h}

        match self.activation:
            case "Sigmoid":
                outputs = vec_sigmoid(outputs)

            case "Softmax":
                denom = 0
                for i in range(len(outputs)):
                    denom += math.exp[outputs[i]]
                    outputs[i] = math.exp(outputs[i])
                outputs = outputs/denom

            case "ReLU":
                # Put code here, delete the pass
                outputs = vec_relu(outputs)
                
            
            case "Tanh":
                # Put code here, delete the pass
                outputs = vec_tanh(outputs)



        return outputs

    
        
    def ComputeLocalGradient(self, inputs):
        # z is output after activation
        # h is output after linear layer
        # w are weights
        # Need to compute three things:
        # dz/dh
        # dh/dw
        # dh/dx

        inputs = np.append(inputs, np.array([1]))
        outputs = self.weights @ inputs


        # This part computes dzdh, and has cases for various activation functions
        match self.activation:
            case "Sigmoid":
                dzdh = np.diag(vec_sigmoidderivative(outputs))
            case "Softmax":
                n = len(outputs)
                dzdh = np.zeros((n, n))
                denom = 0
                for i in range(n):
                    denom += math.exp(outputs[i])
                
                for i in range(n):
                    for j in range(n):
                        if i == j:
                            dzdh[i][j] = (denom * math.exp(outputs[i]) - (math.exp(outputs[i])**2))/(denom**2)
                        else:
                            dzdh[i][j] = -(math.exp(outputs[j]))*(math.exp(outputs[j]))/(denom**2)

            case "ReLU":
                # Put code here and remove pass
                dzdh = np.diag(vec_reluderivative(outputs))

            case "Tanh":
                # Put code here and remove pass
                dzdh = np.diag(vec_tanhderivative(outputs))



            

        
        
        
        
        # This part computes dhdw        
        dhdw = np.zeros((self.numoutputs, self.numoutputs, self.numinputs+1)) #because of bias
        for i in range(self.numoutputs):
            for j in range(self.numinputs):
                dhdw[i,i,j] = inputs[i]
            dhdw[i,i,self.numinputs] = 1
            
        # This part computes dhdx
        dhdx = self.weights[:, :-1]


        return (dzdh, dhdw, dhdx)

    
    

        

In [87]:
Layer1 = NeuralLayer(5,3,"Tanh")
#Layer1.weights = np.array([[1, 2, -1], [3, -2, 1]])
test1 = np.array([1, 2,3,4,5])
(dzdh, dhdw, dhdx) = Layer1.ComputeLocalGradient(test1)
print(dzdh.shape)
print(dhdw.shape)
print(dhdx.shape)

(3, 3)
(3, 3, 6)
(3, 5)


In [88]:
class NeuralNetwork:

    def __init__(self, errorfunc=None):
        
        self.errorfunc = errorfunc
        self.layers = []
        self.numlayers = 0

    def AppendLayer(self, layer: NeuralLayer):
        # need to check that the new layer to be appended has same 
        # number of inputs as the last layer already in the network
        if len(self.layers) > 0:
            if layer.numinputs == self.layers[-1].numoutputs:
                self.layers.append(layer)
                self.numlayers += 1
            else:
                print("Error: number of inputs does not match previous layer")
        else:
            self.layers.append(layer)
            self.numlayers += 1

        
    def Evaluate(self, inputs):

        outputs = []
        outputs.append(self.layers[0].Evaluate(inputs))

        for i in range(1,self.numlayers):
            outputs.append(self.layers[i].Evaluate(outputs[i-1]))
        
        return outputs

    def ComputeError(self, inputs, trueoutputs):

        outputs = self.Evaluate(inputs)
        
        if self.errorfunc == "MSE":
            n = len(outputs[-1])
            diffs = outputs[-1] - trueoutputs
            err = np.dot(diffs, diffs)
            err = err/(2*n)
            return err

    def BackPropagate(self, inputs, trueoutputs, learningrate):

        outputs = self.Evaluate(inputs)
        gradients = []

        # Compute all the necessary gradients
        for i in range(self.numlayers):
            if i == 0:
                tempinput = inputs
            else:
                tempinput = outputs[i-1]
            
            gradients.append(self.layers[i].ComputeLocalGradient(tempinput))

        match self.errorfunc:
            case "MSE":
                dldz = (0.5) * (outputs[-1] - trueoutputs)

            case "CrossEntropy":
                dldz = np.zeros(len(trueoutputs))
                spot = np.where(1 == trueoutputs)
                dldz[spot] = 1/outputs[-1][spot]
                
            

        # Update weights, working backwards

        currgrad = dldz @ gradients[-1][0]
    
        for i in range(self.numlayers-1, -1, -1):
            self.layers[i].weights -= learningrate * (currgrad @ gradients[i][1])
            currgrad = currgrad @ gradients[i][0] @ gradients[i][2]

In [89]:
MyNN = NeuralNetwork(errorfunc="MSE")
MyLayer1 = NeuralLayer(5, 3, "ReLU")
MyLayer2 = NeuralLayer(3, 2, "Tanh")


MyNN.AppendLayer(MyLayer1)
MyNN.AppendLayer(MyLayer2)

myinput = np.array([1,2,3,4,5])
mytrue = np.array([1,0])

print(MyNN.ComputeError(myinput, mytrue))

for i in range(10):
    MyNN.BackPropagate(myinput, mytrue, 1)
    print(MyNN.ComputeError(myinput, mytrue))




0.27676883637742145
0.07873602016068584
0.039442661678792484
0.02592354320770875
0.01923428987744372
0.015261743548131899
0.012631760064650619
0.01076272082619471
0.009366926881301814
0.008285582908824512
0.0074237564102602694


In [90]:
# Load data from MNIST database
(x_train0, y_train0), (x_test0, y_test0) = tf.keras.datasets.mnist.load_data()
assert x_train0.shape == (60000, 28, 28)
assert x_test0.shape == (10000, 28, 28)
assert y_train0.shape == (60000,)
assert y_test0.shape == (10000,)

# Prepare data for processing
# x_train and x_test need to be reshaped and converted to np.float64
# y_train and y_test need to be one-hot encoded

#TRAIN
x_train = []
y_train = []

for i in range(6000):
    img = x_train0[i].reshape(28*28).astype(np.float64)
    img = img/255.0
    x_train.append(img)

    y_train.append(OneHot(y_train0[i]))

#TEST
x_test = []
y_test = []

for i in range(1000):
    img = x_test0[i].reshape(28*28).astype(np.float64)
    img = img/255.0
    x_test.append(img)

    y_test.append(OneHot(y_test0[i]))


In [91]:
print(f"y_train len: {len(y_train)}, shape of first elmt: {y_train[0].shape}")
print(f"x_train len: {len(x_train)}, shape of first elmt: {x_train[0].shape}")
print(f"x_test len: {len(x_test)}, shape of first elmt: {x_test[0].shape}")
print(f"y_test len: {len(y_test)}, shape of first elmt: {y_test[0].shape}")
print(f"\ne.g. label: {y_train0[0]} -> Onehot: {y_train[0]}")

y_train len: 6000, shape of first elmt: (10,)
x_train len: 6000, shape of first elmt: (784,)
x_test len: 1000, shape of first elmt: (784,)
y_test len: 1000, shape of first elmt: (10,)

e.g. label: 5 -> Onehot: [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


In [92]:
x_train0[0].reshape(28*28)


array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,  18,  18,
       126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 17

In [96]:
MyMNISTNetwork = NeuralNetwork("MSE")
MyMNISTNetwork.AppendLayer(NeuralLayer(28*28,10,"ReLU"))


y_train0[0]

testinput = np.astype(x_train0[0].reshape(28*28), np.float64)
testinput /= 255.0
#print(testinput.sum())
#print(MyMNISTNetwork.Evaluate(testinput))
#print(MyMNISTNetwork.layers[-1].weights.dtype)

onehot = np.array([0,0,0,0,0,1,0,0,0,0])

print(MyMNISTNetwork.ComputeError(testinput, onehot))
for i in range(100):
    MyMNISTNetwork.BackPropagate(testinput, onehot, 1)
    print(MyMNISTNetwork.ComputeError(testinput, onehot))
    
print(MyMNISTNetwork.ComputeError(testinput, onehot))

print("Final check evaluation: " + str(MyMNISTNetwork.Evaluate(testinput)))

33.58405315097387
8.396013287743472
2.099003321935868
0.5247508304839668
0.13118770762099172
0.03279692690524796
0.00819923172631194
0.002049807931578014
0.0005124519828945218
0.0001281129957236248
3.2028248930903315e-05
8.007062232727702e-06
2.0017655581808007e-06
5.004413895448487e-07
1.2511034738596698e-07
3.1277586846629254e-08
7.819396711630743e-09
1.954849177929678e-09
4.887122944710355e-10
1.2217807362325053e-10
3.054451840324052e-11
7.636129603364385e-12
1.9090324001980687e-12
4.772581006998066e-13
1.1931452544792715e-13
2.982863144778867e-14
7.457157817844904e-15
1.8642894951291668e-15
4.660723570086008e-16
1.1651809672828495e-16
2.912952159842614e-17
7.282379143693934e-18
1.8205941147809088e-18
4.551481723077002e-19
1.137868752917251e-19
2.8446559964252107e-20
7.111705513179282e-21
1.7779041042845414e-21
4.4448611841982565e-22
1.1112798784552192e-22
2.7777997315796687e-23
6.942015976600238e-24
1.7344806443407614e-24
4.340237271864921e-25
1.0875774359314055e-25
2.7317917881918