In [47]:
import numpy as np
import tensorflow as tf
import math
import random
from matplotlib import pyplot as plt

def sigmoid(x):
    return 1/(1+math.exp(-x))

def sigmoidderivative(x):
    return sigmoid(x) * (1-sigmoid(x))

def OneHot(int):
    output = np.array([0,0,0,0,0,0,0,0,0,0])
    output[int] = 1
    return output

vec_sigmoid = np.vectorize(sigmoid)
vec_sigmoidderivative = np.vectorize(sigmoidderivative)

In [29]:
class NeuralLayer:

    def __init__(self, numinputs:int, numoutputs:int, activation=None):

        self.numinputs = numinputs
        self.numoutputs = numoutputs
        self.activation = activation
        self.weights = np.random.randn(self.numoutputs, self.numinputs + 1)

    
    def Evaluate(self, inputs):

        inputs = np.append(inputs, np.array([1]))
    
        outputs = self.weights @ inputs # this is \vec{h}

        match self.activation:
            case "Sigmoid":
                outputs = vec_sigmoid(outputs)

            case "Softmax":
                denom = 0
                for i in range(len(outputs)):
                    denom += math.exp[outputs[i]]
                    outputs[i] = math.exp(outputs[i])
                outputs = outputs/denom

            case "ReLU":
                # Put code here, delete the pass
                for i in range(len(outputs)):
                    if outputs[i] > 0:
                        continue
                    else:
                        outputs[i] = 0
                pass #Alejandro shall not pass
            
            case "Tanh":
                # Put code here, delete the pass
                outputs = (np.exp(outputs) - np.exp((-1*outputs))) / (np.exp(outputs) + np.exp((-1*outputs)))


        return outputs

    
        
    def ComputeLocalGradient(self, inputs):
        # z is output after activation
        # h is output after linear layer
        # w are weights
        # Need to compute three things:
        # dz/dh
        # dh/dw
        # dh/dx

        inputs = np.append(inputs, np.array([1]))
        outputs = self.weights @ inputs


        # This part computes dzdh, and has cases for various activation functions
        match self.activation:
            case "Sigmoid":
                dzdh = np.diag(vec_sigmoidderivative(outputs))
            case "Softmax":
                n = len(outputs)
                dzdh = np.zeros((n, n))
                denom = 0
                for i in range(n):
                    denom += math.exp(outputs[i])
                
                for i in range(n):
                    for j in range(n):
                        if i == j:
                            dzdh[i][j] = (denom * math.exp(outputs[i]) - (math.exp(outputs[i])**2))/(denom**2)
                        else:
                            dzdh[i][j] = -(math.exp(outputs[j]))*(math.exp(outputs[j]))/(denom**2)

            case "ReLU":
                # Put code here and remove pass
                n = len(outputs)
                dzdh = np.zeros((n,n))
                for i in range(n):
                    if outputs[i] > 0:
                        dzdh[i,i] = 1
                    else:
                        dzdh[i,i] = 0
                    

            case "Tanh":
                # Put code here and remove pass
                dzdh = np.diag(1 - ((np.exp(outputs) - np.exp((-1*outputs))) / (np.exp(outputs) + np.exp((-1*outputs))))**2)
            

        
        # This part computes dhdw        
        dhdw = np.zeros((self.numoutputs, self.numoutputs, self.numinputs+1)) #because of bias
        for i in range(self.numoutputs):
            for j in range(self.numinputs):
                dhdw[i,i,j] = inputs[i]
            dhdw[i,i,self.numinputs] = 1
            
        # This part computes dhdx
        dhdx = self.weights[:, :-1]


        return (dzdh, dhdw, dhdx)

    
    

        

In [22]:
Layer1 = NeuralLayer(5,3,"Tanh")
#Layer1.weights = np.array([[1, 2, -1], [3, -2, 1]])
test1 = np.array([1, 2,3,4,5])
(dzdh, dhdw, dhdx) = Layer1.ComputeLocalGradient(test1)
print(dzdh.shape)
print(dhdw.shape)
print(dhdx.shape)

(3, 3)
(3, 3, 6)
(3, 5)


In [23]:
class NeuralNetwork:

    def __init__(self, errorfunc=None):
        
        self.errorfunc = errorfunc
        self.layers = []
        self.numlayers = 0

    def AppendLayer(self, layer: NeuralLayer):
        # need to check that the new layer to be appended has same 
        # number of inputs as the last layer already in the network
        if len(self.layers) > 0:
            if layer.numinputs == self.layers[-1].numoutputs:
                self.layers.append(layer)
                self.numlayers += 1
            else:
                print("Error: number of inputs does not match previous layer")
        else:
            self.layers.append(layer)
            self.numlayers += 1

        
    def Evaluate(self, inputs):

        outputs = []
        outputs.append(self.layers[0].Evaluate(inputs))

        for i in range(1,self.numlayers):
            outputs.append(self.layers[i].Evaluate(outputs[i-1]))
        
        return outputs

    def ComputeError(self, inputs, trueoutputs):

        outputs = self.Evaluate(inputs)
        
        if self.errorfunc == "MSE":
            n = len(outputs[-1])
            diffs = outputs[-1] - trueoutputs
            err = np.dot(diffs, diffs)
            err = err/(2*n)
            return err

    def BackPropagate(self, inputs, trueoutputs, learningrate):

        outputs = self.Evaluate(inputs)
        gradients = []

        # Compute all the necessary gradients
        for i in range(self.numlayers):
            if i == 0:
                tempinput = inputs
            else:
                tempinput = outputs[i-1]
            
            gradients.append(self.layers[i].ComputeLocalGradient(tempinput))

        match self.errorfunc:
            case "MSE":
                dldz = (0.5) * (outputs[-1] - trueoutputs)

            case "CrossEntropy":
                dldz = np.zeros(len(trueoutputs))
                spot = np.where(1 == trueoutputs)
                dldz[spot] = 1/outputs[-1][spot]
                
            

        # Update weights, working backwards

        currgrad = dldz @ gradients[-1][0]
    
        for i in range(self.numlayers-1, -1, -1):
            self.layers[i].weights -= learningrate * (currgrad @ gradients[i][1])
            currgrad = currgrad @ gradients[i][0] @ gradients[i][2]

In [None]:
MyNN = NeuralNetwork(errorfunc="MSE")
MyLayer1 = NeuralLayer(5, 3, "Sigmoid")
MyLayer2 = NeuralLayer(3, 2, "Sigmoid")


MyNN.AppendLayer(MyLayer1)
MyNN.AppendLayer(MyLayer2)

myinput = np.array([1,2,3,4,5])
mytrue = np.array([1,0])

print(MyNN.ComputeError(myinput, mytrue))

for i in range(10):
    MyNN.BackPropagate(myinput, mytrue, 1)
    print(MyNN.ComputeError(myinput, mytrue))




0.11836143679049575
244.78737838563907
0.25
0.25
0.25
0.25
0.25
0.25
0.25
0.25
0.25


In [None]:
# Load data from MNIST database
(x_train0, y_train0), (x_test0, y_test0) = tf.keras.datasets.mnist.load_data()
assert x_train0.shape == (60000, 28, 28)
assert x_test0.shape == (10000, 28, 28)
assert y_train0.shape == (60000,)
assert y_test0.shape == (10000,)

# Prepare data for processing
# x_train and x_test need to be reshaped and converted to np.float64
# y_train and y_test need to be one-hot encoded
x_train = [x.flatten().astype(np.float64) / 255.0 for x in x_train0[0:6000]]
x_test = [x.flatten().astype(np.float64) / 255.0 for x in x_test0[0:1000]]
y_train = [OneHot(y) for y in y_train0[0:6000]]
y_test = [OneHot(y) for y in y_test0[0:6000]]

In [78]:
MyMNISTNetwork = NeuralNetwork("MSE")
MyMNISTNetwork.AppendLayer(NeuralLayer(28*28,10,"Tanh"))


y_train0[0]

testinput = np.astype(x_train0[0].reshape(28*28), np.float64)
testinput /= 255.0
#print(testinput.sum())
#print(MyMNISTNetwork.Evaluate(testinput))
#print(MyMNISTNetwork.layers[-1].weights.dtype)

onehot = np.array([0,0,0,0,0,1,0,0,0,0])

print(MyMNISTNetwork.ComputeError(testinput, onehot))
for i in range(100):
    MyMNISTNetwork.BackPropagate(testinput, onehot, 1)
    print(MyMNISTNetwork.ComputeError(testinput, onehot))
    
print(MyMNISTNetwork.ComputeError(testinput, onehot))

print("Final check evaluation: " + str(MyMNISTNetwork.Evaluate(testinput)))

0.4488643126069637
0.44883925223147336
0.44881307138077925
0.44878569414590974
0.44875703767224684
0.44872701135339155
0.44869551591095025
0.44866244234108865
0.4486276707049529
0.4485910687354836
0.4485524902275152
0.44851177317110524
0.44846873757940575
0.44842318295163786
0.4483748852982366
0.4483235936382311
0.4482690258573646
0.4482108637879577
0.4481487473362085
0.44808226743699997
0.4480109575569039
0.4479342833882253
0.4478516302740749
0.44776228776745197
0.4476654305432266
0.44756009463229934
0.44744514760549314
0.44731925086210794
0.44718081151735756
0.4470279204486675
0.4468582717238636
0.446669056696608
0.44645682320840585
0.4462172860973891
0.4459450688073036
0.44563334606079996
0.4452733422501338
0.4448536159914019
0.4443590224903528
0.44376918250888897
0.4430561843996087
0.4421810804639267
0.44108848184719873
0.43969819976417507
0.4378925672990562
0.43549860828750564
0.4322688818371926
0.4278848017110756
0.42206880850213047
0.4149972034715681
0.40798841199345404
0.403119