In [1]:
import numpy as np
import mnist_loader as ml

In [170]:
class Layer:
    
    def __init__(self, inputSize:int, activ:str):
        
        self.input = None
        self.FPoutput = None
        self.BPoutput = None
        self.weights = np.random.uniform(low = -1/np.sqrt(inputSize), high = 1/np.sqrt(inputSize), size=(10, inputSize)) # xavier weight initialization
        self.activ = activ

    #######################################################################
    # for first layer, inputData is our data
    # for each next layer, inputData is self.FPoutput from PREVIOUS layer   
    #######################################################################
     
    def forwardPass(self, inputData):
        
        self.input = np.vstack(([1], inputData))
        net = self.weights @ self.input
        self.FPoutput = self.activation(net) 
        # print(f'FPoutput: {self.FPoutput.shape}')
        
    def activation(self, x):

        if self.activ == 'sigmoid':
            return 1/(1+np.exp(-x))
        
        elif self.activ == 'tanh':
            return np.tanh(x)
        
        elif self.activ == 'softmax':
            exp_x = np.exp(x-np.max(x))  # Subtracting max(x) for numerical stability
            return exp_x / np.sum(exp_x)
        
    def activation_prime(self):

        if self.activ == 'sigmoid' or self.activ == 'softmax':
            return self.FPoutput * (1 - self.FPoutput)
        
        elif self.activ == 'tanh':
            return 1-self.FPoutput**2

        # elif self.activ == 'softmax':
        #     return np.diag(self.FPoutput) - np.outer(self.FPoutput, self.FPoutput)
        
    #######################################################################
    # for last layer, the inputData = self.FPoutput - trueLabel
    # for each next layer, the inputData = self.BPoutput from PREVIOUS LAYER
    #######################################################################
    
    def backwardPropagation(self, inputData, lr):
        
        # print(f'input: {inputData.shape}')
        # print(f'actprime: {self.activation_prime().shape}')
        delta = inputData * self.activation_prime()
        dLdW = delta @ self.input.T
        new_weights = self.weights - lr * dLdW
        dLdX = self.weights.T @ delta
        self.weights = new_weights
        self.BPoutput = dLdX[1:]


In [178]:
class Network:

    def __init__(self, layers):
        
        self.layers = layers
        self.result = None

    def fit(self, xTrain, yTrain, epochs, lr):

        for _ in range(epochs):

            for i in range(len(xTrain)):

                output = xTrain[i]
                for layer in self.layers:
                    layer.forwardPass(output)
                    output = layer.FPoutput

                error = output - yTrain[i]
                for layer in np.flip(self.layers):
                    layer.backwardPropagation(error, lr)
                    error = layer.BPoutput
            
            np.random.shuffle(xTrain)
    
    def predict(self, xTest):

        result = [0]*len(xTest)

        for i in range(len(xTest)):

            output = xTest[i]
            for layer in self.layers:
                layer.forwardPass(output)
                output = layer.FPoutput
            
            digit = np.argmax(output)
            result[i] = digit
        
        self.result = result

In [179]:
training_data, validation_data, test_data = ml.load_data_wrapper()

In [180]:
a, b = zip(*training_data)
c, d = zip(*validation_data)
a = list(a)
b = list(b)
c = list(c)
d = list(d)
trainX = a + c
trainY = b + d

In [181]:
testX, testY = zip(*test_data)
testX = list(testX)
testY = list(testY)

In [184]:
L1 = Layer(785, 'sigmoid')
L2 = Layer(11, 'tanh')
L3 = Layer(11, 'sigmoid')

In [185]:
NNet = Network([L1, L2, L3])
NNet.fit(trainX, trainY, 3, 0.1)
NNet.predict(testX)

In [186]:
prediction = NNet.result
accuracy = np.mean(np.array(prediction) == np.array(testY))
accuracy

0.1137