In [None]:
import numpy as np
import random as rd
from keras.datasets import mnist



In [None]:
class Network:
    """
    The main object we're going to use accross this notebook
    It's a neural network that takes as input a list of 
    layers nodes
    
    Ex: [2, 3, 1] is a 3 layer network, with 2 neurons of input, 3 neurons 
    in the hidden layer and 1 for the output layer
    
    Supposedly it can take more than just 3 layers but I didnt test it
    
    It initializes an object with the proper weights, biases, activations and z
    based on the layers list. It also has the layers list and the number of layers
    
    The weights and biases initialized following a Gaussian of mean 1
    """
    def __init__(self, layers: list):        
        np.random.seed(42)        
        b = []
        w = []
        a = []
        z = []
        for l in range(0, len(layers)):
            # skipping one layer for the weights and biases
            if (l+1) < len(layers):
                b.append(np.random.normal(loc=0, scale=1,size=layers[l+1]))
                w.append(np.random.normal(loc=0,scale=1,size=[layers[l],layers[l+1]]))
            a.append(np.zeros(layers[l]))
            z.append(np.zeros(layers[l]))
    
        # b[i][j] -> "i" is which layer, "j" which neuron
        # w[i][j][k] -> "i" is which layer, "j" which neuron of the first layer, "k" which neuron of the second layer
        self.b = b
        self.w = w
        self.a = a
        self.z = z
        self.nLayers = len(layers)
        self.layers = layers

In [None]:
def sigmoid(n: float):
    return 1.0/(1.0+np.exp(-n))

def sigmoid_derivative(n: float):
    """Derivative of the sigmoid function."""
    return sigmoid(n)*(1-sigmoid(n))


In [1]:
def feedForward(net: Network) -> Network:
    """
    Feedforwading the activations to the next layer
    
    It will take as input the network already with the input image as the activation 
    on the first layer and then feedforward to the next layrse
    
    It returns the network with all the activations set
    """
    
    # resetting the activations as to not take any info from the activation of
    # the previous number while maintanin the first activation
    for i in range(1, net.nLayers):
        net.z[i] = np.zeros(net.layers[i])
        net.a[i] = np.zeros(net.layers[i])
    for l in range(0, net.nLayers-1):
        for receivingNeuron in range(net.layers[l+1]):
            for givingNeuron in range(net.layers[l]):
                net.z[l+1][receivingNeuron] += net.a[l][givingNeuron] * net.w[l][givingNeuron][receivingNeuron]
            net.z[l+1][receivingNeuron] += net.b[l][receivingNeuron]
            net.a[l+1][receivingNeuron] = sigmoid(net.z[l+1][receivingNeuron])

            
    return net
    
    

In [3]:
def findCostVector(network: Network, rightNumber):
    lastLayer = -1
    cost = np.zeros(network.nOutputs)
    for i in range(network.nOutputs):
        if i+1 == rightNumber:
            cost[i] = (network.activations[lastLayer][i] - 1)**2 
        else:
            cost[i] = (network.activations[lastLayer][i] - 0)**2 
    return cost

In [4]:
def findCostDerivative(network: Network, rightNumber):
    lastLayer = -1
    cost = np.zeros(network.nOutputs)
    for i in range(network.nOutputs):
        if i == rightNumber:
            cost[i] = 2*(network.activations[lastLayer][i] - 1)
        else:
            cost[i] = 2*(network.activations[lastLayer][i] - 0)
    return cost

In [5]:
def setActivations(network: Network, img):
        #passing the inputs to our network
        for i in range(28):
            for j in range(28):
                network.activations[0][28*i + j] = img[i][j]
        return network

In [6]:
def backPropagation(network: Network, xBatch, yBatch, batchSize, learningRate):
    eta = learningRate
    
    size = []
    size.append(network.nInputs)
    for i in range(network.nHiddenLayers):
        size.append(network.nNeuronsPerHL)
    size.append(network.nOutputs)
    delta_l = [np.zeros(s) for s in size[:None]]
    # recreating the shape of the biases and weights
    nablaB = np.empty_like(network.biases)
    nablaW = np.empty_like(network.weights)
    #-1 because array and -1 because the last layer will not be iterated and -1 because the first layer has no bias
    for img in range(batchSize):
        network = setActivations(network, xBatch[img])
        network = feedforward(network)
        delta_l[-1] = 2*findCostDerivative(network, yBatch[img])*sigmoidDerivative(network.zActivations[-1])
        
        # finding dC/dW and dC/dB
        for l in range(network.totalLayers-2, -1, -1): 
            if l == 0:
                delta_l[l] += np.dot(network.weights[l].T, delta_l[l+1])*sigmoidDerivative(network.zActivations[l])
                nablaB[l] += delta_l[l]
                nablaW[l] += np.dot(network.activations[l-1], delta_l[l])
    print(f"delta 0: {delta_l[0,0]}," 
              f"delta 1: {delta_l[1,0]}," 
              f"delta 2: {delta_l[2,0]}")
    
    # taking the averages
    nablaB = nablaB/batchSize
    nablaW = nablaW/batchSize
    # adjusting the network
    for i in range(network.totalLayers-2):
        network.biases[i] = network.biases[i] - eta*nablaB[i]
        network.weights[i] = network.weights[i] - eta*nablaW[i]
    return network

In [7]:
def SGD(network: Network, trainX, trainY, batchSize, epochs, learningRate):
    
    # shuffling
    trainingImages = list(zip(trainX, trainY))
    random.shuffle(trainingImages)
    trainX, trainY = zip(*trainingImages)

    for currentEpoch in range(epochs):
        yBatch = trainY[currentEpoch*batchSize: (currentEpoch + 1)*batchSize]
        xBatch = trainX[currentEpoch*batchSize: (currentEpoch + 1)*batchSize]
        # print("before: ", network.weights[0,0][0], network.biases[0,0])
        network = backPropagation(network, xBatch, yBatch, batchSize, learningRate)
        # print("after: ", network.weights[0,0][0], network.biases[0,0])
        hits = 0
        misses = 0
        numbersThinked = np.zeros(10)
        trainXX = trainX[currentEpoch*batchSize: (currentEpoch + 1)*batchSize]
        trainYY = trainY[currentEpoch*batchSize: (currentEpoch + 1)*batchSize]
        for imgX, imgY in zip(trainXX, trainYY):
            network = setActivations(network, imgX)
            network = feedforward(network)
            numberNetworkThinks, certainty = classify(network)
            numbersThinked[numberNetworkThinks] += 1 
            if numberNetworkThinks == imgY:
                hits += 1
            else:
                misses += 1
        acc = hits/misses
        print(f'epoch {currentEpoch+1}, accuracy = {acc}, numbers guessed = {numbersThinked}')
    return network 

In [8]:
network = Network(28*28, 1, 30, 10)
hits = 0
misses = 0
nTrainingImages = trainX.shape[0]
network = SGD(network, trainX, trainY, batchSize=100, epochs=100, learningRate = 3)

  sigNumber = 1/(1 + np.exp(-number))


ValueError: shapes (30,784) and (30,) not aligned: 784 (dim 1) != 30 (dim 0)