In [1]:
import math
import numpy as np
import random

#we will be using a fixed seed as to make our network behave equally between runs
np.random.seed(9)
class Network:
    def __init__(self, nInputs, nHiddenLayers, nNeuronsPerHL, nOutputs):
        totalLayers = nHiddenLayers + 2 # total layers = (one input+ nHiddenLayers+ one output)
        self.totalLayers = totalLayers
        self.nInputs = nInputs
        self.nHiddenLayers = nHiddenLayers
        self.nNeuronsPerHL = nNeuronsPerHL
        self.nOutputs = nOutputs
        
        #initializing the weights and biases randomly using a gaussian distribution with mean 0 and standard deviation 1
        sizes = [nInputs]
        for i in range(nHiddenLayers):
            sizes.append(nNeuronsPerHL)
        sizes.append(nOutputs)
        self.biases = [np.random.normal(loc=0, scale=1, size=s) for s in sizes[1:None]]                    #which layer, which neuron

        self.weights =[np.random.normal(loc=0, scale=1, size=(x,y)) for x,y in zip(sizes[:None], sizes[1:])]
        #which layer, which neuron, which weight
        #to access the 1st weight layer 5th exiting neuron 10th arriving neuron: network.weights[0][4][9]
        #+1 because the number of weight layers is the total number of layers - 1
        self.zActivations = [np.zeros(s) for s in sizes[1:None]]
        self.activations = [np.zeros(s) for s in sizes]
        
def sigmoid(number):
    sigNumber = 1/(1 + np.exp(-number))
    return sigNumber  

def feedforward(network: Network):
    givingLayer = 0
    # looping until the last layer
    while givingLayer < network.totalLayers-1:                 
        if givingLayer == 0:                           #if its the input layer
            nGivingNeurons = network.nInputs
            nReceivingNeurons = network.nNeuronsPerHL
        elif givingLayer == network.totalLayers-2: #if its the layer before the output layer
            nGivingNeurons = network.nNeuronsPerHL
            nReceivingNeurons = network.nOutputs
        else:                                          #if its any layer inbetween
            nGivingNeurons = network.nNeuronsPerHL
            nReceivingNeurons = network.nNeuronsPerHL
        currentLayer = 0
        #for each neuron in the layer being fed    
        for receivingNeuron in range(nReceivingNeurons):
            #the activation of the current neuron is its own biases + all the weights*activations of the previous layer
            activation = network.biases[currentLayer][receivingNeuron]
            for givingNeuron in range(nGivingNeurons):
                activation += network.weights[givingLayer][givingNeuron][receivingNeuron]*network.activations[givingLayer][givingNeuron]
            network.activations[givingLayer+1][receivingNeuron] = sigmoid(activation)
            
        givingLayer += 1
        currentLayer += 1
    
    return network

def classify(network: Network):
    maxIndex = np.argmax(network.activations[-1])
    return maxIndex, network.activations[-1][maxIndex]

from keras.datasets import mnist
(trainX, trainY), (testX, testY) = mnist.load_data()

In [2]:
def sigmoidDerivative(number):
    return sigmoid(number)*(1-sigmoid(number))

In [3]:
def findCostVector(network: Network, rightNumber):
    lastLayer = -1
    cost = np.zeros(network.nOutputs)
    for i in range(network.nOutputs):
        if i+1 == rightNumber:
            cost[i] = (network.activations[lastLayer][i] - 1)**2 
        else:
            cost[i] = (network.activations[lastLayer][i] - 0)**2 
    return cost

In [4]:
def findCostDerivative(network: Network, rightNumber):
    lastLayer = -1
    cost = np.zeros(network.nOutputs)
    for i in range(network.nOutputs):
        if i == rightNumber:
            cost[i] = 2*(network.activations[lastLayer][i] - 1)
        else:
            cost[i] = 2*(network.activations[lastLayer][i] - 0)
    return cost

In [5]:
def setActivations(network: Network, img):
        #passing the inputs to our network
        for i in range(28):
            for j in range(28):
                network.activations[0][28*i + j] = img[i][j]
        return network

In [6]:
def backPropagation(network: Network, xBatch, yBatch, batchSize, learningRate):
    eta = learningRate
    
    size = []
    size.append(network.nInputs)
    for i in range(network.nHiddenLayers):
        size.append(network.nNeuronsPerHL)
    size.append(network.nOutputs)
    delta_l = [np.zeros(s) for s in size[:None]]
    # recreating the shape of the biases and weights
    nablaB = np.empty_like(network.biases)
    nablaW = np.empty_like(network.weights)
    #-1 because array and -1 because the last layer will not be iterated and -1 because the first layer has no bias
    for img in range(batchSize):
        network = setActivations(network, xBatch[img])
        network = feedforward(network)
        delta_l[-1] = 2*findCostDerivative(network, yBatch[img])*sigmoidDerivative(network.zActivations[-1])
        
        # finding dC/dW and dC/dB
        for l in range(network.totalLayers-2, -1, -1): 
            if l == 0:
                delta_l[l] += np.dot(network.weights[l].T, delta_l[l+1])*sigmoidDerivative(network.zActivations[l])
                nablaB[l] += delta_l[l]
                nablaW[l] += np.dot(network.activations[l-1], delta_l[l])
    print(f"delta 0: {delta_l[0,0]}," 
              f"delta 1: {delta_l[1,0]}," 
              f"delta 2: {delta_l[2,0]}")
    
    # taking the averages
    nablaB = nablaB/batchSize
    nablaW = nablaW/batchSize
    # adjusting the network
    for i in range(network.totalLayers-2):
        network.biases[i] = network.biases[i] - eta*nablaB[i]
        network.weights[i] = network.weights[i] - eta*nablaW[i]
    return network

In [7]:
def SGD(network: Network, trainX, trainY, batchSize, epochs, learningRate):
    
    # shuffling
    trainingImages = list(zip(trainX, trainY))
    random.shuffle(trainingImages)
    trainX, trainY = zip(*trainingImages)

    for currentEpoch in range(epochs):
        yBatch = trainY[currentEpoch*batchSize: (currentEpoch + 1)*batchSize]
        xBatch = trainX[currentEpoch*batchSize: (currentEpoch + 1)*batchSize]
        # print("before: ", network.weights[0,0][0], network.biases[0,0])
        network = backPropagation(network, xBatch, yBatch, batchSize, learningRate)
        # print("after: ", network.weights[0,0][0], network.biases[0,0])
        hits = 0
        misses = 0
        numbersThinked = np.zeros(10)
        trainXX = trainX[currentEpoch*batchSize: (currentEpoch + 1)*batchSize]
        trainYY = trainY[currentEpoch*batchSize: (currentEpoch + 1)*batchSize]
        for imgX, imgY in zip(trainXX, trainYY):
            network = setActivations(network, imgX)
            network = feedforward(network)
            numberNetworkThinks, certainty = classify(network)
            numbersThinked[numberNetworkThinks] += 1 
            if numberNetworkThinks == imgY:
                hits += 1
            else:
                misses += 1
        acc = hits/misses
        print(f'epoch {currentEpoch+1}, accuracy = {acc}, numbers guessed = {numbersThinked}')
    return network 

In [8]:
network = Network(28*28, 1, 30, 10)
hits = 0
misses = 0
nTrainingImages = trainX.shape[0]
network = SGD(network, trainX, trainY, batchSize=100, epochs=100, learningRate = 3)

  sigNumber = 1/(1 + np.exp(-number))


ValueError: shapes (30,784) and (30,) not aligned: 784 (dim 1) != 30 (dim 0)