In [6]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [7]:
# Downloaded from https://www.kaggle.com/datasets/oddrationale/mnist-in-csv?resource=download
mnistTraining = "/Users/christianjohnson/Downloads/net/SimpleNeuralNet/mnist_train.csv"
mnistTest = "/Users/christianjohnson/Downloads/net/SimpleNeuralNet/mnist_test.csv"
data = pd.read_csv(mnistTraining)

In [8]:
arrayData = np.array(data)
# m - number of training images
# n - label + intensity values of each pixel (28x28)
m, n = arrayData.shape

# shuffle data
np.random.shuffle(arrayData)

# initialize the dev set (first 1000 rows) and transpose
devSet = arrayData[:1000].T
# seperate labels and values of the dev set
devLabels = devSet[0]
devVals = devSet[1:n] / 255. # normalizing values between 0 and 1

# initialize the training set [1000:] rows and transpose
tSet = arrayData[1000:].T
# seperate labels and values of the training set
tLabels = tSet[0]
tVals = tSet[1:n] / 255. # normalizing values between 0 and 1

tRows, tCols = tVals.shape

In [9]:
# initialization of weights and baises
def initalization():
    w1 = np.random.rand(10,784) - 0.5
    b1 = np.random.rand(10,1) - 0.5
    w2 = np.random.rand(10,10) - 0.5
    b2 = np.random.rand(10,1) - 0.5
    return w1, b1, w2, b2

# reLu activation function
def reLu(x):
    np.maximum(x, 0)

def sigmoind(x):
    return 1 / (1 + np.exp(-x))
    #return np.abs(x)

def sigmoidDerivative(x):
    return sigmoind(x) * (1 - sigmoind(x))
    

# softMax function
def softMax(x):
    return np.exp(x) / sum(np.exp(x))
    #return np.abs(x) / sum(np.abs(x))

# Forward Propagation
def forwardPropagation(w1, b1, w2, b2, x):
    A0 = x
    Z1 = np.dot(w1,A0) + b1
    A1 = sigmoind(Z1)
    Z2 = np.dot(w2,A1) + b2
    A2 = softMax(Z2)
    return Z1, A1, Z2, A2

# derivative of the ReLu function (see reLu graph in neuralNetNotes.pdf)
def reLuDerivative(x):
    if x < 0:
        return 0
    return 1

# One hot Y encoding
def oneHotY(Y):
    oneHot = np.zeros((Y.size,Y.max() + 1))
    oneHot[np.arange(Y.size),Y] = 1
    return oneHot.T

# Backwards Propagation
def backPropagation(z1, a1, w2, a2, x, y):
    one_hot = oneHotY(y)
    dz2 = a2 - one_hot 
    dw2 = np.dot(dz2, a1.T) * 1/m
    db2 = sum(dz2)
    dz1 = np.dot(w2.T, dz2) * sigmoidDerivative(z1)
    dw1 = np.dot(dz1, x.T)
    db1 = sum(dz1)
    return dw1, db1, dw2, db2

def updatedWeights(w1, b1, w2, b2, dw1, db1, dw2, db2, alpha):
    w1 = w1 - alpha * dw1
    b1 = b1 - alpha * db1
    w2 = w2 - alpha * dw2
    b2 = b2 - alpha * db2
    return w1, b1, w2, b2

def gradientDescent(X, Y, alpha, iter):
    w1, b1, w2, b2 = initalization()
    for i in range(iter):
        Z1, A1, Z2, A2 = forwardPropagation(w1, b1, w2, b2, X)
        dw1, db1, dw2, db2 = backPropagation(Z1, A1, w2, A2, X, Y)
        w1, b1, w2, b2 = updatedWeights(w1, b1, w2, b2, dw1, db1, dw2, db2, alpha)
        if i % 100 == 0:
            #print("Iteration: ", i)
            predict = predictions(A2)
            print(accuracy(predict, Y))
    return w1, b1, w2, b2

def predictions(A2):
    return np.argmax(A2, 0)

def accuracy(predictions, Y):
    return np.sum(predictions == Y) / Y.size