https://github.com/LorenzoVaralo/MNIST-from-Scratch/blob/main/MNISTalgorithm.ipynb

In [3]:
import numpy as np
import mnist_loader as ml
import time

In [4]:
training_data, validation_data, test_data = ml.load_data_wrapper()

In [5]:
a, b = zip(*training_data)
c, d = zip(*validation_data)
e, f = zip(*test_data)

a = np.array([x.reshape(784,) for x in a])
c = np.array([x.reshape(784,) for x in c])
e = np.array([x.reshape(784,) for x in e])

trainX = np.concatenate((a, c))
trainX = np.concatenate((np.ones([trainX.shape[0], 1]), trainX), axis=1)

b = np.array([y.reshape(10, ) for y in b])
cat = np.zeros([len(d), 10])
for i, y in enumerate(d):
    cat[i][y] = 1
d = cat

trainY = np.concatenate((b, d))

testX = np.concatenate((np.ones([e.shape[0], 1]), e), axis=1)

cat = np.zeros([len(d), 10])
for i, y in enumerate(f):
    cat[i][y] = 1
testY = cat

In [6]:
print(f'trainX: {trainX.shape}')
print(f'trainY: {trainY.shape}')
print(f'testX: {testX.shape}')
print(f'testY: {testY.shape}')

trainX: (60000, 785)
trainY: (60000, 10)
testX: (10000, 785)
testY: (10000, 10)


In [7]:
class Network:

    def __init__(self, sizes, activations, epochs=10, lr=0.1, batch=0):
        self.sizes = sizes
        self.activations = activations
        self.epochs = epochs
        self.lr = lr
        self.batch = batch
        self.weights = self.initWeights()

    def initWeights(self):
        weights = {f'W{layer+1}': np.random.randn(self.sizes[layer]+1, self.sizes[layer+1])/10 \
                   for layer in range(len(self.sizes)-1)}
        return weights
    
    def activ(self, x, activation):

        if activation == 'sigmoid':
            return 1/(1+np.exp(-x))
        
        elif activation == 'relu':
            return np.maximum(0, x)
        
        elif activation == 'softmax':
            e_x = np.exp(x - np.max(x, axis=1, keepdims=True)) # subtract max for numerical stability, keepdims for correct broadcasting
            return (e_x / e_x.sum(axis=1)[:,None]) # sum along axis 1, because each row is a different observation; reshaping for correct broadcasting!
        
    def dactiv(self, x, activation):

        if activation == 'sigmoid':
            return self.activ(x, 'sigmoid') * (1 - self.activ(x, 'sigmoid'))
        
        elif activation == 'relu':
            return np.where(x >= 0, 1, 0)
        
    def forwardPass(self, X):

        M = X.shape[0]
        last = len(self.sizes)
        FPsteps = {'a1': X}

        for layer in range(1, last):
            activation = self.activations[layer-1]
            FPsteps[f'z{layer+1}'] = FPsteps[f'a{layer}'] @ self.weights[f'W{layer}']
            FPsteps[f'a{layer+1}'] = np.concatenate((np.ones([M,1]), self.activ(FPsteps[f'z{layer+1}'], activation)), axis=1)

        h = FPsteps.pop(f'a{last}')
        FPsteps['h'] = h[:,1:]

        return FPsteps
    
    def dLdW(self, X, Y):

        M = X.shape[0]
        nLayers = len(self.sizes)
        gradients = []
        FPsteps = self.forwardPass(X)

        deltas = {f'delta{nLayers}': FPsteps['h'] - Y} # delta for last layer
        for i in range(nLayers-1, 1, -1):
            deltas[f'delta{i}'] = (deltas[f'delta{i+1}'] @ self.weights[f'W{i}'][1:].T) * \
                self.dactiv(FPsteps[f'z{i}'], self.activations[i-2])

        gradients = [(deltas[f'delta{i+2}'].T @ FPsteps[f'a{i+1}']) / M for i in range(len(deltas))]

        return gradients
    
    def accuracy(self, X, Y):

        FPsteps = self.forwardPass(X)
        h = FPsteps['h']
        yPred = np.argmax(h, axis=1)
        yReal = np.argmax(Y, axis=1)
        return np.mean(yPred == yReal)
    
    def train(self, xTrain, yTrain, xTest, yTest):

        M = xTrain.shape[0]

        for epoch in range(self.epochs):
            slicer = np.array([0, self.batch])

            for i in range(M // self.batch): # backPropagation
                batch = slicer + i*self.batch
                gradients = self.dLdW(xTrain[batch[0]:batch[1]], yTrain[batch[0]:batch[1]])
                newWeights = {f'W{j+1}': self.weights[f'W{j+1}'] - self.lr*gradients[j].T \
                              for j in range(len(gradients))}
                self.weights.update(newWeights)
            
            accTrain = self.accuracy(xTrain, yTrain)
            accTest = self.accuracy(xTest, yTest)

            print(f'Epoch: {epoch+1}; Accuracy Train: {accTrain}; Accuracy Test: {accTest}')



In [8]:
N = Network([784, 200, 80, 10], ['sigmoid', 'relu', 'softmax'], epochs=10, lr=0.1, batch=100)

In [9]:
N.train(trainX, trainY, testX, testY)

Epoch: 1; Accuracy Train: 0.8641333333333333; Accuracy Test: 0.8744


KeyboardInterrupt: 

In [19]:
dupa = np.arange(1, 10).reshape((3, 3))

In [27]:
dupa = N.weights['W3']
dupa[1:].T.shape

(10, 80)

In [22]:
dupa[1:]

array([[4, 5, 6],
       [7, 8, 9]])