In [None]:
import numpy as np
import random

In [None]:
from tensorflow.keras.datasets import mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

In [None]:
print('MNIST Dataset Shape:')
print('X_train: ' + str(X_train.shape))
print('Y_train: ' + str(Y_train.shape))
print('X_test:  '  + str(X_test.shape))
print('Y_test:  '  + str(Y_test.shape))

MNIST Dataset Shape:
X_train: (60000, 28, 28)
Y_train: (60000,)
X_test:  (10000, 28, 28)
Y_test:  (10000,)


In [None]:
def data_preprocess():
    training_inputs = [np.reshape(x/256, (784, 1)) for x in X_train[:50000]]
    train_data = zip(training_inputs, Y_train[:50000])
    
    valid_inputs = [np.reshape(x/256, (784, 1)) for x in X_train[50000:]]
    valid_data = zip(valid_inputs, Y_train[50000:])
    
    test_inputs = [np.reshape(x/256, (784, 1)) for x in X_test]
    test_data = zip(test_inputs, Y_test)
    
    return (train_data, valid_data, test_data)

In [None]:
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def d_sigmoid(z):
    return sigmoid(z)*(1-sigmoid(z))

def rmse(x, y):
    return ((x-y)**2)/2

def d_rmse(x, y):
    return (x-y)

In [None]:
class Network:
    def __init__ (self, hiddenLayerSize = 10):
        """Creating weights and biases for two layers, one hidden and one output"""
        self.weights = [np.random.randn(hiddenLayerSize, 784) , np.random.randn(10, hiddenLayerSize)]
        self.biases = [np.random.randn(hiddenLayerSize, 1) , np.random.randn(10, 1)]
        
    def predict(self, activation):
        """Predicting for single input"""
        activation = sigmoid(np.dot(self.weights[0], activation) + self.biases[0])
        activation = sigmoid(np.dot(self.weights[1], activation) + self.biases[1])
        return activation
    
    def fit(self, train_data, epochs, batch_size, learning_rate, valid_data=None):
        train_data = list(train_data)
        nTrain = len(train_data)
        
        if valid_data:
            valid_data = list(valid_data)
            nValid = len(valid_data)
        
        # Training
        for e in range(epochs):
            random.shuffle(train_data)
            batch_data = [train_data[k:k+batch_size] for k in range(0, nTrain, batch_size)]
            for batch in batch_data:
                self.gda(batch, learning_rate)
            if valid_data:
                print("Epoch {:3} - loss {:7.4f} - accuracy {:7.4f}".format(e + 1, self.loss(valid_data), self.evaluate(valid_data) / nValid));
            else:
                print("Epoch {} complete".format(e + 1))
            
    def gda(self, batch, learning_rate):
        sum_db = [np.zeros(b.shape) for b in self.biases]
        sum_dw = [np.zeros(w.shape) for w in self.weights]
        
        for X, y in batch:
            db, dw = self.backpropogation(X, y)
            sum_db = [b + sb for b, sb in zip(sum_db, db)]
            sum_dw = [w + sw for w, sw in zip(sum_dw, dw)]
        
        self.biases = [b-(learning_rate/len(batch))*sb for b, sb in zip(self.biases, sum_db)]
        self.weights = [w-(learning_rate/len(batch))*sw for w, sw in zip(self.weights, sum_dw)]
        
        
    def backpropogation(self, X, y):        
        db = [np.zeros(b.shape) for b in self.biases]
        dw = [np.zeros(w.shape) for w in self.weights]
        
        # Coverting output to one hot vector
        Y = np.zeros((10, 1))
        Y[y] = 1
                
        # Feedforward
        """Layer 1"""
        weighted_input_1 = np.dot(self.weights[0], X) + self.biases[0] 
        activation_1 = sigmoid(weighted_input_1)
        """Layer 2"""
        weighted_input_2 = np.dot(self.weights[1], activation_1) + self.biases[1]
        activation_2 = sigmoid(weighted_input_2)
        
        # Backwordpass
        """Layer 2"""
        error = d_rmse(activation_2, Y) * d_sigmoid(weighted_input_2)
        db[1] = error
        dw[1] = np.dot(error, activation_1.transpose())
        """Layer 1"""
        error = np.dot(self.weights[1].transpose(), error) * d_sigmoid(weighted_input_1)
        db[0] = error
        dw[0] = np.dot(error, X.transpose())
        
        return (db, dw)
    
    def evaluate(self, data):
        results = [(np.argmax(self.predict(x)), y) for (x, y) in data]
        return sum(int(x == y) for (x, y) in results)
    
    def loss(self, data):
        results = [rmse(np.argmax(self.predict(x)), y) for (x, y) in data]
        return sum(results)/len(data)
        

In [None]:
training_data, valid_data, test_data = data_preprocess()
nn = Network(20)
nn.fit(training_data, 20, 10, 3.0, valid_data=valid_data)

Epoch   1 - loss  0.8186 - accuracy  0.9032
Epoch   2 - loss  0.6313 - accuracy  0.9231
Epoch   3 - loss  0.5900 - accuracy  0.9302
Epoch   4 - loss  0.6171 - accuracy  0.9258
Epoch   5 - loss  0.5842 - accuracy  0.9339
Epoch   6 - loss  0.5570 - accuracy  0.9355
Epoch   7 - loss  0.5361 - accuracy  0.9391
Epoch   8 - loss  0.5765 - accuracy  0.9352
Epoch   9 - loss  0.5091 - accuracy  0.9392
Epoch  10 - loss  0.5150 - accuracy  0.9395
Epoch  11 - loss  0.5270 - accuracy  0.9393
Epoch  12 - loss  0.5315 - accuracy  0.9374
Epoch  13 - loss  0.4647 - accuracy  0.9439
Epoch  14 - loss  0.4912 - accuracy  0.9424
Epoch  15 - loss  0.5331 - accuracy  0.9414
Epoch  16 - loss  0.4786 - accuracy  0.9443
Epoch  17 - loss  0.5124 - accuracy  0.9415
Epoch  18 - loss  0.5125 - accuracy  0.9404
Epoch  19 - loss  0.4824 - accuracy  0.9456
Epoch  20 - loss  0.4994 - accuracy  0.9447
