In [None]:
import numpy as np
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import random
import time
import pickle
import csv

In [None]:
def load_data(path):
    with np.load(path) as f:
        x_train, y_train = f['x_train'], f['y_train']
        x_test, y_test = f['x_test'], f['y_test']
        return (x_train, y_train, x_test, y_test)

In [None]:
"""
Loading the MNIST dataset
"""

x_train,y_train,x_test,y_test = load_data('mnist.npz')
x_train = x_train.reshape(60000,-1,1)/255
y_train = list(y_train)
for i,y in enumerate(y_train):
    y_ =  np.zeros((10,1))
    y_[y] = 1
    y_train[i] = y_
y_train = np.array(y_train)
x_test = x_test.reshape(10000,-1,1)/255
y_test = list(y_test)
for i,y in enumerate(y_test):
    y_ =  np.zeros((10,1))
    y_[y] = 1
    y_test[i] = y_
y_test = np.array(y_test)
train_data = list(zip(x_train, y_train))
test_data = list(zip(x_test,y_test))

In [None]:
class NeuralNetwork:
    """ Simple python neural network implementation with the help of numpy """
    def __init__(self,sizes):
        s = sizes
        self.L = len(s)  # number of layers
        self.shapes = list(zip(s[1:],s[:-1]))  # shapes of the weight matricies
        self.weights = [np.random.randn(*s) for s in self.shapes]  # Initializing the weights
        self.times = []
        self.accuracies = []
    
    @staticmethod
    def sigmoid(x):
        """ Sigmoid activation function """
        return 1 / (1 + np.exp(-x))

    
    def sigmoid_p(self,x):
        """ Sigmoid prime """
        g = self.sigmoid(x)
        return g * (1 - g)
    
    
    def predict(self,x):
        """ Predicts the value of the given input """
        g = self.sigmoid
        a = x
        for w in self.weights:
            a = g(w @ a)
        return a
    
    
    def accuracy(self,test_data):
        """ Tests the accuracy of the model """
        good = 0
        for i in test_data:
            x = np.argmax(self.predict(i[0]))
            y = np.argmax(i[1])
            good += x == y
        return good

    
    def train(self, train_data, learning_rate, batch_size, epochs, test_data):
        """ Trains the model with stochastic gradient descent """
        all_time = time.time()
        for epoch in range(1, epochs + 1):
            epoch_time = time.time()
            random.shuffle(train_data)  # Shuffling the data so it can be randomly put into batches
            m = len(train_data)
            
            # Initializig the batches
            batches = [train_data[i * batch_size:(i + 1) * batch_size] for i in range(m // batch_size + 1)]
            for batch in batches:
                if batch:
                    
                    # Updating the weights based on the batch
                    self.update_mini_batch(batch, learning_rate)
            if test_data:
                
                # Calculating the run time of the epoch and testing the accuracy
                epoch_time_ = round(time.time() - epoch_time, 2)
                self.times.append(epoch_time_)
                test_data_len = len(test_data)
                acc = self.accuracy(test_data)
                self.accuracies.append((acc / test_data_len)*100)
                
                # Logging the accuracy and the epoch time
                print(f"epoch {epoch}: {acc} / {test_data_len}, {epoch_time_}s")
        print(str(round(time.time() - all_time, 2)) + "s")
#         return self.weights
                
    
    def update_mini_batch(self, batch, learning_rate):
        m = len(batch)
        DELTAS = [np.zeros(w.shape) for w in self.weights]
        for b in batch:
            delta = self.backprop(b)
            for i  in range(self.L - 1):
                DELTAS[i] += delta[i]
        for i in range(self.L - 1):
            self.weights[i] -= (learning_rate / m) * DELTAS[i]
    
    def backprop(self, item):
        a = item[0]
        y = item[1]
        a_ = [a]
        z_ = []
        for w in self.weights:
            z = w @ a
            z_.append(z)
            a = self.sigmoid(z)
            a_.append(a)
        delta_ = [a - y]
        for i in range(self.L - 2):
            delta_.append(self.weights[self.L-2-i].T @ delta_[i] * self.sigmoid_p(z_[self.L-3-i]))
        
        delta = []
        for i in range(self.L - 1):
            delta.append(delta_[self.L-2-i] @ a_[i].T)
        return delta

In [None]:
model = NeuralNetwork([784,16,16,10])

In [None]:
model.train(train_data, 0.5, 500, 3, test_data)