In [22]:
import math
import random
import numpy as np
import matplotlib.pyplot as plt
import os
import struct as st

In [25]:
def process_image_file(image_file):
    image_file.seek(0)
    magic = st.unpack('>4B', image_file.read(4))

    n_images = st.unpack('>I', image_file.read(4))[0]
    n_rows = st.unpack('>I', image_file.read(4))[0]
    n_columns = st.unpack('>I', image_file.read(4))[0]
    n_bytes = n_images * n_rows * n_columns

    images = np.zeros((n_images, n_rows * n_columns))
    images = np.asarray(st.unpack('>' + 'B' * n_bytes, image_file.read(n_bytes))).reshape((n_images, n_rows * n_columns))

    return images

def process_label_file(label_file):
    label_file.seek(0)
    magic = st.unpack('>4B', label_file.read(4))

    n_labels = st.unpack('>I', label_file.read(4))[0]

    labels = np.zeros((n_labels))
    labels = np.asarray(st.unpack('>' + 'B' * n_labels, label_file.read(n_labels)))

    targets = np.array([labels]).reshape(-1)

    one_hot_labels = np.eye(10)[targets]

    return one_hot_labels

def dataset():
    home = os.path.expanduser('~') + '/Datasets/'

    test_images = open(home + 't10k-images-idx3-ubyte', 'rb')
    test_labels = open(home + 't10k-labels-idx1-ubyte', 'rb')
    train_images = open(home + 'train-images-idx3-ubyte', 'rb')
    train_labels = open(home + 'train-labels-idx1-ubyte', 'rb')
    
    train_images = process_image_file(train_images)
    test_images = process_image_file(test_images)
    train_labels = process_label_file(train_labels)
    test_labels = process_label_file(test_labels)
    
    return ((train_images, test_images), (train_labels, test_labels))

In [255]:
class Dense:
    def __init__(self, n_inputs, n_neurons):
        self.w = np.random.randn(n_inputs, n_neurons).astype(np.float32) * np.sqrt(1. / n_neurons)
        self.b = np.zeros((n_neurons))
        self.z = np.zeros((n_neurons))
        self.d_w = np.zeros((n_inputs, n_neurons))
        self.d_b = np.zeros((n_neurons))

    def forward(self, x):
        self.z = np.dot(x, self.w) + self.b

    def backward(self, error, a):
        self.d_w = (1. / n_samples) * np.matmul(error, a)
        self.d_b = (1. / n_samples) * np.sum(error.T, axis=1, keepdims=True)

class ReLU:
    def __init__(self):
        self.a = 0
        self.d_a = 0
        
    def forward(self, x):
        self.a = np.maximum(0, x)

    def backward(self, x):
        self.d_a = (x > 0) * 1

class Softmax:
    def __init__(self):
        self.a = 0
        self.d_a = 0
        
    def forward(self, x):
        exp = np.exp(x - x.max())
        self.a = exp / np.sum(exp, axis=0)
    
    def backward(self, x):
        exp = np.exp(x - x.max())
        self.d_a = exp / np.sum(exp, axis=0) * (1 - exp / np.sum(exp, axis=0))

class Sigmoid:
    def __init__(self):
        self.a = 0
        self.d_a = 0
        
    def forward(self, x):
        self.a = 1. / (1. + np.exp(-x))

    def backward(self, x):
        exp = 1. / (1. + np.exp(-x))
        self.d_a = exp * (1 - exp)

In [256]:
def batch_loss(y, a):
    l_sum = np.sum(np.multiply(y, np.log(a)))
    m = y.shape[1]
    l = -(1. / m) * l_sum

    return l

In [263]:
# Get MNIST training data
((train_images, test_images), (train_labels, test_labels)) = dataset()
train_images, test_images = train_images / 255.0, test_images / 255.0

# Hyperparameters
n_training_examples = 60000
n_inputs = 784
n_samples = 32
n_epochs = 1
n_batches = math.floor(n_training_examples / n_samples)
learning_rate = 1

# Layers
layer1 = Dense(n_inputs = n_inputs, n_neurons = 128)
activation1 = Sigmoid()
layer2 = Dense(n_inputs = 128, n_neurons = 10)
activation2 = Sigmoid()

In [264]:
def forward(inputs):
    layer1.forward(inputs)
    activation1.forward(layer1.z)

    layer2.forward(activation1.a)
    activation2.forward(layer2.z)

In [265]:
def backward(x, y):
    d_Z2 = activation2.a - y
    
    layer2.backward(d_Z2.T, activation1.a)
    
    d_A1 = np.matmul(layer2.w, d_Z2.T)
    
    activation1.backward(layer1.z)
    
    d_Z1 = d_A1.T * activation1.d_a
    
    layer1.backward(d_Z1.T, x)

In [266]:
for j in range(n_epochs):
    shuffle_index = np.random.permutation(n_training_examples)

    for batch_start in shuffle_index:
        batch_end = batch_start + n_samples

        batch_x = train_images[batch_start:batch_end]
        batch_y = train_labels[batch_start:batch_end]

        forward(batch_x)
        backward(batch_x, batch_y)
        
        layer1.w = layer1.w - learning_rate * layer1.d_w.T
        layer1.b = layer1.b - learning_rate * layer1.d_b
        layer2.w = layer2.w - learning_rate * layer2.d_w.T
        layer2.b = layer2.b - learning_rate * layer2.d_b

    forward(test_images)
    test_loss = batch_loss(test_labels, activation2.a)

    print("€poch {}, test loss: {}", format(j + 1), format(test_loss))

ValueError: operands could not be broadcast together with shapes (29,128) (32,128) 