In [1]:
%config IPCompleter.greedy=True

import numpy as np
from mnist import load_data
import utils

In [100]:
training, validation, testing = load_data()
train_x, train_y, test_x, test_y = training[0], training[1], testing[0], testing[1]
train_y = utils.one_hot_encoding(train_y)
test_y = utils.one_hot_encoding(test_y)

class Network():
    def __init__(self, layers, lr=0.0001, epochs=10):
        self.n_layers = len(layers)
        self.layers = layers
        w1 = np.random.rand(784, layers[0])
        w_last = np.random.rand(layers[len(layers)-1], 10)
        self.weights = [np.random.rand(x, y) for x, y in zip(layers[:-1], layers[1:])]
        self.weights.insert(0, w1)
        self.weights.append(w_last)
        self.weights = np.asarray(self.weights)
        
        self.biases = [np.random.rand(y) for x, y in zip(layers[:-1], layers[1:])]
        b1 = np.random.rand(1, layers[0])
        b_last = np.random.rand(1, 10)
        self.biases.insert(0, b1)
        self.biases.append(b_last)
        self.biases = np.array(self.biases)
        self.lr = lr
        self.epochs = epochs

    def feed_forward(self, inputs):
        activations = [inputs]
        z_vec = []
        for w, b in zip (self.weights, self.biases):
            z = np.dot(inputs, w) + b
            inputs = utils.sigmoid(z)
            z_vec.append(z)
            activations.append(inputs)
        return inputs, activations, z_vec
    
    def compute_loss(self, logits, labels, epsilon=np.finfo(float).eps):
        return -np.sum(np.multiply(labels, np.log10(logits+epsilon)))/logits.shape[0]
    
    def get_gradients(self, logits, labels, activations, z_vec):
        nabla_w = [np.empty(w.shape) for w in self.weights]
        nabla_b = [np.empty(b.shape) for b in self.biases]
        w_last = self.weights[-1]
        error = logits - labels
        w_last_g = np.dot(activations[-2].T, error)
        nabla_w[-1] = w_last_g
        error = error * utils.sigmoid_prime(z_vec[-1])
        errors = []
        errors.append(error)
        for i in range(len(activations) - 3, -1, -1):
            activation = activations[i]
            weight = self.weights[i+1].T
            new_error = np.dot(errors[-1], weight) * utils.sigmoid_prime(z_vec[i])
            dB = new_error
            dW = np.dot(activation.T, new_error)
            nabla_w[i] = dW
            nabla_b[i] = dB
            errors.append(new_error)
        return nabla_w, nabla_b
    
    def update_weights_and_biases(self, nabla_w, nabla_b):
        self.weights -= self.lr * nabla_w
        self.biases -= self.lr * nabla_b
            

In [104]:
model = Network([10, 20])
predict, activations, z_vec = model.feed_forward(train_x)
nabla_w, nabla_b = model.get_gradients(logits=predict, labels=train_y, activations=activations, z_vec=z_vec)
print(nabla_b[0].shape)

(50000, 10)
