In [1]:
'''Loads mnist data and transform into suitable format for NN training'''

import pickle
import gzip
import numpy as np
import random

def load_data():
    f = gzip.open('./mnist.pkl.gz')
    training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
    f.close()
    return (training_data, validation_data, test_data)

def load_data_wrapper():
    tr_d, va_d, te_d = load_data()
    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
    training_results = [vectorized_result(y) for y in tr_d[1]]
    training_data = list(zip(training_inputs, training_results))
    validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
    validation_data = list(zip(validation_inputs, va_d[1]))
    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
    test_data = list(zip(test_inputs, te_d[1]))
    return (training_data, validation_data, test_data)

def vectorized_result(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

In [2]:
training_data, validation_data, test_data = load_data_wrapper()

In [48]:
def sigmoid(z):
    return 1.0/(1.0 + np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z)*(1 - sigmoid(z))

def update_mini_batch(mini_batch, eta):

    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_w = [np.zeros(w.shape) for w in weights]
    
    X = np.array([mini_batch[k][0] for k in range(len(mini_batch))])
    Y = np.array([mini_batch[k][1] for k in range(len(mini_batch))])
    
    #for x, y in mini_batch:
    nabla_b, nabla_w = backprop(X, Y)

    #nabla_b = [b + nb for b, nb in zip(nabla_b, delta_nabla_b)]
    #nabla_w = [w + nw for w, nw in zip(nabla_w, delta_nabla_w)]

    biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(biases, nabla_b)]
    weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(weights, nabla_w)]


def backprop(x, y):

    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_w = [np.zeros(w.shape) for w in weights]

    activation = x
    activations = [x]
    zs = []

    for w, b in zip(weights, biases):
        z = np.dot(w, activation) + b
        zs.append(z)
        activation = sigmoid(z)
        activations.append(activation)

    delta = cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
    nabla_b[-1] = np.sum(delta, axis = 1, keepdims = True)
    nabla_w[-1] = np.dot(delta, activations[-2].transpose())

    for l in range(2, num_layers):

        z = zs[-l]
        sp = sigmoid_prime(z)
        delta = np.dot(weights[-l+1].transpose(), delta) * sp
        nabla_b[-l] = np.sum(delta, axis = 1, keepdims = True)
        nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())

    return (nabla_b, nabla_w)


def cost_derivative(output_activations, y):
    return (output_activations - y)    
  

In [3]:
sizes = [784, 30, 10]
num_layers = len(sizes)
sizes = sizes
biases = [np.random.randn(y, 1) for y in sizes[1:]]
weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]

n = len(training_data)
eta = 3.0

random.shuffle(training_data) 
          
mini_batches = [training_data[k:k + 10] for k in range(0, n, 10)] 
        
mini_batch = mini_batches[0]


In [20]:
X = mini_batch[0][0]
for k in range(1, len(mini_batch)):
    X = np.append(X, mini_batch[k][0], axis = 1)

Y = mini_batch[0][1]
for k in range(1, len(mini_batch)):
    Y = np.append(Y, mini_batch[k][1], axis = 1)

X.shape
#nabla_b, nabla_w = backprop(X, Y)

(784, 10)