### Custom neural network to classify the mnist digits

Reference: http://neuralnetworksanddeeplearning.com/chap1.html

(This is a simplified version with lot's of comments, works with Python3)

Learnings: Always normalize your data!!

In [None]:
import utility
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
from numpy import random
import mnist_loader
%load_ext autoreload
%autoreload 2

In [None]:
# Fetch training data
master_train_labels, master_train_images = utility.read_mnist()
master_test_labels, master_test_images = utility.read_mnist('testing')

In [None]:
# Convert the y value into a binary array
master_train_labels_ = []
for l in master_train_labels:
    a = np.zeros((10, ))
    a[l] = 1
    master_train_labels_.append(a)
    
    
master_train_labels = np.array(master_train_labels_)

master_test_labels_ = []
for l in master_test_labels:
    a = np.zeros((10, ))
    a[l] = 1
    master_test_labels_.append(a)
    
    
master_test_labels = np.array(master_test_labels_)

In [None]:
# Study the shapes
print(master_train_labels.shape)
print(master_train_images.shape)
print(master_test_labels.shape)
print(master_test_images.shape)

In [None]:
# Let's look at one image
#plt.imshow(master_train_images[2], cmap=cm.gray)
#plt.show()
#print(master_train_labels[2])

In [None]:
# What are the image dimensions
print(master_train_images[0].shape)

In [None]:
# Let's do some transformations to the master data
# Flatten the images from 28x28 to 784

master_train_images = master_train_images.reshape(master_train_images.shape[0], 784,)
master_test_images = master_test_images.reshape(master_test_images.shape[0], 784, )

print(master_train_images.shape)
print(master_test_images.shape)


In [None]:
# Some pre-processing. First we divid by 255 to get the values between 0 and 1. Then we substract the mean to make them 0 centered.

master_train_images = master_train_images/255
master_test_images = master_test_images/255
"""
mean_train = np.mean(master_train_images)
mean_test = np.mean(master_test_images)

master_train_images -= mean_train
master_test_images -= mean_test
"""

In [None]:
# Let's split the training images into training and validation sets
training_pct = 0.8 # and 20% validation
training_len = master_train_images.shape[0]

train_mask = np.random.rand(training_len) < training_pct

train_images = master_train_images[train_mask]
train_labels = master_train_labels[train_mask]

valid_images = master_train_images[~train_mask]
valid_labels = master_train_labels[~train_mask]

In [None]:
print(train_images.shape)
print(valid_images.shape)

In [None]:
train_images[0]

In [None]:
train_labels[0]

In [None]:
print(type(train_images))
print(type(train_labels))
print(type(train_images[0]))
print(type(train_labels[0]))

In [None]:
# Let's use data from NNDL book
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

train_images = []
train_labels = []
for item in training_data:
    train_images.append(item[0].reshape(784, ))
    train_labels.append(item[1].reshape(10, ))
train_images = np.array(train_images)
train_labels = np.array(train_labels)




In [None]:
train_images[0]

In [None]:
#### Miscellaneous functions
def sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

In [None]:
class Network(object):

    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(num_nodes, ) for num_nodes in sizes[1:]] 
        self.weights = [np.random.randn(to, frm) for frm, to in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, activation):
        for b, w in zip(self.biases, self.weights):
            activation = sigmoid(np.dot(w, activation) + b)
        return activation

    def SGD(self, iterations, xs, ys, learn_rate, xs_test, ys_test):
        len_test = len(xs_test)
        len_train = len(xs)
        mini_batch_size = 10
        
        indexes = [i for i in range(len_train)]
        
        for i in range(iterations):
            """
            random.shuffle(indexes)
            mini_batches = [indexes[k:k+mini_batch_size] for k in range(0, len_train, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(xs[mini_batch], ys[mini_batch], learn_rate)
            """
            self.update_mini_batch(xs, ys, learn_rate)
            print("Iteration {0}/{1}, {2}/{3}".format(i+1, iterations, 
                                                      self.evaluate(xs_test, ys_test), len_test))
            
    def update_mini_batch(self, xs, ys, learn_rate):
        upd_biases = [np.zeros(b.shape) for b in self.biases]
        upd_weights = [np.zeros(w.shape) for w in self.weights]
        length = len(xs)
        
        for x, y in zip(xs, ys):
            delta_biases, delta_weights = self.backprop(x, y)
            upd_biases = [u + d for u, d in zip(upd_biases, delta_biases)]
            upd_weights = [u + d for u, d in zip(upd_weights, delta_weights)]
            
        self.biases = [b - (learn_rate/length) * u for b, u in zip(self.biases, upd_biases)]
        self.weights = [w - (learn_rate/length) * u for w, u in zip(self.weights, upd_weights)]
        
    def backprop(self, x, y):
        upd_biases = [np.zeros(b.shape) for b in self.biases]
        upd_weights = [np.zeros(w.shape) for w in self.weights]

        activation = np.array(x)
        activations = []
        zs = []
        activations.append(activation)
        
        # Feed forward
        for w, b in zip(self.weights, self.biases):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        
        # Back prop - output error
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
        
        upd_biases[-1] = delta
        upd_weights[-1] = np.dot(np.expand_dims(delta, axis=1), 
                                 np.expand_dims(activations[-2], axis=1).transpose())

        # Back prop - backpropagate the error        
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            upd_biases[-l] = delta
            upd_weights[-l] = np.dot(np.expand_dims(delta, axis=1), 
                                     np.expand_dims(activations[-l-1], axis=1).transpose())
            
        # output
        return upd_biases, upd_weights
    
    def cost_derivative(self, output_activations, y):
        # Derivative of Mean Squared Error
        return (output_activations-y)
    
    def evaluate(self, xs, ys):
        test_results = [(np.argmax(self.feedforward(x)), np.argmax(y)) for (x, y) in zip(xs, ys)]
        return sum(int(x == y) for (x, y) in test_results)

In [None]:
#net = Network([2, 3, 2])
#net.backprop([1, 1], [10, 9])

In [None]:
net = Network([784, 30, 10])

In [None]:
net.SGD(10, train_images, train_labels, 3, valid_images, valid_labels)

In [None]:
# net.evaluate(valid_images, valid_labels)
# Let's look at one image
idx = 2240
#plt.imshow(train_images[idx].reshape((28, 28)), cmap=cm.gray)
#plt.show()
print(train_labels[idx])
net.feedforward(train_images[idx])

In [None]:
print(len(net.biases))
print(net.biases[0].shape)
print(len(net.weights))
print(net.weights[0].shape)
print(train_images.shape)

In [None]:
net.feedforward(train_images[0])

In [None]:
!jupyter nbconvert --to script neuralnet_from_scratch_mnist.ipynb