# Backpropagation learning for multilayer perceptron
# with minibatches

    @author      Qipeng Liu
    @rev.date    2017/02/22

Python 2.7 port of Minibatch.ipynb Julia code by Prof. Sebastian Seung

- Note: In this code, label 0 has index 0. (Python indexing starts from 0, Julia from 1)


In [1]:
import time

from IPython import display

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
import sklearn
from sklearn.datasets import fetch_mldata

In [2]:
def f(x):                # logistic function activation (replace to define your own activation function)
    return np.tanh(x)
def df(y):               # derivative of f composed with inverse of f
    return 1 - np.multiply(y, y)

n0 = 784                 # widths of layers
n1 = 200
n2 = 100
n3 = 10  

eta = 0.01               # learning rate parameter
epsinit = 0.01           # magnitude of initial conditions for synaptic weights

# two fully connected synaptic layers
W1 = epsinit*np.random.randn(n1,n0)
W2 = epsinit*np.random.randn(n2,n1)
W3 = epsinit*np.random.randn(n3,n2)

# biases
b1 = epsinit*np.random.randn(n1, 1)
b2 = epsinit*np.random.randn(n2, 1)
b3 = epsinit*np.random.randn(n3, 1)

tmax = 600000             # maximum number of learning updates
tshow = 1000              # how often to pause for visualization
errsq = np.zeros(tmax)
errcl = np.zeros(tmax)
errclvalidate = np.zeros(tmax / tshow)

In [3]:
# preprocess training set
mnist = fetch_mldata('MNIST original', data_home="data")

train = mnist.data[:60000]
train = np.divide(train, 255.0)
trainlabel = mnist.target[:60000]

# separate out validation set
mtotal = len(train)             # number of examples in training set
mvalidate = 10000               # desired size of validation set
mtrain = mtotal - mvalidate     # remaining examples will be the new training set
np.random.seed(495)             # seed the random number generator so that validation set is reproducible
ind = np.random.permutation(range(mtotal))
validate = [train[i] for i in ind[:mvalidate]]
validatelabels = [trainlabel[i] for i in ind[:mvalidate]]
train = [train[i] for i in ind[mvalidate:]]
trainlabels = [trainlabel[i] for i in ind[mvalidate:]]

batchsize = 32     # minibatch size

In [4]:
%matplotlib inline
plt.rcParams['image.cmap'] = 'gray'
fig_size = (12,12)                           # you may need to change the numbers to fit your screen
for t in xrange(tmax):
    # generate random samples from train set
    batchindices = [int(np.floor(mtrain * np.random.rand())) for i in xrange(batchsize)] 
    x0 = np.zeros([n0, batchsize])
    for i, j in zip(range(batchsize), batchindices):
        x0[:, i] = train[j]
    
    y = -np.ones([n3, batchsize])
    for i, j in zip(range(batchsize), batchindices):
        y[int(trainlabels[j]), i] = 1.0
    
    B1 = np.repeat(b1, batchsize).reshape([n1, batchsize])
    B2 = np.repeat(b2, batchsize).reshape([n2, batchsize])
    B3 = np.repeat(b3, batchsize).reshape([n3, batchsize])
    
    # forward pass   
    x1 = f(np.dot(W1,x0)+B1)
    x2 = f(np.dot(W2,x1)+B2)
    x3 = f(np.dot(W3,x2)+B3)
    
    # error computation
    errsq[t] = sum(sum(np.power((y-x3), 2))) / batchsize
    errcl[t] = sum([float(np.argmax(x3[:, i]) != int(trainlabels[j])) for i, j in zip(range(batchsize), batchindices)]) / batchsize
    delta3 = np.multiply((y-x3),df(x3))
    
    # backward pass
    delta2 = np.multiply(np.dot(W3.T, delta3), df(x2))
    delta1 = np.multiply(np.dot(W2.T, delta2), df(x1))

    # learning updates
    W3 += eta / batchsize * np.dot(delta3, x2.T)
    W2 += eta / batchsize * np.dot(delta2, x1.T)
    W1 += eta / batchsize * np.dot(delta1, x0.T)
    b3 += eta / batchsize * np.sum(delta3, axis=1).reshape(n3, 1)
    b2 += eta / batchsize * np.sum(delta2, axis=1).reshape(n2, 1)
    b1 += eta / batchsize * np.sum(delta1, axis=1).reshape(n1, 1)

    if t % tshow == 0 and t > 0:    # visualization every tshow steps
        avgerrsq = np.sum(errsq[: t].reshape(t / tshow, tshow), axis=1).reshape(t / tshow, 1) / tshow
        avgerrcl = np.sum(errcl[: t].reshape(t / tshow, tshow), axis=1).reshape(t / tshow, 1) / tshow
        
        # compute error on validation set
        x0 = np.zeros([n0, mvalidate])
        for i in range(mvalidate):
            x0[:, i] = validate[i]
        B1 = np.repeat(b1, mvalidate).reshape([n1, mvalidate])
        B2 = np.repeat(b2, mvalidate).reshape([n2, mvalidate])
        B3 = np.repeat(b3, mvalidate).reshape([n3, mvalidate])
        x1 = f(np.dot(W1,x0)+B1)
        x2 = f(np.dot(W2,x1)+B2)
        x3 = f(np.dot(W3,x2)+B3)
        errclvalidate[t / tshow - 1] = sum([float(np.argmax(x3[:, i]) != int(validatelabels[i])) for i in xrange(mvalidate)]) / mvalidate
        
        # plot figures
        fig = plt.figure(figsize=fig_size)
        gs = gridspec.GridSpec(2, 3, wspace=0.3, hspace=0.3)

        ax = fig.add_subplot(gs[0,0])
        ax.plot(range(1, t / tshow + 1), avgerrsq, label="squared")
        ax.set_ylabel("sq err")
        ax.set_ylim([0.001,4])
        ax.set_yscale('log')
        ax.set_xlabel("x{} minibatches".format(tshow))
        ax.grid()
        
        ax = fig.add_subplot(gs[0,1])
        ax.plot(range(1, t / tshow + 1), avgerrcl, label="train")
        ax.plot(range(1, t / tshow + 1), errclvalidate[: t / tshow], label="validation")
        ax.set_ylabel("class err")
        ax.set_ylim([0.001,1])
        ax.set_yscale('log')
        ax.set_title("t={}".format(t))
        ax.set_xlabel("x{} minibatches".format(tshow))
        ax.grid()
        ax.legend()
        
        ax = fig.add_subplot(gs[0,2])
        ax.plot(range(1, t / tshow + 1), avgerrcl, label="train")
        ax.plot(range(1, t / tshow + 1), errclvalidate[: t / tshow], label="validation")
        ax.set_ylabel("class err")
        ax.set_ylim([0.0, 0.1])
        ax.set_title("t={}".format(t))
        ax.set_xlabel("x{} minibatches".format(tshow))
        ax.grid()
        ax.legend()
        
        ax = fig.add_subplot(gs[1, 0])
        ax.hist(x1.flatten())
        ax.set_xlabel("x1")
        ax.grid()
        
        ax = fig.add_subplot(gs[1, 1])
        ax.hist(x2.flatten())
        ax.set_xlabel("x2")
        ax.grid()
        
        ax = fig.add_subplot(gs[1, 2])
        ax.hist(x3.flatten())
        ax.set_xlabel("x3")
        ax.grid()

        display.display(plt.gcf())
        time.sleep(0.01)
        display.clear_output(wait=True)
        fig.clf()
        plt.close(fig)


KeyboardInterrupt: 