In [1]:
import numpy as np
import random as rd
from mnist import MNIST as mn
import time
from PIL import Image

In [56]:
class layer(object):
    # A neural net layer with n neurons as defined by the user and an activation function for the layer
    
    def __init__(self, numInputs, layerSize, actFun = "sigmoid"):
        
        # Lookup table of activation functions
        actLookup = {"sigmoid":lambda x: 1/(1 + np.exp(-1 * x)),
                     "tanh":lambda x: np.tanh(x),
                    "relu": lambda x: np.maximum(0,x)}

        derivativeLookup = {"sigmoid": lambda x: x * (1.-x),
                       "tanh": lambda x: 1. - (np.tanh(x) ** 2),
                      "relu": lambda x: np.ceil(x.clip(0,1))}
    
        # Size of the layer and the number of inputs
        self.layerSize = layerSize
        self.actFun = actLookup[actFun]
        self.deriv = derivativeLookup[actFun]
        
        # Incorporating the bias neuron
        self.numInputs = numInputs + 1
        self.inputMatrix = None
        
        # Numpy matrix of weights
        self.weights = np.array(np.random.randn(self.layerSize,self.numInputs))/100
        
        # Set the backward propagation values for this layer
        self.delta = None
        
    # Defining the forward function to the layer
    def forward(self, inputMatrix):
        """Forward() forward propagates the inputs to a layer"""
        
        # Convert to numpy array if not passed as a numpy array
        if(not(isinstance(inputMatrix,np.ndarray))):
            inputMatrix = np.array(inputMatrix)
        
        rows, columns = inputMatrix.shape
        
        # Dot product of input matrix (with extra 1s for the bias neuron) with the weight matrix
        inputPadded = np.append(np.ones(rows).reshape(rows,1), inputMatrix, axis = 1)
        self.inputMatrix = inputPadded
        layerOutput = np.dot(inputPadded, self.weights.T)
        
        # Pass through activation function
        self.output = self.actFun(layerOutput)
        return self.output
    
    # Defining the backward propagation function
    def backward(self, delta):
        """Backward takes the delta from next layer and passes it on the previous layer"""        

        # Delta passed back is wkh * deltak for every batch instance
        # Delta for each batch element is sum by column of the delta matrix passed back
        # by the next layer
        self.delta = np.multiply(self.deriv(self.output),delta)
        
        # Delta to pass on to preceding layer has to be 
        # matrix multiplication of these values by the weight matrix
        weightMatrix = self.weights[:,1:]
        
        deltaBack = weightMatrix.T.dot(self.delta.T)
        return deltaBack.T
    
    def updateWeights(self, learnRate):
        """updateWeights() uses the inputs and the delta stored in each layer after forward
        and backward propagation to derive the weight update rule"""
        
        # The udpate rule for each element in the matrix is given by
        # wi = wi + sum_over_instances(delta for neuron * input i to neuron * learning rate)
        # Compute the weight updates for every data point and then add those updates
        rows,columns = self.inputMatrix.shape
        weightUpdates = np.dot(self.inputMatrix.T,self.delta).T * (1/np.float(rows))
        self.weights = np.add(self.weights,weightUpdates)

In [95]:
class neuralNet(object):
    
    """Neural net object is a combination of layer objects. Has 2 functions
    predict and backprop"""
    


    def __init__(self, layerList, actFun = "sigmoid"):
        
        """Takes in the layerlist as input and generates as many layers"""
        
        self.actFun = actFun
        
        # Creating the layers
        self.layers = [layer(numInputs=layerList[i - 1],
                             layerSize=layerList[i],                            
                             actFun=self.actFun) for i in range(1, len(layerList))]
        
    def getWeights(self):
        return [layer.weights for layer in self.layers]
        
    def predict(self, inputMatrix):
        """Predict function is used to propagate the inputs from 
        one layer to the next and get the final output from the layer"""
        # Pass on input of previous layer to the next
        # If 1st hidden layer, pass on the input
        layerOut = inputMatrix
        
        for layer in self.layers:
            layerOut = layer.forward(inputMatrix=layerOut)
            
        return layerOut
    
    def backprop(self, trainInput, trainOutput, learnRate, batchSize = 1, nIter = 100):
        """Back prop is used to update the weights based on the training sample
        It runs nIter iterations on the trainInput with batchSize number of rows
        Weight updates are carried out at learning rate learnRate"""
        
        if(not(isinstance(trainOutput,np.ndarray))):
            trainOutput = np.array(trainOutput)
        if(not(isinstance(trainInput,np.ndarray))):
            trainInput = np.array(trainInput)
        
        rows, columns = trainInput.shape
        
        for i in range(nIter):
            
            # Pick a random sample from the trainInput and trainOutput
            # Updated weights based on the same. Sample size is to be of size batchSize
            
            randomIndices = np.random.choice(range(rows),size=batchSize)
            
            # Sample from trainInput and trainOutput
            batchTrain = trainInput[randomIndices,:].reshape(batchSize,columns)
            batchTest = trainOutput[randomIndices,:]
        
            # A forward pass through the network
            output = self.predict(batchTrain)
            
            # Iterate backwards through the layers to pass the deltas
            delta = np.subtract(batchTest,output)
            
            for layer in self.layers[::-1]:
                
                # Delta to be passed to the previous layer is computed
                delta = layer.backward(delta=delta)
                # Update weights as determined by the delta gradient
                layer.updateWeights(learnRate=learnRate)

In [41]:
mdata = mn(path='../datasets/MNIST/')

In [5]:
images,labels = mdata.load_training()
images = np.array(images)/255.
labels = np.array(labels).reshape(60000,1).astype(dtype = 'uint8')
labels = np.unpackbits(labels,axis=1)
labels = labels[:,4:8]

In [6]:
sampleIndices = np.random.choice(range(len(images)),size=5000)

In [7]:
imageTrain = images[sampleIndices]

In [8]:
labelTrain = labels[sampleIndices]

In [9]:
testImages,testLabels = mdata.load_testing()
testImages = np.array(testImages)/255.
testLabelsDigit = np.array(testLabels).reshape(10000,1).astype(dtype = 'uint8')

In [10]:
sampleTestIndices = np.random.choice(range(len(testImages)),size=1000) 

In [11]:
imageTest = testImages[sampleTestIndices]

In [12]:
labelTest = testLabelsDigit[sampleTestIndices]

In [87]:
# Training a neural net with 256 inputs, 300 hidden units (1 layer only)
nn1 = neuralNet(layerList=[784,300,4],actFun="sigmoid")

In [88]:
nn1.backprop(batchSize=20,learnRate=0.0001,nIter=10000,trainInput=imageTrain,trainOutput=labelTrain)

0
500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000
6500
7000
7500
8000
8500
9000
9500


In [89]:
preds = nn1.predict(imageTest)

In [90]:
preds = np.array([np.round(arr) for arr in preds])

In [91]:
preds = np.append(np.zeros(4000).reshape(1000,4),preds,axis=1)

In [92]:
predDigits = np.packbits(preds.astype("bool"))

In [93]:
acc = [predDigits[i] == labelTest[i] for i in range(1000)]

In [94]:
print sum(acc)/1000.

[ 0.893]
