In [1]:
#coding = utf-8

"""
Coding the implementation of a neural network using numpy arrays
Forward propagate
Backward propagation
"""

import numpy as np
import pickle
import gzip

In [2]:
f = gzip.open("mnist.pkl.gz", mode='rb')
train, validate, test = pickle.load(f, encoding= 'latin1')
X_train, y_train = train[0], train[1]
length, width = X_train.shape

In [None]:
class Neural_Network(object):

    def convert_toOneHot(self,y):
        y_ = np.zeros((len(y),10))
        y_[np.arange(len(y)),y] = 1
        return y_
        
    def __init__(self, input_data, input_layers,hidden_layers_size, output_layers):
        # hidden_layers_size is a list of number of nodes in each hidden layer
        self.X_train = input_data[0]
        self.y_train = input_data[1]
        self.y_oneHot = self.convert_toOneHot(self.y_train) # The function is above
        self.input_layers = input_layers
        self.hidden_layers_size = hidden_layers_size
        self.output_layers = output_layers
        
        # Initializing the network
        self.initialize_network()

    def initialize_network(self):
        self.weights = {}
        self.delta = {}
        self.error= {}

        # We define the first and last set of weights manually and loop through to set the other weights

        self.weights[0] = np.random.randn(self.input_layers + 1,self.hidden_layers_size[0])
        # The seld.input_layers + 1 is for the bias term
        
        self.weights[len(self.hidden_layers_size)] = np.random.randn(self.hidden_layers_size[-1],self.output_layers)
        for i in range(len(self.hidden_layers_size) - 1):   # Since we already defined the last layer
            self.weights[i+1] = np.random.randn(self.hidden_layers_size[i],self.hidden_layers_size[i+1])
            
        
    def mini_batch(self,size_of_batch):
        """
        Chooses a number of random examples from the whole training set depending on the
        size_of_batch specified
        """
        idx = np.random.choice(len(X_train),size = size_of_batch, replace = False)
        """
        The below step adds the bias to the training set and bias in our
        example is only used in the input layer
        """
        self.X_train_batch = np.hstack((X_train[idx,:],np.ones((size_of_batch,1)))) 
        self.y_train_batch = self.y_oneHot[idx]
        
    def compute_product(self,x,w):
        return np.dot(x,w)
    
    def calc_activation(self,x,w):
        """
        The below function is sigmoid activation function
        """
        return 1/(1 + np.exp(-(self.compute_product(x,w)))) # np.dot(x,w) = (x1w1 + x2w2 + ......., it also includes bias
    
    
    def softmax_layer(self,x):
        """
        Insert this layer wherever desired to convert the scores into probabilities
        """
        sum_exp_x = np.sum(np.exp(x.T),axis = 0)
        return (np.divide(np.exp(x.T),sum_exp_x)).T
    
    def forward_propagate(self):
        
        self.activation = {}
        self.mini_batch(100)
        # Calculating the activation at the first hidden layer
        self.activation[0] = np.copy(self.X_train_batch) # Set the activation of 0 layer as the input itself
        
        for i in range(len(self.hidden_layers_size) + 1):
            self.activation[i + 1] = self.calc_activation(self.activation[i], self.weights[i])
        
        """
        For last layer we need to still compute the product to get the scores
        we could also insert a softmax layer if need be
        
        
        """
        # Before applying softmax
        
        self.activation[len(self.hidden_layers_size) + 1] = self.compute_product(self.activation[len(self.hidden_layers_size)],
                                          self.weights[len(self.hidden_layers_size)])
        
        # Applying softmax 
        
        self.activation[len(self.hidden_layers_size) + 1] = self.softmax_layer(self.activation[len(self.hidden_layers_size) + 1]) # After softmax layer
        
        
    def cross_entropy(self,y,y_):
        return np.mean(-np.sum(y*y_, axis =1), axis = 0)
        
    def accuracy(self):
        return np.mean(np.equal(np.argmax(self.y_train_batch,axis=1),
                                np.argmax(self.activation[len(self.hidden_layers_size) + 1], axis = 1)))
        
            
    
        
           

In [7]:
NN = Neural_Network(train,width,[200,100,50],10)

In [8]:
NN.forward_propagate()

In [10]:
np.argmax(NN.activation[len(NN.hidden_layers_size) + 1], axis = 1)

array([9, 8, 9, 8, 8, 3, 8, 8, 3, 8, 3, 9, 3, 3, 8, 8, 8, 3, 8, 8, 3, 8, 8,
       8, 8, 3, 8, 3, 3, 9, 8, 3, 8, 3, 3, 3, 0, 8, 8, 8, 8, 8, 8, 8, 9, 3,
       0, 8, 8, 8, 8, 8, 9, 9, 8, 8, 8, 8, 8, 9, 8, 8, 8, 8, 0, 5, 1, 9, 3,
       9, 8, 8, 8, 3, 3, 9, 8, 9, 9, 8, 8, 8, 8, 8, 8, 3, 8, 8, 9, 3, 8, 8,
       9, 8, 9, 8, 9, 3, 8, 8])

In [11]:
np.argmax(NN.y_train_batch, axis = 1)

array([5, 3, 0, 1, 1, 1, 7, 9, 6, 9, 2, 8, 7, 5, 3, 3, 0, 2, 3, 7, 1, 9, 5,
       5, 8, 8, 8, 7, 1, 8, 5, 1, 2, 1, 7, 7, 1, 8, 0, 9, 5, 7, 3, 8, 7, 1,
       8, 5, 5, 4, 6, 4, 2, 4, 8, 2, 8, 6, 6, 8, 4, 4, 2, 8, 7, 7, 5, 2, 3,
       6, 2, 1, 2, 8, 7, 5, 7, 2, 0, 1, 7, 0, 0, 1, 4, 7, 5, 0, 7, 7, 7, 0,
       3, 2, 9, 2, 2, 3, 2, 3])

In [14]:
NN.cross_entropy(NN.y_train_batch, NN.activation[len(NN.hidden_layers_size) + 1])

-0.10679623712136026

In [15]:
NN.accuracy()

0.10000000000000001

(50, 10)