In [188]:
#coding = utf-8
"""
Coding the implementation of a neural network using numpy arrays
Forward propagate
Backward propagation

"""
import numpy as np
import pickle
import gzip

In [189]:
f = gzip.open("mnist.pkl.gz", mode='rb')
train, validate, test = pickle.load(f, encoding= 'latin1')
X_train, y_train = train[0], train[1]
length, width = X_train.shape

In [291]:
class Neural_Network(object):

    def convert_toOneHot(self,y):
        y_ = np.zeros((len(y),10))
        y_[np.arange(len(y)),y] = 1
        return y_
        
    def __init__(self, input_data, input_layers,hidden_layers_size, output_layers):
        # hidden_layers_size is a list of number of nodes in each hidden layer
        self.X_train = input_data[0]
        self.y_train = input_data[1]
        self.y_oneHot = self.convert_toOneHot(self.y_train) # The function is above
        self.input_layers = input_layers
        self.hidden_layers_size = hidden_layers_size
        self.output_layers = output_layers
        
        # Initializing the network
        self.initialize_network()

    def initialize_network(self):
        self.weights = {}

        # We define the first and last set of weights manually and lopp through to set the other weights

        self.weights[0] = np.random.randn(self.input_layers + 1,self.hidden_layers_size[0])
        # The seld.input_layers + 1 is for the bias term
        
        self.weights[len(self.hidden_layers_size)] = np.random.randn(self.hidden_layers_size[-1],self.output_layers)
        for i in range(len(self.hidden_layers_size) - 1):   # Since we already defined the last layer
            self.weights[i+1] = np.random.randn(self.hidden_layers_size[i],self.hidden_layers_size[i+1])

    def mini_batch(self,size_of_batch):
        """
        Chooses a number of random examples from the whole training set depending on the
        size_of_batch specified
        """
        idx = np.random.choice(len(X_train),size = size_of_batch, replace = False)
        """
        The below step adds the bias to the training set and bias in our
        example is only used in the input layer
        """
        self.X_train_batch = np.hstack((X_train[idx,:],np.ones((size_of_batch,1)))) 
        self.y_train_batch = self.y_oneHot[idx]
        
    def compute_product(self,x,w):
        return np.dot(x,w)
    
    def calc_activation(self,x,w):
        """
        The below function is sigmoid activation function
        """
        return 1/(1 + np.exp(-(self.compute_product(x,w)))) # np.dot(x,w) = (x1w1 + x2w2 + ......., it also includes bias
    
    
    def softmax_layer(self,x):
        """
        Insert this layer wherever desired to convert the scores into probabilities
        """
        sum_exp_x = np.sum(np.exp(x.T),axis = 0)
        return (np.divide(np.exp(x.T),sum_exp_x)).T
    
    def forward_propagate(self):
        
        self.activation = {}
        self.mini_batch(100)
        # Calculating the activation at the first hidden layer
        self.activation[0] = np.copy(self.X_train_batch) # Set the activation of 0 layer as the input itself
        
        for i in range(len(self.hidden_layers_size)):
            self.activation[i+1] = self.calc_activation(self.activation[i], self.weights[i])
        
        """
        For last layer we need to still compute the product to get the scores
        we could also insert a softmax layer if need be
        
        self.score is the output of the final layer before softmax
        
        """
        
        self.score = self.compute_product(self.activation[len(self.hidden_layers_size)],
                                          self.weights[len(self.hidden_layers_size)])
        
        self.final_score = self.softmax_layer(self.score) # After softmax layer
        
        
    def cross_entropy(self,y,y_):
        return np.mean(-np.sum(y*y_, axis =1), axis = 0)
        
    def accuracy(self):
        return np.mean(np.equal(np.argmax(self.y_train_batch,axis=1),np.argmax(self.final_score, axis = 1)))
        
            
        
        
           

In [292]:
NN = Neural_Network(train,width,[50],10)

In [293]:
NN.forward_propagate()

In [298]:
np.argmax(NN.final_score, axis = 1)

array([3, 8, 8, 8, 7, 5, 5, 8, 7, 8, 4, 0, 8, 8, 8, 4, 3, 8, 7, 7, 8, 8, 4,
       8, 8, 8, 0, 0, 4, 8, 7, 8, 7, 8, 8, 4, 0, 4, 8, 4, 8, 5, 0, 9, 8, 5,
       8, 0, 0, 7, 8, 5, 8, 5, 8, 8, 8, 8, 8, 8, 4, 8, 7, 4, 4, 0, 8, 7, 8,
       8, 9, 8, 8, 3, 8, 5, 3, 8, 8, 8, 9, 0, 8, 3, 8, 8, 0, 5, 7, 8, 8, 8,
       4, 8, 8, 7, 5, 8, 8, 0])

In [299]:
np.argmax(NN.y_train_batch, axis = 1)

array([8, 3, 4, 3, 9, 9, 9, 9, 9, 9, 3, 1, 1, 4, 0, 7, 2, 7, 4, 6, 8, 2, 0,
       8, 1, 0, 8, 9, 9, 8, 4, 6, 6, 7, 4, 3, 3, 2, 1, 8, 1, 9, 2, 5, 0, 3,
       0, 1, 3, 1, 1, 7, 1, 7, 8, 6, 7, 2, 8, 4, 7, 0, 9, 7, 7, 1, 0, 0, 5,
       2, 4, 1, 5, 3, 4, 9, 7, 5, 5, 4, 6, 4, 5, 2, 2, 3, 3, 3, 7, 2, 5, 1,
       9, 4, 5, 9, 4, 8, 0, 1])