<a href="https://colab.research.google.com/github/gautam-bits/Deep-Learning-Specialization/blob/master/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Classification of Handwritten digit 0-9 from the MNIST dataset

Name - Gautam Bhambhani <BR>
ID - 2019A7PS0101P
<BR>

Classifaction of 70000 handwritted digits 0-9 (MNIST dataset) in which the input was a 24*24 grayscale image and the network has to classify it .

A simple deep neural net was used with 4 layers(1 input, 2 hidden, 1 output)

After tuning some hyperparameters the best accuracy was found out to be 78%.

I have tried to keep the code simple and well commented.



In [None]:
#all the imports of libraries and datasets
from sklearn.datasets import fetch_openml                          #fetch open ml fetches the dataset from the server
from keras.utils.np_utils import to_categorical                    #Converts a class vector (integers) to binary class matrix.
import numpy as np                                                 #for linear algebra calculations
from sklearn.model_selection import train_test_split               #Split arrays or matrices into random train and test subsets               
import time



In [None]:
x, y = fetch_openml('mnist_784', version=1, return_X_y=True)
x = (x/255).astype('float32')    #x = (70000, 784)               #divide by 255 to get a value between 0 and 1
#print(y)
y = to_categorical(y)                                            #same as one hot encoder
#print(y)
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.15, random_state=42)

['5' '0' '4' ... '4' '5' '6']
[[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [None]:
class DeepNeuralNetwork():
    def __init__(self, sizes, epochs=10, l_rate=0.01):   #constructor     
        self.sizes = sizes
        self.epochs = epochs
        self.l_rate = l_rate

      
        self.params = self.initialization()       #all the parameters w are stored in params dict

    def sigmoid(self, x, derivative=False):      #our activation function
        if derivative:
            return (np.exp(-x))/((np.exp(-x)+1)**2)
        else:
          return 1/(1 + np.exp(-x))

    def softmax(self, x, derivative=False):        #why? because it is numerically stable with large exponents
        
        exps = np.exp(x - x.max())
        if derivative:
            return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
        return exps / np.sum(exps, axis=0)

    def initialization(self):
        # number of nodes in each layer
        input_layer=self.sizes[0]
        hidden_1=self.sizes[1]
        hidden_2=self.sizes[2]
        output_layer=self.sizes[3]

        params = {
            'W1':np.random.randn(hidden_1, input_layer) * np.sqrt(1. / hidden_1),
            'W2':np.random.randn(hidden_2, hidden_1) * np.sqrt(1. / hidden_2),
            'W3':np.random.randn(output_layer, hidden_2) * np.sqrt(1. / output_layer)
        }

        return params

    def forward_pass(self, x_train):
        '''
            This is the forward propagation part, for predicting the value of 
            y .
              
        '''       
        params = self.params

        # input layer activations becomes sample
        params['A0'] = x_train

        # input layer to hidden layer 1
        params['Z1'] = np.dot(params["W1"], params['A0'])
        params['A1'] = self.sigmoid(params['Z1'])

        # hidden layer 1 to hidden layer 2
        params['Z2'] = np.dot(params["W2"], params['A1'])
        params['A2'] = self.sigmoid(params['Z2'])

        # hidden layer 2 to output layer
        params['Z3'] = np.dot(params["W3"], params['A2'])
        params['A3'] = self.softmax(params['Z3'])

        return params['A3']

    def backward_pass(self, y_train, output):
        '''
            This is the backpropagation algorithm, for calculating the updates
            of the neural network's parameters.
              
        '''
        params = self.params
        change_w = {}

        # Calculate W3 update
        error = 2 * (output - y_train) / output.shape[0] * self.softmax(params['Z3'], derivative=True)
        change_w['W3'] = np.outer(error, params['A2'])

        # Calculate W2 update
        error = np.dot(params['W3'].T, error) * self.sigmoid(params['Z2'], derivative=True)
        change_w['W2'] = np.outer(error, params['A1'])

        # Calculate W1 update
        error = np.dot(params['W2'].T, error) * self.sigmoid(params['Z1'], derivative=True)
        change_w['W1'] = np.outer(error, params['A0'])

        return change_w

    def update_parameters(self, changes_to_w):
        '''
            This updates the parameters of the nn 

        '''
        
        for key, value in changes_to_w.items():
            self.params[key] -= self.l_rate * value

    def compute_accuracy(self, x_val, y_val):
        '''
        This basically calculates the accuracy over all the 70000 images
        '''
        
        predictions = []  

        for x, y in zip(x_val, y_val):
            output = self.forward_pass(x)
            pred = np.argmax(output)
            predictions.append(pred == np.argmax(y))
        
        return np.mean(predictions)

    def train(self, x_train, y_train, x_val, y_val):
        start_time = time.time()
        print('Training Started')
        for iteration in range(self.epochs):
            for x,y in zip(x_train, y_train):
                output = self.forward_pass(x)
                changes_to_w = self.backward_pass(y, output)
                self.update_parameters(changes_to_w)
            
            accuracy = self.compute_accuracy(x_val, y_val)
            print('Epoch: {0}, Time Spent: {1:.2f}s, Accuracy: {2:.2f}%'.format(iteration+1, time.time() - start_time, accuracy * 100
            ))

    def test(self)

In [None]:
dnn = DeepNeuralNetwork(sizes=[784, 128, 64, 10])
dnn.train(x_train, y_train, x_val, y_val)

Training Started
Epoch: 1, Time Spent: 57.00s, Accuracy: 54.10%
Epoch: 2, Time Spent: 114.15s, Accuracy: 65.81%
Epoch: 3, Time Spent: 171.31s, Accuracy: 70.79%
Epoch: 4, Time Spent: 233.13s, Accuracy: 74.91%
Epoch: 5, Time Spent: 290.37s, Accuracy: 76.84%
Epoch: 6, Time Spent: 347.39s, Accuracy: 77.69%
Epoch: 7, Time Spent: 404.70s, Accuracy: 77.95%
Epoch: 8, Time Spent: 461.86s, Accuracy: 78.61%
Epoch: 9, Time Spent: 518.63s, Accuracy: 79.33%
Epoch: 10, Time Spent: 575.74s, Accuracy: 79.94%


### The accuracy in the end with seed = 42 layer size = [784, 128, 64, 10] alpha = 0.01 is approximately 78.37% which is pretty good considering it is a simple neural network

###The time taken was approximately 10 minutes for 10 epochs(episodes)


#Thank You!