**Q) Design and implement a feed forward neural network using backpropagation algorithm to to solve hand written character recognition problem for A to Z and 0 to 9 letters and digits respectively.**

**Digit Recognition**

In [None]:
import numpy as np
import time
from sklearn.datasets import fetch_openml
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder

In [None]:
mnist = fetch_openml('mnist_784')

In [None]:
x,y = mnist['data'],mnist['target']
y = to_categorical(y)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.16, random_state=37)

xtrain, xtest = np.array(x_train).astype('float64') , np.array(x_test).astype('float64')
ytrain, ytest = np.array(y_train).astype('float64') , np.array(y_test).astype('float64')


xtrain , xtest = xtrain/255 , xtest/255

In [None]:
x = x.to_numpy()
print(type(x))
print(type(y))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=37)
print("X_train size: ", x_train.shape)
print("y_train size: ", y_train.shape)
print("X_test size: ", x_test.shape)
print("y_test size: ", y_test.shape)

X_train size:  (59500, 784)
y_train size:  (59500, 10)
X_test size:  (10500, 784)
y_test size:  (10500, 10)


In [None]:
#Activation functions
def sigmoid(x, derivative=False):
  if derivative:
    return (np.exp(-x))/((np.exp(-x)+1)**2)
  return 1/(1 + np.exp(-x))

def softmax(x, derivative=False):
  exps = np.exp(x - x.max())
  if derivative:
    return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
  return exps / np.sum(exps, axis=0)

In [None]:
def init_parameters(layer_sizes): #function for initialising parameters with random values
  parameters = {}
  for i in range(1, len(layer_sizes)):
    parameters['W' + str(i)] = np.random.randn(layer_sizes[i], layer_sizes[i-1])*0.01
  return parameters

In [None]:
def forward_propagation(X, parameters): #function for forward propagation
  layers = len(parameters)
  values = {}
  values['A0'] = X
  for i in range(1, layers+1):
    if i==1:
      values['Z' + str(i)] = np.dot(parameters['W' + str(i)], X)
      values['A' + str(i)] = sigmoid(values['Z' + str(i)])
    else:
      values['Z' + str(i)] = np.dot(parameters['W' + str(i)], values['A' + str(i-1)])
      if i==layers:
        values['A' + str(i)] = softmax(values['Z' + str(i)])
      else:
        values['A' + str(i)] = sigmoid(values['Z' + str(i)])
  return values


In [None]:
def backward_propagation(parameters, values, X, y): #function for backward propagation
  layers = len(parameters)
  m = len(y)
  grads = {}
  error = 0
  dz = 0
  for i in range(layers,0,-1):
    if i == layers:
        dz = (values['A' + str(i)] - y)
        error = (1/values['A' + str(i)].shape[1])*np.dot(dz,values['A' + str(i-1)].T)
    else:
        dz = np.dot(parameters['W' + str(i+1)].T, dz) * sigmoid(values['Z' + str(i)], derivative=True)
        error = (1/values['A' + str(i)].shape[1])*np.dot(dz,values['A' + str(i-1)].T)
    grads['W' + str(i)] = error
    
  return grads

In [None]:
def update_parameters(parameters, grads, learning_rate): #function for updating the parameters after back propagation
  layers = len(parameters)
  updated_parameters = {}
  for i in range(1,layers+1):
    updated_parameters['W' + str(i)] = parameters['W' + str(i)] - learning_rate * grads['W' + str(i)]
  return updated_parameters

In [None]:
def compute_cost(a2, y):
  m = y.shape[1]
  cost = -(1/m)*np.sum(y*np.log(a2))
  return cost

In [None]:
def train_model(X, y, layer_sizes, epochs, learning_rate): #trains the model
  parameters = init_parameters(layer_sizes)
  for i in range(epochs):
    values = forward_propagation(X.T, parameters)
    cost = compute_cost(values['A' + str(len(parameters))], y.T)
    grads = backward_propagation(parameters, values,X.T, y.T)
    parameters = update_parameters(parameters, grads, learning_rate)
  return parameters

In [None]:
def predict(X, params): #predicts the values
  values = forward_propagation(X.T, params)
  predictions = values['A' + str(len(values)//2)].T
  return predictions

In [None]:
layer_sizes = [784, 64, 10]  #input layer has 784 neurons, one hidden layer with 64 neurons and output layer with 10 neurons                                      
epochs = 100                                                             
learning_rate = 0.5                                                            
parameters = train_model(x_train, y_train, layer_sizes, epochs, learning_rate)      
pred_y = predict(x_test, parameters)


In [None]:
from sklearn.metrics import accuracy_score
pred_y = np.argmax(pred_y, axis = 1)
y_test = np.argmax(y_test, axis = 1)
print(accuracy_score(y_test, pred_y, normalize = False)) #prints no. of correctly classified test data
print(accuracy_score(y_test, pred_y)) #prints percentage of correctly classified test data

7517
0.7159047619047619
