**Q) Design and implement a feed forward neural network using backpropagation algorithm to to solve hand written character recognition problem for A to Z and 0 to 9 letters and digits respectively.**

**Character Recognition**

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv('A_Z Handwritten Data.csv')

In [None]:
df.head

<bound method NDFrame.head of          0  0.1  0.2  0.3  0.4  0.5  0.6  0.7  0.8  0.9  ...  0.639  0.640  \
0        0    0    0    0    0    0    0    0    0    0  ...      0      0   
1        0    0    0    0    0    0    0    0    0    0  ...      0      0   
2        0    0    0    0    0    0    0    0    0    0  ...      0      0   
3        0    0    0    0    0    0    0    0    0    0  ...      0      0   
4        0    0    0    0    0    0    0    0    0    0  ...      0      0   
...     ..  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...    ...    ...   
372445  25    0    0    0    0    0    0    0    0    0  ...      0      0   
372446  25    0    0    0    0    0    0    0    0    0  ...      0      0   
372447  25    0    0    0    0    0    0    0    0    0  ...      0      0   
372448  25    0    0    0    0    0    0    0    0    0  ...      0      0   
372449  25    0    0    0    0    0    0    0    0    0  ...      0      0   

        0.641  0.642  0.643  0.64

In [None]:
y = df.iloc[:,0]
y = y.to_numpy().reshape(-1,1)
print(type(y))
print(y.shape)
x = df.iloc[:,1:]
x = x.to_numpy()
print(type(x))
print(x.shape)

<class 'numpy.ndarray'>
(372450, 1)
<class 'numpy.ndarray'>
(372450, 784)


In [None]:
def oneHotEncoding(y):
    l = []
    for row in range(y.shape[0]):
        enc = []
        for i in range(26):
            enc.append(0)
        num = y[row][0]
        enc[num] = 1
        l.append(enc)
    
    return np.array(l)

In [None]:
y = oneHotEncoding(y)
print(type(y))
print(y.shape)

<class 'numpy.ndarray'>
(372450, 26)


In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.16, random_state=37)
x_train = x_train.astype('float64')
x_test = x_test.astype('float64')
y_train = y_train.astype('float64')
y_test = y_test.astype('float64')

x_train = x_train/255
x_test = x_test/255

print("X_train size: ", x_train.shape)
print("y_train size: ", y_train.shape)
print("X_test size: ", x_test.shape)
print("y_test size: ", y_test.shape)

X_train size:  (312858, 784)
y_train size:  (312858, 26)
X_test size:  (59592, 784)
y_test size:  (59592, 26)


In [None]:
#Activation Functions
def sigmoid(x):
    return 1/(1 + np.exp(-x))

def softMax(x):
    exps = np.exp(x - x.max())
    return exps/np.sum(exps, axis = 0)

In [None]:
#Differentials
def dif_sigmoid(x):
    y = sigmoid(x)
    return y*(1-y)

def dif_softMax(x):
    y = softMax(x)
    return y*(1-y)

In [None]:
def init_parameters(layer_sizes):
    parameters = {}
    for i in range(1, len(layer_sizes)):
        parameters['W' + str(i)] = np.random.randn(layer_sizes[i], layer_sizes[i-1])*0.01
        parameters['B' + str(i)] = np.random.randn(layer_sizes[i], 1)*0.01
    
    return parameters

In [None]:
def forward_propagation(X, parameters):
    values = {}
    layers = len(parameters)//2 + 1
    values['A0'] = X
    for i in range(1, layers):
        values['Z' + str(i)] = np.dot(parameters['W' + str(i)], values['A' + str(i-1)]) + parameters['B' + str(i)]
        if(i == layers - 1):
            values['A' + str(i)] = softMax(values['Z' + str(i)])
        else:
            values['A' + str(i)] = sigmoid(values['Z' + str(i)])
            
    return values

In [None]:
def compute_cost(a2, y):
    m = y.shape[1]
    return -(1/m)*np.sum(y*np.log(a2))

In [None]:
def backward_propagation(y, parameters, values):
    layers = len(parameters)//2
    m = len(y)
    grads = {}
    for i in range(layers,0,-1):
        if i == layers:
            dZ = values['A' + str(i)] - y
        else:
            dZ = (np.dot(parameters['W' + str(i+1)].T, dZ))*dif_sigmoid(values['Z'+str(i)])
        
        grads['W' + str(i)] = (1/m)*np.dot(dZ, values['A' + str(i-1)].T)
        grads['B' + str(i)] = (1/m)*np.sum(dZ, axis = 1, keepdims = True)
        
    return grads

In [None]:
def update_parameters(parameters, grads, alpha):
    layers = len(parameters)//2
    for i in range(1, layers+1):
        parameters['W'+str(i)] = parameters['W'+str(i)] - alpha*grads['W'+str(i)]
        parameters['B'+str(i)] = parameters['B'+str(i)] - alpha*grads['B'+str(i)]
        
    return parameters

In [None]:
def train_model(X, Y, layer_sizes, epochs, alpha):
    parameters = init_parameters(layer_sizes)
    for i in range(epochs):
        values = forward_propagation(X.T, parameters)
        cost = compute_cost(values['A' + str(len(parameters)//2)], Y.T)
        print("After epoch " + str(i+1) + " cost =  " + str(cost))
        grads = backward_propagation(Y.T, parameters, values)
        parameters = update_parameters(parameters, grads, alpha)
    
    return parameters

In [None]:
def predict(parameters, X):
    values = forward_propagation(X.T, parameters)
    return values['A' + str(len(values)//2)].T

In [None]:
layer_sizes = [784, 256, 26] #input layer with 784 neurons, hidden layer with 256 neurons, output layer with 26 neurons
epochs = 15
alpha = 0.00001
parameters = train_model(x_train, y_train, layer_sizes, epochs, alpha)

After epoch 1 cost =  3.2547424341532976
After epoch 2 cost =  3.0497742764466325
After epoch 3 cost =  2.981012421083142
After epoch 4 cost =  2.9592787449475315
After epoch 5 cost =  2.94873512334498
After epoch 6 cost =  2.942451015260368
After epoch 7 cost =  2.9382984538718846
After epoch 8 cost =  2.935315172618956
After epoch 9 cost =  2.933024696080624
After epoch 10 cost =  2.931173894253088
After epoch 11 cost =  2.9296184109614356
After epoch 12 cost =  2.9282703452833476
After epoch 13 cost =  2.927073037612833
After epoch 14 cost =  2.925988202215939
After epoch 15 cost =  2.9249889626192442


In [None]:
pred_y = predict(parameters, x_test)

In [None]:
from sklearn.metrics import accuracy_score
pred_y = np.argmax(pred_y, axis = 1)
y_test = np.argmax(y_test, axis = 1)
print(accuracy_score(y_test, pred_y, normalize = False)) #prints no. of correctly classified test data
print(accuracy_score(y_test, pred_y)) #prints percentage of correctly classified test data

9293
0.1559437508390388
