### Importing necessary Libraries

In [20]:

import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Loading the Datasets

In [5]:

train = pd.read_csv("/content/drive/MyDrive/logistic_data/emnist-letters-train.csv")
test = pd.read_csv("/content/drive/MyDrive/logistic_data/emnist-letters-test.csv")

In [6]:
y1 = np.array(train.iloc[:,0].values)
x1 = np.array(train.iloc[:,1:].values)
y2 = np.array(test.iloc[:,0].values)
x2 = np.array(test.iloc[:,1:].values)

###Normalizing features

In [8]:
X_train = x1.transpose()/255.0
Y_train = y1.reshape(1,y1.shape[0])-1
X_test = x2.transpose()/255.0
Y_test = y2.reshape(1,y2.shape[0])-1

#### Creating neural networks class

In [16]:
class Neural:
  def init_params():
    W1 = np.random.rand(26, 784) - 0.5
    b1 = np.random.rand(26, 1) - 0.5
    W2 = np.random.rand(26, 26) - 0.5
    b2 = np.random.rand(26, 1) - 0.5
    return W1, b1, W2, b2

  def tanh(Z):
    return np.tanh(Z)

  def softmax(Z):
      A = np.exp(Z) / sum(np.exp(Z))
      return A
      
  def forward_prop(W1, b1, W2, b2, X):
      Z1 = W1.dot(X) + b1
      A1 = Neural.tanh(Z1)
      Z2 = W2.dot(A1) + b2
      A2 = Neural.softmax(Z2)
      return Z1, A1, Z2, A2

  def derivative_tanh(Z):
    return (1 - np.power(np.tanh(Z), 2))

  def label_binarizer(Y):
      label_binarizer_Y = np.zeros((Y.size, Y.max() + 1))
      label_binarizer_Y[np.arange(Y.size), Y] = 1
      label_binarizer_Y = label_binarizer_Y.T
      return label_binarizer_Y

  def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
      m = X_train.shape[1]
      label_binarizer_Y = Neural.label_binarizer(Y)
      dZ2 = A2 - label_binarizer_Y
      dW2 = 1 / m * dZ2.dot(A1.T)
      db2 = 1 / m * np.sum(dZ2)
      dZ1 = W2.T.dot(dZ2) * Neural.derivative_tanh(Z1)
      dW1 = 1 / m * dZ1.dot(X.T)
      db1 = 1 / m * np.sum(dZ1)
      return dW1, db1, dW2, db2

  def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
      W1 = W1 - alpha * dW1
      b1 = b1 - alpha * db1    
      W2 = W2 - alpha * dW2  
      b2 = b2 - alpha * db2    
      return W1, b1, W2, b2
  def get_predictions(A2):
      return np.argmax(A2, 0)

  def get_accuracy(predictions, Y):
      return np.sum(predictions == Y) / Y.size

  def gradient_descent(X, Y, alpha, iterations):
      W1, b1, W2, b2 = Neural.init_params()
      for i in range(iterations+1):
          Z1, A1, Z2, A2 = Neural.forward_prop(W1, b1, W2, b2, X)
          dW1, db1, dW2, db2 = Neural.backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y)
          W1, b1, W2, b2 = Neural.update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
          if i % 100 == 0:
              print("Iteration: ", i)
              predictions = Neural.get_predictions(A2)
              print(Neural.get_accuracy(predictions, Y))
      return W1, b1, W2, b2
  
  def test_net(W1,b1,W2,b2,X,y):
    Z1, A1, Z2, A2 = Neural.forward_prop(W1, b1, W2, b2, X)
    predictions = Neural.get_predictions(A2)
    print(Neural.get_accuracy(predictions, y)) 


### Predicting accuracy on train set

In [17]:
W1, b1, W2, b2 = Neural.gradient_descent(X_train, Y_train, 1.0, 1000)

Iteration:  0
0.0448766314935979
Iteration:  100
0.5978558317098166
Iteration:  200
0.6572146082726157
Iteration:  300
0.6894897465061544
Iteration:  400
0.709929165868985
Iteration:  500
0.7248392436851766
Iteration:  600
0.7362695525850517
Iteration:  700
0.7466975979459228
Iteration:  800
0.7547607518102681
Iteration:  900
0.7613036182839897
Iteration:  1000
0.7676325183842161


###Predicting accuracy on test set

In [18]:
Neural.test_net(W1, b1, W2, b2, X_test, Y_test)

0.7307926211230489
