In [None]:
import pandas as pd
import numpy as np
import cv2
from tqdm import tqdm
import os
import csv
import tensorflow as tf
from tensorflow import keras

In [None]:
def create_op_vec(i):
    op = np.zeros((10,1))
    op[i] = 1
    return op

def oneHot(inp):
  outp = []
  for out in inp:
    outp.append(create_op_vec(out))
  return np.array(outp) 

In [None]:
# loading data
handwriting_mnist = keras.datasets.mnist
(train_images,train_labels),(test_images,test_labels) = handwriting_mnist.load_data()

# pre-processing data
train_images = (train_images.reshape(len(train_images), 784).T)/255
test_images = (test_images.reshape(len(test_images), 784).T)/255
train_labels = (oneHot(train_labels).reshape(len(train_labels), 10)).T
test_labels = (oneHot(test_labels).reshape(len(test_labels), 10)).T

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
print(train_images.shape)
print(test_images.shape)
print(train_labels.shape)
print(test_labels.shape)

(784, 60000)
(784, 10000)
(10, 60000)
(10, 10000)


In [None]:
# cost function

In [None]:
def cost_function(parameters, A, Y, lambd):
    m = Y.shape[1]
    F = Y*np.log(A) + (1-Y)*np.log(1-A)
    J = -np.sum(np.sum(F, axis = 1))/m
    parasum = 0
    L = int(len(parameters)/2)
    for l in range(1, L + 1):
        parasum += np.sum(np.sum(parameters['W' + str(l)]**2))
    return J + parasum*lambd/(2*m)

In [None]:
# FORWARD PROPAGATION

In [None]:
def initialise_parameters(layer_dims):
    L = len(layer_dims) - 1
    
    parameters = {}
    
    for l in range(1,L + 1):
        parameters["W" + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        parameters["b" + str(l)] = np.random.randn(layer_dims[l], 1) * 0.01
        
        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))
        
    return parameters

In [None]:
def relu(Z):
    return np.maximum(0, Z)

In [None]:
def sigmoid(Z):
    return 1/(1 + np.exp(-Z))

In [None]:
def linear_jump(A_prev, W, b):
    return np.dot(W, A_prev) + b

In [None]:
def forward_prop(parameters, X, Y, i, lambd):
    
    L = int(len(parameters)/2)
    cacheZ = []
    cacheA = []
    cacheA.append(X)
    A_prev = X
    
    for l in range(1,L):
        Z = linear_jump(A_prev, parameters["W" + str(l)], parameters["b" + str(l)])
        A_prev = relu(Z)
        cacheZ.append(Z)
        cacheA.append(A_prev)
    
    Z_final = linear_jump(A_prev, parameters["W" + str(L)], parameters["b" + str(L)])
    AL = sigmoid(Z_final)

    cacheZ.append(Z_final)
    predictions = np.argmax(AL, axis=0)
    labels = np.argmax(Y, axis=0)
    accuracy = np.mean(predictions == labels)

    if i%1 == 0:
        print("At iteration " + str(i))
        print("Cost = " + str(cost_function(parameters, AL, Y, lambd)))
        print("Accuracy = " + str(accuracy))
    
    return cacheA, cacheZ, AL

In [None]:
# BACKWARD PROPAGATAION

In [None]:
def relu_backward(Z):
    final = np.maximum(0, Z)
    for i in range(final.shape[0]):
        for j in range(final.shape[1]):
            if final[i][j] != 0:
                final[i][j] = 1
    
    return final

In [None]:
def sigmoid_backward(Z):
    A = sigmoid(Z)
    return A*(1-A)

In [None]:
def back_prop(parameters, X, Y, iteration, lambd):
    
    m = X.shape[1]
    L = int(len(parameters)/2)
    cacheA, cacheZ, AL = forward_prop(parameters, X, Y, iteration, lambd)
    gradients = {}
    
    dZ = AL - Y
    gradients['dW' + str(L)] = np.dot(dZ, cacheA[L-1].T)/m + lambd * parameters["W" + str(L)] / m
    gradients['db' + str(L)] = np.sum(dZ, axis=1, keepdims=True)/m
    dA = np.dot(parameters['W' + str(L)].T, dZ)
    
    for l in reversed(range(1,L)):
        dZ = dA * relu_backward(cacheZ[l-1])
        gradients['dW' + str(l)] = np.dot(dZ, cacheA[l-1].T)/m + lambd * parameters["W" + str(l)] / m
        gradients['db' + str(l)] = np.sum(dZ, axis=1, keepdims=True)/m
        dA = np.dot(parameters['W' + str(l)].T, dZ)
        
    return gradients

In [None]:
# UPDATING PARAMETERS

def jump(parameters, gradients, alpha):
    L = int(len(parameters)/2)
    for l in range(1, L + 1):
        parameters["W" + str(l)] -= alpha*gradients["dW" + str(l)]
        parameters["b" + str(l)] -= alpha*gradients["db" + str(l)]
        
    
    return parameters

In [None]:
# TRAINING OUR MODEL

def train(X, Y, alpha, iterations, layer_dims, parameters, lambd):
        
    for i in tqdm(range(iterations)):
        gradients = back_prop(parameters, X, Y, i, lambd)
        parameters = jump(parameters, gradients, alpha)
    
    return parameters

In [None]:
# SPECIFYING NEURAL NETWORK

n_x = 784
n_y = 10
n_h1 = 500
n_h2 = 500
n_h3 = 500
layer_dims = np.array([n_x, n_h1, n_h2, n_y])
r_parameters = initialise_parameters(layer_dims)

In [None]:
# GETTING OPTIMAL PARAMETERS

params = train(train_images, train_labels, 0.02, 2, layer_dims, params, 0)

  0%|          | 0/2 [00:00<?, ?it/s]

At iteration 0
Cost = 0.1658304503875758
Accuracy = 0.9774333333333334


 50%|█████     | 1/2 [01:00<01:00, 60.64s/it]

At iteration 1
Cost = 0.16574492786088899
Accuracy = 0.9775


100%|██████████| 2/2 [02:00<00:00, 60.49s/it]


In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
os.chdir('/content/drive/My Drive/Digit Recognizer/Parameters')

params5 = {}
for l in range(1,4):
    params5['W' + str(l)] = np.array(pd.read_csv('params W' + str(l) + '.csv').values)
    params5['b' + str(l)] = np.array(pd.read_csv('params b' + str(l) + '.csv').values)


In [None]:
print("=== TRAIN SET ===")
_, _, A = forward_prop(params5, train_images, train_labels, 0, 0)
print("\n=== TEST SET ===")
_, _, A_t = forward_prop(params5, test_images, test_labels, 0, 0)

=== TRAIN SET ===
At iteration 0
Cost = 0.16765014047393623
Accuracy = 0.9769666666666666

=== TEST SET ===
At iteration 0
Cost = 0.1500298228749556
Accuracy = 0.9787


In [None]:
from sklearn import metrics
print(metrics.classification_report(np.argmax(test_labels, axis=0), np.argmax(A_t, axis=0), digits = 4))

              precision    recall  f1-score   support

           0     0.9798    0.9888    0.9843       980
           1     0.9817    0.9938    0.9877      1135
           2     0.9825    0.9767    0.9796      1032
           3     0.9763    0.9802    0.9783      1010
           4     0.9786    0.9786    0.9786       982
           5     0.9776    0.9765    0.9770       892
           6     0.9761    0.9812    0.9787       958
           7     0.9746    0.9718    0.9732      1028
           8     0.9813    0.9713    0.9763       974
           9     0.9779    0.9663    0.9721      1009

    accuracy                         0.9787     10000
   macro avg     0.9786    0.9785    0.9786     10000
weighted avg     0.9787    0.9787    0.9787     10000



In [None]:
# SAVING THE PARAMETERS IN A CSV FILE

os.chdir('c:/Users/hp-2111/Desktop/ProjectRelated/Parameters/')
L = int(len(params)/2)
for l in tqdm(range(1, L+1)):
    with open('params W' + str(l) + '.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(np.ones(params['W' + str(l)].shape[1]) * 2)
        writer.writerows(params['W' + str(l)])
    
    with open('params b' + str(l) + '.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(np.ones(params['b' + str(l)].shape[1]) * 2)
        writer.writerows(params['b' + str(l)])