In [1]:
import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt
import warnings
import pandas as pd

warnings.filterwarnings('ignore')

In [2]:
print("----------------Reading the Data-------------------------")
PATH = os.getcwd()
os.chdir('Alphabets/')

X_train = pd.read_csv('train.csv', sep=',', header=None, index_col=False)
X_test = pd.read_csv('test.csv', sep=',', header=None, index_col=False)

train_class = X_train[X_train.columns[-1]]
test_actual_class = X_test[X_test.columns[-1]]

X_train = X_train.drop(X_train.columns[-1], axis=1)
X_test = X_test.drop(X_test.columns[-1], axis=1)

print("----------------Data Reading completed-------------------")

os.chdir('../')

X_train = X_train/255
X_test = X_test/255
m = X_train.shape[0] # Number of Training Samples
n = X_train.shape[1] # Number of input features

print("The total number of training samples = {}".format(m))
print("The number of features = {}".format(n))

----------------Reading the Data-------------------------
----------------Data Reading completed-------------------
The total number of training samples = 13000
The number of features = 784


In [3]:
#To get the one hot encoding of each label
print("--------Perform 1-hot encoding of class labels------------")

train_class_enc = pd.get_dummies(train_class).to_numpy()
test_actual_class_enc = pd.get_dummies(test_actual_class).to_numpy()

--------Perform 1-hot encoding of class labels------------


In [4]:
#Add the intercept term to the data samples both in training and test dataset
X_train = np.hstack((np.ones((m,1)),X_train.to_numpy()))
X_test = np.hstack((np.ones((X_test.shape[0],1)),X_test.to_numpy()))

In [5]:
lr = 0.1
arch = [2,2,3] #means one hidden layer with 2 perceptrons 
# Run with 1000 units in one hidden layer arch shows no change in error with 0.1 lr and full dataset

M = 100 # Mini-Batch Size
r = np.max(train_class) + 1 # Default value of the number of classes = 26

In [62]:
#Theta Initialization 
theta = []

for i in range(len(arch)+1):
    if i == 0:
        dim0=n+1
        dim1=arch[i]
    elif (i == len(arch)):
        dim0=arch[i-1]
        dim1 = r
    else:
        dim0=arch[i-1]
        dim1= arch[i]
        
    #theta.append(2*np.random.random((dim0, dim1))-1)
    theta.append(np.zeros((dim0, dim1)))

In [63]:
def activation(x):
    return 1/(1+np.exp(-x))

In [64]:
def forward_prop(data, theta):
    fm = []
    fm.append(data)
    for l in range(len(theta)):
        fm.append(activation(np.dot(fm[l], theta[l])))
    #l1 = activation(np.dot(l0, theta[0]))
    #l2 = activation(np.dot(l1, theta[1]))
    return fm

In [65]:
fm = forward_prop(X_train, theta)

In [66]:
def cost_total(X, theta, Y, m):
    fm = forward_prop(X, theta)
    cost = (1/(2*m))*np.sum((Y-fm[-1])**2)
    return cost

In [67]:
cost_total(X_train, theta, train_class_enc, m)

3.25

In [68]:
print(theta)

[array([[0., 0.],
       [0., 0.],
       [0., 0.],
       ...,
       [0., 0.],
       [0., 0.],
       [0., 0.]]), array([[0., 0.],
       [0., 0.]]), array([[0., 0., 0.],
       [0., 0., 0.]]), array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])]


In [69]:
for it in range(2000):
    delta = [None]*len(fm)
    # Forward Propagation generic
    fm = forward_prop(X_train, theta)
    if (it % 10 == 0):
        print("Error = "+str(cost_total(X_train, theta, train_class_enc, m)))
        #print("Error = "+str(np.mean(np.abs(train_class_enc-l2))))

    #Backward Propagation generic
    
    #delta_output = (1/m)*(train_class_enc - fm[-1])*fm[-1]*(1-fm[-1])

    for l in range(len(fm)-1, 0, -1):
        if (l == len(fm)-1):
            delta[l] = ((1/m)*(train_class_enc - fm[l])*fm[l]*(1-fm[l]))
        else:
            delta[l]=(np.dot(delta[l+1], theta[l].T)*fm[l]*(1-fm[l]))
        
    #delta_l1 = np.dot(delta_l2, theta2.T)*l1*(1-l1)

    for t in range(len(theta)):
        theta[t] += lr*np.dot(fm[t].T, delta[t+1])
#     theta1 += lr*np.dot(l0.T, delta_l1)
#     theta2 += lr*np.dot(l1.T, delta_l2)

Error = 3.25
Error = 3.0004761152782558
Error = 2.766758010558254
Error = 2.5433632964699298
Error = 2.3280665121291917
Error = 2.121096100285749
Error = 1.9242912456150978
Error = 1.7402108005022272
Error = 1.57127234374041
Error = 1.4190818562162617
Error = 1.2841404424262488
Error = 1.1660103366874506
Error = 1.0637858797189979
Error = 0.9764754003172456
Error = 0.902998570668764
Error = 0.8419927560810644
Error = 0.7917948954895361
Error = 0.7506299580534488
Error = 0.7168224829740912
Error = 0.688920549219516
Error = 0.6657309383797322
Error = 0.6463022049903381
Error = 0.6298871438238185
Error = 0.6159022849197531
Error = 0.6038919200004105
Error = 0.5934987725450422
Error = 0.5844410781676542
Error = 0.5764950491038525
Error = 0.5694815837638428
Error = 0.5632562194692143
Error = 0.5577015250862665
Error = 0.5527213175803376
Error = 0.5482362406403849
Error = 0.5441803627628532
Error = 0.540498541620986
Error = 0.5371443675883979
Error = 0.5340785477103381
Error = 0.531267626853

In [None]:
for it in range(20000):
    # Forward Propagation
    l0 = X_train
    l1 = activation(np.dot(l0, theta1))
    l2 = activation(np.dot(l1, theta2))

    if (it % 1000 == 0):
        print("Error = "+str(cost_total(X_train, [theta1, theta2], train_class_enc, m)))
        #print("Error = "+str(np.mean(np.abs(train_class_enc-l2))))

    #Backward Propagation
    delta_l2 = (1/m)*(train_class_enc - l2)*l2*(1-l2)

    delta_l1 = np.dot(delta_l2, theta2.T)*l1*(1-l1)

    theta1 += lr*np.dot(l0.T, delta_l1)
    theta2 += lr*np.dot(l1.T, delta_l2)

In [70]:
pred_class = forward_prop(X_test, theta)
test_pred_class = pred_class[-1]
for i in range(len(test_pred_class)):
    test_pred_class[i][test_pred_class[i] == np.max(test_pred_class[i])] = 1
    test_pred_class[i][test_pred_class[i] != np.max(test_pred_class[i])] = 0
    
    
test_acc = 0
for i in range(len(test_actual_class_enc)):
    if (np.array_equal(test_pred_class[i], test_actual_class_enc[i])):
        test_acc+=1
test_acc /= X_test.shape[0]

print("The Test Accuracy of the model = {}%".format(test_acc*100))

The Test Accuracy of the model = 3.8461538461538463%
