In [1]:
import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt
import warnings
import pandas as pd

warnings.filterwarnings('ignore')

In [2]:
print("----------------Reading the Data-------------------------")
PATH = os.getcwd()
os.chdir('Alphabets/')

X_train = pd.read_csv('train.csv', sep=',', header=None, index_col=False)
X_test = pd.read_csv('test.csv', sep=',', header=None, index_col=False)
np.random.shuffle(X_train.to_numpy())
train_class = X_train[X_train.columns[-1]]
test_actual_class = X_test[X_test.columns[-1]]

X_train = X_train.drop(X_train.columns[-1], axis=1)
X_test = X_test.drop(X_test.columns[-1], axis=1)

print("----------------Data Reading completed-------------------")

os.chdir('../')

X_train = X_train/255
X_test = X_test/255

m = X_train.shape[0] # Number of Training Samples

X_valid = X_train.iloc[(int(0.85*m)):]
valid_class = train_class[(int(0.85*m)):]
X_train = X_train.iloc[0:int(0.85*m)]
train_class = train_class[0:int(0.85*m)]


m = X_train.shape[0] # Number of Training Samples
n = X_train.shape[1] # Number of input features

print("The total number of training samples = {}".format(m))
print("The total number of validation samples = {}".format(X_valid.shape[0]))

print("The number of features = {}".format(n))

----------------Reading the Data-------------------------
----------------Data Reading completed-------------------
The total number of training samples = 11050
The total number of validation samples = 1950
The number of features = 784


In [3]:
#To get the one hot encoding of each label
print("--------Perform 1-hot encoding of class labels------------")

train_class_enc = pd.get_dummies(train_class).to_numpy()
valid_class_enc = pd.get_dummies(valid_class).to_numpy()
test_actual_class_enc = pd.get_dummies(test_actual_class).to_numpy()

--------Perform 1-hot encoding of class labels------------


In [4]:
#Add the intercept term to the data samples both in training and test dataset
X_train = np.hstack((np.ones((m,1)),X_train.to_numpy()))
X_valid = np.hstack((np.ones((X_valid.shape[0],1)), X_valid.to_numpy()))
X_test = np.hstack((np.ones((X_test.shape[0],1)),X_test.to_numpy()))

In [5]:
lr = 0.1
arch_test = [1,5,10,50,100]
arch = [arch_test[3]] #means one hidden layer with 2 perceptrons 
batch_size = 100 # Mini-Batch Size
r = np.max(train_class) + 1 # Default value of the number of classes = 26

In [6]:
#Mini-Batch formation
mini_batch = [(X_train[i:i+batch_size,:], train_class_enc[i:i+batch_size]) for i in range(0, m, batch_size)]
print("The number of mini-batches formed is = {}".format(len(mini_batch)))

The number of mini-batches formed is = 111


In [7]:
#Theta Initialization 
def theta_init(arch=[50]):
    theta = []
    for i in range(len(arch)+1):
        if i == 0:
            dim0=n+1
            dim1=arch[i]
        elif (i == len(arch)):
            dim0=arch[i-1]
            dim1 = r
        else:
            dim0=arch[i-1]
            dim1= arch[i]

        theta.append(2*np.random.random((dim0, dim1))-1)
        #theta.append(np.zeros((dim0, dim1)))
    return theta

In [8]:
def activation(x):
    return 1/(1+np.exp(-x))

In [9]:
def relu_act(x):
    return np.maximum(0.0, x)

In [10]:
def deriv_relu(x):
    x[x<=0] = 0
    x[x>0] = 1
    return x

In [11]:
def forward_prop(data, theta):
    fm = []
    fm.append(data)
    for l in range(len(theta)):
        if (l != len(theta)-1):
            #print("relu")
            fm.append(relu_act(np.dot(fm[l], theta[l])))
        else:
            fm.append(activation(np.dot(fm[l], theta[l])))
            #print("sigmoid output")
    return fm

In [14]:
theta = theta_init([100, 100])
print(theta[2].shape)
cost_total(X_train, theta, train_class_enc, m)

(100, 26)


7.565819744386329

In [15]:
def cost_total(X, theta, Y, m):
    fm = forward_prop(X, theta)
    cost = (1/(2*m))*np.sum((Y-fm[-1])**2)
    return cost

In [16]:
def calc_accuracy(data, theta, actual_class):
    pred_class = forward_prop(data, theta)
    test_pred_class = pred_class[-1]
    for i in range(len(test_pred_class)):
        test_pred_class[i][test_pred_class[i] == np.max(test_pred_class[i])] = 1
        test_pred_class[i][test_pred_class[i] != np.max(test_pred_class[i])] = 0


    test_acc = 0
    for i in range(len(actual_class)):
        if (np.array_equal(test_pred_class[i], actual_class[i])):
            test_acc+=1
    test_acc /= data.shape[0]

    #print("The Test Accuracy of the model = {}%".format(test_acc*100))
    return (test_acc*100)

In [17]:
epochs = []
train_accuracy = []
test_accuracy = []
train_time = []
valid_accuracy=[]

In [21]:
arch=[50]

theta = theta_init(arch)
fm = forward_prop(X_train, theta)
#ite=1
epoch = 0
start = time.time()
cost_init = cost_total(X_valid, theta, valid_class_enc, m)

while(True):
    count = 0
    #lr = lr0/ite
    #print("learning rate = ", lr)

    print("Initial Cost on Val dataset for this epoch {} = {}".format(epoch, cost_init))

    for b in mini_batch:
        #if (lr < 0.1): lr = 0.1
        X_b = b[0]
        Y_b = b[1]
        fm = forward_prop(X_b, theta)
        delta = [None]*len(fm)

        if (count % 60 == 0):
            print("Error on this batch = "+str(cost_total(X_b, theta, Y_b, batch_size)))
        #Backward Propagation

        for l in range(len(fm)-1, 0, -1):
            if (l == len(fm)-1):
                delta[l] = ((1/batch_size)*(Y_b - fm[l])*fm[l]*(1-fm[l]))
            else:
                delta[l]=(np.dot(delta[l+1], theta[l].T)*deriv_relu(fm[l]))

        #delta_l1 = np.dot(delta_l2, theta2.T)*l1*(1-l1)

        for t in range(len(theta)):
            theta[t] += lr*np.dot(fm[t].T, delta[t+1]) 

        count+=1
    epoch+=1 #Number of epochs
    #ite+=1

    cost_final = cost_total(X_valid, theta, valid_class_enc, m)
    print("Cost on val dataset after {} epochs is = {}".format(epoch, cost_final))
    if (abs(cost_final-cost_init) < 1e-08):
        print("cost initial= {} , cost final={} , change in cost= {}".format(cost_init,cost_final, cost_final-cost_init))
        break
    cost_init = cost_final
epochs.append(epoch)
train_time.append(time.time()-start)
train_accuracy.append(calc_accuracy(X_train, theta, train_class_enc))
valid_accuracy.append(calc_accuracy(X_valid, theta, valid_class_enc))
test_accuracy.append(calc_accuracy(X_test, theta, test_actual_class_enc))
print("\n------------------------------------------------------------------------------")
print("The stats for number of units in the hidden layer arch= {} are as below:".format(arch))
print("------------------------------------------------------------------------------")
print("The number of epochs = {:2.3f}".format(epochs[-1]))
print("The training time = {:2.3f}sec".format(train_time[-1]))
print("The training accuracy is = {:2.3f}%".format(train_accuracy[-1]))
print("The validation accuracy is = {:2.3f}%".format(valid_accuracy[-1]))
print("The test accuracy is = {:2.3f}%".format(test_accuracy[-1]))
print("------------------------------------------------------------------------------\n")

Initial Cost on Val dataset for this epoch 0 = 1.0297571711980604
Error on this batch = 5.665003326991755
Error on this batch = 1.71209591208564
Cost on val dataset after 1 epochs is = 0.21020660416253134
Initial Cost on Val dataset for this epoch 1 = 0.21020660416253134
Error on this batch = 1.1886145277925402
Error on this batch = 1.0792025004153691
Cost on val dataset after 2 epochs is = 0.18022829104018706
Initial Cost on Val dataset for this epoch 2 = 0.18022829104018706
Error on this batch = 0.99718612321832
Error on this batch = 1.0345714884940203
Cost on val dataset after 3 epochs is = 0.17480441594685542
Initial Cost on Val dataset for this epoch 3 = 0.17480441594685542
Error on this batch = 0.9626170818322564
Error on this batch = 1.0163373887990665
Cost on val dataset after 4 epochs is = 0.17278537986912662
Initial Cost on Val dataset for this epoch 4 = 0.17278537986912662
Error on this batch = 0.942756781689722
Error on this batch = 1.0088744592284788
Cost on val dataset af

Error on this batch = 0.5075815048308543
Cost on val dataset after 40 epochs is = 0.08776440040843864
Initial Cost on Val dataset for this epoch 40 = 0.08776440040843864
Error on this batch = 0.49788807881133834
Error on this batch = 0.5072608053854852
Cost on val dataset after 41 epochs is = 0.0877215252579681
Initial Cost on Val dataset for this epoch 41 = 0.0877215252579681
Error on this batch = 0.4974849414407564
Error on this batch = 0.5070270949267959
Cost on val dataset after 42 epochs is = 0.0876778408563418
Initial Cost on Val dataset for this epoch 42 = 0.0876778408563418
Error on this batch = 0.4971011025502975
Error on this batch = 0.5068015971622066
Cost on val dataset after 43 epochs is = 0.08763595776739012
Initial Cost on Val dataset for this epoch 43 = 0.08763595776739012
Error on this batch = 0.4967077515517776
Error on this batch = 0.506519743415519
Cost on val dataset after 44 epochs is = 0.08759777821572966
Initial Cost on Val dataset for this epoch 44 = 0.08759777

Error on this batch = 0.5025912399499924
Cost on val dataset after 79 epochs is = 0.08689637450752793
Initial Cost on Val dataset for this epoch 79 = 0.08689637450752793
Error on this batch = 0.4864047175744361
Error on this batch = 0.5025128879936088
Cost on val dataset after 80 epochs is = 0.08688863996632741
Initial Cost on Val dataset for this epoch 80 = 0.08688863996632741
Error on this batch = 0.486108338610106
Error on this batch = 0.5024393802279115
Cost on val dataset after 81 epochs is = 0.08688130205349848
Initial Cost on Val dataset for this epoch 81 = 0.08688130205349848
Error on this batch = 0.48586847817503226
Error on this batch = 0.5023708963198286
Cost on val dataset after 82 epochs is = 0.0868742306253891
Initial Cost on Val dataset for this epoch 82 = 0.0868742306253891
Error on this batch = 0.48568277696256296
Error on this batch = 0.5023063768532053
Cost on val dataset after 83 epochs is = 0.08686777242910196
Initial Cost on Val dataset for this epoch 83 = 0.08686

Error on this batch = 0.5013605795424353
Cost on val dataset after 118 epochs is = 0.0866785884682593
Initial Cost on Val dataset for this epoch 118 = 0.0866785884682593
Error on this batch = 0.48510948236555884
Error on this batch = 0.5013377564131367
Cost on val dataset after 119 epochs is = 0.08667434937918495
Initial Cost on Val dataset for this epoch 119 = 0.08667434937918495
Error on this batch = 0.48510795154414643
Error on this batch = 0.5013131791804246
Cost on val dataset after 120 epochs is = 0.08667023123863149
Initial Cost on Val dataset for this epoch 120 = 0.08667023123863149
Error on this batch = 0.48510606424551145
Error on this batch = 0.5012889775502065
Cost on val dataset after 121 epochs is = 0.08666622272283217
Initial Cost on Val dataset for this epoch 121 = 0.08666622272283217
Error on this batch = 0.4851035314230146
Error on this batch = 0.5012654875000524
Cost on val dataset after 122 epochs is = 0.08666200602921306
Initial Cost on Val dataset for this epoch 1

Error on this batch = 0.4889550886806328
Cost on val dataset after 157 epochs is = 0.085401117709765
Initial Cost on Val dataset for this epoch 157 = 0.085401117709765
Error on this batch = 0.4802009966786105
Error on this batch = 0.4884812912857744
Cost on val dataset after 158 epochs is = 0.08536053318325297
Initial Cost on Val dataset for this epoch 158 = 0.08536053318325297
Error on this batch = 0.4801705217323921
Error on this batch = 0.4880589034761533
Cost on val dataset after 159 epochs is = 0.08532647169306075
Initial Cost on Val dataset for this epoch 159 = 0.08532647169306075
Error on this batch = 0.48014170259740824
Error on this batch = 0.4877446230789512
Cost on val dataset after 160 epochs is = 0.0852982547553957
Initial Cost on Val dataset for this epoch 160 = 0.0852982547553957
Error on this batch = 0.48011795441303196
Error on this batch = 0.48755669245992245
Cost on val dataset after 161 epochs is = 0.08527456887965539
Initial Cost on Val dataset for this epoch 161 =

Error on this batch = 0.4896763527028749
Cost on val dataset after 196 epochs is = 0.08459749553869321
Initial Cost on Val dataset for this epoch 196 = 0.08459749553869321
Error on this batch = 0.4701776253954422
Error on this batch = 0.49003628412468003
Cost on val dataset after 197 epochs is = 0.08457052687213856
Initial Cost on Val dataset for this epoch 197 = 0.08457052687213856
Error on this batch = 0.46943798219567184
Error on this batch = 0.49027998650759363
Cost on val dataset after 198 epochs is = 0.08454506845648685
Initial Cost on Val dataset for this epoch 198 = 0.08454506845648685
Error on this batch = 0.4687432332470452
Error on this batch = 0.4903752920762797
Cost on val dataset after 199 epochs is = 0.08452204499325114
Initial Cost on Val dataset for this epoch 199 = 0.08452204499325114
Error on this batch = 0.4681521113148547
Error on this batch = 0.4903324917352409
Cost on val dataset after 200 epochs is = 0.08450177401502265
Initial Cost on Val dataset for this epoch

Error on this batch = 0.48564060125040387
Cost on val dataset after 235 epochs is = 0.083976467998682
Initial Cost on Val dataset for this epoch 235 = 0.083976467998682
Error on this batch = 0.46048634666624716
Error on this batch = 0.48531017548818595
Cost on val dataset after 236 epochs is = 0.08395455197741994
Initial Cost on Val dataset for this epoch 236 = 0.08395455197741994
Error on this batch = 0.459940831117013
Error on this batch = 0.4850392587297412
Cost on val dataset after 237 epochs is = 0.08393386428215995
Initial Cost on Val dataset for this epoch 237 = 0.08393386428215995
Error on this batch = 0.45937674678482465
Error on this batch = 0.4847934873755088
Cost on val dataset after 238 epochs is = 0.08391526966032183
Initial Cost on Val dataset for this epoch 238 = 0.08391526966032183
Error on this batch = 0.4588093418760574
Error on this batch = 0.4845306730653868
Cost on val dataset after 239 epochs is = 0.08389882644241553
Initial Cost on Val dataset for this epoch 239

Error on this batch = 0.47536782697535857
Cost on val dataset after 274 epochs is = 0.08360564470561387
Initial Cost on Val dataset for this epoch 274 = 0.08360564470561387
Error on this batch = 0.45516981641751486
Error on this batch = 0.47521735349338473
Cost on val dataset after 275 epochs is = 0.08360069218434955
Initial Cost on Val dataset for this epoch 275 = 0.08360069218434955
Error on this batch = 0.4551744181190799
Error on this batch = 0.4750639381333591
Cost on val dataset after 276 epochs is = 0.08359587611600766
Initial Cost on Val dataset for this epoch 276 = 0.08359587611600766
Error on this batch = 0.4551795480891413
Error on this batch = 0.4749193654097983
Cost on val dataset after 277 epochs is = 0.08359115045643961
Initial Cost on Val dataset for this epoch 277 = 0.08359115045643961
Error on this batch = 0.4551841340139967
Error on this batch = 0.4747732385032269
Cost on val dataset after 278 epochs is = 0.08358658340594252
Initial Cost on Val dataset for this epoch

Error on this batch = 0.4705270645764034
Cost on val dataset after 313 epochs is = 0.08345552415457128
Initial Cost on Val dataset for this epoch 313 = 0.08345552415457128
Error on this batch = 0.4551909266763675
Error on this batch = 0.4704248572660707
Cost on val dataset after 314 epochs is = 0.08345104147349698
Initial Cost on Val dataset for this epoch 314 = 0.08345104147349698
Error on this batch = 0.45519125073638866
Error on this batch = 0.47032464867514523
Cost on val dataset after 315 epochs is = 0.08344662339351441
Initial Cost on Val dataset for this epoch 315 = 0.08344662339351441
Error on this batch = 0.4551918041226084
Error on this batch = 0.47022704484239747
Cost on val dataset after 316 epochs is = 0.08344281236178563
Initial Cost on Val dataset for this epoch 316 = 0.08344281236178563
Error on this batch = 0.45519174914143223
Error on this batch = 0.47014771164344943
Cost on val dataset after 317 epochs is = 0.08343842534400772
Initial Cost on Val dataset for this epo

Error on this batch = 0.4681413562990161
Cost on val dataset after 352 epochs is = 0.0833262573403632
Initial Cost on Val dataset for this epoch 352 = 0.0833262573403632
Error on this batch = 0.45522961760932534
Error on this batch = 0.46810691419340605
Cost on val dataset after 353 epochs is = 0.08332408303528321
Initial Cost on Val dataset for this epoch 353 = 0.08332408303528321
Error on this batch = 0.45523272011613986
Error on this batch = 0.4680642135318891
Cost on val dataset after 354 epochs is = 0.083321778092818
Initial Cost on Val dataset for this epoch 354 = 0.083321778092818
Error on this batch = 0.45523548920031487
Error on this batch = 0.4680288734435473
Cost on val dataset after 355 epochs is = 0.0833196317978899
Initial Cost on Val dataset for this epoch 355 = 0.0833196317978899
Error on this batch = 0.45523865291871474
Error on this batch = 0.46798876433600456
Cost on val dataset after 356 epochs is = 0.08331742954825701
Initial Cost on Val dataset for this epoch 356 

Error on this batch = 0.4667218201708236
Cost on val dataset after 391 epochs is = 0.0832300761257434
Initial Cost on Val dataset for this epoch 391 = 0.0832300761257434
Error on this batch = 0.4552916403155385
Error on this batch = 0.4666888896014483
Cost on val dataset after 392 epochs is = 0.08322695618051554
Initial Cost on Val dataset for this epoch 392 = 0.08322695618051554
Error on this batch = 0.45529114134405446
Error on this batch = 0.46665795739931
Cost on val dataset after 393 epochs is = 0.0832237156596453
Initial Cost on Val dataset for this epoch 393 = 0.0832237156596453
Error on this batch = 0.45529169919503143
Error on this batch = 0.46662772756783016
Cost on val dataset after 394 epochs is = 0.08322033877189204
Initial Cost on Val dataset for this epoch 394 = 0.08322033877189204
Error on this batch = 0.4552912063190451
Error on this batch = 0.46660157694495136
Cost on val dataset after 395 epochs is = 0.08321708424658843
Initial Cost on Val dataset for this epoch 395 

Error on this batch = 0.46623777217293877
Cost on val dataset after 430 epochs is = 0.08312171721560534
Initial Cost on Val dataset for this epoch 430 = 0.08312171721560534
Error on this batch = 0.45524952555409726
Error on this batch = 0.46623513222654794
Cost on val dataset after 431 epochs is = 0.08311989988436255
Initial Cost on Val dataset for this epoch 431 = 0.08311989988436255
Error on this batch = 0.4552483953897075
Error on this batch = 0.4662313531943434
Cost on val dataset after 432 epochs is = 0.08311794884711857
Initial Cost on Val dataset for this epoch 432 = 0.08311794884711857
Error on this batch = 0.4552475453579683
Error on this batch = 0.46622982004543323
Cost on val dataset after 433 epochs is = 0.0831162273756928
Initial Cost on Val dataset for this epoch 433 = 0.0831162273756928
Error on this batch = 0.45524649757098934
Error on this batch = 0.4662262874426532
Cost on val dataset after 434 epochs is = 0.08311447133006027
Initial Cost on Val dataset for this epoch

Error on this batch = 0.4660675690008361
Cost on val dataset after 469 epochs is = 0.08305689898908597
Initial Cost on Val dataset for this epoch 469 = 0.08305689898908597
Error on this batch = 0.4552933174048288
Error on this batch = 0.46605910284930374
Cost on val dataset after 470 epochs is = 0.08305539127412853
Initial Cost on Val dataset for this epoch 470 = 0.08305539127412853
Error on this batch = 0.45529604174553884
Error on this batch = 0.4660506462853228
Cost on val dataset after 471 epochs is = 0.08305382435818913
Initial Cost on Val dataset for this epoch 471 = 0.08305382435818913
Error on this batch = 0.4552985287091076
Error on this batch = 0.4660412374388776
Cost on val dataset after 472 epochs is = 0.08305254670030045
Initial Cost on Val dataset for this epoch 472 = 0.08305254670030045
Error on this batch = 0.45530086285552984
Error on this batch = 0.46603375336665864
Cost on val dataset after 473 epochs is = 0.08305106066854614
Initial Cost on Val dataset for this epoc

Error on this batch = 0.46590024843044847
Cost on val dataset after 508 epochs is = 0.08299666798717567
Initial Cost on Val dataset for this epoch 508 = 0.08299666798717567
Error on this batch = 0.455380782296111
Error on this batch = 0.4659064652435639
Cost on val dataset after 509 epochs is = 0.08299500509732527
Initial Cost on Val dataset for this epoch 509 = 0.08299500509732527
Error on this batch = 0.4553830716724835
Error on this batch = 0.4659125158704099
Cost on val dataset after 510 epochs is = 0.08299298622120069
Initial Cost on Val dataset for this epoch 510 = 0.08299298622120069
Error on this batch = 0.4553839049668406
Error on this batch = 0.46591685306366243
Cost on val dataset after 511 epochs is = 0.08299131751879936
Initial Cost on Val dataset for this epoch 511 = 0.08299131751879936
Error on this batch = 0.45538578709345395
Error on this batch = 0.4659221461743114
Cost on val dataset after 512 epochs is = 0.08298922187093821
Initial Cost on Val dataset for this epoch 

Error on this batch = 0.4658275583353184
Cost on val dataset after 547 epochs is = 0.0829229998723304
Initial Cost on Val dataset for this epoch 547 = 0.0829229998723304
Error on this batch = 0.45545739771152954
Error on this batch = 0.4658232080660939
Cost on val dataset after 548 epochs is = 0.08292126166657209
Initial Cost on Val dataset for this epoch 548 = 0.08292126166657209
Error on this batch = 0.4554585405846621
Error on this batch = 0.46582197722030017
Cost on val dataset after 549 epochs is = 0.08291992160432822
Initial Cost on Val dataset for this epoch 549 = 0.08291992160432822
Error on this batch = 0.4554605277208161
Error on this batch = 0.4658197459534249
Cost on val dataset after 550 epochs is = 0.08291826844903097
Initial Cost on Val dataset for this epoch 550 = 0.08291826844903097
Error on this batch = 0.4554635577136321
Error on this batch = 0.46581469492257543
Cost on val dataset after 551 epochs is = 0.08291687178359108
Initial Cost on Val dataset for this epoch 5

In [120]:
lr

0.1