In [1]:
import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt
import warnings
import pandas as pd

warnings.filterwarnings('ignore')

In [2]:
print("----------------Reading the Data-------------------------")
PATH = os.getcwd()
os.chdir('Alphabets/')

X_train = pd.read_csv('train.csv', sep=',', header=None, index_col=False)
X_test = pd.read_csv('test.csv', sep=',', header=None, index_col=False)
np.random.shuffle(X_train.to_numpy())
train_class = X_train[X_train.columns[-1]]
test_actual_class = X_test[X_test.columns[-1]]

X_train = X_train.drop(X_train.columns[-1], axis=1)
X_test = X_test.drop(X_test.columns[-1], axis=1)

print("----------------Data Reading completed-------------------")

os.chdir('../')

X_train = X_train/255
X_test = X_test/255

m = X_train.shape[0] # Number of Training Samples

X_valid = X_train.iloc[(int(0.85*m)):]
valid_class = train_class[(int(0.85*m)):]
X_train = X_train.iloc[0:int(0.85*m)]
train_class = train_class[0:int(0.85*m)]


m = X_train.shape[0] # Number of Training Samples
n = X_train.shape[1] # Number of input features

print("The total number of training samples = {}".format(m))
print("The total number of validation samples = {}".format(X_valid.shape[0]))

print("The number of features = {}".format(n))

----------------Reading the Data-------------------------
----------------Data Reading completed-------------------
The total number of training samples = 11050
The total number of validation samples = 1950
The number of features = 784


In [3]:
#To get the one hot encoding of each label
print("--------Perform 1-hot encoding of class labels------------")

train_class_enc = pd.get_dummies(train_class).to_numpy()
valid_class_enc = pd.get_dummies(valid_class).to_numpy()
test_actual_class_enc = pd.get_dummies(test_actual_class).to_numpy()

--------Perform 1-hot encoding of class labels------------


In [4]:
#Add the intercept term to the data samples both in training and test dataset
X_train = np.hstack((np.ones((m,1)),X_train.to_numpy()))
X_valid = np.hstack((np.ones((X_valid.shape[0],1)), X_valid.to_numpy()))
X_test = np.hstack((np.ones((X_test.shape[0],1)),X_test.to_numpy()))

In [79]:
lr = 0.1
arch_test = [1,5,10,50,100]
arch = [arch_test[3]] #means one hidden layer with 2 perceptrons 
batch_size = 100 # Mini-Batch Size
r = np.max(train_class) + 1 # Default value of the number of classes = 26

In [80]:
#Mini-Batch formation
mini_batch = [(X_train[i:i+batch_size,:], train_class_enc[i:i+batch_size]) for i in range(0, m, batch_size)]
print("The number of mini-batches formed is = {}".format(len(mini_batch)))

The number of mini-batches formed is = 111


In [81]:
#Theta Initialization 
def theta_init(arch=[50]):
    theta = []
    for i in range(len(arch)+1):
        if i == 0:
            dim0=n+1
            dim1=arch[i]
        elif (i == len(arch)):
            dim0=arch[i-1]
            dim1 = r
        else:
            dim0=arch[i-1]
            dim1= arch[i]

        theta.append(2*np.random.random((dim0, dim1))-1)
        #theta.append(np.zeros((dim0, dim1)))
    return theta

In [82]:
def activation(x):
    return 1/(1+np.exp(-x))

In [83]:
def relu_act(x):
#     x[x<=0] = 0.01*x[x<=0]
#     return x
    return np.maximum(0.0, x)

In [84]:
def deriv_relu(x):
    #x[x<=0] = -0.01
    x[x<=0] = 0
    x[x>0] = 1
    return x

In [85]:
a = np.array([[1,-1,0], [-2.5, 0, 3]])
print(relu_act(a))
#print(deriv_relu(a))

[[1. 0. 0.]
 [0. 0. 3.]]


In [86]:
def forward_prop(data, theta):
    fm = []
    fm.append(data)
    for l in range(len(theta)):
        if (l != len(theta)-1):
            #print("relu")
            fm.append(relu_act(np.dot(fm[l], theta[l])))
        else:
            fm.append(activation(np.dot(fm[l], theta[l])))
            #print("sigmoid output")
    return fm

In [87]:
theta = theta_init([100, 100, 100])
print(theta[3].shape)
cost_total(X_train, theta, train_class_enc, m)
#fm = forward_prop(X_train, theta)

(100, 26)


6.045363256922522

In [88]:
def cost_total(X, theta, Y, m):
    fm = forward_prop(X, theta)
    cost = (1/(2*m))*np.sum((Y-fm[-1])**2)
    return cost

In [89]:
def calc_accuracy(data, theta, actual_class):
    pred_class = forward_prop(data, theta)
    test_pred_class = pred_class[-1]
    for i in range(len(test_pred_class)):
        test_pred_class[i][test_pred_class[i] == np.max(test_pred_class[i])] = 1
        test_pred_class[i][test_pred_class[i] != np.max(test_pred_class[i])] = 0


    test_acc = 0
    for i in range(len(actual_class)):
        if (np.array_equal(test_pred_class[i], actual_class[i])):
            test_acc+=1
    test_acc /= data.shape[0]

    #print("The Test Accuracy of the model = {}%".format(test_acc*100))
    return (test_acc*100)

In [90]:
epochs = []
train_accuracy = []
test_accuracy = []
train_time = []
valid_accuracy=[]

In [91]:
arch=[100, 100]
lr0=0.5

theta = theta_init(arch)
print(theta[0].shape, theta[1].shape, theta[2].shape)
fm = forward_prop(X_train, theta)
ite=1
epoch = 0
start = time.time()
cost_init = cost_total(X_valid, theta, valid_class_enc, m)

while(True):
    count = 0
#     lr = lr0/(np.sqrt(ite))
#     print("learning rate = ", lr)

    print("Initial Cost on Val dataset for this epoch {} = {}".format(epoch, cost_init))

    for b in mini_batch:
        X_b = b[0]
        Y_b = b[1]
        fm = forward_prop(X_b, theta)
        delta = [None]*len(fm)

        if (count % 60 == 0):
            print("Error on this batch = "+str(cost_total(X_b, theta, Y_b, batch_size)))
        #Backward Propagation

        for l in range(len(fm)-1, 0, -1):
            if (l == len(fm)-1):
                delta[l] = ((1/batch_size)*(Y_b - fm[l])*fm[l]*(1-fm[l]))
                #print("delta for last layer=",delta[l])
            else:
                delta[l]=(np.dot(delta[l+1], theta[l].T)*deriv_relu(fm[l]))
                #print("delta for hidden layer=",delta[l])

        for t in range(len(theta)):
            theta[t] += lr*np.dot(fm[t].T, delta[t+1]) 

        count+=1
    epoch+=1 #Number of epochs
    #ite+=1

    cost_final = cost_total(X_valid, theta, valid_class_enc, m)
    print("Cost on val dataset after {} epochs is = {}".format(epoch, cost_final))
    if (abs(cost_final-cost_init) < 1e-08):
        print("cost initial= {} , cost final={} , change in cost= {}".format(cost_init,cost_final, cost_final-cost_init))
        break
    cost_init = cost_final
epochs.append(epoch)
train_time.append(time.time()-start)
train_accuracy.append(calc_accuracy(X_train, theta, train_class_enc))
valid_accuracy.append(calc_accuracy(X_valid, theta, valid_class_enc))
test_accuracy.append(calc_accuracy(X_test, theta, test_actual_class_enc))
print("\n------------------------------------------------------------------------------")
print("The stats for number of units in the hidden layer arch= {} are as below:".format(arch))
print("------------------------------------------------------------------------------")
print("The number of epochs = {:2.3f}".format(epochs[-1]))
print("The training time = {:2.3f}sec".format(train_time[-1]))
print("The training accuracy is = {:2.3f}%".format(train_accuracy[-1]))
print("The validation accuracy is = {:2.3f}%".format(valid_accuracy[-1]))
print("The test accuracy is = {:2.3f}%".format(test_accuracy[-1]))
print("------------------------------------------------------------------------------\n")

(785, 100) (100, 100) (100, 26)
Initial Cost on Val dataset for this epoch 0 = 1.2988304465235856
Error on this batch = 7.4909528119755
Error on this batch = 3.2230831335138204
Cost on val dataset after 1 epochs is = 0.3600505331020244
Initial Cost on Val dataset for this epoch 1 = 0.3600505331020244
Error on this batch = 2.1819069906824
Error on this batch = 1.1373419009515668
Cost on val dataset after 2 epochs is = 0.1376144141812923
Initial Cost on Val dataset for this epoch 2 = 0.1376144141812923
Error on this batch = 0.8854449804409198
Error on this batch = 0.6281747761635217
Cost on val dataset after 3 epochs is = 0.10321419340742177
Initial Cost on Val dataset for this epoch 3 = 0.10321419340742177
Error on this batch = 0.6069794030120971
Error on this batch = 0.5393815409346756
Cost on val dataset after 4 epochs is = 0.09590984561686325
Initial Cost on Val dataset for this epoch 4 = 0.09590984561686325
Error on this batch = 0.5646173073237102
Error on this batch = 0.52745971979

Cost on val dataset after 40 epochs is = 0.08786377996907312
Initial Cost on Val dataset for this epoch 40 = 0.08786377996907312
Error on this batch = 0.4999999392913481
Error on this batch = 0.49500567251481625
Cost on val dataset after 41 epochs is = 0.08782938838567719
Initial Cost on Val dataset for this epoch 41 = 0.08782938838567719
Error on this batch = 0.4999999636119115
Error on this batch = 0.4950025496764643
Cost on val dataset after 42 epochs is = 0.08782202693500589
Initial Cost on Val dataset for this epoch 42 = 0.08782202693500589
Error on this batch = 0.49999996915076866
Error on this batch = 0.49500219209149143
Cost on val dataset after 43 epochs is = 0.08779786838094039
Initial Cost on Val dataset for this epoch 43 = 0.08779786838094039
Error on this batch = 0.49999998543568736
Error on this batch = 0.49499992810527843
Cost on val dataset after 44 epochs is = 0.08776715081083038
Initial Cost on Val dataset for this epoch 44 = 0.08776715081083038
Error on this batch = 

Cost on val dataset after 79 epochs is = 0.08725170018788175
Initial Cost on Val dataset for this epoch 79 = 0.08725170018788175
Error on this batch = 0.49999997810086827
Error on this batch = 0.48508848115863873
Cost on val dataset after 80 epochs is = 0.0872416305392283
Initial Cost on Val dataset for this epoch 80 = 0.0872416305392283
Error on this batch = 0.4999999834360263
Error on this batch = 0.48506432624756685
Cost on val dataset after 81 epochs is = 0.0872085783820185
Initial Cost on Val dataset for this epoch 81 = 0.0872085783820185
Error on this batch = 0.4999999845316904
Error on this batch = 0.48504735237457264
Cost on val dataset after 82 epochs is = 0.08721316507852142
Initial Cost on Val dataset for this epoch 82 = 0.08721316507852142
Error on this batch = 0.4999999874954479
Error on this batch = 0.4850397953498377
Cost on val dataset after 83 epochs is = 0.08721992735010932
Initial Cost on Val dataset for this epoch 83 = 0.08721992735010932
Error on this batch = 0.499

Cost on val dataset after 118 epochs is = 0.08717280373892167
Initial Cost on Val dataset for this epoch 118 = 0.08717280373892167
Error on this batch = 0.4999999906447888
Error on this batch = 0.4850067386923915
Cost on val dataset after 119 epochs is = 0.0871713343561645
Initial Cost on Val dataset for this epoch 119 = 0.0871713343561645
Error on this batch = 0.49999999003605655
Error on this batch = 0.48500622480802663
Cost on val dataset after 120 epochs is = 0.08716990772427204
Initial Cost on Val dataset for this epoch 120 = 0.08716990772427204
Error on this batch = 0.4999999895207036
Error on this batch = 0.4850058109828734
Cost on val dataset after 121 epochs is = 0.0871685288422119
Initial Cost on Val dataset for this epoch 121 = 0.0871685288422119
Error on this batch = 0.4999999890685802
Error on this batch = 0.4850054745687875
Cost on val dataset after 122 epochs is = 0.08716719417227126
Initial Cost on Val dataset for this epoch 122 = 0.08716719417227126
Error on this batch

Cost on val dataset after 157 epochs is = 0.08711559721467346
Initial Cost on Val dataset for this epoch 157 = 0.08711559721467346
Error on this batch = 0.49999998846581223
Error on this batch = 0.4850031338571047
Cost on val dataset after 158 epochs is = 0.08711468702589001
Initial Cost on Val dataset for this epoch 158 = 0.08711468702589001
Error on this batch = 0.4999999882724523
Error on this batch = 0.4850031919543449
Cost on val dataset after 159 epochs is = 0.08711385328595898
Initial Cost on Val dataset for this epoch 159 = 0.08711385328595898
Error on this batch = 0.4999999881076466
Error on this batch = 0.4850032617969099
Cost on val dataset after 160 epochs is = 0.08711309073417696
Initial Cost on Val dataset for this epoch 160 = 0.08711309073417696
Error on this batch = 0.4999999879696246
Error on this batch = 0.48500334494686753
Cost on val dataset after 161 epochs is = 0.08711240011294187
Initial Cost on Val dataset for this epoch 161 = 0.08711240011294187
Error on this b

Cost on val dataset after 196 epochs is = 0.08716762531949353
Initial Cost on Val dataset for this epoch 196 = 0.08716762531949353
Error on this batch = 0.49999999199239575
Error on this batch = 0.4851730438847136
Cost on val dataset after 197 epochs is = 0.08716780756476283
Initial Cost on Val dataset for this epoch 197 = 0.08716780756476283
Error on this batch = 0.49999999207908796
Error on this batch = 0.48512720604708703
Cost on val dataset after 198 epochs is = 0.08716634390370269
Initial Cost on Val dataset for this epoch 198 = 0.08716634390370269
Error on this batch = 0.49999999282478313
Error on this batch = 0.4851058167776121
Cost on val dataset after 199 epochs is = 0.08715810255303272
Initial Cost on Val dataset for this epoch 199 = 0.08715810255303272
Error on this batch = 0.49999999441143905
Error on this batch = 0.48511305365346874
Cost on val dataset after 200 epochs is = 0.08714667670591367
Initial Cost on Val dataset for this epoch 200 = 0.08714667670591367
Error on th

Cost on val dataset after 235 epochs is = 0.08699711438135177
Initial Cost on Val dataset for this epoch 235 = 0.08699711438135177
Error on this batch = 0.49999999737959416
Error on this batch = 0.48500782904138556
Cost on val dataset after 236 epochs is = 0.08699406986165356
Initial Cost on Val dataset for this epoch 236 = 0.08699406986165356
Error on this batch = 0.4999999976117871
Error on this batch = 0.4850072280465122
Cost on val dataset after 237 epochs is = 0.08698851069761288
Initial Cost on Val dataset for this epoch 237 = 0.08698851069761288
Error on this batch = 0.499999997358265
Error on this batch = 0.4850068355763857
Cost on val dataset after 238 epochs is = 0.08698557172270709
Initial Cost on Val dataset for this epoch 238 = 0.08698557172270709
Error on this batch = 0.49999999727238786
Error on this batch = 0.48500651759195507
Cost on val dataset after 239 epochs is = 0.08698355084182989
Initial Cost on Val dataset for this epoch 239 = 0.08698355084182989
Error on this 

Cost on val dataset after 274 epochs is = 0.08690411942072505
Initial Cost on Val dataset for this epoch 274 = 0.08690411942072505
Error on this batch = 0.4999999991291776
Error on this batch = 0.4850031339257609
Cost on val dataset after 275 epochs is = 0.08690334530641931
Initial Cost on Val dataset for this epoch 275 = 0.08690334530641931
Error on this batch = 0.499999999127561
Error on this batch = 0.4850030733634179
Cost on val dataset after 276 epochs is = 0.0869025970340799
Initial Cost on Val dataset for this epoch 276 = 0.0869025970340799
Error on this batch = 0.4999999991264843
Error on this batch = 0.4850030158758639
Cost on val dataset after 277 epochs is = 0.08690187477907446
Initial Cost on Val dataset for this epoch 277 = 0.08690187477907446
Error on this batch = 0.4999999991259793
Error on this batch = 0.4850029609818168
Cost on val dataset after 278 epochs is = 0.08690117701916927
Initial Cost on Val dataset for this epoch 278 = 0.08690117701916927
Error on this batch 

Cost on val dataset after 313 epochs is = 0.08687390690465764
Initial Cost on Val dataset for this epoch 313 = 0.08687390690465764
Error on this batch = 0.49999999957017977
Error on this batch = 0.4850017194279698
Cost on val dataset after 314 epochs is = 0.08687326194476196
Initial Cost on Val dataset for this epoch 314 = 0.08687326194476196
Error on this batch = 0.49999999956573826
Error on this batch = 0.48500170175036444
Cost on val dataset after 315 epochs is = 0.08687263564501242
Initial Cost on Val dataset for this epoch 315 = 0.08687263564501242
Error on this batch = 0.4999999995614367
Error on this batch = 0.4850016846022085
Cost on val dataset after 316 epochs is = 0.08687202693037048
Initial Cost on Val dataset for this epoch 316 = 0.08687202693037048
Error on this batch = 0.4999999995572598
Error on this batch = 0.4850016679356973
Cost on val dataset after 317 epochs is = 0.08687143495742371
Initial Cost on Val dataset for this epoch 317 = 0.08687143495742371
Error on this 

Cost on val dataset after 352 epochs is = 0.0868564540067071
Initial Cost on Val dataset for this epoch 352 = 0.0868564540067071
Error on this batch = 0.49999999942210355
Error on this batch = 0.4850012212817521
Cost on val dataset after 353 epochs is = 0.0868561172866375
Initial Cost on Val dataset for this epoch 353 = 0.0868561172866375
Error on this batch = 0.4999999994177125
Error on this batch = 0.4850012112687593
Cost on val dataset after 354 epochs is = 0.08685578305194946
Initial Cost on Val dataset for this epoch 354 = 0.08685578305194946
Error on this batch = 0.49999999941324336
Error on this batch = 0.48500120134557323
Cost on val dataset after 355 epochs is = 0.08685545145190791
Initial Cost on Val dataset for this epoch 355 = 0.08685545145190791
Error on this batch = 0.49999999940869244
Error on this batch = 0.48500119155404536
Cost on val dataset after 356 epochs is = 0.08685512262147964
Initial Cost on Val dataset for this epoch 356 = 0.08685512262147964
Error on this ba

Cost on val dataset after 391 epochs is = 0.08684435503917683
Initial Cost on Val dataset for this epoch 391 = 0.08684435503917683
Error on this batch = 0.49999999912013976
Error on this batch = 0.4850008831325296
Cost on val dataset after 392 epochs is = 0.08684404814912292
Initial Cost on Val dataset for this epoch 392 = 0.08684404814912292
Error on this batch = 0.49999999910420884
Error on this batch = 0.4850008754617692
Cost on val dataset after 393 epochs is = 0.08684374100273773
Initial Cost on Val dataset for this epoch 393 = 0.08684374100273773
Error on this batch = 0.49999999908727794
Error on this batch = 0.4850008678159075
Cost on val dataset after 394 epochs is = 0.08684343356165151
Initial Cost on Val dataset for this epoch 394 = 0.08684343356165151
Error on this batch = 0.4999999990692357
Error on this batch = 0.48500086019210903
Cost on val dataset after 395 epochs is = 0.08684312606466939
Initial Cost on Val dataset for this epoch 395 = 0.08684312606466939
Error on this

Cost on val dataset after 430 epochs is = 0.08682957293093879
Initial Cost on Val dataset for this epoch 430 = 0.08682957293093879
Error on this batch = 0.499999992265605
Error on this batch = 0.4850007432888784
Cost on val dataset after 431 epochs is = 0.08682856233885602
Initial Cost on Val dataset for this epoch 431 = 0.08682856233885602
Error on this batch = 0.4999999922787167
Error on this batch = 0.48500073019415924
Cost on val dataset after 432 epochs is = 0.08682762001396206
Initial Cost on Val dataset for this epoch 432 = 0.08682762001396206
Error on this batch = 0.4999999922901019
Error on this batch = 0.4850007166442428
Cost on val dataset after 433 epochs is = 0.08682673772545534
Initial Cost on Val dataset for this epoch 433 = 0.08682673772545534
Error on this batch = 0.4999999923000222
Error on this batch = 0.4850007027725831
Cost on val dataset after 434 epochs is = 0.08682590859855367
Initial Cost on Val dataset for this epoch 434 = 0.08682590859855367
Error on this bat

Cost on val dataset after 469 epochs is = 0.08681067707228458
Initial Cost on Val dataset for this epoch 469 = 0.08681067707228458
Error on this batch = 0.49999999231616754
Error on this batch = 0.48500024124327423
Cost on val dataset after 470 epochs is = 0.08681042169001617
Initial Cost on Val dataset for this epoch 470 = 0.08681042169001617
Error on this batch = 0.4999999923143064
Error on this batch = 0.4850002297473528
Cost on val dataset after 471 epochs is = 0.0868101712689378
Initial Cost on Val dataset for this epoch 471 = 0.0868101712689378
Error on this batch = 0.4999999923124473
Error on this batch = 0.48500021823618555
Cost on val dataset after 472 epochs is = 0.08680992570944869
Initial Cost on Val dataset for this epoch 472 = 0.08680992570944869
Error on this batch = 0.49999999231059494
Error on this batch = 0.48500020670015387
Cost on val dataset after 473 epochs is = 0.08680968492452992
Initial Cost on Val dataset for this epoch 473 = 0.08680968492452992
Error on this 

Cost on val dataset after 508 epochs is = 0.08680501567007289
Initial Cost on Val dataset for this epoch 508 = 0.08680501567007289
Error on this batch = 0.49999999233130044
Error on this batch = 0.4849995864693269
Cost on val dataset after 509 epochs is = 0.08680521613020398
Initial Cost on Val dataset for this epoch 509 = 0.08680521613020398
Error on this batch = 0.49999999234410525
Error on this batch = 0.4849995381927396
Cost on val dataset after 510 epochs is = 0.08680550601710896
Initial Cost on Val dataset for this epoch 510 = 0.08680550601710896
Error on this batch = 0.49999999236010667
Error on this batch = 0.48499947973297014
Cost on val dataset after 511 epochs is = 0.08680592198677634
Initial Cost on Val dataset for this epoch 511 = 0.08680592198677634
Error on this batch = 0.4999999923806499
Error on this batch = 0.48499940571575834
Cost on val dataset after 512 epochs is = 0.08680652619720093
Initial Cost on Val dataset for this epoch 512 = 0.08680652619720093
Error on thi

Cost on val dataset after 547 epochs is = 0.08680053429060161
Initial Cost on Val dataset for this epoch 547 = 0.08680053429060161
Error on this batch = 0.49999999481870633
Error on this batch = 0.48000590575069946
Cost on val dataset after 548 epochs is = 0.08679947571477657
Initial Cost on Val dataset for this epoch 548 = 0.08679947571477657
Error on this batch = 0.499999994830141
Error on this batch = 0.48000513978136655
Cost on val dataset after 549 epochs is = 0.08679817874137645
Initial Cost on Val dataset for this epoch 549 = 0.08679817874137645
Error on this batch = 0.4999999948594324
Error on this batch = 0.4800047973082168
Cost on val dataset after 550 epochs is = 0.08679665079453006
Initial Cost on Val dataset for this epoch 550 = 0.08679665079453006
Error on this batch = 0.49999999484753416
Error on this batch = 0.4800046218221851
Cost on val dataset after 551 epochs is = 0.08679526358316256
Initial Cost on Val dataset for this epoch 551 = 0.08679526358316256
Error on this 

Cost on val dataset after 586 epochs is = 0.0867710491195202
Initial Cost on Val dataset for this epoch 586 = 0.0867710491195202
Error on this batch = 0.49999999400696526
Error on this batch = 0.480003199660794
Cost on val dataset after 587 epochs is = 0.08677060267685173
Initial Cost on Val dataset for this epoch 587 = 0.08677060267685173
Error on this batch = 0.49999999398952666
Error on this batch = 0.48000317796135933
Cost on val dataset after 588 epochs is = 0.08677015684144897
Initial Cost on Val dataset for this epoch 588 = 0.08677015684144897
Error on this batch = 0.4999999939730115
Error on this batch = 0.48000315745261474
Cost on val dataset after 589 epochs is = 0.08676972087183096
Initial Cost on Val dataset for this epoch 589 = 0.08676972087183096
Error on this batch = 0.4999999939556128
Error on this batch = 0.48000313712423526
Cost on val dataset after 590 epochs is = 0.08676928972859206
Initial Cost on Val dataset for this epoch 590 = 0.08676928972859206
Error on this b

Cost on val dataset after 625 epochs is = 0.0867551428308601
Initial Cost on Val dataset for this epoch 625 = 0.0867551428308601
Error on this batch = 0.4999999931950349
Error on this batch = 0.4800025216888845
Cost on val dataset after 626 epochs is = 0.086754693568526
Initial Cost on Val dataset for this epoch 626 = 0.086754693568526
Error on this batch = 0.49999999316360205
Error on this batch = 0.480002507686062
Cost on val dataset after 627 epochs is = 0.08675422897068301
Initial Cost on Val dataset for this epoch 627 = 0.08675422897068301
Error on this batch = 0.49999999313153626
Error on this batch = 0.4800024936814563
Cost on val dataset after 628 epochs is = 0.08675376443763017
Initial Cost on Val dataset for this epoch 628 = 0.08675376443763017
Error on this batch = 0.49999999309758864
Error on this batch = 0.4800024800685394
Cost on val dataset after 629 epochs is = 0.08675328538421623
Initial Cost on Val dataset for this epoch 629 = 0.08675328538421623
Error on this batch =

Cost on val dataset after 664 epochs is = 0.0867037691592335
Initial Cost on Val dataset for this epoch 664 = 0.0867037691592335
Error on this batch = 0.499999988033509
Error on this batch = 0.4800020942037819
Cost on val dataset after 665 epochs is = 0.08670358426619629
Initial Cost on Val dataset for this epoch 665 = 0.08670358426619629
Error on this batch = 0.49999998820075536
Error on this batch = 0.4800020868193661
Cost on val dataset after 666 epochs is = 0.08670342355207175
Initial Cost on Val dataset for this epoch 666 = 0.08670342355207175
Error on this batch = 0.49999998834590453
Error on this batch = 0.48000207968917097
Cost on val dataset after 667 epochs is = 0.08670328225779116
Initial Cost on Val dataset for this epoch 667 = 0.08670328225779116
Error on this batch = 0.49999998847372673
Error on this batch = 0.4800020729633515
Cost on val dataset after 668 epochs is = 0.08670315119361542
Initial Cost on Val dataset for this epoch 668 = 0.08670315119361542
Error on this ba

Cost on val dataset after 703 epochs is = 0.08669996241015811
Initial Cost on Val dataset for this epoch 703 = 0.08669996241015811
Error on this batch = 0.4999999898335177
Error on this batch = 0.4800018614392073
Cost on val dataset after 704 epochs is = 0.08669987820941878
Initial Cost on Val dataset for this epoch 704 = 0.08669987820941878
Error on this batch = 0.49999998984087435
Error on this batch = 0.480001855668114
Cost on val dataset after 705 epochs is = 0.08669979420790505
Initial Cost on Val dataset for this epoch 705 = 0.08669979420790505
Error on this batch = 0.49999998984750527
Error on this batch = 0.4800018498987199
Cost on val dataset after 706 epochs is = 0.08669971042008554
Initial Cost on Val dataset for this epoch 706 = 0.08669971042008554
Error on this batch = 0.49999998985344035
Error on this batch = 0.48000184404760604
Cost on val dataset after 707 epochs is = 0.08669962867304018
Initial Cost on Val dataset for this epoch 707 = 0.08669962867304018
Error on this 

Cost on val dataset after 742 epochs is = 0.08669695649648368
Initial Cost on Val dataset for this epoch 742 = 0.08669695649648368
Error on this batch = 0.49999998973833043
Error on this batch = 0.48000163819066405
Cost on val dataset after 743 epochs is = 0.08669688538159705
Initial Cost on Val dataset for this epoch 743 = 0.08669688538159705
Error on this batch = 0.4999999897287832
Error on this batch = 0.48000163246093863
Cost on val dataset after 744 epochs is = 0.08669681770175773
Initial Cost on Val dataset for this epoch 744 = 0.08669681770175773
Error on this batch = 0.49999998971858073
Error on this batch = 0.48000162672400193
Cost on val dataset after 745 epochs is = 0.08669675052072152
Initial Cost on Val dataset for this epoch 745 = 0.08669675052072152
Error on this batch = 0.4999999897080899
Error on this batch = 0.4800016209790864
Cost on val dataset after 746 epochs is = 0.08669668385199134
Initial Cost on Val dataset for this epoch 746 = 0.08669668385199134
Error on thi

Cost on val dataset after 781 epochs is = 0.08669484690700785
Initial Cost on Val dataset for this epoch 781 = 0.08669484690700785
Error on this batch = 0.49999998914095417
Error on this batch = 0.48000139592004376
Cost on val dataset after 782 epochs is = 0.08669482097786609
Initial Cost on Val dataset for this epoch 782 = 0.08669482097786609
Error on this batch = 0.4999999891195098
Error on this batch = 0.4800013885001871
Cost on val dataset after 783 epochs is = 0.08669479803645234
Initial Cost on Val dataset for this epoch 783 = 0.08669479803645234
Error on this batch = 0.49999998909770305
Error on this batch = 0.4800013809811368
Cost on val dataset after 784 epochs is = 0.08669478077815378
Initial Cost on Val dataset for this epoch 784 = 0.08669478077815378
Error on this batch = 0.4999999890752107
Error on this batch = 0.48000137328842685
Cost on val dataset after 785 epochs is = 0.08669476455976248
Initial Cost on Val dataset for this epoch 785 = 0.08669476455976248
Error on this

In [120]:
lr

0.1