In [1]:
import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt
import warnings
import pandas as pd

warnings.filterwarnings('ignore')

In [2]:
print("----------------Reading the Data-------------------------")
PATH = os.getcwd()
os.chdir('Alphabets/')

X_train = pd.read_csv('train.csv', sep=',', header=None, index_col=False)
X_test = pd.read_csv('test.csv', sep=',', header=None, index_col=False)
np.random.shuffle(X_train.to_numpy())
train_class = X_train[X_train.columns[-1]]
test_actual_class = X_test[X_test.columns[-1]]

X_train = X_train.drop(X_train.columns[-1], axis=1)
X_test = X_test.drop(X_test.columns[-1], axis=1)

print("----------------Data Reading completed-------------------")

os.chdir('../')

X_train = X_train/255
X_test = X_test/255

m = X_train.shape[0] # Number of Training Samples

X_valid = X_train.iloc[(int(0.85*m)):]
valid_class = train_class[(int(0.85*m)):]
X_train = X_train.iloc[0:int(0.85*m)]
train_class = train_class[0:int(0.85*m)]


m = X_train.shape[0] # Number of Training Samples
n = X_train.shape[1] # Number of input features

print("The total number of training samples = {}".format(m))
print("The total number of validation samples = {}".format(X_valid.shape[0]))

print("The number of features = {}".format(n))

----------------Reading the Data-------------------------
----------------Data Reading completed-------------------
The total number of training samples = 11050
The total number of validation samples = 1950
The number of features = 784


In [3]:
#To get the one hot encoding of each label
print("--------Perform 1-hot encoding of class labels------------")

train_class_enc = pd.get_dummies(train_class).to_numpy()
valid_class_enc = pd.get_dummies(valid_class).to_numpy()
test_actual_class_enc = pd.get_dummies(test_actual_class).to_numpy()

--------Perform 1-hot encoding of class labels------------


In [4]:
#Add the intercept term to the data samples both in training and test dataset
X_train = np.hstack((np.ones((m,1)),X_train.to_numpy()))
X_valid = np.hstack((np.ones((X_valid.shape[0],1)), X_valid.to_numpy()))
X_test = np.hstack((np.ones((X_test.shape[0],1)),X_test.to_numpy()))

In [5]:
lr = 0.1
arch_test = [1,5,10,50,100]
arch = [arch_test[3]] #means one hidden layer with 2 perceptrons 
batch_size = 100 # Mini-Batch Size
r = np.max(train_class) + 1 # Default value of the number of classes = 26

In [6]:
#Mini-Batch formation
mini_batch = [(X_train[i:i+batch_size,:], train_class_enc[i:i+batch_size]) for i in range(0, m, batch_size)]
print("The number of mini-batches formed is = {}".format(len(mini_batch)))

The number of mini-batches formed is = 111


In [7]:
#Theta Initialization 
def theta_init(arch=[50]):
    theta = []
    for i in range(len(arch)+1):
        if i == 0:
            dim0=n+1
            dim1=arch[i]
        elif (i == len(arch)):
            dim0=arch[i-1]
            dim1 = r
        else:
            dim0=arch[i-1]
            dim1= arch[i]

        theta.append(2*np.random.random((dim0, dim1))-1)
        #theta.append(np.zeros((dim0, dim1)))
    return theta

In [8]:
def activation(x):
    return 1/(1+np.exp(-x))

In [9]:
def forward_prop(data, theta):
    fm = []
    fm.append(data)
    for l in range(len(theta)):
        fm.append(activation(np.dot(fm[l], theta[l])))
    return fm

In [10]:
def cost_total(X, theta, Y, m):
    fm = forward_prop(X, theta)
    cost = (1/(2*m))*np.sum((Y-fm[-1])**2)
    return cost

In [11]:
def calc_accuracy(data, theta, actual_class):
    pred_class = forward_prop(data, theta)
    test_pred_class = pred_class[-1]
    for i in range(len(test_pred_class)):
        test_pred_class[i][test_pred_class[i] == np.max(test_pred_class[i])] = 1
        test_pred_class[i][test_pred_class[i] != np.max(test_pred_class[i])] = 0


    test_acc = 0
    for i in range(len(actual_class)):
        if (np.array_equal(test_pred_class[i], actual_class[i])):
            test_acc+=1
    test_acc /= data.shape[0]

    #print("The Test Accuracy of the model = {}%".format(test_acc*100))
    return (test_acc*100)

In [12]:
epochs = []
train_accuracy = []
test_accuracy = []
valid_accuracy=[]
train_time = []

In [28]:
lr0=1.5
arch_test=[100]
for i in range(len(arch_test)):
    theta = theta_init([arch_test[i]])
    fm = forward_prop(X_train, theta)
    ite=1
    epoch = 0
    start = time.time()
    cost_init = cost_total(X_valid, theta, valid_class_enc, m)

    while(True):
        count = 0
        lr = lr0/(np.sqrt(ite))
        print("learning rate = ", lr)

        print("Initial Cost on Val dataset for this epoch {} = {}".format(epoch, cost_init))

        for b in mini_batch:
            #if (lr < 0.1): lr = 0.1
            X_b = b[0]
            Y_b = b[1]
            fm = forward_prop(X_b, theta)
            delta = [None]*len(fm)

            if (count % 60 == 0):
                print("Error on this batch = "+str(cost_total(X_b, theta, Y_b, batch_size)))
            #Backward Propagation

            for l in range(len(fm)-1, 0, -1):
                if (l == len(fm)-1):
                    delta[l] = ((1/batch_size)*(Y_b - fm[l])*fm[l]*(1-fm[l]))
                else:
                    delta[l]=(np.dot(delta[l+1], theta[l].T)*fm[l]*(1-fm[l]))

            #delta_l1 = np.dot(delta_l2, theta2.T)*l1*(1-l1)

            for t in range(len(theta)):
                theta[t] += lr*np.dot(fm[t].T, delta[t+1]) 

            count+=1
        epoch+=1 #Number of epochs
        ite+=1
        
        cost_final = cost_total(X_valid, theta, valid_class_enc, m)
        print("Cost on val dataset after {} epochs is = {}".format(epoch, cost_final))
        if (abs(cost_final-cost_init) < 1e-06):
            print("cost initial= {} , cost final={} , change in cost= {}".format(cost_init,cost_final, cost_final-cost_init))
            break
        cost_init = cost_final
    epochs.append(epoch)
    train_time.append(time.time()-start)
    train_accuracy.append(calc_accuracy(X_train, theta, train_class_enc))
    valid_accuracy.append(calc_accuracy(X_valid, theta, valid_class_enc))
    test_accuracy.append(calc_accuracy(X_test, theta, test_actual_class_enc))
    print("\n------------------------------------------------------------------------------")
    print("The stats for number of units in the hidden layer = {} are as below:".format(arch_test[i]))
    print("------------------------------------------------------------------------------")
    print("The number of epochs = {:2.3f}".format(epochs[-1]))
    print("The training time = {:2.3f}sec".format(train_time[-1]))
    print("The training accuracy is = {:2.3f}%".format(train_accuracy[-1]))
    print("The validation accuracy is = {:2.3f}%".format(valid_accuracy[-1]))
    print("The test accuracy is = {:2.3f}%".format(test_accuracy[-1]))
    print("------------------------------------------------------------------------------\n")

learning rate =  1.5
Initial Cost on Val dataset for this epoch 0 = 1.0357980972016674
Error on this batch = 5.915096418485747
Error on this batch = 0.4912251055742077
Cost on val dataset after 1 epochs is = 0.08449712773923045
learning rate =  1.0606601717798212
Initial Cost on Val dataset for this epoch 1 = 0.08449712773923045
Error on this batch = 0.48298644685510733
Error on this batch = 0.47827137077464166
Cost on val dataset after 2 epochs is = 0.08109410749997038
learning rate =  0.8660254037844387
Initial Cost on Val dataset for this epoch 2 = 0.08109410749997038
Error on this batch = 0.4640634340097464
Error on this batch = 0.4596095132636519
Cost on val dataset after 3 epochs is = 0.07829379955419291
learning rate =  0.75
Initial Cost on Val dataset for this epoch 3 = 0.07829379955419291
Error on this batch = 0.45363469079923124
Error on this batch = 0.4542045800812817
Cost on val dataset after 4 epochs is = 0.07698173061020931
learning rate =  0.6708203932499369
Initial Cost

Error on this batch = 0.3737773541871235
Cost on val dataset after 35 epochs is = 0.06322419395126803
learning rate =  0.25
Initial Cost on Val dataset for this epoch 35 = 0.06322419395126803
Error on this batch = 0.37724772034688303
Error on this batch = 0.37169592231877757
Cost on val dataset after 36 epochs is = 0.0629438336129316
learning rate =  0.24659848095803594
Initial Cost on Val dataset for this epoch 36 = 0.0629438336129316
Error on this batch = 0.3745868247421224
Error on this batch = 0.36935403735247635
Cost on val dataset after 37 epochs is = 0.06264421875086282
learning rate =  0.24333213169614382
Initial Cost on Val dataset for this epoch 37 = 0.06264421875086282
Error on this batch = 0.3716847972261598
Error on this batch = 0.3667346420693373
Cost on val dataset after 38 epochs is = 0.062322675566374046
learning rate =  0.24019223070763068
Initial Cost on Val dataset for this epoch 38 = 0.062322675566374046
Error on this batch = 0.3685301000895905
Error on this batch 

Cost on val dataset after 69 epochs is = 0.053705852558991525
learning rate =  0.17928429140015903
Initial Cost on Val dataset for this epoch 69 = 0.053705852558991525
Error on this batch = 0.3065348085613996
Error on this batch = 0.3027857848229039
Cost on val dataset after 70 epochs is = 0.05352389973349451
learning rate =  0.17801724872907798
Initial Cost on Val dataset for this epoch 70 = 0.05352389973349451
Error on this batch = 0.305359131241745
Error on this batch = 0.3011994762433208
Cost on val dataset after 71 epochs is = 0.05334481907534534
learning rate =  0.1767766952966369
Initial Cost on Val dataset for this epoch 71 = 0.05334481907534534
Error on this batch = 0.3042096163898966
Error on this batch = 0.29957327997148925
Cost on val dataset after 72 epochs is = 0.05316924518958036
learning rate =  0.17556172079419585
Initial Cost on Val dataset for this epoch 72 = 0.05316924518958036
Error on this batch = 0.303085129669191
Error on this batch = 0.29793070373248626
Cost on

Cost on val dataset after 102 epochs is = 0.04982541541507088
learning rate =  0.14779939172464399
Initial Cost on Val dataset for this epoch 102 = 0.04982541541507088
Error on this batch = 0.2809045823893061
Error on this batch = 0.2668758282049775
Cost on val dataset after 103 epochs is = 0.04975082323407731
learning rate =  0.14708710135363803
Initial Cost on Val dataset for this epoch 103 = 0.04975082323407731
Error on this batch = 0.2804016669565274
Error on this batch = 0.26613281097237085
Cost on val dataset after 104 epochs is = 0.049677308575865234
learning rate =  0.14638501094227999
Initial Cost on Val dataset for this epoch 104 = 0.049677308575865234
Error on this batch = 0.27990355591511806
Error on this batch = 0.265398324946545
Cost on val dataset after 105 epochs is = 0.04960479634557179
learning rate =  0.14569287935358963
Initial Cost on Val dataset for this epoch 105 = 0.04960479634557179
Error on this batch = 0.2794093119820123
Error on this batch = 0.26467231728236

Cost on val dataset after 135 epochs is = 0.047365484440745044
learning rate =  0.12862393885688161
Initial Cost on Val dataset for this epoch 135 = 0.047365484440745044
Error on this batch = 0.25626241788588183
Error on this batch = 0.24539759024271626
Cost on val dataset after 136 epochs is = 0.04726689558729737
learning rate =  0.12815364865751414
Initial Cost on Val dataset for this epoch 136 = 0.04726689558729737
Error on this batch = 0.25520802077844307
Error on this batch = 0.24459588287237702
Cost on val dataset after 137 epochs is = 0.047166462475456444
learning rate =  0.1276884796138123
Initial Cost on Val dataset for this epoch 137 = 0.047166462475456444
Error on this batch = 0.25423168390450956
Error on this batch = 0.2437737697019469
Cost on val dataset after 138 epochs is = 0.04706449541151074
learning rate =  0.12722833945199566
Initial Cost on Val dataset for this epoch 138 = 0.04706449541151074
Error on this batch = 0.25334165130935826
Error on this batch = 0.24294137

Cost on val dataset after 168 epochs is = 0.044656892525230314
learning rate =  0.11538461538461539
Initial Cost on Val dataset for this epoch 168 = 0.044656892525230314
Error on this batch = 0.24036765220841944
Error on this batch = 0.2276394113164187
Cost on val dataset after 169 epochs is = 0.04460336125599356
learning rate =  0.11504474832710555
Initial Cost on Val dataset for this epoch 169 = 0.04460336125599356
Error on this batch = 0.2400727197369422
Error on this batch = 0.22729548924268134
Cost on val dataset after 170 epochs is = 0.04455083916600492
learning rate =  0.11470786693528089
Initial Cost on Val dataset for this epoch 170 = 0.04455083916600492
Error on this batch = 0.23978263418291665
Error on this batch = 0.22695488648805523
Cost on val dataset after 171 epochs is = 0.04449927817460563
learning rate =  0.1143739277494535
Initial Cost on Val dataset for this epoch 171 = 0.04449927817460563
Error on this batch = 0.23949722386330152
Error on this batch = 0.22661735238

Error on this batch = 0.21765022434612324
Cost on val dataset after 201 epochs is = 0.04326200276746379
learning rate =  0.10553963170954378
Initial Cost on Val dataset for this epoch 201 = 0.04326200276746379
Error on this batch = 0.23248616303495595
Error on this batch = 0.21736656731011803
Cost on val dataset after 202 epochs is = 0.043228268562072666
learning rate =  0.10527936095153946
Initial Cost on Val dataset for this epoch 202 = 0.043228268562072666
Error on this batch = 0.23228957787564178
Error on this batch = 0.21708502648847883
Cost on val dataset after 203 epochs is = 0.04319489445879349
learning rate =  0.10502100630210073
Initial Cost on Val dataset for this epoch 203 = 0.04319489445879349
Error on this batch = 0.2320947538280919
Error on this batch = 0.21680566067109702
Cost on val dataset after 204 epochs is = 0.04316187156355931
learning rate =  0.10476454436543672
Initial Cost on Val dataset for this epoch 204 = 0.04316187156355931
Error on this batch = 0.231901653

Error on this batch = 0.20956676605708674
Cost on val dataset after 234 epochs is = 0.042298366598940214
learning rate =  0.09784921095801632
Initial Cost on Val dataset for this epoch 234 = 0.042298366598940214
Error on this batch = 0.2267736226170674
Error on this batch = 0.20936252327925922
Cost on val dataset after 235 epochs is = 0.04227286080577459
learning rate =  0.09764168323561791
Initial Cost on Val dataset for this epoch 235 = 0.04227286080577459
Error on this batch = 0.22662129286975627
Error on this batch = 0.20916038581292953
Cost on val dataset after 236 epochs is = 0.042247518873551985
learning rate =  0.09743547036924463
Initial Cost on Val dataset for this epoch 236 = 0.042247518873551985
Error on this batch = 0.22646995598983694
Error on this batch = 0.20896032194914402
Cost on val dataset after 237 epochs is = 0.0422223375929999
learning rate =  0.09723055853282467
Initial Cost on Val dataset for this epoch 237 = 0.0422223375929999
Error on this batch = 0.226319591

Error on this batch = 0.20378575351326889
Cost on val dataset after 267 epochs is = 0.041529898833573525
learning rate =  0.09162708326722892
Initial Cost on Val dataset for this epoch 267 = 0.041529898833573525
Error on this batch = 0.22211212892181423
Error on this batch = 0.2036356720382133
Cost on val dataset after 268 epochs is = 0.04150858717282251
learning rate =  0.09145661412745384
Initial Cost on Val dataset for this epoch 268 = 0.04150858717282251
Error on this batch = 0.22197461852086414
Error on this batch = 0.20348656908580487
Cost on val dataset after 269 epochs is = 0.04148736594175686
learning rate =  0.09128709291752769
Initial Cost on Val dataset for this epoch 269 = 0.04148736594175686
Error on this batch = 0.22183657010290447
Error on this batch = 0.20333838212568872
Cost on val dataset after 270 epochs is = 0.04146623215417846
learning rate =  0.09111851088461805
Initial Cost on Val dataset for this epoch 270 = 0.04146623215417846
Error on this batch = 0.221697883

Error on this batch = 0.1988959395162893
Cost on val dataset after 300 epochs is = 0.04085508790749171
learning rate =  0.08645856265563524
Initial Cost on Val dataset for this epoch 300 = 0.04085508790749171
Error on this batch = 0.21650948014631305
Error on this batch = 0.19873158220236906
Cost on val dataset after 301 epochs is = 0.04083488540831388
learning rate =  0.0863153006472654
Initial Cost on Val dataset for this epoch 301 = 0.04083488540831388
Error on this batch = 0.21627735611813814
Error on this batch = 0.1985652944211189
Cost on val dataset after 302 epochs is = 0.04081465501490932
learning rate =  0.0861727484432139
Initial Cost on Val dataset for this epoch 302 = 0.04081465501490932
Error on this batch = 0.21604095131480464
Error on this batch = 0.19839707080143038
Cost on val dataset after 303 epochs is = 0.040794391489302126
learning rate =  0.08603090020146065
Initial Cost on Val dataset for this epoch 303 = 0.040794391489302126
Error on this batch = 0.215800486249

Error on this batch = 0.19315625624040586
Cost on val dataset after 333 epochs is = 0.04012745935506007
learning rate =  0.08207634827468742
Initial Cost on Val dataset for this epoch 333 = 0.04012745935506007
Error on this batch = 0.20843553493744207
Error on this batch = 0.1930005717529072
Cost on val dataset after 334 epochs is = 0.04010222717226553
learning rate =  0.08195375470622296
Initial Cost on Val dataset for this epoch 334 = 0.04010222717226553
Error on this batch = 0.20822273868127195
Error on this batch = 0.1928464444283533
Cost on val dataset after 335 epochs is = 0.04007683409734434
learning rate =  0.08183170883849715
Initial Cost on Val dataset for this epoch 335 = 0.04007683409734434
Error on this batch = 0.2080124222483669
Error on this batch = 0.1926937695179629
Cost on val dataset after 336 epochs is = 0.040051292347262
learning rate =  0.0817102066054265
Initial Cost on Val dataset for this epoch 336 = 0.040051292347262
Error on this batch = 0.20780448873712928
E

Error on this batch = 0.18830574378240147
Cost on val dataset after 366 epochs is = 0.039264522586033475
learning rate =  0.07829936264578208
Initial Cost on Val dataset for this epoch 366 = 0.039264522586033475
Error on this batch = 0.2021854197090294
Error on this batch = 0.18815512849285887
Cost on val dataset after 367 epochs is = 0.03923874486913384
learning rate =  0.07819290527140305
Initial Cost on Val dataset for this epoch 367 = 0.03923874486913384
Error on this batch = 0.20200704426477004
Error on this batch = 0.18800387005477087
Cost on val dataset after 368 epochs is = 0.03921303678398854
learning rate =  0.07808688094430304
Initial Cost on Val dataset for this epoch 368 = 0.03921303678398854
Error on this batch = 0.20182866258396095
Error on this batch = 0.18785194701547958
Cost on val dataset after 369 epochs is = 0.03918739785124337
learning rate =  0.07798128673650545
Initial Cost on Val dataset for this epoch 369 = 0.03918739785124337
Error on this batch = 0.201650212

Error on this batch = 0.1825206567838873
Cost on val dataset after 399 epochs is = 0.03840657593696226
learning rate =  0.075
Initial Cost on Val dataset for this epoch 399 = 0.03840657593696226
Error on this batch = 0.19589170287787405
Error on this batch = 0.18227123253938135
Cost on val dataset after 400 epochs is = 0.038375905521927586
learning rate =  0.07490642541583835
Initial Cost on Val dataset for this epoch 400 = 0.038375905521927586
Error on this batch = 0.19565809072847007
Error on this batch = 0.18201018934518456
Cost on val dataset after 401 epochs is = 0.03834426133020283
learning rate =  0.07481320020807246
Initial Cost on Val dataset for this epoch 401 = 0.03834426133020283
Error on this batch = 0.19541572160153245
Error on this batch = 0.18173628194667862
Cost on val dataset after 402 epochs is = 0.03831150892205133
learning rate =  0.07472032220800769
Initial Cost on Val dataset for this epoch 402 = 0.03831150892205133
Error on this batch = 0.19516285906995595
Error

Error on this batch = 0.1701451546775741
Cost on val dataset after 432 epochs is = 0.03684489031097381
learning rate =  0.07208539969983191
Initial Cost on Val dataset for this epoch 432 = 0.03684489031097381
Error on this batch = 0.18319408941736398
Error on this batch = 0.16983865166232265
Cost on val dataset after 433 epochs is = 0.03680189352355165
learning rate =  0.0720023041105979
Initial Cost on Val dataset for this epoch 433 = 0.03680189352355165
Error on this batch = 0.1829213732272501
Error on this batch = 0.16954080819567513
Cost on val dataset after 434 epochs is = 0.03676006776059153
learning rate =  0.07191949522280762
Initial Cost on Val dataset for this epoch 434 = 0.03676006776059153
Error on this batch = 0.18266075884914165
Error on this batch = 0.16925131454512915
Cost on val dataset after 435 epochs is = 0.03671937518796833
learning rate =  0.07183697139158635
Initial Cost on Val dataset for this epoch 435 = 0.03671937518796833
Error on this batch = 0.1824116477734

Error on this batch = 0.1631281922204534
Cost on val dataset after 465 epochs is = 0.03584682532203279
learning rate =  0.06948615819181192
Initial Cost on Val dataset for this epoch 465 = 0.03584682532203279
Error on this batch = 0.17805519884607582
Error on this batch = 0.1629788284023103
Cost on val dataset after 466 epochs is = 0.03582517942273657
learning rate =  0.06941172200741212
Initial Cost on Val dataset for this epoch 466 = 0.03582517942273657
Error on this batch = 0.17796522961640907
Error on this batch = 0.16283174124837232
Cost on val dataset after 467 epochs is = 0.03580384576181483
learning rate =  0.06933752452815363
Initial Cost on Val dataset for this epoch 467 = 0.03580384576181483
Error on this batch = 0.17787698080239864
Error on this batch = 0.16268685741446084
Cost on val dataset after 468 epochs is = 0.0357828148237003
learning rate =  0.06926356448094152
Initial Cost on Val dataset for this epoch 468 = 0.0357828148237003
Error on this batch = 0.17779036043559

Error on this batch = 0.15909789972652078
Cost on val dataset after 498 epochs is = 0.03525853155916489
learning rate =  0.06714922215537679
Initial Cost on Val dataset for this epoch 498 = 0.03525853155916489
Error on this batch = 0.17566055483741216
Error on this batch = 0.1589969509884868
Cost on val dataset after 499 epochs is = 0.035243777557059874
learning rate =  0.06708203932499368
Initial Cost on Val dataset for this epoch 499 = 0.035243777557059874
Error on this batch = 0.17559921117141386
Error on this batch = 0.15889689480671151
Cost on val dataset after 500 epochs is = 0.03522915953555022
learning rate =  0.06701505774131554
Initial Cost on Val dataset for this epoch 500 = 0.03522915953555022
Error on this batch = 0.17553825597009223
Error on this batch = 0.15879770963581277
Cost on val dataset after 501 epochs is = 0.03521467474618029
learning rate =  0.06694827640161777
Initial Cost on Val dataset for this epoch 501 = 0.03521467474618029
Error on this batch = 0.175477676

Error on this batch = 0.15614409065940899
Cost on val dataset after 531 epochs is = 0.03483136166546228
learning rate =  0.06503324771430899
Initial Cost on Val dataset for this epoch 531 = 0.03483136166546228
Error on this batch = 0.17379242479245022
Error on this batch = 0.1560642214528107
Cost on val dataset after 532 epochs is = 0.034820010429477445
learning rate =  0.0649722122736315
Initial Cost on Val dataset for this epoch 532 = 0.034820010429477445
Error on this batch = 0.17373972217121408
Error on this batch = 0.15598479902430168
Cost on val dataset after 533 epochs is = 0.034808736772776086
learning rate =  0.06491134836118731
Initial Cost on Val dataset for this epoch 533 = 0.034808736772776086
Error on this batch = 0.17368720443430527
Error on this batch = 0.1559058156193419
Cost on val dataset after 534 epochs is = 0.03479753961533867
learning rate =  0.06485065517506754
Initial Cost on Val dataset for this epoch 534 = 0.03479753961533867
Error on this batch = 0.173634869

Error on this batch = 0.1537092135993844
Cost on val dataset after 564 epochs is = 0.034492691733822536
learning rate =  0.06310547428675069
Initial Cost on Val dataset for this epoch 564 = 0.034492691733822536
Error on this batch = 0.17214071777816067
Error on this batch = 0.15364090681019218
Cost on val dataset after 565 epochs is = 0.03448343835470008
learning rate =  0.06304970275637979
Initial Cost on Val dataset for this epoch 565 = 0.03448343835470008
Error on this batch = 0.17209321087601706
Error on this batch = 0.153572871965886
Cost on val dataset after 566 epochs is = 0.034474236547010276
learning rate =  0.06299407883487121
Initial Cost on Val dataset for this epoch 566 = 0.034474236547010276
Error on this batch = 0.17204584019236735
Error on this batch = 0.1535051055714844
Cost on val dataset after 567 epochs is = 0.03446508574360858
learning rate =  0.06293860187225168
Initial Cost on Val dataset for this epoch 567 = 0.03446508574360858
Error on this batch = 0.1719986047

Error on this batch = 0.1515820345947582
Cost on val dataset after 597 epochs is = 0.03421179390188489
learning rate =  0.061339561508214804
Initial Cost on Val dataset for this epoch 597 = 0.03421179390188489
Error on this batch = 0.17064030741107555
Error on this batch = 0.1515211662340528
Cost on val dataset after 598 epochs is = 0.03420398520787019
learning rate =  0.06128833848340777
Initial Cost on Val dataset for this epoch 598 = 0.03420398520787019
Error on this batch = 0.1705968660028832
Error on this batch = 0.15146048236904383
Cost on val dataset after 599 epochs is = 0.034196213236924705
learning rate =  0.06123724356957946
Initial Cost on Val dataset for this epoch 599 = 0.034196213236924705
Error on this batch = 0.17055353578044335
Error on this batch = 0.15139998101835722
Cost on val dataset after 600 epochs is = 0.0341884776383971
learning rate =  0.06118627623360032
Initial Cost on Val dataset for this epoch 600 = 0.0341884776383971
Error on this batch = 0.170510316128

Error on this batch = 0.14966104768162855
Cost on val dataset after 630 epochs is = 0.03397175632191398
learning rate =  0.05971405714935247
Initial Cost on Val dataset for this epoch 630 = 0.03397175632191398
Error on this batch = 0.16926233450670908
Error on this batch = 0.1496053595870912
Cost on val dataset after 631 epochs is = 0.03396499677994192
learning rate =  0.059666796313179754
Initial Cost on Val dataset for this epoch 631 = 0.03396499677994192
Error on this batch = 0.1692222691418168
Error on this batch = 0.14954980336768706
Cost on val dataset after 632 epochs is = 0.03395826446975951
learning rate =  0.059619647513787316
Initial Cost on Val dataset for this epoch 632 = 0.03395826446975951
Error on this batch = 0.1691822976128455
Error on this batch = 0.1494943777737183
Cost on val dataset after 633 epochs is = 0.03395155916015081
learning rate =  0.05957261030921532
Initial Cost on Val dataset for this epoch 633 = 0.03395155916015081
Error on this batch = 0.169142419475

Error on this batch = 0.1478868239801357
Cost on val dataset after 663 epochs is = 0.03376191273591755
learning rate =  0.05821128942797496
Initial Cost on Val dataset for this epoch 663 = 0.03376191273591755
Error on this batch = 0.16798745109405203
Error on this batch = 0.14783491629693069
Cost on val dataset after 664 epochs is = 0.03375594337287091
learning rate =  0.0581675050747111
Initial Cost on Val dataset for this epoch 664 = 0.03375594337287091
Error on this batch = 0.16795026761542692
Error on this batch = 0.14778310762320393
Cost on val dataset after 665 epochs is = 0.03374999488032978
learning rate =  0.05812381937190964
Initial Cost on Val dataset for this epoch 665 = 0.03374999488032978
Error on this batch = 0.16791316516656438
Error on this batch = 0.14773139721272943
Cost on val dataset after 666 epochs is = 0.033744067100658186
learning rate =  0.05808023194967748
Initial Cost on Val dataset for this epoch 666 = 0.033744067100658186
Error on this batch = 0.1678761434

Error on this batch = 0.14622262357761096
Cost on val dataset after 696 epochs is = 0.03357515010061525
learning rate =  0.056816551430892766
Initial Cost on Val dataset for this epoch 696 = 0.03357515010061525
Error on this batch = 0.16680143649355728
Error on this batch = 0.1461736616418436
Cost on val dataset after 697 epochs is = 0.03356979495193113
learning rate =  0.056775837307834844
Initial Cost on Val dataset for this epoch 697 = 0.03356979495193113
Error on this batch = 0.1667667614101209
Error on this batch = 0.14612478100851267
Cost on val dataset after 698 epochs is = 0.03356445628679605
learning rate =  0.056735210585417535
Initial Cost on Val dataset for this epoch 698 = 0.03356445628679605
Error on this batch = 0.16673215737959843
Error on this batch = 0.14607598135192698
Cost on val dataset after 699 epochs is = 0.03355913399508988
learning rate =  0.056694670951384085
Initial Cost on Val dataset for this epoch 699 = 0.03355913399508988
Error on this batch = 0.16669762

Error on this batch = 0.14464848213091858
Cost on val dataset after 729 epochs is = 0.03340657673114112
learning rate =  0.0555174907648204
Initial Cost on Val dataset for this epoch 729 = 0.03340657673114112
Error on this batch = 0.16569325080914132
Error on this batch = 0.14460209047019237
Cost on val dataset after 730 epochs is = 0.03340171315216755
learning rate =  0.05547950410914818
Initial Cost on Val dataset for this epoch 730 = 0.03340171315216755
Error on this batch = 0.1656607845812046
Error on this batch = 0.14455577493427868
Cost on val dataset after 731 epochs is = 0.033396862961685654
learning rate =  0.055441595321592964
Initial Cost on Val dataset for this epoch 731 = 0.033396862961685654
Error on this batch = 0.1656283811400622
Error on this batch = 0.14450953552348175
Cost on val dataset after 732 epochs is = 0.03339202608108995
learning rate =  0.05540376413648383
Initial Cost on Val dataset for this epoch 732 = 0.03339202608108995
Error on this batch = 0.1655960402

Error on this batch = 0.1431580250061115
Cost on val dataset after 762 epochs is = 0.033252748397328794
learning rate =  0.05430364606919974
Initial Cost on Val dataset for this epoch 762 = 0.033252748397328794
Error on this batch = 0.16465379202445454
Error on this batch = 0.1431141802704581
Cost on val dataset after 763 epochs is = 0.03324828899449303
learning rate =  0.05426809539693162
Initial Cost on Val dataset for this epoch 763 = 0.03324828899449303
Error on this batch = 0.1646232805472032
Error on this batch = 0.14307041475423543
Cost on val dataset after 764 epochs is = 0.0332438407476799
learning rate =  0.05423261445466404
Initial Cost on Val dataset for this epoch 764 = 0.0332438407476799
Error on this batch = 0.1645928246899667
Error on this batch = 0.14302672861636864
Cost on val dataset after 765 epochs is = 0.03323940359943797
learning rate =  0.054197203014744316
Initial Cost on Val dataset for this epoch 765 = 0.03323940359943797
Error on this batch = 0.1645624242494

Error on this batch = 0.141753867785067
Cost on val dataset after 795 epochs is = 0.03311118484345912
learning rate =  0.05316609037562519
Initial Cost on Val dataset for this epoch 795 = 0.03311118484345912
Error on this batch = 0.1636752028168732
Error on this batch = 0.14171272281871525
Cost on val dataset after 796 epochs is = 0.03310706584498122
learning rate =  0.05313272602318609
Initial Cost on Val dataset for this epoch 796 = 0.03310706584498122
Error on this batch = 0.16364642361886797
Error on this batch = 0.14167166222257613
Cost on val dataset after 797 epochs is = 0.033102956349102895
learning rate =  0.05309942440535915
Initial Cost on Val dataset for this epoch 797 = 0.033102956349102895
Error on this batch = 0.16361769373211985
Error on this batch = 0.1416306861200287
Cost on val dataset after 798 epochs is = 0.03309885631254655
learning rate =  0.053066185325791874
Initial Cost on Val dataset for this epoch 798 = 0.03309885631254655
Error on this batch = 0.16358901297

Error on this batch = 0.14044113961660504
Cost on val dataset after 828 epochs is = 0.03298005235765344
learning rate =  0.05209715337353909
Initial Cost on Val dataset for this epoch 828 = 0.03298005235765344
Error on this batch = 0.16275058228568917
Error on this batch = 0.14040282136004903
Cost on val dataset after 829 epochs is = 0.03297622582122007
learning rate =  0.05206576008754675
Initial Cost on Val dataset for this epoch 829 = 0.03297622582122007
Error on this batch = 0.16272334018328413
Error on this batch = 0.1403645892588373
Cost on val dataset after 830 epochs is = 0.03297240752084652
learning rate =  0.052034423485197025
Initial Cost on Val dataset for this epoch 830 = 0.03297240752084652
Error on this batch = 0.16269614189543327
Error on this batch = 0.14032644328401747
Cost on val dataset after 831 epochs is = 0.03296859742269909
learning rate =  0.05200314339611523
Initial Cost on Val dataset for this epoch 831 = 0.03296859742269909
Error on this batch = 0.1626689872

Cost on val dataset after 861 epochs is = 0.03285794951369462
learning rate =  0.051090202746762875
Initial Cost on Val dataset for this epoch 861 = 0.03285794951369462
Error on this batch = 0.16187391678447172
Error on this batch = 0.13918624685108796
Cost on val dataset after 862 epochs is = 0.032854378055665705
learning rate =  0.05106059381773481
Initial Cost on Val dataset for this epoch 862 = 0.032854378055665705
Error on this batch = 0.16184804322052732
Error on this batch = 0.13915081204779411
Cost on val dataset after 863 epochs is = 0.03285081382517134
learning rate =  0.05103103630798288
Initial Cost on Val dataset for this epoch 863 = 0.03285081382517134
Error on this batch = 0.16182220876873257
Error on this batch = 0.13911545970273756
Cost on val dataset after 864 epochs is = 0.03284725679470331
learning rate =  0.05100153006885345
Initial Cost on Val dataset for this epoch 864 = 0.03284725679470331
Error on this batch = 0.1617964132982546
Error on this batch = 0.13908018

Error on this batch = 0.13809219782808455
Cost on val dataset after 894 epochs is = 0.032743763462436244
learning rate =  0.05013947028486567
Initial Cost on Val dataset for this epoch 894 = 0.032743763462436244
Error on this batch = 0.16104005206847277
Error on this batch = 0.1380592942846858
Cost on val dataset after 895 epochs is = 0.03274041677632221
learning rate =  0.050111482858579574
Initial Cost on Val dataset for this epoch 895 = 0.03274041677632221
Error on this batch = 0.16101540348589666
Error on this batch = 0.13802646580097933
Cost on val dataset after 896 epochs is = 0.03273707648406159
learning rate =  0.05008354224706334
Initial Cost on Val dataset for this epoch 896 = 0.03273707648406159
Error on this batch = 0.16099079001660171
Error on this batch = 0.13799371210784206
Cost on val dataset after 897 epochs is = 0.03273374256252678
learning rate =  0.05005564831994999
Initial Cost on Val dataset for this epoch 897 = 0.03273374256252678
Error on this batch = 0.16096621

Error on this batch = 0.1370444920481493
Cost on val dataset after 927 epochs is = 0.03263657699667644
learning rate =  0.049239912322395954
Initial Cost on Val dataset for this epoch 927 = 0.03263657699667644
Error on this batch = 0.16024459858469528
Error on this batch = 0.13701391690950815
Cost on val dataset after 928 epochs is = 0.03263342967706085
learning rate =  0.049213403619359
Initial Cost on Val dataset for this epoch 928 = 0.03263342967706085
Error on this batch = 0.1602210523194335
Error on this batch = 0.13698340739655826
Cost on val dataset after 929 epochs is = 0.03263028804326973
learning rate =  0.04918693768379647
Initial Cost on Val dataset for this epoch 929 = 0.03263028804326973
Error on this batch = 0.16019753772424516
Error on this batch = 0.13695296321424644
Cost on val dataset after 930 epochs is = 0.03262715207550915
learning rate =  0.049160514400834666
Initial Cost on Val dataset for this epoch 930 = 0.03262715207550915
Error on this batch = 0.160174054702

Error on this batch = 0.15950635856812814
Error on this batch = 0.13606855724339834
Cost on val dataset after 960 epochs is = 0.03253561326965599
learning rate =  0.04838709677419355
Initial Cost on Val dataset for this epoch 960 = 0.03253561326965599
Error on this batch = 0.15948377946324413
Error on this batch = 0.13603999278624973
Cost on val dataset after 961 epochs is = 0.03253264362200875
learning rate =  0.04836194101643707
Initial Cost on Val dataset for this epoch 961 = 0.03253264362200875
Error on this batch = 0.15946122910691846
Error on this batch = 0.13601148433769328
Cost on val dataset after 962 epochs is = 0.03252967905411868
learning rate =  0.048336824452283184
Initial Cost on Val dataset for this epoch 962 = 0.03252967905411868
Error on this batch = 0.1594387074137559
Error on this batch = 0.13598303161485814
Cost on val dataset after 963 epochs is = 0.03252671954909286
learning rate =  0.04831174698006231
Initial Cost on Val dataset for this epoch 963 = 0.0325267195

Error on this batch = 0.15877597511148625
Error on this batch = 0.1351539940183073
Cost on val dataset after 993 epochs is = 0.0324402082245254
learning rate =  0.04757711097716634
Initial Cost on Val dataset for this epoch 993 = 0.0324402082245254
Error on this batch = 0.158754301431117
Error on this batch = 0.1351271332477609
Cost on val dataset after 994 epochs is = 0.03243739773800206
learning rate =  0.0475531968711181
Initial Cost on Val dataset for this epoch 994 = 0.03243739773800206
Error on this batch = 0.15873265385423527
Error on this batch = 0.13510031963253302
Cost on val dataset after 995 epochs is = 0.03243459181675844
learning rate =  0.04752931878933585
Initial Cost on Val dataset for this epoch 995 = 0.03243459181675844
Error on this batch = 0.15871103230595643
Error on this batch = 0.13507355292225715
Cost on val dataset after 996 epochs is = 0.03243179044653126
learning rate =  0.047505476641465226
Initial Cost on Val dataset for this epoch 996 = 0.0324317904465312

Error on this batch = 0.15807412901568008
Error on this batch = 0.13429117007698527
Cost on val dataset after 1026 epochs is = 0.0323497977298998
learning rate =  0.04680648595607224
Initial Cost on Val dataset for this epoch 1026 = 0.0323497977298998
Error on this batch = 0.15805327869510433
Error on this batch = 0.13426573987793486
Cost on val dataset after 1027 epochs is = 0.03234713075060401
learning rate =  0.0467837146163854
Initial Cost on Val dataset for this epoch 1027 = 0.03234713075060401
Error on this batch = 0.15803245214701137
Error on this batch = 0.1342403492204349
Cost on val dataset after 1028 epochs is = 0.03234446790232925
learning rate =  0.04676097647914122
Initial Cost on Val dataset for this epoch 1028 = 0.03234446790232925
Error on this batch = 0.1580116493048505
Error on this batch = 0.1342149978951702
Cost on val dataset after 1029 epochs is = 0.032341809172994965
learning rate =  0.04673827146373168
Initial Cost on Val dataset for this epoch 1029 = 0.0323418

Error on this batch = 0.1334717603912952
Cost on val dataset after 1059 epochs is = 0.032263904897867866
learning rate =  0.04607213376254634
Initial Cost on Val dataset for this epoch 1059 = 0.032263904897867866
Error on this batch = 0.1573781656081176
Error on this batch = 0.13344753130230544
Cost on val dataset after 1060 epochs is = 0.03226136815448367
learning rate =  0.04605041698841376
Initial Cost on Val dataset for this epoch 1060 = 0.03226136815448367
Error on this batch = 0.15735808740701765
Error on this batch = 0.13342333549456448
Cost on val dataset after 1061 epochs is = 0.03225883517082533
learning rate =  0.046028730894916166
Initial Cost on Val dataset for this epoch 1061 = 0.03225883517082533
Error on this batch = 0.15733803082445497
Error on this batch = 0.13339917279823657
Cost on val dataset after 1062 epochs is = 0.03225630593642814
learning rate =  0.046007075409880825
Initial Cost on Val dataset for this epoch 1062 = 0.03225630593642814
Error on this batch = 0.

Error on this batch = 0.1567460927825139
Error on this batch = 0.13268888930157455
Cost on val dataset after 1092 epochs is = 0.032182121982432144
learning rate =  0.04537129530141776
Initial Cost on Val dataset for this epoch 1092 = 0.032182121982432144
Error on this batch = 0.15672667727077233
Error on this batch = 0.1326656744787181
Cost on val dataset after 1093 epochs is = 0.032179703991505944
learning rate =  0.045350554136767536
Initial Cost on Val dataset for this epoch 1093 = 0.032179703991505944
Error on this batch = 0.15670728152751565
Error on this batch = 0.13264248786858718
Cost on val dataset after 1094 epochs is = 0.03217728943447732
learning rate =  0.04532984139116246
Initial Cost on Val dataset for this epoch 1094 = 0.03217728943447732
Error on this batch = 0.15668790549751666
Error on this batch = 0.13261932933341655
Cost on val dataset after 1095 epochs is = 0.03217487830204103
learning rate =  0.045309156999763275
Initial Cost on Val dataset for this epoch 1095 = 

Error on this batch = 0.1561155239152657
Error on this batch = 0.1319369777591795
Cost on val dataset after 1125 epochs is = 0.032104091811288794
learning rate =  0.044701496631161235
Initial Cost on Val dataset for this epoch 1125 = 0.032104091811288794
Error on this batch = 0.15609673248515338
Error on this batch = 0.13191462554980019
Cost on val dataset after 1126 epochs is = 0.03210178238455702
learning rate =  0.04468166015508745
Initial Cost on Val dataset for this epoch 1126 = 0.03210178238455702
Error on this batch = 0.15607795908704652
Error on this batch = 0.13189229741713823
Cost on val dataset after 1127 epochs is = 0.03209947609980776
learning rate =  0.044661850063156025
Initial Cost on Val dataset for this epoch 1127 = 0.03209947609980776
Error on this batch = 0.1560592036711828
Error on this batch = 0.13186999324798152
Cost on val dataset after 1128 epochs is = 0.03209717294866084
learning rate =  0.04464206629693032
Initial Cost on Val dataset for this epoch 1128 = 0.0

Cost on val dataset after 1158 epochs is = 0.03202949504789827
learning rate =  0.044060512158453496
Initial Cost on Val dataset for this epoch 1158 = 0.03202949504789827
Error on this batch = 0.15548644159608863
Error on this batch = 0.13118986140978225
Cost on val dataset after 1159 epochs is = 0.03202728502487393
learning rate =  0.044041516463602756
Initial Cost on Val dataset for this epoch 1159 = 0.03202728502487393
Error on this batch = 0.1554682363885034
Error on this batch = 0.1311682680253094
Cost on val dataset after 1160 epochs is = 0.03202507788109829
learning rate =  0.04402254531628119
Initial Cost on Val dataset for this epoch 1160 = 0.03202507788109829
Error on this batch = 0.1554500476198252
Error on this batch = 0.13114669521215633
Cost on val dataset after 1161 epochs is = 0.03202287360905188
learning rate =  0.04400359866366432
Initial Cost on Val dataset for this epoch 1161 = 0.03202287360905188
Error on this batch = 0.1554318752463024
Error on this batch = 0.1311

Cost on val dataset after 1191 epochs is = 0.0319580445681041
learning rate =  0.04344633347726192
Initial Cost on Val dataset for this epoch 1191 = 0.0319580445681041
Error on this batch = 0.1548941179322309
Error on this batch = 0.13048765542786114
Cost on val dataset after 1192 epochs is = 0.03195592573173045
learning rate =  0.043428120802608244
Initial Cost on Val dataset for this epoch 1192 = 0.03195592573173045
Error on this batch = 0.15487643305761678
Error on this batch = 0.1304666946392421
Cost on val dataset after 1193 epochs is = 0.03195380954017606
learning rate =  0.04340993101299245
Initial Cost on Val dataset for this epoch 1193 = 0.03195380954017606
Error on this batch = 0.15485876326937073
Error on this batch = 0.13044575162570102
Cost on val dataset after 1194 epochs is = 0.03195169598677777
learning rate =  0.04339176406052788
Initial Cost on Val dataset for this epoch 1194 = 0.03195169598677777
Error on this batch = 0.15484110852935293
Error on this batch = 0.13042

Error on this batch = 0.12982536473833312
Cost on val dataset after 1224 epochs is = 0.03188948412786231
learning rate =  0.04285714285714286
Initial Cost on Val dataset for this epoch 1224 = 0.03188948412786231
Error on this batch = 0.15431828096686137
Error on this batch = 0.12980493717800987
Cost on val dataset after 1225 epochs is = 0.03188744919203394
learning rate =  0.04283966084831548
Initial Cost on Val dataset for this epoch 1225 = 0.03188744919203394
Error on this batch = 0.15430107464886567
Error on this batch = 0.12978452517590924
Cost on val dataset after 1226 epochs is = 0.03188541669460254
learning rate =  0.04282220021549388
Initial Cost on Val dataset for this epoch 1226 = 0.03188541669460254
Error on this batch = 0.15428388223917266
Error on this batch = 0.12976412866991838
Cost on val dataset after 1227 epochs is = 0.03188338662973882
learning rate =  0.04280476091515122
Initial Cost on Val dataset for this epoch 1227 = 0.03188338662973882
Error on this batch = 0.15

Error on this batch = 0.12915914758296768
Cost on val dataset after 1257 epochs is = 0.03182358789970577
learning rate =  0.04229129058259507
Initial Cost on Val dataset for this epoch 1257 = 0.03182358789970577
Error on this batch = 0.15375763995321237
Error on this batch = 0.12913920282910926
Cost on val dataset after 1258 epochs is = 0.031821630483792565
learning rate =  0.04227449165816562
Initial Cost on Val dataset for this epoch 1258 = 0.031821630483792565
Error on this batch = 0.1537408757543206
Error on this batch = 0.12911927179208132
Cost on val dataset after 1259 epochs is = 0.031819675326443614
learning rate =  0.042257712736425826
Initial Cost on Val dataset for this epoch 1259 = 0.031819675326443614
Error on this batch = 0.15372412443323924
Error on this batch = 0.12909935442245585
Cost on val dataset after 1260 epochs is = 0.03181772242259833
learning rate =  0.04224095377771143
Initial Cost on Val dataset for this epoch 1260 = 0.03181772242259833
Error on this batch = 

Error on this batch = 0.12850795760485118
Cost on val dataset after 1290 epochs is = 0.03176015895782806
learning rate =  0.04174727549935325
Initial Cost on Val dataset for this epoch 1290 = 0.03176015895782806
Error on this batch = 0.153211065852672
Error on this batch = 0.12848844127193498
Cost on val dataset after 1291 epochs is = 0.03175827351788512
learning rate =  0.041731116305599295
Initial Cost on Val dataset for this epoch 1291 = 0.03175827351788512
Error on this batch = 0.15319471177387267
Error on this batch = 0.12846893720260721
Cost on val dataset after 1292 epochs is = 0.03175639018051983
learning rate =  0.0417149758616362
Initial Cost on Val dataset for this epoch 1292 = 0.03175639018051983
Error on this batch = 0.15317836966385
Error on this batch = 0.12844944535825495
Cost on val dataset after 1293 epochs is = 0.031754508941340936
learning rate =  0.0416988541312325
Initial Cost on Val dataset for this epoch 1293 = 0.031754508941340936
Error on this batch = 0.153162

Cost on val dataset after 1323 epochs is = 0.031699026374517894
learning rate =  0.04122372824469845
Initial Cost on Val dataset for this epoch 1323 = 0.031699026374517894
Error on this batch = 0.15267756329099333
Error on this batch = 0.12785106465987017
Cost on val dataset after 1324 epochs is = 0.03169720809771867
learning rate =  0.04120816918460671
Initial Cost on Val dataset for this epoch 1324 = 0.03169720809771867
Error on this batch = 0.15266159100967586
Error on this batch = 0.12783194536235057
Cost on val dataset after 1325 epochs is = 0.031695391787778776
learning rate =  0.04119262772858173
Initial Cost on Val dataset for this epoch 1325 = 0.031695391787778776
Error on this batch = 0.15264562989014632
Error on this batch = 0.12781283717602268
Cost on val dataset after 1326 epochs is = 0.03169357744086139
learning rate =  0.041177103843452106
Initial Cost on Val dataset for this epoch 1326 = 0.03169357744086139
Error on this batch = 0.15262967990939807
Error on this batch =

Error on this batch = 0.12724462265743194
Cost on val dataset after 1356 epochs is = 0.031640041396026906
learning rate =  0.04071939673484273
Initial Cost on Val dataset for this epoch 1356 = 0.031640041396026906
Error on this batch = 0.15215624919391296
Error on this batch = 0.1272258456859826
Cost on val dataset after 1357 epochs is = 0.03163828607716692
learning rate =  0.040704401559694285
Initial Cost on Val dataset for this epoch 1357 = 0.03163828607716692
Error on this batch = 0.15214063354013693
Error on this batch = 0.12720707901414874
Cost on val dataset after 1358 epochs is = 0.03163653260589012
learning rate =  0.04068942293855797
Initial Cost on Val dataset for this epoch 1358 = 0.03163653260589012
Error on this batch = 0.1521250283307845
Error on this batch = 0.12718832262041938
Cost on val dataset after 1359 epochs is = 0.031634780978799355
learning rate =  0.04067446084099803
Initial Cost on Val dataset for this epoch 1359 = 0.031634780978799355
Error on this batch = 0

Error on this batch = 0.126630312307642
Cost on val dataset after 1389 epochs is = 0.031583073316248156
learning rate =  0.04023313355893651
Initial Cost on Val dataset for this epoch 1389 = 0.031583073316248156
Error on this batch = 0.15164633877946987
Error on this batch = 0.12661186504091057
Cost on val dataset after 1390 epochs is = 0.031581377233559534
learning rate =  0.0402186690128789
Initial Cost on Val dataset for this epoch 1390 = 0.031581377233559534
Error on this batch = 0.15163105739904034
Error on this batch = 0.12659342747637167
Cost on val dataset after 1391 epochs is = 0.03157968289198254
learning rate =  0.0402042200564173
Initial Cost on Val dataset for this epoch 1391 = 0.03157968289198254
Error on this batch = 0.1516157858266942
Error on this batch = 0.12657499959938423
Cost on val dataset after 1392 epochs is = 0.03157799028845553
learning rate =  0.04018978666156824
Initial Cost on Val dataset for this epoch 1392 = 0.03157799028845553
Error on this batch = 0.151

Error on this batch = 0.12602660315939035
Cost on val dataset after 1422 epochs is = 0.031528005527440554
learning rate =  0.03976388499056925
Initial Cost on Val dataset for this epoch 1422 = 0.031528005527440554
Error on this batch = 0.1511471356801782
Error on this batch = 0.1260084693649287
Cost on val dataset after 1423 epochs is = 0.03152636532915119
learning rate =  0.039749920500238504
Initial Cost on Val dataset for this epoch 1423 = 0.03152636532915119
Error on this batch = 0.15113216874649923
Error on this batch = 0.12599034488815902
Cost on val dataset after 1424 epochs is = 0.031524726775109826
learning rate =  0.03973597071195132
Initial Cost on Val dataset for this epoch 1424 = 0.031524726775109826
Error on this batch = 0.15111721105571566
Error on this batch = 0.12597222972047553
Cost on val dataset after 1425 epochs is = 0.03152308986250164
learning rate =  0.03972203559992821
Initial Cost on Val dataset for this epoch 1425 = 0.03152308986250164
Error on this batch = 0

Error on this batch = 0.12543306759301598
Cost on val dataset after 1455 epochs is = 0.03147473203042905
learning rate =  0.039310681377071936
Initial Cost on Val dataset for this epoch 1455 = 0.03147473203042905
Error on this batch = 0.15065802074101917
Error on this batch = 0.1254152376478151
Cost on val dataset after 1456 epochs is = 0.03147314463767981
learning rate =  0.03929718877966846
Initial Cost on Val dataset for this epoch 1456 = 0.03147314463767981
Error on this batch = 0.15064335052404176
Error on this batch = 0.12539741682339506
Cost on val dataset after 1457 epochs is = 0.031471558799252626
learning rate =  0.039283710065919304
Initial Cost on Val dataset for this epoch 1457 = 0.031471558799252626
Error on this batch = 0.15062868903820545
Error on this batch = 0.12537960511648016
Cost on val dataset after 1458 epochs is = 0.0314699745125064
learning rate =  0.039270245212030735
Initial Cost on Val dataset for this epoch 1458 = 0.0314699745125064
Error on this batch = 0.

Error on this batch = 0.12484948337573455
Cost on val dataset after 1488 epochs is = 0.031423154503053524
learning rate =  0.03887262870700081
Initial Cost on Val dataset for this epoch 1488 = 0.031423154503053524
Error on this batch = 0.1501784323495181
Error on this batch = 0.12483195349182985
Cost on val dataset after 1489 epochs is = 0.03142161702939164
learning rate =  0.03885958201131299
Initial Cost on Val dataset for this epoch 1489 = 0.03142161702939164
Error on this batch = 0.15016404239665673
Error on this batch = 0.12481443269775744
Cost on val dataset after 1490 epochs is = 0.03142008102483983
learning rate =  0.0388465484432753
Initial Cost on Val dataset for this epoch 1490 = 0.03142008102483983
Error on this batch = 0.1501496606825749
Error on this batch = 0.1247969209949704
Cost on val dataset after 1491 epochs is = 0.03141854648686802
learning rate =  0.03883352798088733
Initial Cost on Val dataset for this epoch 1491 = 0.03141854648686802
Error on this batch = 0.1501

Error on this batch = 0.12427581001012658
Cost on val dataset after 1521 epochs is = 0.031373179842847515
learning rate =  0.0384489011888262
Initial Cost on Val dataset for this epoch 1521 = 0.031373179842847515
Error on this batch = 0.14970782902481816
Error on this batch = 0.1242585815804477
Cost on val dataset after 1522 epochs is = 0.03137168952464175
learning rate =  0.03843627636458541
Initial Cost on Val dataset for this epoch 1522 = 0.03137168952464175
Error on this batch = 0.14969370262685885
Error on this batch = 0.12424136235797084
Cost on val dataset after 1523 epochs is = 0.031370200592994654
learning rate =  0.038423663968391995
Initial Cost on Val dataset for this epoch 1523 = 0.031370200592994654
Error on this batch = 0.14967958393254285
Error on this batch = 0.12422415234843015
Cost on val dataset after 1524 epochs is = 0.03136871304542274
learning rate =  0.03841106397986879
Initial Cost on Val dataset for this epoch 1524 = 0.03136871304542274
Error on this batch = 0

Error on this batch = 0.12371216963166468
Cost on val dataset after 1554 epochs is = 0.031324717884893875
learning rate =  0.038038734700573035
Initial Cost on Val dataset for this epoch 1554 = 0.031324717884893875
Error on this batch = 0.14924562125308238
Error on this batch = 0.12369524874896923
Cost on val dataset after 1555 epochs is = 0.031323272009629435
learning rate =  0.038026509492254536
Initial Cost on Val dataset for this epoch 1555 = 0.031323272009629435
Error on this batch = 0.1492317387091062
Error on this batch = 0.12367833732872681
Cost on val dataset after 1556 epochs is = 0.03132182743861243
learning rate =  0.038014296063485276
Initial Cost on Val dataset for this epoch 1556 = 0.03132182743861243
Error on this batch = 0.1492178631817889
Error on this batch = 0.12366143538081616
Cost on val dataset after 1557 epochs is = 0.031320384169318344
learning rate =  0.038002094395360494
Initial Cost on Val dataset for this epoch 1557 = 0.031320384169318344
Error on this batc

Error on this batch = 0.12315883599818614
Cost on val dataset after 1587 epochs is = 0.03127767870696885
learning rate =  0.037641420992137166
Initial Cost on Val dataset for this epoch 1587 = 0.03127767870696885
Error on this batch = 0.1487910516770653
Error on this batch = 0.123142233322171
Cost on val dataset after 1588 epochs is = 0.03127627451566925
learning rate =  0.037629574753923374
Initial Cost on Val dataset for this epoch 1588 = 0.03127627451566925
Error on this batch = 0.14877738551860506
Error on this batch = 0.12312564050645142
Cost on val dataset after 1589 epochs is = 0.031274871542801425
learning rate =  0.03761773969316394
Initial Cost on Val dataset for this epoch 1589 = 0.031274871542801425
Error on this batch = 0.1487637253471036
Error on this batch = 0.12310905756557279
Cost on val dataset after 1590 epochs is = 0.031273469785649724
learning rate =  0.03760591579229251
Initial Cost on Val dataset for this epoch 1590 = 0.031273469785649724
Error on this batch = 0.

Error on this batch = 0.12261624035478154
Cost on val dataset after 1620 epochs is = 0.031231968458590668
learning rate =  0.03725630254015558
Initial Cost on Val dataset for this epoch 1620 = 0.031231968458590668
Error on this batch = 0.14834298451001982
Error on this batch = 0.12259997171230205
Cost on val dataset after 1621 epochs is = 0.031230602978513225
learning rate =  0.037244816089276704
Initial Cost on Val dataset for this epoch 1621 = 0.031230602978513225
Error on this batch = 0.14832949100584042
Error on this batch = 0.12258371350706748
Cost on val dataset after 1622 epochs is = 0.031229238620619285
learning rate =  0.03723334025597724
Initial Cost on Val dataset for this epoch 1622 = 0.031229238620619285
Error on this batch = 0.14831600176979884
Error on this batch = 0.12256746576019314
Cost on val dataset after 1623 epochs is = 0.031227875381708133
learning rate =  0.03722187502390988
Initial Cost on Val dataset for this epoch 1623 = 0.031227875381708133
Error on this bat

Error on this batch = 0.12208501279139114
Cost on val dataset after 1653 epochs is = 0.031187481611371196
learning rate =  0.036882767970367755
Initial Cost on Val dataset for this epoch 1653 = 0.031187481611371196
Error on this batch = 0.14789952087186184
Error on this batch = 0.1220691013221267
Cost on val dataset after 1654 epochs is = 0.031186151326020463
learning rate =  0.03687162345644953
Initial Cost on Val dataset for this epoch 1654 = 0.031186151326020463
Error on this batch = 0.14788612406801743
Error on this batch = 0.1220532011503974
Cost on val dataset after 1655 epochs is = 0.03118482204180735
learning rate =  0.036860489038724284
Initial Cost on Val dataset for this epoch 1655 = 0.03118482204180735
Error on this batch = 0.1478727284510759
Error on this batch = 0.12203731230879161
Cost on val dataset after 1656 epochs is = 0.03118349375440718
learning rate =  0.03684936470195707
Initial Cost on Val dataset for this epoch 1656 = 0.03118349375440718
Error on this batch = 0

Error on this batch = 0.1215660997631899
Cost on val dataset after 1686 epochs is = 0.031144084737961288
learning rate =  0.03652024797494876
Initial Cost on Val dataset for this epoch 1686 = 0.031144084737961288
Error on this batch = 0.1474572448095653
Error on this batch = 0.12155058145582351
Cost on val dataset after 1687 epochs is = 0.03114278486156902
learning rate =  0.036509428763342656
Initial Cost on Val dataset for this epoch 1687 = 0.03114278486156902
Error on this batch = 0.1474438039844774
Error on this batch = 0.12153507584724058
Cost on val dataset after 1688 epochs is = 0.031141485807264477
learning rate =  0.03649861916169541
Initial Cost on Val dataset for this epoch 1688 = 0.031141485807264477
Error on this batch = 0.1474303584692132
Error on this batch = 0.12151958299348288
Cost on val dataset after 1689 epochs is = 0.03114018756804181
learning rate =  0.03648781915578899
Initial Cost on Val dataset for this epoch 1689 = 0.03114018756804181
Error on this batch = 0.1

Error on this batch = 0.12106105638828789
Cost on val dataset after 1719 epochs is = 0.03110157845764642
learning rate =  0.03616821166278091
Initial Cost on Val dataset for this epoch 1719 = 0.03110157845764642
Error on this batch = 0.14700960041707226
Error on this batch = 0.12104599374168631
Cost on val dataset after 1720 epochs is = 0.031100301136074912
learning rate =  0.036157702230142134
Initial Cost on Val dataset for this epoch 1720 = 0.031100301136074912
Error on this batch = 0.14699583664768878
Error on this batch = 0.12103094638065415
Cost on val dataset after 1721 epochs is = 0.031099024312163038
learning rate =  0.03614720195339099
Initial Cost on Val dataset for this epoch 1721 = 0.031099024312163038
Error on this batch = 0.14698205611621826
Error on this batch = 0.12101591441560273
Cost on val dataset after 1722 epochs is = 0.03109774797185793
learning rate =  0.03613671081924073
Initial Cost on Val dataset for this epoch 1722 = 0.03109774797185793
Error on this batch =

Error on this batch = 0.12057278860903439
Cost on val dataset after 1752 epochs is = 0.031059595122158085
learning rate =  0.03582616328943238
Initial Cost on Val dataset for this epoch 1752 = 0.031059595122158085
Error on this batch = 0.14654298678755381
Error on this batch = 0.12055830655597845
Cost on val dataset after 1753 epochs is = 0.031058324261068054
learning rate =  0.035815949130054606
Initial Cost on Val dataset for this epoch 1753 = 0.031058324261068054
Error on this batch = 0.14652829646813548
Error on this batch = 0.12054384534275499
Cost on val dataset after 1754 epochs is = 0.03105705315797392
learning rate =  0.03580574370197164
Initial Cost on Val dataset for this epoch 1754 = 0.03105705315797392
Error on this batch = 0.14651356198683316
Error on this batch = 0.12052940522523019
Cost on val dataset after 1755 epochs is = 0.03105578177721432
learning rate =  0.03579554699275107
Initial Cost on Val dataset for this epoch 1755 = 0.03105578177721432
Error on this batch =

Cost on val dataset after 1784 epochs is = 0.03101857678439257
learning rate =  0.035503580124836315
Initial Cost on Val dataset for this epoch 1784 = 0.03101857678439257
Error on this batch = 0.14604303065577443
Error on this batch = 0.1201076608963474
Cost on val dataset after 1785 epochs is = 0.03101727243956778
learning rate =  0.03549363932102258
Initial Cost on Val dataset for this epoch 1785 = 0.03101727243956778
Error on this batch = 0.1460260537717555
Error on this batch = 0.12009405628103569
Cost on val dataset after 1786 epochs is = 0.03101596578945901
learning rate =  0.03548370686264396
Initial Cost on Val dataset for this epoch 1786 = 0.03101596578945901
Error on this batch = 0.14600896504741784
Error on this batch = 0.12008048693851926
Cost on val dataset after 1787 epochs is = 0.031014656720821158
learning rate =  0.03547378273803016
Initial Cost on Val dataset for this epoch 1787 = 0.031014656720821158
Error on this batch = 0.1459917610586591
Error on this batch = 0.12

Cost on val dataset after 1817 epochs is = 0.030973432623769812
learning rate =  0.0351798772365146
Initial Cost on Val dataset for this epoch 1817 = 0.030973432623769812
Error on this batch = 0.14539897654671852
Error on this batch = 0.11968283901967587
Cost on val dataset after 1818 epochs is = 0.03097195306007952
learning rate =  0.03517020579241801
Initial Cost on Val dataset for this epoch 1818 = 0.03097195306007952
Error on this batch = 0.14537562574070537
Error on this batch = 0.11967100633474761
Cost on val dataset after 1819 epochs is = 0.030970462907311043
learning rate =  0.03516054232038716
Initial Cost on Val dataset for this epoch 1819 = 0.030970462907311043
Error on this batch = 0.1453519539607812
Error on this batch = 0.11965925825580868
Cost on val dataset after 1820 epochs is = 0.03096896165182602
learning rate =  0.035150886809475895
Initial Cost on Val dataset for this epoch 1820 = 0.03096896165182602
Error on this batch = 0.14532795012505212
Error on this batch = 0

Error on this batch = 0.1193678821812234
Cost on val dataset after 1850 epochs is = 0.030914519233016597
learning rate =  0.03486486995834514
Initial Cost on Val dataset for this epoch 1850 = 0.030914519233016597
Error on this batch = 0.14437770739880626
Error on this batch = 0.11936133647104441
Cost on val dataset after 1851 epochs is = 0.03091216045214761
learning rate =  0.03485545592551298
Initial Cost on Val dataset for this epoch 1851 = 0.03091216045214761
Error on this batch = 0.14433503446406945
Error on this batch = 0.1193551071653135
Cost on val dataset after 1852 epochs is = 0.030909742772519413
learning rate =  0.034846049514351135
Initial Cost on Val dataset for this epoch 1852 = 0.030909742772519413
Error on this batch = 0.1442913850715081
Error on this batch = 0.11934921010231041
Cost on val dataset after 1853 epochs is = 0.03090726268593413
learning rate =  0.034836650714580884
Initial Cost on Val dataset for this epoch 1853 = 0.03090726268593413
Error on this batch = 0

Error on this batch = 0.11945235119107325
Cost on val dataset after 1883 epochs is = 0.03077192801113862
learning rate =  0.03455817581880685
Initial Cost on Val dataset for this epoch 1883 = 0.03077192801113862
Error on this batch = 0.14237626143833162
Error on this batch = 0.1194707862310098
Cost on val dataset after 1884 epochs is = 0.03076360426815918
learning rate =  0.034549007977865906
Initial Cost on Val dataset for this epoch 1884 = 0.03076360426815918
Error on this batch = 0.14230124598967173
Error on this batch = 0.11949053535346515
Cost on val dataset after 1885 epochs is = 0.030754859056153088
learning rate =  0.0345398474293878
Initial Cost on Val dataset for this epoch 1885 = 0.030754859056153088
Error on this batch = 0.1422265692947206
Error on this batch = 0.11951159083701551
Cost on val dataset after 1886 epochs is = 0.030745670003467236
learning rate =  0.03453069416370981
Initial Cost on Val dataset for this epoch 1886 = 0.030745670003467236
Error on this batch = 0.

Error on this batch = 0.11942419301076221
Cost on val dataset after 1916 epochs is = 0.030208970553711894
learning rate =  0.03425943549137658
Initial Cost on Val dataset for this epoch 1916 = 0.030208970553711894
Error on this batch = 0.14046431749260882
Error on this batch = 0.11933500708404214
Cost on val dataset after 1917 epochs is = 0.030184647667755873
learning rate =  0.03425050329581573
Initial Cost on Val dataset for this epoch 1917 = 0.030184647667755873
Error on this batch = 0.14039351105814696
Error on this batch = 0.11923936380730936
Cost on val dataset after 1918 epochs is = 0.0301602934344882
learning rate =  0.03424157808307896
Initial Cost on Val dataset for this epoch 1918 = 0.0301602934344882
Error on this batch = 0.14031904343350388
Error on this batch = 0.11913769467497602
Cost on val dataset after 1919 epochs is = 0.030135945780543575
learning rate =  0.03423265984407288
Initial Cost on Val dataset for this epoch 1919 = 0.030135945780543575
Error on this batch = 

Error on this batch = 0.11524154698962179
Cost on val dataset after 1949 epochs is = 0.02950416462457911
learning rate =  0.03396831102433787
Initial Cost on Val dataset for this epoch 1949 = 0.02950416462457911
Error on this batch = 0.13692679036883604
Error on this batch = 0.11512305868794026
Cost on val dataset after 1950 epochs is = 0.029487502903426253
learning rate =  0.033959604549499395
Initial Cost on Val dataset for this epoch 1950 = 0.029487502903426253
Error on this batch = 0.13681775518443715
Error on this batch = 0.11500635278646475
Cost on val dataset after 1951 epochs is = 0.029471123473807332
learning rate =  0.03395090476594445
Initial Cost on Val dataset for this epoch 1951 = 0.029471123473807332
Error on this batch = 0.1367105642137426
Error on this batch = 0.11489143880345981
Cost on val dataset after 1952 epochs is = 0.029455022878909602
learning rate =  0.03394221166510653
Initial Cost on Val dataset for this epoch 1952 = 0.029455022878909602
Error on this batch 

Error on this batch = 0.11220258476543495
Cost on val dataset after 1982 epochs is = 0.029080715537519907
learning rate =  0.03368448423330154
Initial Cost on Val dataset for this epoch 1982 = 0.029080715537519907
Error on this batch = 0.1343435806601011
Error on this batch = 0.1121340491125462
Cost on val dataset after 1983 epochs is = 0.029071145203213734
learning rate =  0.033675994130020294
Initial Cost on Val dataset for this epoch 1983 = 0.029071145203213734
Error on this batch = 0.13429354037368033
Error on this batch = 0.11206658318572416
Cost on val dataset after 1984 epochs is = 0.029061719443032624
learning rate =  0.03366751044324252
Initial Cost on Val dataset for this epoch 1984 = 0.029061719443032624
Error on this batch = 0.1342447691892185
Error on this batch = 0.11200016213387094
Cost on val dataset after 1985 epochs is = 0.029052434781581968
learning rate =  0.033659033164890016
Initial Cost on Val dataset for this epoch 1985 = 0.029052434781581968
Error on this batch

Error on this batch = 0.11039076820670048
Cost on val dataset after 2015 epochs is = 0.02882536231048095
learning rate =  0.03340765523905305
Initial Cost on Val dataset for this epoch 2015 = 0.02882536231048095
Error on this batch = 0.13321475461963186
Error on this batch = 0.1103471044894787
Cost on val dataset after 2016 epochs is = 0.028819130786309714
learning rate =  0.033399372691451094
Initial Cost on Val dataset for this epoch 2016 = 0.028819130786309714
Error on this batch = 0.13319307472134773
Error on this batch = 0.11030393998682052
Cost on val dataset after 2017 epochs is = 0.028812966388995533
learning rate =  0.033391096301114274
Initial Cost on Val dataset for this epoch 2017 = 0.028812966388995533
Error on this batch = 0.13317190907384496
Error on this batch = 0.11026126390077096
Cost on val dataset after 2018 epochs is = 0.028806867730370844
learning rate =  0.033382826060417495
Initial Cost on Val dataset for this epoch 2018 = 0.028806867730370844
Error on this batc

Cost on val dataset after 2047 epochs is = 0.028653579840263052
learning rate =  0.03314563036811941
Initial Cost on Val dataset for this epoch 2047 = 0.028653579840263052
Error on this batch = 0.1327123916844054
Error on this batch = 0.10916421639410853
Cost on val dataset after 2048 epochs is = 0.028648967682847616
learning rate =  0.03313754113545029
Initial Cost on Val dataset for this epoch 2048 = 0.028648967682847616
Error on this batch = 0.13270122004317686
Error on this batch = 0.10913256029307299
Cost on val dataset after 2049 epochs is = 0.028644392970527833
learning rate =  0.03312945782245396
Initial Cost on Val dataset for this epoch 2049 = 0.028644392970527833
Error on this batch = 0.13269022938790556
Error on this batch = 0.10910115816825659
Cost on val dataset after 2050 epochs is = 0.02863985512135498
learning rate =  0.033121380421913954
Initial Cost on Val dataset for this epoch 2050 = 0.02863985512135498
Error on this batch = 0.13267941346854245
Error on this batch 

Error on this batch = 0.10825678233019352
Cost on val dataset after 2080 epochs is = 0.028518453636823614
learning rate =  0.032881772401606085
Initial Cost on Val dataset for this epoch 2080 = 0.028518453636823614
Error on this batch = 0.13241175057227067
Error on this batch = 0.1082313240810063
Cost on val dataset after 2081 epochs is = 0.02851483082062008
learning rate =  0.03287387477391883
Initial Cost on Val dataset for this epoch 2081 = 0.02851483082062008
Error on this batch = 0.13240408856753408
Error on this batch = 0.10820600861709118
Cost on val dataset after 2082 epochs is = 0.02851123169983316
learning rate =  0.03286598283411626
Initial Cost on Val dataset for this epoch 2082 = 0.02851123169983316
Error on this batch = 0.13239647653119307
Error on this batch = 0.10818083354940397
Cost on val dataset after 2083 epochs is = 0.02850765598443859
learning rate =  0.032858096575374265
Initial Cost on Val dataset for this epoch 2083 = 0.02850765598443859
Error on this batch = 0

Error on this batch = 0.10748078652867606
Cost on val dataset after 2113 epochs is = 0.028410014835690254
learning rate =  0.03262411712177667
Initial Cost on Val dataset for this epoch 2113 = 0.028410014835690254
Error on this batch = 0.1321756911697318
Error on this batch = 0.10745900657186037
Cost on val dataset after 2114 epochs is = 0.028407044618054978
learning rate =  0.03261640365267211
Initial Cost on Val dataset for this epoch 2114 = 0.028407044618054978
Error on this batch = 0.13216882439093688
Error on this batch = 0.10743731134625499
Cost on val dataset after 2115 epochs is = 0.028404090687168232
learning rate =  0.03260869565217391
Initial Cost on Val dataset for this epoch 2115 = 0.028404090687168232
Error on this batch = 0.13216196269325703
Error on this batch = 0.10741569961696304
Cost on val dataset after 2116 epochs is = 0.02840115287245402
learning rate =  0.032600993113823355
Initial Cost on Val dataset for this epoch 2116 = 0.02840115287245402
Error on this batch 

Error on this batch = 0.10680090459141553
Cost on val dataset after 2146 epochs is = 0.02831975555934398
learning rate =  0.032372425258136266
Initial Cost on Val dataset for this epoch 2146 = 0.02831975555934398
Error on this batch = 0.1319487757156456
Error on this batch = 0.10678137915349187
Cost on val dataset after 2147 epochs is = 0.028317244282348268
learning rate =  0.032364888900157734
Initial Cost on Val dataset for this epoch 2147 = 0.028317244282348268
Error on this batch = 0.13194181401278685
Error on this batch = 0.10676190780665458
Cost on val dataset after 2148 epochs is = 0.02831474472724307
learning rate =  0.03235735780316219
Initial Cost on Val dataset for this epoch 2148 = 0.02831474472724307
Error on this batch = 0.13193484404622893
Error on this batch = 0.10674248988142679
Cost on val dataset after 2149 epochs is = 0.028312256783890776
learning rate =  0.03234983196103152
Initial Cost on Val dataset for this epoch 2149 = 0.028312256783890776
Error on this batch =

Error on this batch = 0.10618190404030092
Cost on val dataset after 2179 epochs is = 0.02824252239769771
learning rate =  0.03212647026585895
Initial Cost on Val dataset for this epoch 2179 = 0.02824252239769771
Error on this batch = 0.13171406018311926
Error on this batch = 0.10616386583796848
Cost on val dataset after 2180 epochs is = 0.028240346392879024
learning rate =  0.032119104343426734
Initial Cost on Val dataset for this epoch 2180 = 0.028240346392879024
Error on this batch = 0.13170677564016764
Error on this batch = 0.10614586474626612
Cost on val dataset after 2181 epochs is = 0.028238179089379107
learning rate =  0.03211174348522455
Initial Cost on Val dataset for this epoch 2181 = 0.028238179089379107
Error on this batch = 0.13169948076094304
Error on this batch = 0.10612790038272482
Cost on val dataset after 2182 epochs is = 0.028236020412507622
learning rate =  0.03210438768545213
Initial Cost on Val dataset for this epoch 2182 = 0.028236020412507622
Error on this batch

Cost on val dataset after 2211 epochs is = 0.028176863561212667
learning rate =  0.03189324416719217
Initial Cost on Val dataset for this epoch 2211 = 0.028176863561212667
Error on this batch = 0.13147599775379137
Error on this batch = 0.1056043741790116
Cost on val dataset after 2212 epochs is = 0.02817493272486136
learning rate =  0.03188603746861023
Initial Cost on Val dataset for this epoch 2212 = 0.02817493272486136
Error on this batch = 0.13146840204517912
Error on this batch = 0.10558738753310837
Cost on val dataset after 2213 epochs is = 0.028173008564222814
learning rate =  0.03187883565316691
Initial Cost on Val dataset for this epoch 2213 = 0.028173008564222814
Error on this batch = 0.1314607975596527
Error on this batch = 0.10557042802898284
Cost on val dataset after 2214 epochs is = 0.028171091026556024
learning rate =  0.03187163871535014
Initial Cost on Val dataset for this epoch 2214 = 0.028171091026556024
Error on this batch = 0.1314531843675465
Error on this batch = 0

Cost on val dataset after 2244 epochs is = 0.02811640402493914
learning rate =  0.031657971689484546
Initial Cost on Val dataset for this epoch 2244 = 0.02811640402493914
Error on this batch = 0.13122110840400905
Error on this batch = 0.10505699408330293
Cost on val dataset after 2245 epochs is = 0.028114668390129195
learning rate =  0.03165092327089791
Initial Cost on Val dataset for this epoch 2245 = 0.028114668390129195
Error on this batch = 0.13121326251059512
Error on this batch = 0.10504079556549453
Cost on val dataset after 2246 epochs is = 0.028112937948091617
learning rate =  0.03164387955805429
Initial Cost on Val dataset for this epoch 2246 = 0.028112937948091617
Error on this batch = 0.1312054103430099
Error on this batch = 0.1050246179260845
Cost on val dataset after 2247 epochs is = 0.02811121266114984
learning rate =  0.03163684054571984
Initial Cost on Val dataset for this epoch 2247 = 0.02811121266114984
Error on this batch = 0.13119755197818952
Error on this batch = 0

learning rate =  0.03143473067309657
Initial Cost on Val dataset for this epoch 2276 = 0.028063264937693264
Error on this batch = 0.1309673015551191
Error on this batch = 0.10454830115343526
Cost on val dataset after 2277 epochs is = 0.028061678475130705
learning rate =  0.031427830282129206
Initial Cost on Val dataset for this epoch 2277 = 0.028061678475130705
Error on this batch = 0.13095929188615735
Error on this batch = 0.10453270316138681
Cost on val dataset after 2278 epochs is = 0.028060096163735858
learning rate =  0.03142093443338309
Initial Cost on Val dataset for this epoch 2278 = 0.028060096163735858
Error on this batch = 0.13095127827710404
Error on this batch = 0.10451712193485356
Cost on val dataset after 2279 epochs is = 0.028058517975789826
learning rate =  0.031414043121877164
Initial Cost on Val dataset for this epoch 2279 = 0.028058517975789826
Error on this batch = 0.1309432607953388
Error on this batch = 0.10450155736651873
Cost on val dataset after 2280 epochs is

Cost on val dataset after 2309 epochs is = 0.028012962118509645
learning rate =  0.031209389196617962
Initial Cost on Val dataset for this epoch 2309 = 0.028012962118509645
Error on this batch = 0.13070125312016184
Error on this batch = 0.10404187442737296
Cost on val dataset after 2310 epochs is = 0.028011499300556783
learning rate =  0.03120263611005257
Initial Cost on Val dataset for this epoch 2310 = 0.028011499300556783
Error on this batch = 0.13069314677628838
Error on this batch = 0.10402677829018413
Cost on val dataset after 2311 epochs is = 0.028010039840373133
learning rate =  0.031195887405288864
Initial Cost on Val dataset for this epoch 2311 = 0.028010039840373133
Error on this batch = 0.13068503849853147
Error on this batch = 0.10401169586678478
Cost on val dataset after 2312 epochs is = 0.028008583717382563
learning rate =  0.031189143077590267
Initial Cost on Val dataset for this epoch 2312 = 0.028008583717382563
Error on this batch = 0.13067692834055503
Error on this b

Cost on val dataset after 2342 epochs is = 0.02796635667316542
learning rate =  0.03098882537844033
Initial Cost on Val dataset for this epoch 2342 = 0.02796635667316542
Error on this batch = 0.1304329986544164
Error on this batch = 0.1035505381224331
Cost on val dataset after 2343 epochs is = 0.027964994702877145
learning rate =  0.030982214428951035
Initial Cost on Val dataset for this epoch 2343 = 0.027964994702877145
Error on this batch = 0.13042485459704176
Error on this batch = 0.10353585626303896
Cost on val dataset after 2344 epochs is = 0.027963635496518723
learning rate =  0.030975607708665035
Initial Cost on Val dataset for this epoch 2344 = 0.027963635496518723
Error on this batch = 0.13041671016303702
Error on this batch = 0.10352118583127526
Cost on val dataset after 2345 epochs is = 0.02796227903854285
learning rate =  0.030969005213075024
Initial Cost on Val dataset for this epoch 2345 = 0.02796227903854285
Error on this batch = 0.1304085653930016
Error on this batch = 

Cost on val dataset after 2375 epochs is = 0.027922791108074704
learning rate =  0.03077287274483318
Initial Cost on Val dataset for this epoch 2375 = 0.027922791108074704
Error on this batch = 0.13016425287532857
Error on this batch = 0.10307176005036411
Cost on val dataset after 2376 epochs is = 0.02792151278462867
learning rate =  0.03076639901535273
Initial Cost on Val dataset for this epoch 2376 = 0.02792151278462867
Error on this batch = 0.13015611590644194
Error on this batch = 0.10305742587892015
Cost on val dataset after 2377 epochs is = 0.02792023677291387
learning rate =  0.030759929369814823
Initial Cost on Val dataset for this epoch 2377 = 0.02792023677291387
Error on this batch = 0.1301479797180308
Error on this batch = 0.10304310137177555
Cost on val dataset after 2378 epochs is = 0.027918963060986582
learning rate =  0.03075346380392736
Initial Cost on Val dataset for this epoch 2378 = 0.027918963060986582
Error on this batch = 0.130139844339661
Error on this batch = 0.

Cost on val dataset after 2408 epochs is = 0.02788176543657189
learning rate =  0.030561372831402304
Initial Cost on Val dataset for this epoch 2408 = 0.02788176543657189
Error on this batch = 0.12989629453340645
Error on this batch = 0.10260359281821818
Cost on val dataset after 2409 epochs is = 0.02788055756307021
learning rate =  0.030555031639751644
Initial Cost on Val dataset for this epoch 2409 = 0.02788055756307021
Error on this batch = 0.12988819737995194
Error on this batch = 0.10258955459170586
Cost on val dataset after 2410 epochs is = 0.02787935165140353
learning rate =  0.03054869439367296
Initial Cost on Val dataset for this epoch 2410 = 0.02787935165140353
Error on this batch = 0.1298801018354974
Error on this batch = 0.10257552465118963
Cost on val dataset after 2411 epochs is = 0.02787814769225509
learning rate =  0.030542361089076306
Initial Cost on Val dataset for this epoch 2411 = 0.02787814769225509
Error on this batch = 0.12987200792077633
Error on this batch = 0.

Cost on val dataset after 2441 epochs is = 0.027842893263302405
learning rate =  0.030354174694363235
Initial Cost on Val dataset for this epoch 2441 = 0.027842893263302405
Error on this batch = 0.12963004193429944
Error on this batch = 0.10214451674145356
Cost on val dataset after 2442 epochs is = 0.027841745557407892
learning rate =  0.03034796157901939
Initial Cost on Val dataset for this epoch 2442 = 0.027841745557407892
Error on this batch = 0.12962200761325737
Error on this batch = 0.10213073398332273
Cost on val dataset after 2443 epochs is = 0.02784059953860219
learning rate =  0.03034175227735256
Initial Cost on Val dataset for this epoch 2443 = 0.02784059953860219
Error on this batch = 0.12961397547068923
Error on this batch = 0.10211695843458582
Cost on val dataset after 2444 epochs is = 0.027839455199515518
learning rate =  0.030335546785462863
Initial Cost on Val dataset for this epoch 2444 = 0.027839455199515518
Error on this batch = 0.12960594552046995
Error on this batc

Cost on val dataset after 2474 epochs is = 0.027805871517801936
learning rate =  0.03015113445777636
Initial Cost on Val dataset for this epoch 2474 = 0.027805871517801936
Error on this batch = 0.1293661282413389
Error on this batch = 0.10169334550005399
Cost on val dataset after 2475 epochs is = 0.027804775854375918
learning rate =  0.03014504516468028
Initial Cost on Val dataset for this epoch 2475 = 0.027804775854375918
Error on this batch = 0.12935817221113677
Error on this batch = 0.10167978676303836
Cost on val dataset after 2476 epochs is = 0.027803681659402467
learning rate =  0.030138959559457335
Initial Cost on Val dataset for this epoch 2476 = 0.027803681659402467
Error on this batch = 0.1293502187298794
Error on this batch = 0.10166623440616157
Cost on val dataset after 2477 epochs is = 0.027802588926977893
learning rate =  0.030132877638386544
Initial Cost on Val dataset for this epoch 2477 = 0.027802588926977893
Error on this batch = 0.1293422678062347
Error on this batch

learning rate =  0.029958087994702943
Initial Cost on Val dataset for this epoch 2506 = 0.027771510682226778
Error on this batch = 0.1291128379120271
Error on this batch = 0.10126252947415601
Cost on val dataset after 2507 epochs is = 0.02777045926132952
learning rate =  0.029952114893657688
Initial Cost on Val dataset for this epoch 2507 = 0.02777045926132952
Error on this batch = 0.12910496711774205
Error on this batch = 0.10124916504807678
Cost on val dataset after 2508 epochs is = 0.027769409137614807
learning rate =  0.02994614536397336
Initial Cost on Val dataset for this epoch 2508 = 0.027769409137614807
Error on this batch = 0.1290970990883468
Error on this batch = 0.10123580639761008
Cost on val dataset after 2509 epochs is = 0.027768360306270102
learning rate =  0.029940179402092468
Initial Cost on Val dataset for this epoch 2509 = 0.027768360306270102
Error on this batch = 0.12908923382873902
Error on this batch = 0.10122245350671198
Cost on val dataset after 2510 epochs is 

Error on this batch = 0.12886236475076904
Error on this batch = 0.10083765870176922
Cost on val dataset after 2539 epochs is = 0.02773747361888094
learning rate =  0.029762842129970442
Initial Cost on Val dataset for this epoch 2539 = 0.02773747361888094
Error on this batch = 0.12885458439760025
Error on this batch = 0.10082447192279446
Cost on val dataset after 2540 epochs is = 0.027736462618190003
learning rate =  0.02975698503218078
Initial Cost on Val dataset for this epoch 2540 = 0.027736462618190003
Error on this batch = 0.12884680692079897
Error on this batch = 0.10081129048357976
Cost on val dataset after 2541 epochs is = 0.027735452770484066
learning rate =  0.029751131390925722
Initial Cost on Val dataset for this epoch 2541 = 0.027735452770484066
Error on this batch = 0.12883903232249636
Error on this batch = 0.10079811437296032
Cost on val dataset after 2542 epochs is = 0.027734444071826975
learning rate =  0.029745281202806846
Initial Cost on Val dataset for this epoch 254

In [17]:
calc_accuracy(X_test, theta, test_actual_class_enc)

84.3076923076923

In [27]:
test_accuracy

[84.07692307692308, 73.9076923076923, 76.58461538461539]

In [25]:
epochs

[2046, 1459]