In [1]:
import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt
import warnings
import pandas as pd

warnings.filterwarnings('ignore')

In [2]:
print("----------------Reading the Data-------------------------")
PATH = os.getcwd()
os.chdir('Alphabets/')

X_train = pd.read_csv('train.csv', sep=',', header=None, index_col=False)
X_test = pd.read_csv('test.csv', sep=',', header=None, index_col=False)
np.random.shuffle(X_train.to_numpy())
train_class = X_train[X_train.columns[-1]]
test_actual_class = X_test[X_test.columns[-1]]

X_train = X_train.drop(X_train.columns[-1], axis=1)
X_test = X_test.drop(X_test.columns[-1], axis=1)

print("----------------Data Reading completed-------------------")

os.chdir('../')

X_train = X_train/255
X_test = X_test/255

m = X_train.shape[0] # Number of Training Samples

X_valid = X_train.iloc[(int(0.85*m)):]
valid_class = train_class[(int(0.85*m)):]
X_train = X_train.iloc[0:int(0.85*m)]
train_class = train_class[0:int(0.85*m)]


m = X_train.shape[0] # Number of Training Samples
n = X_train.shape[1] # Number of input features

print("The total number of training samples = {}".format(m))
print("The total number of validation samples = {}".format(X_valid.shape[0]))

print("The number of features = {}".format(n))

----------------Reading the Data-------------------------
----------------Data Reading completed-------------------
The total number of training samples = 11050
The total number of validation samples = 1950
The number of features = 784


In [3]:
#To get the one hot encoding of each label
print("--------Perform 1-hot encoding of class labels------------")

train_class_enc = pd.get_dummies(train_class).to_numpy()
valid_class_enc = pd.get_dummies(valid_class).to_numpy()
test_actual_class_enc = pd.get_dummies(test_actual_class).to_numpy()

--------Perform 1-hot encoding of class labels------------


In [4]:
#Add the intercept term to the data samples both in training and test dataset
X_train = np.hstack((np.ones((m,1)),X_train.to_numpy()))
X_valid = np.hstack((np.ones((X_valid.shape[0],1)), X_valid.to_numpy()))
X_test = np.hstack((np.ones((X_test.shape[0],1)),X_test.to_numpy()))

In [31]:
lr = 0.1
arch_test = [1,5,10,50,100]
arch = [arch_test[3]] #means one hidden layer with 2 perceptrons 
batch_size = 100 # Mini-Batch Size
r = np.max(train_class) + 1 # Default value of the number of classes = 26

In [6]:
#Mini-Batch formation
mini_batch = [(X_train[i:i+batch_size,:], train_class_enc[i:i+batch_size]) for i in range(0, m, batch_size)]
print("The number of mini-batches formed is = {}".format(len(mini_batch)))

The number of mini-batches formed is = 111


In [7]:
#Theta Initialization 
def theta_init(arch=[50]):
    theta = []
    for i in range(len(arch)+1):
        if i == 0:
            dim0=n+1
            dim1=arch[i]
        elif (i == len(arch)):
            dim0=arch[i-1]
            dim1 = r
        else:
            dim0=arch[i-1]
            dim1= arch[i]

        theta.append(2*np.random.random((dim0, dim1))-1)
        #theta.append(np.zeros((dim0, dim1)))
    return theta

In [8]:
def activation(x):
    return 1/(1+np.exp(-x))

In [9]:
def forward_prop(data, theta):
    fm = []
    fm.append(data)
    for l in range(len(theta)):
        fm.append(activation(np.dot(fm[l], theta[l])))
    return fm

In [10]:
def cost_total(X, theta, Y, m):
    fm = forward_prop(X, theta)
    cost = (1/(2*m))*np.sum((Y-fm[-1])**2)
    return cost

In [11]:
def calc_accuracy(data, theta, actual_class):
    pred_class = forward_prop(data, theta)
    test_pred_class = pred_class[-1]
    for i in range(len(test_pred_class)):
        test_pred_class[i][test_pred_class[i] == np.max(test_pred_class[i])] = 1
        test_pred_class[i][test_pred_class[i] != np.max(test_pred_class[i])] = 0


    test_acc = 0
    for i in range(len(actual_class)):
        if (np.array_equal(test_pred_class[i], actual_class[i])):
            test_acc+=1
    test_acc /= data.shape[0]

    #print("The Test Accuracy of the model = {}%".format(test_acc*100))
    return (test_acc*100)

In [12]:
epochs = []
train_accuracy = []
test_accuracy = []
valid_accuracy=[]
train_time = []

In [36]:
arch_test = [1, 5, 10, 50, 100]
#arch_test = [50]

In [None]:
lr0=1.1
for i in range(len(arch_test)):
    theta = theta_init([arch_test[i]])
    fm = forward_prop(X_train, theta)
    epoch = 1
    start = time.time()
    cost_init = cost_total(X_valid, theta, valid_class_enc, m)

    while(True):
        count = 0
        lr = lr0/(np.power(epoch, 1/3))
        print("learning rate = ", lr)

        print("Initial Cost on Val dataset for this epoch {} = {}".format(epoch, cost_init))

        for b in mini_batch:
            X_b = b[0]
            Y_b = b[1]
            fm = forward_prop(X_b, theta)
            delta = [None]*len(fm)

            if (count % 60 == 0):
                print("Error on this batch = "+str(cost_total(X_b, theta, Y_b, batch_size)))
            #Backward Propagation

            for l in range(len(fm)-1, 0, -1):
                if (l == len(fm)-1):
                    delta[l] = ((1/batch_size)*(Y_b - fm[l])*fm[l]*(1-fm[l]))
                else:
                    delta[l]=(np.dot(delta[l+1], theta[l].T)*fm[l]*(1-fm[l]))

            for t in range(len(theta)):
                theta[t] += lr*np.dot(fm[t].T, delta[t+1]) 

            count+=1
        epoch+=1 #Number of epochs
        
        cost_final = cost_total(X_valid, theta, valid_class_enc, m)
        print("Cost on val dataset after {} epochs is = {}".format(epoch, cost_final))
        if (abs(cost_final-cost_init) < 1e-07):
            print("cost initial= {} , cost final={} , change in cost= {}".format(cost_init,cost_final, cost_final-cost_init))
            break
        cost_init = cost_final
    epochs.append(epoch)
    train_time.append(time.time()-start)
    train_accuracy.append(calc_accuracy(X_train, theta, train_class_enc))
    valid_accuracy.append(calc_accuracy(X_valid, theta, valid_class_enc))
    test_accuracy.append(calc_accuracy(X_test, theta, test_actual_class_enc))
    print("\n------------------------------------------------------------------------------")
    print("The stats for number of units in the hidden layer = {} are as below:".format(arch_test[i]))
    print("------------------------------------------------------------------------------")
    print("The number of epochs = {:2.3f}".format(epochs[-1]))
    print("The training time = {:2.3f}sec".format(train_time[-1]))
    print("The training accuracy is = {:2.3f}%".format(train_accuracy[-1]))
    print("The validation accuracy is = {:2.3f}%".format(valid_accuracy[-1]))
    print("The test accuracy is = {:2.3f}%".format(test_accuracy[-1]))
    print("------------------------------------------------------------------------------\n")

learning rate =  1.1
Initial Cost on Val dataset for this epoch 1 = 0.5683113833589193
Error on this batch = 3.226044222472448
Error on this batch = 0.5732701020932426
Cost on val dataset after 2 epochs is = 0.0909571966947729
learning rate =  0.8730705785825098
Initial Cost on Val dataset for this epoch 2 = 0.0909571966947729
Error on this batch = 0.5155342648851285
Error on this batch = 0.49965753832222093
Cost on val dataset after 3 epochs is = 0.08706911446751914
learning rate =  0.7626974017856982
Initial Cost on Val dataset for this epoch 3 = 0.08706911446751914
Error on this batch = 0.4933953339190393
Error on this batch = 0.48956440414473323
Cost on val dataset after 4 epochs is = 0.08603630300694488
learning rate =  0.6929565774421803
Initial Cost on Val dataset for this epoch 4 = 0.08603630300694488
Error on this batch = 0.4875247082514087
Error on this batch = 0.4859285896317475
Cost on val dataset after 5 epochs is = 0.0855919654705488
learning rate =  0.6432839024068306
In

Cost on val dataset after 37 epochs is = 0.08485522490016903
learning rate =  0.330110073390418
Initial Cost on Val dataset for this epoch 37 = 0.08485522490016903
Error on this batch = 0.4807820925116769
Error on this batch = 0.48073511909171934
Cost on val dataset after 38 epochs is = 0.08485492416531655
learning rate =  0.32718859209245166
Initial Cost on Val dataset for this epoch 38 = 0.08485492416531655
Error on this batch = 0.4807799676902407
Error on this batch = 0.48073296566807044
Cost on val dataset after 39 epochs is = 0.08485466036444521
learning rate =  0.32436786041931415
Initial Cost on Val dataset for this epoch 39 = 0.08485466036444521
Error on this batch = 0.48077806790384764
Error on this batch = 0.48073102740940443
Cost on val dataset after 40 epochs is = 0.08485442884215952
learning rate =  0.3216419512034153
Initial Cost on Val dataset for this epoch 40 = 0.08485442884215952
Error on this batch = 0.48077636601219026
Error on this batch = 0.4807292790420203
Cost o

Cost on val dataset after 25 epochs is = 0.08485248585035796
learning rate =  0.37619470826887336
Initial Cost on Val dataset for this epoch 25 = 0.08485248585035796
Error on this batch = 0.4807175487952136
Error on this batch = 0.48099942188140743
Cost on val dataset after 26 epochs is = 0.08485040878509684
learning rate =  0.37130850964854006
Initial Cost on Val dataset for this epoch 26 = 0.08485040878509684
Error on this batch = 0.4807116235621941
Error on this batch = 0.48101390058014776
Cost on val dataset after 27 epochs is = 0.08484821846451437
learning rate =  0.3666666666666667
Initial Cost on Val dataset for this epoch 27 = 0.08484821846451437
Error on this batch = 0.4807048105715082
Error on this batch = 0.48103020073422464
Cost on val dataset after 28 epochs is = 0.08484588425501415
learning rate =  0.3622485658045923
Initial Cost on Val dataset for this epoch 28 = 0.08484588425501415
Error on this batch = 0.48069691757420585
Error on this batch = 0.48104855181638867
Cost 

Cost on val dataset after 60 epochs is = 0.0843447319967859
learning rate =  0.2809801252109695
Initial Cost on Val dataset for this epoch 60 = 0.0843447319967859
Error on this batch = 0.4762196341761865
Error on this batch = 0.4805806910503544
Cost on val dataset after 61 epochs is = 0.08432547093782633
learning rate =  0.27943624721030297
Initial Cost on Val dataset for this epoch 61 = 0.08432547093782633
Error on this batch = 0.47604725243162327
Error on this batch = 0.4804904186973829
Cost on val dataset after 62 epochs is = 0.08430647367146947
learning rate =  0.27792575150607113
Initial Cost on Val dataset for this epoch 62 = 0.08430647367146947
Error on this batch = 0.4758805305904645
Error on this batch = 0.48040016536168173
Cost on val dataset after 63 epochs is = 0.0842877138820704
learning rate =  0.2764473950806898
Initial Cost on Val dataset for this epoch 63 = 0.0842877138820704
Error on this batch = 0.4757186265743086
Error on this batch = 0.48031039527488617
Cost on val

Cost on val dataset after 95 epochs is = 0.0837779540108929
learning rate =  0.24107461585287857
Initial Cost on Val dataset for this epoch 95 = 0.0837779540108929
Error on this batch = 0.4717911488293585
Error on this batch = 0.47800556075702816
Cost on val dataset after 96 epochs is = 0.08376468443365594
learning rate =  0.2402346278024822
Initial Cost on Val dataset for this epoch 96 = 0.08376468443365594
Error on this batch = 0.47169428939514796
Error on this batch = 0.47794132187740834
Cost on val dataset after 97 epochs is = 0.08375156629988632
learning rate =  0.2394062259560102
Initial Cost on Val dataset for this epoch 97 = 0.08375156629988632
Error on this batch = 0.4715984982339472
Error on this batch = 0.4778771516906899
Cost on val dataset after 98 epochs is = 0.08373859798574436
learning rate =  0.2385891335045164
Initial Cost on Val dataset for this epoch 98 = 0.08373859798574436
Error on this batch = 0.47150374365302455
Error on this batch = 0.47781306304507115
Cost on 

Cost on val dataset after 130 epochs is = 0.08339348536938672
learning rate =  0.21714253371234285
Initial Cost on Val dataset for this epoch 130 = 0.08339348536938672
Error on this batch = 0.46889228858736104
Error on this batch = 0.4758599482761844
Cost on val dataset after 131 epochs is = 0.08338468058194137
learning rate =  0.21658859627104296
Initial Cost on Val dataset for this epoch 131 = 0.08338468058194137
Error on this batch = 0.4688215545959794
Error on this batch = 0.47580099735005704
Cost on val dataset after 132 epochs is = 0.08337599055862181
learning rate =  0.21604026836497867
Initial Cost on Val dataset for this epoch 132 = 0.08337599055862181
Error on this batch = 0.46875145855525874
Error on this batch = 0.4757420080095467
Cost on val dataset after 133 epochs is = 0.08336741489295449
learning rate =  0.21549745133293519
Initial Cost on Val dataset for this epoch 133 = 0.08336741489295449
Error on this batch = 0.4686820061122996
Error on this batch = 0.47568296969906

Cost on val dataset after 165 epochs is = 0.08314756651837174
learning rate =  0.2005540194507186
Initial Cost on Val dataset for this epoch 165 = 0.08314756651837174
Error on this batch = 0.46684801032733336
Error on this batch = 0.4737859883187528
Cost on val dataset after 166 epochs is = 0.0831421646686967
learning rate =  0.20015048914632175
Initial Cost on Val dataset for this epoch 166 = 0.0831421646686967
Error on this batch = 0.4668041940742526
Error on this batch = 0.47372855941114944
Cost on val dataset after 167 epochs is = 0.08313683664306394
learning rate =  0.19975018709948594
Initial Cost on Val dataset for this epoch 167 = 0.08313683664306394
Error on this batch = 0.4667612281031092
Error on this batch = 0.47367139108312095
Cost on val dataset after 168 epochs is = 0.08313158115819735
learning rate =  0.1993530683838636
Initial Cost on Val dataset for this epoch 168 = 0.08313158115819735
Error on this batch = 0.46671910886751306
Error on this batch = 0.473614493631389
C

Error on this batch = 0.4720682546211759
Cost on val dataset after 199 epochs is = 0.08299832490455226
learning rate =  0.18841189879079956
Initial Cost on Val dataset for this epoch 199 = 0.08299832490455226
Error on this batch = 0.4657887931177828
Error on this batch = 0.47202282257311207
Cost on val dataset after 200 epochs is = 0.08299480595873304
learning rate =  0.1880973541344367
Initial Cost on Val dataset for this epoch 200 = 0.08299480595873304
Error on this batch = 0.4657689583997214
Error on this batch = 0.4719778120221422
Cost on val dataset after 201 epochs is = 0.08299132641089682
learning rate =  0.18778489948254123
Initial Cost on Val dataset for this epoch 201 = 0.08299132641089682
Error on this batch = 0.4657496231072459
Error on this batch = 0.47193322361272266
Cost on val dataset after 202 epochs is = 0.08298788555871621
learning rate =  0.18747451065313256
Initial Cost on Val dataset for this epoch 202 = 0.08298788555871621
Error on this batch = 0.4657307743890401

Error on this batch = 0.4707197463865063
Cost on val dataset after 234 epochs is = 0.08289461914008045
learning rate =  0.17850651283069705
Initial Cost on Val dataset for this epoch 234 = 0.08289461914008045
Error on this batch = 0.4653145702688607
Error on this batch = 0.47068782836244294
Cost on val dataset after 235 epochs is = 0.08289213541010813
learning rate =  0.17825295210183914
Initial Cost on Val dataset for this epoch 235 = 0.08289213541010813
Error on this batch = 0.46530556668744977
Error on this batch = 0.4706562178735201
Cost on val dataset after 236 epochs is = 0.08288967294103106
learning rate =  0.1780008259490625
Initial Cost on Val dataset for this epoch 236 = 0.08288967294103106
Error on this batch = 0.4652967194386221
Error on this batch = 0.4706249092091899
Cost on val dataset after 237 epochs is = 0.08288723138063835
learning rate =  0.17775012022861225
Initial Cost on Val dataset for this epoch 237 = 0.08288723138063835
Error on this batch = 0.4652880231549831

Error on this batch = 0.46507818408351326
Error on this batch = 0.4697743936900474
Cost on val dataset after 268 epochs is = 0.0828203101482889
learning rate =  0.17061390393631956
Initial Cost on Val dataset for this epoch 268 = 0.0828203101482889
Error on this batch = 0.46507243934467746
Error on this batch = 0.4697499742518761
Cost on val dataset after 269 epochs is = 0.08281839117879768
learning rate =  0.17040222393739646
Initial Cost on Val dataset for this epoch 269 = 0.08281839117879768
Error on this batch = 0.4650667546878633
Error on this batch = 0.4697256980688907
Cost on val dataset after 270 epochs is = 0.08281648495738969
learning rate =  0.17019159056580194
Initial Cost on Val dataset for this epoch 270 = 0.08281648495738969
Error on this batch = 0.4650611287337252
Error on this batch = 0.4697015618544598
Cost on val dataset after 271 epochs is = 0.08281459131390366
learning rate =  0.16998199479886364
Initial Cost on Val dataset for this epoch 271 = 0.08281459131390366


Error on this batch = 0.4649012494389891
Error on this batch = 0.4689672745034891
Cost on val dataset after 304 epochs is = 0.08275818897031303
learning rate =  0.16359429604622583
Initial Cost on Val dataset for this epoch 304 = 0.08275818897031303
Error on this batch = 0.46489700879072304
Error on this batch = 0.46894654497613303
Cost on val dataset after 305 epochs is = 0.08275663816657448
learning rate =  0.16341530870851229
Initial Cost on Val dataset for this epoch 305 = 0.08275663816657448
Error on this batch = 0.46489279494052527
Error on this batch = 0.46892588942138225
Cost on val dataset after 306 epochs is = 0.08275509526743001
learning rate =  0.1632371021236301
Initial Cost on Val dataset for this epoch 306 = 0.08275509526743001
Error on this batch = 0.4648886072402402
Error on this batch = 0.4689053068369311
Cost on val dataset after 307 epochs is = 0.08275356015779274
learning rate =  0.16305967035105687
Initial Cost on Val dataset for this epoch 307 = 0.082753560157792

Error on this batch = 0.46831688221212747
Cost on val dataset after 337 epochs is = 0.08271054574483154
learning rate =  0.1580699750954998
Initial Cost on Val dataset for this epoch 337 = 0.08271054574483154
Error on this batch = 0.46476842450517625
Error on this batch = 0.46829805323638335
Cost on val dataset after 338 epochs is = 0.08270919259975885
learning rate =  0.15791393355765282
Initial Cost on Val dataset for this epoch 338 = 0.08270919259975885
Error on this batch = 0.46476475081614455
Error on this batch = 0.46827926014961435
Cost on val dataset after 339 epochs is = 0.08270784319158245
learning rate =  0.15775850635726477
Initial Cost on Val dataset for this epoch 339 = 0.08270784319158245
Error on this batch = 0.4647610825511966
Error on this batch = 0.46826050112115897
Cost on val dataset after 340 epochs is = 0.08270649736270874
learning rate =  0.15760368927414997
Initial Cost on Val dataset for this epoch 340 = 0.08270649736270874
Error on this batch = 0.464757418932

Error on this batch = 0.4676609182888823
Cost on val dataset after 372 epochs is = 0.08266407917384307
learning rate =  0.15294843534456884
Initial Cost on Val dataset for this epoch 372 = 0.08266407917384307
Error on this batch = 0.46463545798001304
Error on this batch = 0.46764134659403644
Cost on val dataset after 373 epochs is = 0.0826627195957961
learning rate =  0.1528116298732284
Initial Cost on Val dataset for this epoch 373 = 0.0826627195957961
Error on this batch = 0.4646311211194457
Error on this batch = 0.46762164163561426
Cost on val dataset after 374 epochs is = 0.08266135313510108
learning rate =  0.15267531255745828
Initial Cost on Val dataset for this epoch 374 = 0.08266135313510108
Error on this batch = 0.46462671207815204
Error on this batch = 0.4676017917807792
Cost on val dataset after 375 epochs is = 0.08265997914842352
learning rate =  0.15253948035713966
Initial Cost on Val dataset for this epoch 375 = 0.08265997914842352
Error on this batch = 0.4646222244838220

Error on this batch = 0.46674621684879547
Cost on val dataset after 407 epochs is = 0.08260474089381392
learning rate =  0.1484321171714776
Initial Cost on Val dataset for this epoch 407 = 0.08260474089381392
Error on this batch = 0.46432829931876
Error on this batch = 0.46670410507894405
Cost on val dataset after 408 epochs is = 0.08260226561383739
learning rate =  0.14831074989154563
Initial Cost on Val dataset for this epoch 408 = 0.08260226561383739
Error on this batch = 0.4643063574468492
Error on this batch = 0.4666601885235582
Cost on val dataset after 409 epochs is = 0.08259970659272102
learning rate =  0.14818977858999635
Initial Cost on Val dataset for this epoch 409 = 0.08259970659272102
Error on this batch = 0.46428269349558776
Error on this batch = 0.4666143518283505
Cost on val dataset after 410 epochs is = 0.08259705877101764
learning rate =  0.14806920101145807
Initial Cost on Val dataset for this epoch 410 = 0.08259705877101764
Error on this batch = 0.46425718543743344

Cost on val dataset after 441 epochs is = 0.08241854142631254
learning rate =  0.14451507590567778
Initial Cost on Val dataset for this epoch 441 = 0.08241854142631254
Error on this batch = 0.46296035390484314
Error on this batch = 0.4631944497431211
Cost on val dataset after 442 epochs is = 0.08240735734807232
learning rate =  0.14440600788242994
Initial Cost on Val dataset for this epoch 442 = 0.08240735734807232
Error on this batch = 0.462929544974932
Error on this batch = 0.46298170455111404
Cost on val dataset after 443 epochs is = 0.08239585938695956
learning rate =  0.14429726837754203
Initial Cost on Val dataset for this epoch 443 = 0.08239585938695956
Error on this batch = 0.4628991943875093
Error on this batch = 0.4627596540981939
Cost on val dataset after 444 epochs is = 0.08238409154656882
learning rate =  0.14418885566326753
Initial Cost on Val dataset for this epoch 444 = 0.08238409154656882
Error on this batch = 0.4628691295525655
Error on this batch = 0.4625284630764318

Cost on val dataset after 476 epochs is = 0.08191949067706329
learning rate =  0.14088249498642366
Initial Cost on Val dataset for this epoch 476 = 0.08191949067706329
Error on this batch = 0.46126588279590397
Error on this batch = 0.45437038335218477
Cost on val dataset after 477 epochs is = 0.08190138421254818
learning rate =  0.1407839757270305
Initial Cost on Val dataset for this epoch 477 = 0.08190138421254818
Error on this batch = 0.46117410966151723
Error on this batch = 0.4541847752229757
Cost on val dataset after 478 epochs is = 0.08188324327128806
learning rate =  0.14068573146921567
Initial Cost on Val dataset for this epoch 478 = 0.08188324327128806
Error on this batch = 0.46107967834952396
Error on this batch = 0.45400496276928237
Cost on val dataset after 479 epochs is = 0.0818650899942487
learning rate =  0.1405877608724412
Initial Cost on Val dataset for this epoch 479 = 0.0818650899942487
Error on this batch = 0.4609826568304338
Error on this batch = 0.4538309771600278

Cost on val dataset after 511 epochs is = 0.08130122615310736
learning rate =  0.13758963496671586
Initial Cost on Val dataset for this epoch 511 = 0.08130122615310736
Error on this batch = 0.4563094977407975
Error on this batch = 0.45083555881762166
Cost on val dataset after 512 epochs is = 0.08128401260456575
learning rate =  0.13750000000000004
Initial Cost on Val dataset for this epoch 512 = 0.08128401260456575
Error on this batch = 0.4561413732491113
Error on this batch = 0.45078695451171075
Cost on val dataset after 513 epochs is = 0.08126681731242852
learning rate =  0.13741059815424903
Initial Cost on Val dataset for this epoch 513 = 0.08126681731242852
Error on this batch = 0.455975327763846
Error on this batch = 0.4507385100719854
Cost on val dataset after 514 epochs is = 0.0812496424995829
learning rate =  0.137321428370509
Initial Cost on Val dataset for this epoch 514 = 0.0812496424995829
Error on this batch = 0.4558116290940367
Error on this batch = 0.4506900627867455
Cos

Error on this batch = 0.44914540243526546
Cost on val dataset after 546 epochs is = 0.08076708229423343
learning rate =  0.1345845285735925
Initial Cost on Val dataset for this epoch 546 = 0.08076708229423343
Error on this batch = 0.45191494444695324
Error on this batch = 0.4490994377768371
Cost on val dataset after 547 epochs is = 0.08075491780363091
learning rate =  0.13450246481631015
Initial Cost on Val dataset for this epoch 547 = 0.08075491780363091
Error on this batch = 0.4518275451387299
Error on this batch = 0.4490537002598036
Cost on val dataset after 548 epochs is = 0.0807429155553484
learning rate =  0.13442060084915028
Initial Cost on Val dataset for this epoch 548 = 0.0807429155553484
Error on this batch = 0.45174198491870926
Error on this batch = 0.4490081705929495
Cost on val dataset after 549 epochs is = 0.08073107166397082
learning rate =  0.13433893582245807
Initial Cost on Val dataset for this epoch 549 = 0.08073107166397082
Error on this batch = 0.45165823867176613

Error on this batch = 0.44764868907377137
Cost on val dataset after 581 epochs is = 0.08041742909523983
learning rate =  0.13182586843335029
Initial Cost on Val dataset for this epoch 581 = 0.08041742909523983
Error on this batch = 0.44973054941810403
Error on this batch = 0.4476100753234606
Cost on val dataset after 582 epochs is = 0.08040927420109546
learning rate =  0.13175032350652904
Initial Cost on Val dataset for this epoch 582 = 0.08040927420109546
Error on this batch = 0.4496872705162393
Error on this batch = 0.44757172945495344
Cost on val dataset after 583 epochs is = 0.08040120356853629
learning rate =  0.13167495145145994
Initial Cost on Val dataset for this epoch 583 = 0.08040120356853629
Error on this batch = 0.44964466804106673
Error on this batch = 0.44753364762942094
Cost on val dataset after 584 epochs is = 0.08039321645615775
learning rate =  0.13159975157705053
Initial Cost on Val dataset for this epoch 584 = 0.08039321645615775
Error on this batch = 0.449602717564

Error on this batch = 0.4463943977835647
Cost on val dataset after 616 epochs is = 0.08017673823230084
learning rate =  0.12928032873823497
Initial Cost on Val dataset for this epoch 616 = 0.08017673823230084
Error on this batch = 0.44849207908361555
Error on this batch = 0.4463594138373266
Cost on val dataset after 617 epochs is = 0.08017096964913309
learning rate =  0.12921044746584173
Initial Cost on Val dataset for this epoch 617 = 0.08017096964913309
Error on this batch = 0.44846229591139336
Error on this batch = 0.44632434840868257
Cost on val dataset after 618 epochs is = 0.08016524583078324
learning rate =  0.12914071704352115
Initial Cost on Val dataset for this epoch 618 = 0.08016524583078324
Error on this batch = 0.4484327351753347
Error on this batch = 0.4462891817291248
Cost on val dataset after 619 epochs is = 0.08015956557452786
learning rate =  0.12907113690233413
Initial Cost on Val dataset for this epoch 619 = 0.08015956557452786
Error on this batch = 0.44840339226199

Error on this batch = 0.4449084128991501
Cost on val dataset after 651 epochs is = 0.0799916820470721
learning rate =  0.12692066988577458
Initial Cost on Val dataset for this epoch 651 = 0.0799916820470721
Error on this batch = 0.44753891231913395
Error on this batch = 0.4448512336587333
Cost on val dataset after 652 epochs is = 0.07998648949161265
learning rate =  0.12685574881568257
Initial Cost on Val dataset for this epoch 652 = 0.07998648949161265
Error on this batch = 0.44751240426868605
Error on this batch = 0.4447934837911858
Cost on val dataset after 653 epochs is = 0.07998127521174316
learning rate =  0.1267909603729447
Initial Cost on Val dataset for this epoch 653 = 0.07998127521174316
Error on this batch = 0.4474857641788719
Error on this batch = 0.44473537119617207
Cost on val dataset after 654 epochs is = 0.07997603736030989
learning rate =  0.12672630408413324
Initial Cost on Val dataset for this epoch 654 = 0.07997603736030989
Error on this batch = 0.44745897212701424

Error on this batch = 0.44318721237552783
Cost on val dataset after 686 epochs is = 0.07978458035596042
learning rate =  0.12472436836892999
Initial Cost on Val dataset for this epoch 686 = 0.07978458035596042
Error on this batch = 0.4463230773330847
Error on this batch = 0.44310099960647703
Cost on val dataset after 687 epochs is = 0.07977791384344882
learning rate =  0.12466382254952814
Initial Cost on Val dataset for this epoch 687 = 0.07977791384344882
Error on this batch = 0.446271789891697
Error on this batch = 0.44300793387303117
Cost on val dataset after 688 epochs is = 0.0797712200654937
learning rate =  0.12460339412389462
Initial Cost on Val dataset for this epoch 688 = 0.0797712200654937
Error on this batch = 0.4462193269061187
Error on this batch = 0.4429077427237272
Cost on val dataset after 689 epochs is = 0.07976449179588352
learning rate =  0.12454308269427691
Initial Cost on Val dataset for this epoch 689 = 0.07976449179588352
Error on this batch = 0.44616570577765635

Error on this batch = 0.4375615876349207
Cost on val dataset after 721 epochs is = 0.07949966831647194
learning rate =  0.12267260758057934
Initial Cost on Val dataset for this epoch 721 = 0.07949966831647194
Error on this batch = 0.4443101169302305
Error on this batch = 0.43736957756883527
Cost on val dataset after 722 epochs is = 0.07948934103849371
learning rate =  0.1226159458600101
Initial Cost on Val dataset for this epoch 722 = 0.07948934103849371
Error on this batch = 0.44425331363927284
Error on this batch = 0.43717762656169257
Cost on val dataset after 723 epochs is = 0.07947884295504873
learning rate =  0.12255938868138669
Initial Cost on Val dataset for this epoch 723 = 0.07947884295504873
Error on this batch = 0.44419620153607414
Error on this batch = 0.43698611167917006
Cost on val dataset after 724 epochs is = 0.07946818391660815
learning rate =  0.12250293570763227
Initial Cost on Val dataset for this epoch 724 = 0.07946818391660815
Error on this batch = 0.4441386990045

Error on this batch = 0.4341480333814975
Cost on val dataset after 756 epochs is = 0.07900761378247574
learning rate =  0.1207495219348641
Initial Cost on Val dataset for this epoch 756 = 0.07900761378247574
Error on this batch = 0.4411769528480102
Error on this batch = 0.4341168885243469
Cost on val dataset after 757 epochs is = 0.07898650486132489
learning rate =  0.12069632830598281
Initial Cost on Val dataset for this epoch 757 = 0.07898650486132489
Error on this batch = 0.44101242724405565
Error on this batch = 0.43408502416912986
Cost on val dataset after 758 epochs is = 0.07896507465295338
learning rate =  0.12064322828667638
Initial Cost on Val dataset for this epoch 758 = 0.07896507465295338
Error on this batch = 0.440842431680182
Error on this batch = 0.4340522005674468
Cost on val dataset after 759 epochs is = 0.07894336052643491
learning rate =  0.12059022158904177
Initial Cost on Val dataset for this epoch 759 = 0.07894336052643491
Error on this batch = 0.44066727708160824

Error on this batch = 0.4328100250203016
Cost on val dataset after 791 epochs is = 0.07819160644891483
learning rate =  0.1189416221844171
Initial Cost on Val dataset for this epoch 791 = 0.07819160644891483
Error on this batch = 0.434630493485039
Error on this batch = 0.4327842556578901
Cost on val dataset after 792 epochs is = 0.07816800329397315
learning rate =  0.1188915414954718
Initial Cost on Val dataset for this epoch 792 = 0.07816800329397315
Error on this batch = 0.4344141632024474
Error on this batch = 0.43275956641849944
Cost on val dataset after 793 epochs is = 0.07814437330363283
learning rate =  0.11884154504655893
Initial Cost on Val dataset for this epoch 793 = 0.07814437330363283
Error on this batch = 0.43419422236754757
Error on this batch = 0.4327358815829853
Cost on val dataset after 794 epochs is = 0.07812070823653165
learning rate =  0.1187916325900177
Initial Cost on Val dataset for this epoch 794 = 0.07812070823653165
Error on this batch = 0.43397125836876244
E

Error on this batch = 0.43216409284921564
Cost on val dataset after 826 epochs is = 0.07743628368161383
learning rate =  0.11723735256741952
Initial Cost on Val dataset for this epoch 826 = 0.07743628368161383
Error on this batch = 0.42822192925436076
Error on this batch = 0.4321452342361816
Cost on val dataset after 827 epochs is = 0.07741960753269234
learning rate =  0.1171900794362591
Initial Cost on Val dataset for this epoch 827 = 0.07741960753269234
Error on this batch = 0.4280906947692746
Error on this batch = 0.43212600461589407
Cost on val dataset after 828 epochs is = 0.07740317888119704
learning rate =  0.11714288245997603
Initial Cost on Val dataset for this epoch 828 = 0.07740317888119704
Error on this batch = 0.4279607840465365
Error on this batch = 0.4321063806382095
Cost on val dataset after 829 epochs is = 0.07738698109260232
learning rate =  0.11709576142413577
Initial Cost on Val dataset for this epoch 829 = 0.07738698109260232
Error on this batch = 0.427832167772227

Error on this batch = 0.4311062637178072
Cost on val dataset after 861 epochs is = 0.0769275927287763
learning rate =  0.11562674535072894
Initial Cost on Val dataset for this epoch 861 = 0.0769275927287763
Error on this batch = 0.4244120429084661
Error on this batch = 0.4310640865355142
Cost on val dataset after 862 epochs is = 0.07691474035791152
learning rate =  0.11558201546411821
Initial Cost on Val dataset for this epoch 862 = 0.07691474035791152
Error on this batch = 0.4243256448751312
Error on this batch = 0.43102142115651393
Cost on val dataset after 863 epochs is = 0.07690200118870837
learning rate =  0.11553735471181105
Initial Cost on Val dataset for this epoch 863 = 0.07690200118870837
Error on this batch = 0.42424027843184947
Error on this batch = 0.43097827757715573
Cost on val dataset after 864 epochs is = 0.07688937711591963
learning rate =  0.11549276290703006
Initial Cost on Val dataset for this epoch 864 = 0.07688937711591963
Error on this batch = 0.4241559221002596

Error on this batch = 0.4294442169443803
Cost on val dataset after 896 epochs is = 0.07654789051793941
learning rate =  0.11410114833785849
Initial Cost on Val dataset for this epoch 896 = 0.07654789051793941
Error on this batch = 0.42188468207008983
Error on this batch = 0.4293952207921615
Cost on val dataset after 897 epochs is = 0.0765389743052331
learning rate =  0.11405873155018116
Initial Cost on Val dataset for this epoch 897 = 0.0765389743052331
Error on this batch = 0.4218244757961906
Error on this batch = 0.4293463838395062
Cost on val dataset after 898 epochs is = 0.07653014380530809
learning rate =  0.1140163777655406
Initial Cost on Val dataset for this epoch 898 = 0.07653014380530809
Error on this batch = 0.4217647991542131
Error on this batch = 0.42929772262831906
Cost on val dataset after 899 epochs is = 0.07652139698783844
learning rate =  0.11397408682035322
Initial Cost on Val dataset for this epoch 899 = 0.07652139698783844
Error on this batch = 0.42170564372802466


Error on this batch = 0.42789510057671
Cost on val dataset after 931 epochs is = 0.07627517965359432
learning rate =  0.11265300773685817
Initial Cost on Val dataset for this epoch 931 = 0.07627517965359432
Error on this batch = 0.42004604308935506
Error on this batch = 0.42785683724528284
Cost on val dataset after 932 epochs is = 0.07626826874994792
learning rate =  0.11261270254281444
Initial Cost on Val dataset for this epoch 932 = 0.07626826874994792
Error on this batch = 0.42000037525913647
Error on this batch = 0.4278188933059525
Cost on val dataset after 933 epochs is = 0.07626139253050683
learning rate =  0.11257245496878508
Initial Cost on Val dataset for this epoch 933 = 0.07626139253050683
Error on this batch = 0.41995502676281277
Error on this batch = 0.4277812631562881
Cost on val dataset after 934 epochs is = 0.0762545501507175
learning rate =  0.11253226487077146
Initial Cost on Val dataset for this epoch 934 = 0.0762545501507175
Error on this batch = 0.41990999346724606

Error on this batch = 0.42670582491306114
Cost on val dataset after 966 epochs is = 0.0760499273125106
learning rate =  0.1112756935214145
Initial Cost on Val dataset for this epoch 966 = 0.0760499273125106
Error on this batch = 0.4186150232980863
Error on this batch = 0.42667522552316023
Cost on val dataset after 967 epochs is = 0.076043917683848
learning rate =  0.11123732258957181
Initial Cost on Val dataset for this epoch 967 = 0.076043917683848
Error on this batch = 0.4185786135859732
Error on this batch = 0.42664476406951723
Cost on val dataset after 968 epochs is = 0.0760379302925673
learning rate =  0.1111990045284658
Initial Cost on Val dataset for this epoch 968 = 0.0760379302925673
Error on this batch = 0.4185424261949258
Error on this batch = 0.42661443826670986
Cost on val dataset after 969 epochs is = 0.07603196524380193
learning rate =  0.11116073921074095
Initial Cost on Val dataset for this epoch 969 = 0.07603196524380193
Error on this batch = 0.41850645963108046
Error

Error on this batch = 0.4257044470554737
Cost on val dataset after 1001 epochs is = 0.0758536795889735
learning rate =  0.10996335775878129
Initial Cost on Val dataset for this epoch 1001 = 0.0758536795889735
Error on this batch = 0.417466857991786
Error on this batch = 0.4256775385204346
Cost on val dataset after 1002 epochs is = 0.07584851789036552
learning rate =  0.10992676429259876
Initial Cost on Val dataset for this epoch 1002 = 0.07584851789036552
Error on this batch = 0.41743779284972704
Error on this batch = 0.4256506993122363
Cost on val dataset after 1003 epochs is = 0.07584338108160509
learning rate =  0.10989021948794668
Initial Cost on Val dataset for this epoch 1003 = 0.07584338108160509
Error on this batch = 0.41740893695703263
Error on this batch = 0.42562392740757193
Cost on val dataset after 1004 epochs is = 0.07583826908993062
learning rate =  0.1098537232316963
Initial Cost on Val dataset for this epoch 1004 = 0.07583826908993062
Error on this batch = 0.4173802907

Error on this batch = 0.4247922407851897
Cost on val dataset after 1036 epochs is = 0.075687180675721
learning rate =  0.10871081876666146
Initial Cost on Val dataset for this epoch 1036 = 0.075687180675721
Error on this batch = 0.41657648961596705
Error on this batch = 0.4247667459859841
Cost on val dataset after 1037 epochs is = 0.07568282886841428
learning rate =  0.10867586351705988
Initial Cost on Val dataset for this epoch 1037 = 0.07568282886841428
Error on this batch = 0.41655491083052043
Error on this batch = 0.42474126576178317
Cost on val dataset after 1038 epochs is = 0.07567849821407946
learning rate =  0.10864095318265753
Initial Cost on Val dataset for this epoch 1038 = 0.07567849821407946
Error on this batch = 0.4165335419806091
Error on this batch = 0.4247157988742757
Cost on val dataset after 1039 epochs is = 0.07567418860395493
learning rate =  0.10860608766255375
Initial Cost on Val dataset for this epoch 1039 = 0.07567418860395493
Error on this batch = 0.4165123820

Error on this batch = 0.4239012813249876
Cost on val dataset after 1071 epochs is = 0.07554670082032766
learning rate =  0.10751346569160503
Initial Cost on Val dataset for this epoch 1071 = 0.07554670082032766
Error on this batch = 0.41593626801505923
Error on this batch = 0.4238756581945937
Cost on val dataset after 1072 epochs is = 0.07554301948106398
learning rate =  0.10748002448705538
Initial Cost on Val dataset for this epoch 1072 = 0.07554301948106398
Error on this batch = 0.4159210658875178
Error on this batch = 0.4238500149283377
Cost on val dataset after 1073 epochs is = 0.07553935505188467
learning rate =  0.10744662485019787
Initial Cost on Val dataset for this epoch 1073 = 0.07553935505188467
Error on this batch = 0.41590601073567923
Error on this batch = 0.4238243507379153
Cost on val dataset after 1074 epochs is = 0.07553570740850858
learning rate =  0.10741326669069598
Initial Cost on Val dataset for this epoch 1074 = 0.07553570740850858
Error on this batch = 0.4158911

Error on this batch = 0.42298843347695586
Cost on val dataset after 1106 epochs is = 0.07542714298517747
learning rate =  0.10636717937044048
Initial Cost on Val dataset for this epoch 1106 = 0.07542714298517747
Error on this batch = 0.415481753224125
Error on this batch = 0.42296180105668435
Cost on val dataset after 1107 epochs is = 0.07542398413749944
learning rate =  0.10633514106146313
Initial Cost on Val dataset for this epoch 1107 = 0.07542398413749944
Error on this batch = 0.41547090420999794
Error on this batch = 0.42293513991621257
Cost on val dataset after 1108 epochs is = 0.07542083817346396
learning rate =  0.10630314131801376
Initial Cost on Val dataset for this epoch 1108 = 0.07542083817346396
Error on this batch = 0.4154601692113931
Error on this batch = 0.42290845082191125
Cost on val dataset after 1109 epochs is = 0.0754177049762087
learning rate =  0.10627118005892616
Initial Cost on Val dataset for this epoch 1109 = 0.0754177049762087
Error on this batch = 0.4154495

Error on this batch = 0.42204729660171963
Cost on val dataset after 1141 epochs is = 0.07532345716011114
learning rate =  0.10526826627729681
Initial Cost on Val dataset for this epoch 1141 = 0.07532345716011114
Error on this batch = 0.4151741481495169
Error on this batch = 0.42202046943783317
Cost on val dataset after 1142 epochs is = 0.07532067648172756
learning rate =  0.10523753101532286
Initial Cost on Val dataset for this epoch 1142 = 0.07532067648172756
Error on this batch = 0.41516772384107403
Error on this batch = 0.421993667698728
Cost on val dataset after 1143 epochs is = 0.07531790432030508
learning rate =  0.10520683161713544
Initial Cost on Val dataset for this epoch 1143 = 0.07531790432030508
Error on this batch = 0.4151614418965127
Error on this batch = 0.4219668931197124
Cost on val dataset after 1144 epochs is = 0.07531514054301826
learning rate =  0.10517616800956442
Initial Cost on Val dataset for this epoch 1144 = 0.07531514054301826
Error on this batch = 0.4151553

Error on this batch = 0.4211324911667023
Cost on val dataset after 1176 epochs is = 0.07523028051197801
learning rate =  0.10421340306395314
Initial Cost on Val dataset for this epoch 1176 = 0.07523028051197801
Error on this batch = 0.415035866145417
Error on this batch = 0.4211072813742136
Cost on val dataset after 1177 epochs is = 0.07522771200330527
learning rate =  0.10418388085187376
Initial Cost on Val dataset for this epoch 1177 = 0.07522771200330527
Error on this batch = 0.4150344870775328
Error on this batch = 0.42108212893807534
Cost on val dataset after 1178 epochs is = 0.07522514666291075
learning rate =  0.10415439206432292
Initial Cost on Val dataset for this epoch 1178 = 0.07522514666291075
Error on this batch = 0.41503324002365305
Error on this batch = 0.4210570338985779
Cost on val dataset after 1179 epochs is = 0.07522258430863002
learning rate =  0.10412493663513216
Initial Cost on Val dataset for this epoch 1179 = 0.07522258430863002
Error on this batch = 0.41503212

Error on this batch = 0.4150431970101537
Error on this batch = 0.4203062323215639
Cost on val dataset after 1210 epochs is = 0.07514353540755656
learning rate =  0.10322801154563674
Initial Cost on Val dataset for this epoch 1210 = 0.07514353540755656
Error on this batch = 0.41504422112354716
Error on this batch = 0.4202827894196658
Cost on val dataset after 1211 epochs is = 0.07514095884915198
learning rate =  0.1031995897348539
Initial Cost on Val dataset for this epoch 1211 = 0.07514095884915198
Error on this batch = 0.4150452118876421
Error on this batch = 0.42025938408581703
Cost on val dataset after 1212 epochs is = 0.07513837795289019
learning rate =  0.10317119919979406
Initial Cost on Val dataset for this epoch 1212 = 0.07513837795289019
Error on this batch = 0.4150461601078757
Error on this batch = 0.4202360146995349
Cost on val dataset after 1213 epochs is = 0.0751357924525319
learning rate =  0.10314283988027857
Initial Cost on Val dataset for this epoch 1213 = 0.0751357924

Cost on val dataset after 1244 epochs is = 0.07505175562269041
learning rate =  0.10227886305472406
Initial Cost on Val dataset for this epoch 1244 = 0.07505175562269041
Error on this batch = 0.41499227351663964
Error on this batch = 0.4194900646771981
Cost on val dataset after 1245 epochs is = 0.07504886184750999
learning rate =  0.10225147182067322
Initial Cost on Val dataset for this epoch 1245 = 0.07504886184750999
Error on this batch = 0.4149860159728694
Error on this batch = 0.4194659722185169
Cost on val dataset after 1246 epochs is = 0.07504595257254737
learning rate =  0.1022241099055802
Initial Cost on Val dataset for this epoch 1246 = 0.07504595257254737
Error on this batch = 0.41497937316315586
Error on this batch = 0.4194417583965757
Cost on val dataset after 1247 epochs is = 0.07504302736794072
learning rate =  0.10219677725456996
Initial Cost on Val dataset for this epoch 1247 = 0.07504302736794072
Error on this batch = 0.4149723367741864
Error on this batch = 0.41941741

Cost on val dataset after 1279 epochs is = 0.07493730902656776
learning rate =  0.10133726300020762
Initial Cost on Val dataset for this epoch 1279 = 0.07493730902656776
Error on this batch = 0.41447749389833305
Error on this batch = 0.41849114665807174
Cost on val dataset after 1280 epochs is = 0.07493344470848219
learning rate =  0.10131086621261065
Initial Cost on Val dataset for this epoch 1280 = 0.07493344470848219
Error on this batch = 0.41445038208248675
Error on this batch = 0.41845401677477684
Cost on val dataset after 1281 epochs is = 0.07492952866716333
learning rate =  0.10128449690735589
Initial Cost on Val dataset for this epoch 1281 = 0.07492952866716333
Error on this batch = 0.41442220929996854
Error on this batch = 0.4184160761170773
Cost on val dataset after 1282 epochs is = 0.07492555844204
learning rate =  0.10125815503441044
Initial Cost on Val dataset for this epoch 1282 = 0.07492555844204
Error on this batch = 0.414392919923277
Error on this batch = 0.41837728570

Cost on val dataset after 1314 epochs is = 0.07474645417826002
learning rate =  0.10042940663115336
Initial Cost on Val dataset for this epoch 1314 = 0.07474645417826002
Error on this batch = 0.4122925192535282
Error on this batch = 0.41644685352108485
Cost on val dataset after 1315 epochs is = 0.07473819482714127
learning rate =  0.10040394278456717
Initial Cost on Val dataset for this epoch 1315 = 0.07473819482714127
Error on this batch = 0.41216781196528546
Error on this batch = 0.41636013175086845
Cost on val dataset after 1316 epochs is = 0.07472968947214798
learning rate =  0.10037850474375598
Initial Cost on Val dataset for this epoch 1316 = 0.07472968947214798
Error on this batch = 0.41203804254258003
Error on this batch = 0.41627161284079706
Cost on val dataset after 1317 epochs is = 0.0747209297328353
learning rate =  0.10035309246298807
Initial Cost on Val dataset for this epoch 1317 = 0.0747209297328353
Error on this batch = 0.41190312333754775
Error on this batch = 0.41618

Error on this batch = 0.4124678768021454
Cost on val dataset after 1349 epochs is = 0.0743612931350429
learning rate =  0.0995532330589058
Initial Cost on Val dataset for this epoch 1349 = 0.0743612931350429
Error on this batch = 0.4074467283692927
Error on this batch = 0.4123302306806657
Cost on val dataset after 1350 epochs is = 0.07435037521295257
learning rate =  0.09952864594181327
Initial Cost on Val dataset for this epoch 1350 = 0.07435037521295257
Error on this batch = 0.4073566861416208
Error on this batch = 0.4121924619024327
Cost on val dataset after 1351 epochs is = 0.07433957211902546
learning rate =  0.09950408309630891
Initial Cost on Val dataset for this epoch 1351 = 0.07433957211902546
Error on this batch = 0.4072688251262318
Error on this batch = 0.4120546474058787
Cost on val dataset after 1352 epochs is = 0.07432888657097306
learning rate =  0.0994795444804936
Initial Cost on Val dataset for this epoch 1352 = 0.07432888657097306
Error on this batch = 0.4071830082894

Error on this batch = 0.40796017558773
Cost on val dataset after 1384 epochs is = 0.07405437218770357
learning rate =  0.09870685662655969
Initial Cost on Val dataset for this epoch 1384 = 0.07405437218770357
Error on this batch = 0.4050353072087208
Error on this batch = 0.4078498704511068
Cost on val dataset after 1385 epochs is = 0.07404775438812944
learning rate =  0.09868309474393076
Initial Cost on Val dataset for this epoch 1385 = 0.07404775438812944
Error on this batch = 0.40497727233081454
Error on this batch = 0.4077410852854645
Cost on val dataset after 1386 epochs is = 0.07404124185459528
learning rate =  0.09865935572575625
Initial Cost on Val dataset for this epoch 1386 = 0.07404124185459528
Error on this batch = 0.40491942674561815
Error on this batch = 0.4076338520253879
Cost on val dataset after 1387 epochs is = 0.07403483373432121
learning rate =  0.09863563953356229
Initial Cost on Val dataset for this epoch 1387 = 0.07403483373432121
Error on this batch = 0.404861762

Error on this batch = 0.40496786583805927
Cost on val dataset after 1419 epochs is = 0.07387481234220117
learning rate =  0.09788854802077597
Initial Cost on Val dataset for this epoch 1419 = 0.07387481234220117
Error on this batch = 0.4031432516917659
Error on this batch = 0.40490313772307995
Cost on val dataset after 1420 epochs is = 0.07387081685307795
learning rate =  0.09786556409224371
Initial Cost on Val dataset for this epoch 1420 = 0.07387081685307795
Error on this batch = 0.40309437597986825
Error on this batch = 0.4048391698715581
Cost on val dataset after 1421 epochs is = 0.07386686054585961
learning rate =  0.09784260173473915
Initial Cost on Val dataset for this epoch 1421 = 0.07386686054585961
Error on this batch = 0.4030457938128833
Error on this batch = 0.4047759338105913
Cost on val dataset after 1422 epochs is = 0.07386294214589391
learning rate =  0.09781966091285847
Initial Cost on Val dataset for this epoch 1422 = 0.07386294214589391
Error on this batch = 0.402997

Error on this batch = 0.4030080166159194
Cost on val dataset after 1454 epochs is = 0.07375277208087874
learning rate =  0.09709671782864769
Initial Cost on Val dataset for this epoch 1454 = 0.07375277208087874
Error on this batch = 0.40158335999339045
Error on this batch = 0.4029577587534702
Cost on val dataset after 1455 epochs is = 0.07374971240516304
learning rate =  0.09707446835082278
Initial Cost on Val dataset for this epoch 1455 = 0.07374971240516304
Error on this batch = 0.4015426611222415
Error on this batch = 0.40290767521422566
Cost on val dataset after 1456 epochs is = 0.07374667245411332
learning rate =  0.09705223925264446
Initial Cost on Val dataset for this epoch 1456 = 0.07374667245411332
Error on this batch = 0.40150214642902143
Error on this batch = 0.4028577584281316
Cost on val dataset after 1457 epochs is = 0.07374365202245699
learning rate =  0.09703003050146801
Initial Cost on Val dataset for this epoch 1457 = 0.07374365202245699
Error on this batch = 0.401461

Error on this batch = 0.4013197591741753
Cost on val dataset after 1489 epochs is = 0.07365623555820716
learning rate =  0.096329902172657
Initial Cost on Val dataset for this epoch 1489 = 0.07365623555820716
Error on this batch = 0.40026398327552826
Error on this batch = 0.4012731556224011
Cost on val dataset after 1490 epochs is = 0.07365376051277711
learning rate =  0.0963083470360742
Initial Cost on Val dataset for this epoch 1490 = 0.07365376051277711
Error on this batch = 0.40022943947803696
Error on this batch = 0.40122664679922937
Cost on val dataset after 1491 epochs is = 0.07365129905293265
learning rate =  0.09628681117957892
Initial Cost on Val dataset for this epoch 1491 = 0.07365129905293265
Error on this batch = 0.40019507188839804
Error on this batch = 0.4011802358757923
Cost on val dataset after 1492 epochs is = 0.07364885099780297
learning rate =  0.09626529457301235
Initial Cost on Val dataset for this epoch 1492 = 0.07364885099780297
Error on this batch = 0.40016088

Error on this batch = 0.3997758040370487
Cost on val dataset after 1524 epochs is = 0.07357653877349817
learning rate =  0.09558675011906012
Initial Cost on Val dataset for this epoch 1524 = 0.07357653877349817
Error on this batch = 0.39915491474632897
Error on this batch = 0.39973546782993014
Cost on val dataset after 1525 epochs is = 0.07357443702380954
learning rate =  0.09556585227184428
Initial Cost on Val dataset for this epoch 1525 = 0.07357443702380954
Error on this batch = 0.39912601344875565
Error on this batch = 0.3996954041041622
Cost on val dataset after 1526 epochs is = 0.07357234324268536
learning rate =  0.0955449726879873
Initial Cost on Val dataset for this epoch 1526 = 0.07357234324268536
Error on this batch = 0.39909725187320794
Error on this batch = 0.39965561641825675
Cost on val dataset after 1527 epochs is = 0.07357025731095844
learning rate =  0.09552411133957575
Initial Cost on Val dataset for this epoch 1527 = 0.07357025731095844
Error on this batch = 0.39906

Error on this batch = 0.3985358014493242
Cost on val dataset after 1559 epochs is = 0.07350702647106522
learning rate =  0.09486601260558686
Initial Cost on Val dataset for this epoch 1559 = 0.07350702647106522
Error on this batch = 0.3982217558658265
Error on this batch = 0.3985054631367659
Cost on val dataset after 1560 epochs is = 0.07350514245218756
learning rate =  0.09484573775722298
Initial Cost on Val dataset for this epoch 1560 = 0.07350514245218756
Error on this batch = 0.39819735567879777
Error on this batch = 0.39847538170178526
Cost on val dataset after 1561 epochs is = 0.07350326296040899
learning rate =  0.09482548023038785
Initial Cost on Val dataset for this epoch 1561 = 0.07350326296040899
Error on this batch = 0.3981730760731887
Error on this batch = 0.39844555404918314
Cost on val dataset after 1562 epochs is = 0.07350138790723497
learning rate =  0.09480523999920092
Initial Cost on Val dataset for this epoch 1562 = 0.07350138790723497
Error on this batch = 0.398148

Error on this batch = 0.39760716578875516
Cost on val dataset after 1594 epochs is = 0.07344327579228825
learning rate =  0.09416653267557025
Initial Cost on Val dataset for this epoch 1594 = 0.07344327579228825
Error on this batch = 0.3974366233806888
Error on this batch = 0.39758410276349815
Cost on val dataset after 1595 epochs is = 0.07344150813119495
learning rate =  0.0941468490352235
Initial Cost on Val dataset for this epoch 1595 = 0.07344150813119495
Error on this batch = 0.3974161669965854
Error on this batch = 0.39756120557204877
Cost on val dataset after 1596 epochs is = 0.07343974301326507
learning rate =  0.09412718184245675
Initial Cost on Val dataset for this epoch 1596 = 0.07343974301326507
Error on this batch = 0.39739581304968274
Error on this batch = 0.39753847257373004
Cost on val dataset after 1597 epochs is = 0.07343798042990955
learning rate =  0.09410753107323383
Initial Cost on Val dataset for this epoch 1597 = 0.07343798042990955
Error on this batch = 0.39737

Error on this batch = 0.3968897651297374
Cost on val dataset after 1629 epochs is = 0.0733829949216577
learning rate =  0.09348723683941453
Initial Cost on Val dataset for this epoch 1629 = 0.0733829949216577
Error on this batch = 0.3967770438879093
Error on this batch = 0.3968717382817487
Cost on val dataset after 1630 epochs is = 0.07338132638916312
learning rate =  0.09346811488411749
Initial Cost on Val dataset for this epoch 1630 = 0.07338132638916312
Error on this batch = 0.39675975832152144
Error on this batch = 0.3968538350063999
Cost on val dataset after 1631 epochs is = 0.07337966131070461
learning rate =  0.09344900856410766
Initial Cost on Val dataset for this epoch 1631 = 0.07337966131070461
Error on this batch = 0.39674255100882816
Error on this batch = 0.3968360541526093
Cost on val dataset after 1632 epochs is = 0.07337799972711219
learning rate =  0.09342991785702606
Initial Cost on Val dataset for this epoch 1632 = 0.07337799972711219
Error on this batch = 0.396725421

Cost on val dataset after 1663 epochs is = 0.07332840361908322
learning rate =  0.09284573004337694
Initial Cost on Val dataset for this epoch 1663 = 0.07332840361908322
Error on this batch = 0.3962294600043345
Error on this batch = 0.3963242243504119
Cost on val dataset after 1664 epochs is = 0.07332686942199257
learning rate =  0.09282712741213502
Initial Cost on Val dataset for this epoch 1664 = 0.07332686942199257
Error on this batch = 0.3962144842743311
Error on this batch = 0.3963097698032904
Cost on val dataset after 1665 epochs is = 0.07332533945553958
learning rate =  0.09280853968087878
Initial Cost on Val dataset for this epoch 1665 = 0.07332533945553958
Error on this batch = 0.39619956587100974
Error on this batch = 0.39629539350888954
Cost on val dataset after 1666 epochs is = 0.07332381372069134
learning rate =  0.09278996682873569
Initial Cost on Val dataset for this epoch 1666 = 0.07332381372069134
Error on this batch = 0.39618470420541424
Error on this batch = 0.396281

Cost on val dataset after 1698 epochs is = 0.07327718769213404
learning rate =  0.09220336898195017
Initial Cost on Val dataset for this epoch 1698 = 0.07327718769213404
Error on this batch = 0.3957356953778013
Error on this batch = 0.39585710230519866
Cost on val dataset after 1699 epochs is = 0.0732757972162763
learning rate =  0.09218527569871837
Initial Cost on Val dataset for this epoch 1699 = 0.0732757972162763
Error on this batch = 0.39572239327153114
Error on this batch = 0.3958447064332173
Cost on val dataset after 1700 epochs is = 0.07327441061373095
learning rate =  0.09216719660908067
Initial Cost on Val dataset for this epoch 1700 = 0.07327441061373095
Error on this batch = 0.39570912965035815
Error on this batch = 0.3958323520087163
Cost on val dataset after 1701 epochs is = 0.07327302786845882
learning rate =  0.09214913169356326
Initial Cost on Val dataset for this epoch 1701 = 0.07327302786845882
Error on this batch = 0.39569590404673466
Error on this batch = 0.3958200

Cost on val dataset after 1733 epochs is = 0.07323071999923024
learning rate =  0.09157842376781679
Initial Cost on Val dataset for this epoch 1733 = 0.07323071999923024
Error on this batch = 0.3952901898602604
Error on this batch = 0.39544354393197206
Cost on val dataset after 1734 epochs is = 0.07322945554119756
learning rate =  0.09156081591816768
Initial Cost on Val dataset for this epoch 1734 = 0.07322945554119756
Error on this batch = 0.3952779849425348
Error on this batch = 0.39543222342132717
Cost on val dataset after 1735 epochs is = 0.07322819440522403
learning rate =  0.09154322160260804
Initial Cost on Val dataset for this epoch 1735 = 0.07322819440522403
Error on this batch = 0.39526580467429095
Error on this batch = 0.39542092464850626
Cost on val dataset after 1736 epochs is = 0.07322693657685571
learning rate =  0.09152564080294341
Initial Cost on Val dataset for this epoch 1736 = 0.07322693657685571
Error on this batch = 0.3952536487295291
Error on this batch = 0.39540

Cost on val dataset after 1768 epochs is = 0.07318835142001372
learning rate =  0.09097008453117922
Initial Cost on Val dataset for this epoch 1768 = 0.07318835142001372
Error on this batch = 0.3948757135222239
Error on this batch = 0.39505812518281774
Cost on val dataset after 1769 epochs is = 0.07318719531428289
learning rate =  0.09095293977304023
Initial Cost on Val dataset for this epoch 1769 = 0.07318719531428289
Error on this batch = 0.3948641973430007
Error on this batch = 0.3950473822833375
Cost on val dataset after 1770 epochs is = 0.07318604208668288
learning rate =  0.09093580793240648
Initial Cost on Val dataset for this epoch 1770 = 0.07318604208668288
Error on this batch = 0.3948526961621562
Error on this batch = 0.395036651532989
Cost on val dataset after 1771 epochs is = 0.0731848917263926
learning rate =  0.09091868899225568
Initial Cost on Val dataset for this epoch 1771 = 0.0731848917263926
Error on this batch = 0.39484120975104225
Error on this batch = 0.3950259327

Error on this batch = 0.39469873898239904
Cost on val dataset after 1803 epochs is = 0.07314953447269733
learning rate =  0.09037759415277437
Initial Cost on Val dataset for this epoch 1803 = 0.07314953447269733
Error on this batch = 0.3944801941922962
Error on this batch = 0.3946883256153998
Cost on val dataset after 1804 epochs is = 0.0731484732934749
learning rate =  0.0903608915846334
Initial Cost on Val dataset for this epoch 1804 = 0.0731484732934749
Error on this batch = 0.39446908175567175
Error on this batch = 0.39467791988658363
Cost on val dataset after 1805 epochs is = 0.07314741467007006
learning rate =  0.09034420135677229
Initial Cost on Val dataset for this epoch 1805 = 0.07314741467007006
Error on this batch = 0.39445797765485824
Error on this batch = 0.394667521706282
Cost on val dataset after 1806 epochs is = 0.07314635859487116
learning rate =  0.09032752345324462
Initial Cost on Val dataset for this epoch 1806 = 0.07314635859487116
Error on this batch = 0.394446881

Error on this batch = 0.39433831411962117
Cost on val dataset after 1838 epochs is = 0.07311386814246358
learning rate =  0.08980024388274199
Initial Cost on Val dataset for this epoch 1838 = 0.07311386814246358
Error on this batch = 0.3940953543904763
Error on this batch = 0.39432812496199704
Cost on val dataset after 1839 epochs is = 0.07311289238548904
learning rate =  0.08978396392519114
Initial Cost on Val dataset for this epoch 1839 = 0.07311289238548904
Error on this batch = 0.39408445934717884
Error on this batch = 0.39431794117324603
Cost on val dataset after 1840 epochs is = 0.07311191896181954
learning rate =  0.08976769576684895
Initial Cost on Val dataset for this epoch 1840 = 0.07311191896181954
Error on this batch = 0.3940735687695699
Error on this batch = 0.3943077627081775
Cost on val dataset after 1841 epochs is = 0.07311094786626225
learning rate =  0.0897514393927581
Initial Cost on Val dataset for this epoch 1841 = 0.07311094786626225
Error on this batch = 0.394062

Error on this batch = 0.3939846272016508
Cost on val dataset after 1873 epochs is = 0.07308107353361397
learning rate =  0.08923736939891906
Initial Cost on Val dataset for this epoch 1873 = 0.07308107353361397
Error on this batch = 0.3937163897807945
Error on this batch = 0.3939746032752395
Cost on val dataset after 1874 epochs is = 0.07308017666162167
learning rate =  0.08922149368796337
Initial Cost on Val dataset for this epoch 1874 = 0.07308017666162167
Error on this batch = 0.3937056289672178
Error on this batch = 0.3939645834986817
Cost on val dataset after 1875 epochs is = 0.07307928196771481
learning rate =  0.08920562926840989
Initial Cost on Val dataset for this epoch 1875 = 0.07307928196771481
Error on this batch = 0.3936948718831733
Error on this batch = 0.3939545678463173
Cost on val dataset after 1876 epochs is = 0.07307838944808101
learning rate =  0.08918977612621214
Initial Cost on Val dataset for this epoch 1876 = 0.07307838944808101
Error on this batch = 0.393684118

Error on this batch = 0.3936361187335027
Cost on val dataset after 1908 epochs is = 0.07305095512275657
learning rate =  0.08868834725318729
Initial Cost on Val dataset for this epoch 1908 = 0.07305095512275657
Error on this batch = 0.3933422903803603
Error on this batch = 0.39362622838096173
Cost on val dataset after 1909 epochs is = 0.07305013234679684
learning rate =  0.0886728585439563
Initial Cost on Val dataset for this epoch 1909 = 0.07305013234679684
Error on this batch = 0.39333169258072687
Error on this batch = 0.393616341629307
Cost on val dataset after 1910 epochs is = 0.07304931162659346
learning rate =  0.08865738064897548
Initial Cost on Val dataset for this epoch 1910 = 0.07304931162659346
Error on this batch = 0.3933211009084897
Error on this batch = 0.3936064584764525
Cost on val dataset after 1911 epochs is = 0.07304849295890307
learning rate =  0.08864191355503827
Initial Cost on Val dataset for this epoch 1911 = 0.07304849295890307
Error on this batch = 0.393310515

Error on this batch = 0.3932921449746276
Cost on val dataset after 1943 epochs is = 0.07302336052877474
learning rate =  0.088152591661329
Initial Cost on Val dataset for this epoch 1943 = 0.07302336052877474
Error on this batch = 0.3929757935193887
Error on this batch = 0.3932823871642164
Cost on val dataset after 1944 epochs is = 0.07302260785394739
learning rate =  0.0881374737410083
Initial Cost on Val dataset for this epoch 1944 = 0.07302260785394739
Error on this batch = 0.39296548321726416
Error on this batch = 0.3932726336298913
Cost on val dataset after 1945 epochs is = 0.0730218571290175
learning rate =  0.0881223661860773
Initial Cost on Val dataset for this epoch 1945 = 0.0730218571290175
Error on this batch = 0.3929551835458092
Error on this batch = 0.39326288441854723
Cost on val dataset after 1946 epochs is = 0.07302110835125838
learning rate =  0.0881072689841053
Initial Cost on Val dataset for this epoch 1946 = 0.07302110835125838
Error on this batch = 0.39294489464543

Error on this batch = 0.39295356844136947
Cost on val dataset after 1978 epochs is = 0.07299816062125655
learning rate =  0.08762955159766174
Initial Cost on Val dataset for this epoch 1978 = 0.07299816062125655
Error on this batch = 0.3926221558323509
Error on this batch = 0.3929440001191292
Cost on val dataset after 1979 epochs is = 0.07299747474812844
learning rate =  0.08761478920661463
Initial Cost on Val dataset for this epoch 1979 = 0.07299747474812844
Error on this batch = 0.3926122978340081
Error on this batch = 0.39293443878473727
Cost on val dataset after 1980 epochs is = 0.07299679074652587
learning rate =  0.08760003675824575
Initial Cost on Val dataset for this epoch 1980 = 0.07299679074652587
Error on this batch = 0.3926024549861193
Error on this batch = 0.39292488454485286
Cost on val dataset after 1981 epochs is = 0.07299610861481175
learning rate =  0.0875852942408421
Initial Cost on Val dataset for this epoch 1981 = 0.07299610861481175
Error on this batch = 0.3925926

Error on this batch = 0.39262357389896985
Cost on val dataset after 2013 epochs is = 0.07297525938477227
learning rate =  0.0871187081606907
Initial Cost on Val dataset for this epoch 2013 = 0.07297525938477227
Error on this batch = 0.39228675636995763
Error on this batch = 0.39261431787217177
Cost on val dataset after 2014 epochs is = 0.07297463820878061
learning rate =  0.08710428692087076
Initial Cost on Val dataset for this epoch 2014 = 0.07297463820878061
Error on this batch = 0.3922774818753592
Error on this batch = 0.39260507282172963
Cost on val dataset after 2015 epochs is = 0.07297401885927532
learning rate =  0.08708987522522042
Initial Cost on Val dataset for this epoch 2015 = 0.07297401885927532
Error on this batch = 0.392268225358737
Error on this batch = 0.39259583885367166
Cost on val dataset after 2016 epochs is = 0.07297340133492394
learning rate =  0.08707547306269137
Initial Cost on Val dataset for this epoch 2016 = 0.07297340133492394
Error on this batch = 0.392258

Error on this batch = 0.3923067541708581
Cost on val dataset after 2048 epochs is = 0.07295459356591882
learning rate =  0.08661957218027254
Initial Cost on Val dataset for this epoch 2048 = 0.07295459356591882
Error on this batch = 0.3919732022291344
Error on this batch = 0.3922979345883968
Cost on val dataset after 2049 epochs is = 0.07295403515961271
learning rate =  0.08660547853014089
Initial Cost on Val dataset for this epoch 2049 = 0.07295403515961271
Error on this batch = 0.3919645888414201
Error on this batch = 0.39228912866811855
Cost on val dataset after 2050 epochs is = 0.07295347849631495
learning rate =  0.08659139404810198
Initial Cost on Val dataset for this epoch 2050 = 0.07295347849631495
Error on this batch = 0.391955995262466
Error on this batch = 0.3922803364528179
Cost on val dataset after 2051 epochs is = 0.07295292357217553
learning rate =  0.08657731872372397
Initial Cost on Val dataset for this epoch 2051 = 0.07295292357217553
Error on this batch = 0.391947421

Error on this batch = 0.39200639329034487
Cost on val dataset after 2083 epochs is = 0.07293605697866394
learning rate =  0.0861316820404424
Initial Cost on Val dataset for this epoch 2083 = 0.07293605697866394
Error on this batch = 0.3916838308805962
Error on this batch = 0.3919980671390751
Cost on val dataset after 2084 epochs is = 0.07293555677379913
learning rate =  0.08611790317569916
Initial Cost on Val dataset for this epoch 2084 = 0.07293555677379913
Error on this batch = 0.39167593822940133
Error on this batch = 0.39198975527548796
Cost on val dataset after 2085 epochs is = 0.07293505813372946
learning rate =  0.08610413312378938
Initial Cost on Val dataset for this epoch 2085 = 0.07293505813372946
Error on this batch = 0.39166806687883593
Error on this batch = 0.3919814576975702
Cost on val dataset after 2086 epochs is = 0.07293456105246168
learning rate =  0.08609037187485377
Initial Cost on Val dataset for this epoch 2086 = 0.07293456105246168
Error on this batch = 0.391660

Error on this batch = 0.39172343363724926
Cost on val dataset after 2118 epochs is = 0.07291944116945026
learning rate =  0.08565460169520496
Initial Cost on Val dataset for this epoch 2118 = 0.07291944116945026
Error on this batch = 0.39142039176091387
Error on this batch = 0.3917156025239977
Cost on val dataset after 2119 epochs is = 0.07291899213084689
learning rate =  0.08564112551472844
Initial Cost on Val dataset for this epoch 2119 = 0.07291899213084689
Error on this batch = 0.391413254264462
Error on this batch = 0.3917077853090866
Cost on val dataset after 2120 epochs is = 0.07291854444728883
learning rate =  0.0856276578111702
Initial Cost on Val dataset for this epoch 2120 = 0.07291854444728883
Error on this batch = 0.3914061383774609
Error on this batch = 0.3916999819748215
Cost on val dataset after 2121 epochs is = 0.07291809811313138
learning rate =  0.08561419857520328
Initial Cost on Val dataset for this epoch 2121 = 0.07291809811313138
Error on this batch = 0.391399044

Error on this batch = 0.3914722403116657
Cost on val dataset after 2151 epochs is = 0.07290530913182529
learning rate =  0.08521431325678815
Initial Cost on Val dataset for this epoch 2151 = 0.07290530913182529
Error on this batch = 0.39119614557548577
Error on this batch = 0.391464857427166
Cost on val dataset after 2152 epochs is = 0.0729049020427913
learning rate =  0.08520111196869823
Initial Cost on Val dataset for this epoch 2152 = 0.0729049020427913
Error on this batch = 0.39118970815619925
Error on this batch = 0.39145748775940337
Cost on val dataset after 2153 epochs is = 0.0729044961437415
learning rate =  0.08518791885731224
Initial Cost on Val dataset for this epoch 2153 = 0.0729044961437415
Error on this batch = 0.39118329134593394
Error on this batch = 0.39145013128525724
Cost on val dataset after 2154 epochs is = 0.07290409143040592
learning rate =  0.08517473391377132
Initial Cost on Val dataset for this epoch 2154 = 0.07290409143040592
Error on this batch = 0.391176895

Error on this batch = 0.3912215464525808
Cost on val dataset after 2186 epochs is = 0.07289174328215023
learning rate =  0.08475707604014839
Initial Cost on Val dataset for this epoch 2186 = 0.07289174328215023
Error on this batch = 0.3909827645000802
Error on this batch = 0.39121461172133465
Cost on val dataset after 2187 epochs is = 0.07289137556924022
learning rate =  0.08474415575396649
Initial Cost on Val dataset for this epoch 2187 = 0.07289137556924022
Error on this batch = 0.3909770170595032
Error on this batch = 0.3912076893385232
Cost on val dataset after 2188 epochs is = 0.07289100892045458
learning rate =  0.08473124334240688
Initial Cost on Val dataset for this epoch 2188 = 0.07289100892045458
Error on this batch = 0.3909712882780805
Error on this batch = 0.39120077927781294
Cost on val dataset after 2189 epochs is = 0.07289064333281722
learning rate =  0.08471833879707445
Initial Cost on Val dataset for this epoch 2189 = 0.07289064333281722
Error on this batch = 0.3909655

Error on this batch = 0.3909990739772476
Cost on val dataset after 2219 epochs is = 0.07288015533951984
learning rate =  0.0843348191099642
Initial Cost on Val dataset for this epoch 2219 = 0.07288015533951984
Error on this batch = 0.39080259637837217
Error on this batch = 0.3909925324980765
Cost on val dataset after 2220 epochs is = 0.07287982129282891
learning rate =  0.0843221543224018
Initial Cost on Val dataset for this epoch 2220 = 0.07287982129282891
Error on this batch = 0.3907974301405549
Error on this batch = 0.3909860024668595
Cost on val dataset after 2221 epochs is = 0.07287948822265178
learning rate =  0.08430949713903507
Initial Cost on Val dataset for this epoch 2221 = 0.07287948822265178
Error on this batch = 0.3907922803792435
Error on this batch = 0.39097948385550296
Cost on val dataset after 2222 epochs is = 0.07287915612652342
learning rate =  0.08429684755187764
Initial Cost on Val dataset for this epoch 2222 = 0.07287915612652342
Error on this batch = 0.390787147

Error on this batch = 0.3907767484484724
Cost on val dataset after 2254 epochs is = 0.07286902828089961
learning rate =  0.08389602439034242
Initial Cost on Val dataset for this epoch 2254 = 0.07286902828089961
Error on this batch = 0.3906311412792358
Error on this batch = 0.3907705907685083
Cost on val dataset after 2255 epochs is = 0.07286872689242052
learning rate =  0.08388362107490586
Initial Cost on Val dataset for this epoch 2255 = 0.07286872689242052
Error on this batch = 0.3906265117645777
Error on this batch = 0.39076444354043516
Cost on val dataset after 2256 epochs is = 0.07286842638724567
learning rate =  0.08387122509111744
Initial Cost on Val dataset for this epoch 2256 = 0.07286842638724567
Error on this batch = 0.39062189638530553
Error on this batch = 0.39075830673565043
Cost on val dataset after 2257 epochs is = 0.07286812676224355
learning rate =  0.08385883643139644
Initial Cost on Val dataset for this epoch 2257 = 0.07286812676224355
Error on this batch = 0.390617

Error on this batch = 0.3905730802906149
Cost on val dataset after 2288 epochs is = 0.07285925546889357
learning rate =  0.08347837987018333
Initial Cost on Val dataset for this epoch 2288 = 0.07285925546889357
Error on this batch = 0.3904812711797841
Error on this batch = 0.390567262019669
Cost on val dataset after 2289 epochs is = 0.07285898203055158
learning rate =  0.08346622164398892
Initial Cost on Val dataset for this epoch 2289 = 0.07285898203055158
Error on this batch = 0.3904770849861308
Error on this batch = 0.39056145323860075
Cost on val dataset after 2290 epochs is = 0.07285870933604127
learning rate =  0.08345407049785071
Initial Cost on Val dataset for this epoch 2290 = 0.07285870933604127
Error on this batch = 0.3904729106494032
Error on this batch = 0.39055565391968905
Cost on val dataset after 2291 epochs is = 0.07285843737993125
learning rate =  0.08344192642455675
Initial Cost on Val dataset for this epoch 2291 = 0.07285843737993125
Error on this batch = 0.39046874

Error on this batch = 0.3903749090328191
Cost on val dataset after 2323 epochs is = 0.07285008692594187
learning rate =  0.08305700792761796
Initial Cost on Val dataset for this epoch 2323 = 0.07285008692594187
Error on this batch = 0.3903413350705365
Error on this batch = 0.39036940683601956
Cost on val dataset after 2324 epochs is = 0.07284983562649114
learning rate =  0.08304509327992506
Initial Cost on Val dataset for this epoch 2324 = 0.07284983562649114
Error on this batch = 0.3903375200101997
Error on this batch = 0.39036391320124253
Cost on val dataset after 2325 epochs is = 0.072849584819283
learning rate =  0.08303318546598495
Initial Cost on Val dataset for this epoch 2325 = 0.072849584819283
Error on this batch = 0.39033371413085854
Error on this batch = 0.39035842810377214
Cost on val dataset after 2326 epochs is = 0.07284933449518408
learning rate =  0.08302128447894135
Initial Cost on Val dataset for this epoch 2326 = 0.07284933449518408
Error on this batch = 0.390329917

Cost on val dataset after 2357 epochs is = 0.07284175986252162
learning rate =  0.08265570306389441
Initial Cost on Val dataset for this epoch 2357 = 0.07284175986252162
Error on this batch = 0.3902162330878413
Error on this batch = 0.3901872731459679
Cost on val dataset after 2358 epochs is = 0.07284151963478037
learning rate =  0.08264401697536546
Initial Cost on Val dataset for this epoch 2358 = 0.07284151963478037
Error on this batch = 0.39021267857463515
Error on this batch = 0.3901820570055176
Cost on val dataset after 2359 epochs is = 0.07284127954223239
learning rate =  0.08263233749287875
Initial Cost on Val dataset for this epoch 2359 = 0.07284127954223239
Error on this batch = 0.3902091300069604
Error on this batch = 0.390176848668279
Cost on val dataset after 2360 epochs is = 0.072841039573509
learning rate =  0.08262066460990203
Initial Cost on Val dataset for this epoch 2360 = 0.072841039573509
Error on this batch = 0.39020558728132615
Error on this batch = 0.390171648116

Cost on val dataset after 2392 epochs is = 0.07283335929987943
learning rate =  0.08225057828402842
Initial Cost on Val dataset for this epoch 2392 = 0.07283335929987943
Error on this batch = 0.39009467913800044
Error on this batch = 0.39000925037508266
Cost on val dataset after 2393 epochs is = 0.07283311726871793
learning rate =  0.08223911957974472
Initial Cost on Val dataset for this epoch 2393 = 0.07283311726871793
Error on this batch = 0.39009127071606375
Error on this batch = 0.39000429870018255
Cost on val dataset after 2394 epochs is = 0.0728328750042526
learning rate =  0.08222766725825148
Initial Cost on Val dataset for this epoch 2394 = 0.0728328750042526
Error on this batch = 0.39008786463432
Error on this batch = 0.3899993543800512
Cost on val dataset after 2395 epochs is = 0.07283263249782507
learning rate =  0.08221622131332935
Initial Cost on Val dataset for this epoch 2395 = 0.07283263249782507
Error on this batch = 0.3900844607970648
Error on this batch = 0.389994417

Cost on val dataset after 2427 epochs is = 0.0728247016244318
learning rate =  0.0818532811370953
Initial Cost on Val dataset for this epoch 2427 = 0.0728247016244318
Error on this batch = 0.3899762036578676
Error on this batch = 0.3898402776231869
Cost on val dataset after 2428 epochs is = 0.07282444740711827
learning rate =  0.08184204218581616
Initial Cost on Val dataset for this epoch 2428 = 0.07282444740711827
Error on this batch = 0.38997282717170867
Error on this batch = 0.3898355799889495
Cost on val dataset after 2429 epochs is = 0.07282419275842897
learning rate =  0.0818308094046998
Initial Cost on Val dataset for this epoch 2429 = 0.07282419275842897
Error on this batch = 0.3899694503541888
Error on this batch = 0.3898308895373538
Cost on val dataset after 2430 epochs is = 0.07282393767600912
learning rate =  0.08181958278782074
Initial Cost on Val dataset for this epoch 2430 = 0.07282393767600912
Error on this batch = 0.3899660731539934
Error on this batch = 0.389826206264

Cost on val dataset after 2462 epochs is = 0.0728155402646631
learning rate =  0.08146355072746608
Initial Cost on Val dataset for this epoch 2462 = 0.0728155402646631
Error on this batch = 0.3898575739586434
Error on this batch = 0.3896801055745084
Cost on val dataset after 2463 epochs is = 0.07281527055579888
learning rate =  0.08145252425863522
Initial Cost on Val dataset for this epoch 2463 = 0.07281527055579888
Error on this batch = 0.38985416530132294
Error on this batch = 0.38967565648246855
Cost on val dataset after 2464 epochs is = 0.0728150004195344
learning rate =  0.08144150375731568
Initial Cost on Val dataset for this epoch 2464 = 0.0728150004195344
Error on this batch = 0.3898507554058583
Error on this batch = 0.3896712143698848
Cost on val dataset after 2465 epochs is = 0.07281472985795862
learning rate =  0.08143048921785793
Initial Cost on Val dataset for this epoch 2465 = 0.07281472985795862
Error on this batch = 0.38984734427076545
Error on this batch = 0.3896667792

Cost on val dataset after 2497 epochs is = 0.07280586259593923
learning rate =  0.08108113840708908
Initial Cost on Val dataset for this epoch 2497 = 0.07280586259593923
Error on this batch = 0.3897375767944847
Error on this batch = 0.3895284635456764
Cost on val dataset after 2498 epochs is = 0.07280557948729303
learning rate =  0.08107031748898286
Initial Cost on Val dataset for this epoch 2498 = 0.07280557948729303
Error on this batch = 0.3897341301791555
Error on this batch = 0.38952425127983076
Cost on val dataset after 2499 epochs is = 0.07280529604876924
learning rate =  0.08105950234511268
Initial Cost on Val dataset for this epoch 2499 = 0.07280529604876924
Error on this batch = 0.3897306828019096
Error on this batch = 0.3895200454975057
Cost on val dataset after 2500 epochs is = 0.0728050122834246
learning rate =  0.08104869297008852
Initial Cost on Val dataset for this epoch 2500 = 0.0728050122834246
Error on this batch = 0.3897272346875851
Error on this batch = 0.3895158461

Cost on val dataset after 2532 epochs is = 0.07279577688434778
learning rate =  0.08070580703842113
Initial Cost on Val dataset for this epoch 2532 = 0.07279577688434778
Error on this batch = 0.38961666774690595
Error on this batch = 0.3893847566536467
Cost on val dataset after 2533 epochs is = 0.07279548396034574
learning rate =  0.08069518505790382
Initial Cost on Val dataset for this epoch 2533 = 0.07279548396034574
Error on this batch = 0.38961321071710253
Error on this batch = 0.38938075867483307
Cost on val dataset after 2534 epochs is = 0.07279519080427613
learning rate =  0.0806845686671672
Initial Cost on Val dataset for this epoch 2534 = 0.07279519080427613
Error on this batch = 0.3896097539022958
Error on this batch = 0.3893767663997467
Cost on val dataset after 2535 epochs is = 0.07279489741854935
learning rate =  0.08067395786106547
Initial Cost on Val dataset for this epoch 2535 = 0.07279489741854935
Error on this batch = 0.3896062973308031
Error on this batch = 0.3893727

Cost on val dataset after 2567 epochs is = 0.07278540100506514
learning rate =  0.08033733031120545
Initial Cost on Val dataset for this epoch 2567 = 0.07278540100506514
Error on this batch = 0.38949597721428303
Error on this batch = 0.3892480543003214
Cost on val dataset after 2568 epochs is = 0.07278510124018174
learning rate =  0.08032690095495285
Initial Cost on Val dataset for this epoch 2568 = 0.07278510124018174
Error on this batch = 0.3894925435296284
Error on this batch = 0.38924424070996594
Cost on val dataset after 2569 epochs is = 0.07278480131400603
learning rate =  0.08031647701232933
Initial Cost on Val dataset for this epoch 2569 = 0.07278480131400603
Error on this batch = 0.38948911094946836
Error on this batch = 0.38924043192052554
Cost on val dataset after 2570 epochs is = 0.0727845012281894
learning rate =  0.08030605847841917
Initial Cost on Val dataset for this epoch 2570 = 0.0727845012281894
Error on this batch = 0.38948567949535046
Error on this batch = 0.389236

Cost on val dataset after 2602 epochs is = 0.07277482323749636
learning rate =  0.07997549210848762
Initial Cost on Val dataset for this epoch 2602 = 0.07277482323749636
Error on this batch = 0.3893765838525591
Error on this batch = 0.38911726793486645
Cost on val dataset after 2603 epochs is = 0.072774518708478
learning rate =  0.07996524934555434
Initial Cost on Val dataset for this epoch 2603 = 0.072774518708478
Error on this batch = 0.389373200076065
Error on this batch = 0.38911360728518757
Cost on val dataset after 2604 epochs is = 0.07277421406719024
learning rate =  0.07995501182792288
Initial Cost on Val dataset for this epoch 2604 = 0.07277421406719024
Error on this batch = 0.389369818016654
Error on this batch = 0.38910995056357595
Cost on val dataset after 2605 epochs is = 0.07277390931481943
learning rate =  0.07994477955089434
Initial Cost on Val dataset for this epoch 2605 = 0.07277390931481943
Error on this batch = 0.3893664376877351
Error on this batch = 0.389106297746

Cost on val dataset after 2637 epochs is = 0.07276410526152768
learning rate =  0.07962008591771197
Initial Cost on Val dataset for this epoch 2637 = 0.07276410526152768
Error on this batch = 0.3892592507671559
Error on this batch = 0.3889913345965945
Cost on val dataset after 2638 epochs is = 0.07276379745774238
learning rate =  0.07961002398332989
Initial Cost on Val dataset for this epoch 2638 = 0.07276379745774238
Error on this batch = 0.38925593379891427
Error on this batch = 0.38898779819827506
Cost on val dataset after 2639 epochs is = 0.07276348957882307
learning rate =  0.07959996713330078
Initial Cost on Val dataset for this epoch 2639 = 0.07276348957882307
Error on this batch = 0.3892526189054432
Error on this batch = 0.38898426497444605
Cost on val dataset after 2640 epochs is = 0.0727631816257273
learning rate =  0.07958991536313037
Initial Cost on Val dataset for this epoch 2640 = 0.0727631816257273
Error on this batch = 0.389249306094019
Error on this batch = 0.388980734

Cost on val dataset after 2672 epochs is = 0.07275329348787603
learning rate =  0.07927091428314216
Initial Cost on Val dataset for this epoch 2672 = 0.07275329348787603
Error on this batch = 0.38914443241693136
Error on this batch = 0.38886932959112175
Cost on val dataset after 2673 epochs is = 0.0727529835994488
learning rate =  0.07926102766364787
Initial Cost on Val dataset for this epoch 2673 = 0.0727529835994488
Error on this batch = 0.3891411915960124
Error on this batch = 0.3888658936311129
Cost on val dataset after 2674 epochs is = 0.07275267366726298
learning rate =  0.07925114597452121
Initial Cost on Val dataset for this epoch 2674 = 0.07275267366726298
Error on this batch = 0.389137953035267
Error on this batch = 0.388862460245054
Cost on val dataset after 2675 epochs is = 0.07275236369216141
learning rate =  0.07924126921146098
Initial Cost on Val dataset for this epoch 2675 = 0.07275236369216141
Error on this batch = 0.3891347167382828
Error on this batch = 0.38885902941

Cost on val dataset after 2707 epochs is = 0.07274242674677632
learning rate =  0.07892778829620678
Initial Cost on Val dataset for this epoch 2707 = 0.07274242674677632
Error on this batch = 0.38903236858288226
Error on this batch = 0.3887505118820978
Cost on val dataset after 2708 epochs is = 0.07274211581268099
learning rate =  0.07891807171500603
Initial Cost on Val dataset for this epoch 2708 = 0.07274211581268099
Error on this batch = 0.3890292085959898
Error on this batch = 0.3887471579936039
Cost on val dataset after 2709 epochs is = 0.07274180486292837
learning rate =  0.07890835991676437
Initial Cost on Val dataset for this epoch 2709 = 0.07274180486292837
Error on this batch = 0.38902605096031656
Error on this batch = 0.3887438062331788
Cost on val dataset after 2710 epochs is = 0.07274149389828095
learning rate =  0.0788986528973631
Initial Cost on Val dataset for this epoch 2710 = 0.07274149389828095
Error on this batch = 0.38902289567769677
Error on this batch = 0.3887404

Error on this batch = 0.3886441946165968
Cost on val dataset after 2740 epochs is = 0.07273216172275153
learning rate =  0.07860964425282937
Initial Cost on Val dataset for this epoch 2740 = 0.07273216172275153
Error on this batch = 0.3889293395686579
Error on this batch = 0.3886409041425786
Cost on val dataset after 2741 epochs is = 0.0727318506622672
learning rate =  0.0786000833619235
Initial Cost on Val dataset for this epoch 2741 = 0.0727318506622672
Error on this batch = 0.3889262580296727
Error on this batch = 0.38863761551379983
Cost on val dataset after 2742 epochs is = 0.07273153961009197
learning rate =  0.0785905271206911
Initial Cost on Val dataset for this epoch 2742 = 0.07273153961009197
Error on this batch = 0.38892317889312017
Error on this batch = 0.3886343287231965
Cost on val dataset after 2743 epochs is = 0.07273122856691268
learning rate =  0.07858097552517643
Initial Cost on Val dataset for this epoch 2743 = 0.07273122856691268
Error on this batch = 0.38892010216

Error on this batch = 0.3888289252681229
Error on this batch = 0.3885333160444626
Cost on val dataset after 2774 epochs is = 0.07272159432578595
learning rate =  0.07828715897856646
Initial Cost on Val dataset for this epoch 2774 = 0.07272159432578595
Error on this batch = 0.38882592379904724
Error on this batch = 0.38853008493907754
Cost on val dataset after 2775 epochs is = 0.07272128391864247
learning rate =  0.07827775398580675
Initial Cost on Val dataset for this epoch 2775 = 0.07272128391864247
Error on this batch = 0.38882292478408464
Error on this batch = 0.3885268554936272
Cost on val dataset after 2776 epochs is = 0.07272097354179712
learning rate =  0.07826835351087717
Initial Cost on Val dataset for this epoch 2776 = 0.07272097354179712
Error on this batch = 0.388819928225074
Error on this batch = 0.3885236277042801
Cost on val dataset after 2777 epochs is = 0.07272066319585364
learning rate =  0.07825895754998127
Initial Cost on Val dataset for this epoch 2777 = 0.07272066

Error on this batch = 0.38842119430444555
Cost on val dataset after 2809 epochs is = 0.07271075192666106
learning rate =  0.07796064772911661
Initial Cost on Val dataset for this epoch 2809 = 0.07271075192666106
Error on this batch = 0.3887224331293072
Error on this batch = 0.38841801955633115
Cost on val dataset after 2810 epochs is = 0.07271044292325328
learning rate =  0.0779513986309483
Initial Cost on Val dataset for this epoch 2810 = 0.07271044292325328
Error on this batch = 0.38871952135800314
Error on this batch = 0.3884148463821101
Cost on val dataset after 2811 epochs is = 0.07271013396983869
learning rate =  0.07794215392039779
Initial Cost on Val dataset for this epoch 2811 = 0.07271013396983869
Error on this batch = 0.3887166121247465
Error on this batch = 0.3884116747806614
Cost on val dataset after 2812 epochs is = 0.07270982506693811
learning rate =  0.07793291359382389
Initial Cost on Val dataset for this epoch 2812 = 0.07270982506693811
Error on this batch = 0.3887137

Cost on val dataset after 2843 epochs is = 0.07270027687541404
learning rate =  0.07764861825446782
Initial Cost on Val dataset for this epoch 2843 = 0.07270027687541404
Error on this batch = 0.3886248761825245
Error on this batch = 0.3883110105511445
Cost on val dataset after 2844 epochs is = 0.0726999698514298
learning rate =  0.07763951631812206
Initial Cost on Val dataset for this epoch 2844 = 0.0726999698514298
Error on this batch = 0.3886220525923071
Error on this batch = 0.3883078906269423
Cost on val dataset after 2845 epochs is = 0.07269966289399513
learning rate =  0.07763041864797572
Initial Cost on Val dataset for this epoch 2845 = 0.07269966289399513
Error on this batch = 0.3886192316625608
Error on this batch = 0.3883047722729488
Cost on val dataset after 2846 epochs is = 0.07269935600356389
learning rate =  0.0776213252405307
Initial Cost on Val dataset for this epoch 2846 = 0.07269935600356389
Error on this batch = 0.38861641339760084
Error on this batch = 0.38830165548

Cost on val dataset after 2878 epochs is = 0.07268957351755621
learning rate =  0.07733256618834435
Initial Cost on Val dataset for this epoch 2878 = 0.07268957351755621
Error on this batch = 0.38852766377378956
Error on this batch = 0.3882027547530772
Cost on val dataset after 2879 epochs is = 0.0726892690822256
learning rate =  0.07732361151408779
Initial Cost on Val dataset for this epoch 2879 = 0.0726892690822256
Error on this batch = 0.388524936115816
Error on this batch = 0.38819969050135555
Cost on val dataset after 2880 epochs is = 0.07268896472837803
learning rate =  0.0773146609859937
Initial Cost on Val dataset for this epoch 2880 = 0.07268896472837803
Error on this batch = 0.388522211291034
Error on this batch = 0.3881966278674571
Cost on val dataset after 2881 epochs is = 0.07268866045641265
learning rate =  0.07730571460070369
Initial Cost on Val dataset for this epoch 2881 = 0.07268866045641265
Error on this batch = 0.38851948930490865
Error on this batch = 0.38819356685

Error on this batch = 0.3880994879454558
Cost on val dataset after 2913 epochs is = 0.07267896928989866
learning rate =  0.0770215977769523
Initial Cost on Val dataset for this epoch 2913 = 0.07267896928989866
Error on this batch = 0.38843391775821257
Error on this batch = 0.3880964796310132
Cost on val dataset after 2914 epochs is = 0.07267866793295075
learning rate =  0.0770127862452684
Initial Cost on Val dataset for this epoch 2914 = 0.07267866793295075
Error on this batch = 0.38843129255506625
Error on this batch = 0.38809347298715907
Cost on val dataset after 2915 epochs is = 0.07267836667040506
learning rate =  0.07700397874447736
Initial Cost on Val dataset for this epoch 2915 = 0.07267836667040506
Error on this batch = 0.38842867037679923
Error on this batch = 0.38809046801493347
Cost on val dataset after 2916 epochs is = 0.07267806550259177
learning rate =  0.07699517527135338
Initial Cost on Val dataset for this epoch 2916 = 0.07267806550259177
Error on this batch = 0.388426

Error on this batch = 0.38799519417087397
Cost on val dataset after 2948 epochs is = 0.0726684799537587
learning rate =  0.07671557162755184
Initial Cost on Val dataset for this epoch 2948 = 0.0726684799537587
Error on this batch = 0.38834386436262175
Error on this batch = 0.387992244466203
Cost on val dataset after 2949 epochs is = 0.0726681820745627
learning rate =  0.076706899281601
Initial Cost on Val dataset for this epoch 2949 = 0.0726681820745627
Error on this batch = 0.3883413474690447
Error on this batch = 0.3879892964197522
Cost on val dataset after 2950 epochs is = 0.07266788429907321
learning rate =  0.07669823085579755
Initial Cost on Val dataset for this epoch 2950 = 0.07266788429907321
Error on this batch = 0.38833883371796235
Error on this batch = 0.38798635002904386
Cost on val dataset after 2951 epochs is = 0.07266758662746473
learning rate =  0.07668956634704152
Initial Cost on Val dataset for this epoch 2951 = 0.07266758662746473
Error on this batch = 0.388336323109

Error on this batch = 0.38789291496886086
Cost on val dataset after 2983 epochs is = 0.07265811662589476
learning rate =  0.076414351899248
Initial Cost on Val dataset for this epoch 2983 = 0.07265811662589476
Error on this batch = 0.3882576332772888
Error on this batch = 0.38789002052525334
Cost on val dataset after 2984 epochs is = 0.07265782242786592
learning rate =  0.0764058149366142
Initial Cost on Val dataset for this epoch 2984 = 0.07265782242786592
Error on this batch = 0.38825522495853537
Error on this batch = 0.38788712752582927
Cost on val dataset after 2985 epochs is = 0.07265752833459115
learning rate =  0.07639728178767831
Initial Cost on Val dataset for this epoch 2985 = 0.07265752833459115
Error on this batch = 0.388252819630313
Error on this batch = 0.3878842359595263
Cost on val dataset after 2986 epochs is = 0.07265723434591242
learning rate =  0.07638875244945988
Initial Cost on Val dataset for this epoch 2986 = 0.07265723434591242
Error on this batch = 0.388250417

Error on this batch = 0.38779523015048756
Cost on val dataset after 3017 epochs is = 0.07264817110978727
learning rate =  0.07612621697214732
Initial Cost on Val dataset for this epoch 3017 = 0.07264817110978727
Error on this batch = 0.38817733956843214
Error on this batch = 0.3877923762521621
Cost on val dataset after 3018 epochs is = 0.07264788030600156
learning rate =  0.07611780802289388
Initial Cost on Val dataset for this epoch 3018 = 0.07264788030600156
Error on this batch = 0.38817502377421986
Error on this batch = 0.3877895231916743
Cost on val dataset after 3019 epochs is = 0.0726475895941336
learning rate =  0.07610940278784069
Initial Cost on Val dataset for this epoch 3019 = 0.0726475895941336
Error on this batch = 0.3881727103100298
Error on this batch = 0.3877866709428568
Cost on val dataset after 3020 epochs is = 0.07264729897357003
learning rate =  0.07610100126411774
Initial Cost on Val dataset for this epoch 3020 = 0.07264729897357003
Error on this batch = 0.38817039

Error on this batch = 0.38770132844996363
Cost on val dataset after 3050 epochs is = 0.07263861932226837
learning rate =  0.07585066721428158
Initial Cost on Val dataset for this epoch 3050 = 0.07263861932226837
Error on this batch = 0.3881019684320851
Error on this batch = 0.387698485348247
Cost on val dataset after 3051 epochs is = 0.07263833117132747
learning rate =  0.07584237933536789
Initial Cost on Val dataset for this epoch 3051 = 0.07263833117132747
Error on this batch = 0.3880997113609886
Error on this batch = 0.3876956419259399
Cost on val dataset after 3052 epochs is = 0.0726380430871621
learning rate =  0.07583409507759196
Initial Cost on Val dataset for this epoch 3052 = 0.0726380430871621
Error on this batch = 0.38809745539939705
Error on this batch = 0.38769279813851676
Cost on val dataset after 3053 epochs is = 0.07263775506893405
learning rate =  0.07582581443818594
Initial Cost on Val dataset for this epoch 3053 = 0.07263775506893405
Error on this batch = 0.388095200

Error on this batch = 0.3875984300815415
Cost on val dataset after 3086 epochs is = 0.07262828234377544
learning rate =  0.07555456530409847
Initial Cost on Val dataset for this epoch 3086 = 0.07262828234377544
Error on this batch = 0.38802109566606374
Error on this batch = 0.38759554428782844
Cost on val dataset after 3087 epochs is = 0.07262799612268153
learning rate =  0.07554640606372141
Initial Cost on Val dataset for this epoch 3087 = 0.07262799612268153
Error on this batch = 0.388018850619681
Error on this batch = 0.3875926563212538
Cost on val dataset after 3088 epochs is = 0.07262770994424966
learning rate =  0.0755382503467128
Initial Cost on Val dataset for this epoch 3088 = 0.07262770994424966
Error on this batch = 0.3880166051130224
Error on this batch = 0.3875897661260999
Cost on val dataset after 3089 epochs is = 0.07262742380813085
learning rate =  0.07553009815041091
Initial Cost on Val dataset for this epoch 3089 = 0.07262742380813085
Error on this batch = 0.388014359

Error on this batch = 0.3874957747449264
Cost on val dataset after 3121 epochs is = 0.07261828907141413
learning rate =  0.07527107102317733
Initial Cost on Val dataset for this epoch 3121 = 0.07261828907141413
Error on this batch = 0.38794201995582966
Error on this batch = 0.38749278058263303
Cost on val dataset after 3122 epochs is = 0.07261800430847798
learning rate =  0.07526303353555369
Initial Cost on Val dataset for this epoch 3122 = 0.07261800430847798
Error on this batch = 0.3879397395015145
Error on this batch = 0.38748978240454635
Cost on val dataset after 3123 epochs is = 0.07261771959186288
learning rate =  0.07525499947982063
Initial Cost on Val dataset for this epoch 3123 = 0.07261771959186288
Error on this batch = 0.3879374575964573
Error on this batch = 0.38748678016477206
Cost on val dataset after 3124 epochs is = 0.07261743492215685
learning rate =  0.07524696885341457
Initial Cost on Val dataset for this epoch 3124 = 0.07261743492215685
Error on this batch = 0.38793

Error on this batch = 0.38738832079290514
Cost on val dataset after 3156 epochs is = 0.07260835492945976
learning rate =  0.07499178429463556
Initial Cost on Val dataset for this epoch 3156 = 0.07260835492945976
Error on this batch = 0.38786127878350257
Error on this batch = 0.38738516277731627
Cost on val dataset after 3157 epochs is = 0.07260807227598562
learning rate =  0.07498386541555929
Initial Cost on Val dataset for this epoch 3157 = 0.07260807227598562
Error on this batch = 0.38785894310659913
Error on this batch = 0.38738199951235175
Cost on val dataset after 3158 epochs is = 0.07260778970112525
learning rate =  0.0749759498802513
Initial Cost on Val dataset for this epoch 3158 = 0.07260778970112525
Error on this batch = 0.38785660585091736
Error on this batch = 0.38737883097471554
Cost on val dataset after 3159 epochs is = 0.07260750720605055
learning rate =  0.0749680376862415
Initial Cost on Val dataset for this epoch 3159 = 0.07260750720605055
Error on this batch = 0.3878

Cost on val dataset after 3190 epochs is = 0.07259879604277648
learning rate =  0.07472440359900252
Initial Cost on Val dataset for this epoch 3190 = 0.07259879604277648
Error on this batch = 0.3877810363256283
Error on this batch = 0.3872745476324671
Cost on val dataset after 3191 epochs is = 0.07259851674296489
learning rate =  0.07471659703776738
Initial Cost on Val dataset for this epoch 3191 = 0.07259851674296489
Error on this batch = 0.3877786526980354
Error on this batch = 0.3872711961263147
Cost on val dataset after 3192 epochs is = 0.07259823756311538
learning rate =  0.07470879373775892
Initial Cost on Val dataset for this epoch 3192 = 0.07259823756311538
Error on this batch = 0.3877762678854093
Error on this batch = 0.38726783891933536
Cost on val dataset after 3193 epochs is = 0.07259795850441302
learning rate =  0.07470099369659368
Initial Cost on Val dataset for this epoch 3193 = 0.07259795850441302
Error on this batch = 0.38777388190224144
Error on this batch = 0.3872644

Cost on val dataset after 3225 epochs is = 0.07258909930451558
learning rate =  0.07445309891519393
Initial Cost on Val dataset for this epoch 3225 = 0.07258909930451558
Error on this batch = 0.3876969981906481
Error on this batch = 0.3871538585746871
Cost on val dataset after 3226 epochs is = 0.07258882485871657
learning rate =  0.07444540509485237
Initial Cost on Val dataset for this epoch 3226 = 0.07258882485871657
Error on this batch = 0.3876945814998956
Error on this batch = 0.38715030887365876
Cost on val dataset after 3227 epochs is = 0.07258855056948076
learning rate =  0.07443771445377546
Initial Cost on Val dataset for this epoch 3227 = 0.07258855056948076
Error on this batch = 0.3876921640986316
Error on this batch = 0.3871467536458563
Cost on val dataset after 3228 epochs is = 0.0725882764376687
learning rate =  0.07443002698966485
Initial Cost on Val dataset for this epoch 3228 = 0.0725882764376687
Error on this batch = 0.3876897459979318
Error on this batch = 0.3871431929

Cost on val dataset after 3260 epochs is = 0.07257959182953944
learning rate =  0.07418569194626633
Initial Cost on Val dataset for this epoch 3260 = 0.07257959182953944
Error on this batch = 0.38761205145331884
Error on this batch = 0.38702644112715234
Cost on val dataset after 3261 epochs is = 0.0725793232922377
learning rate =  0.07417810804803864
Initial Cost on Val dataset for this epoch 3261 = 0.0725793232922377
Error on this batch = 0.387609614907434
Error on this batch = 0.38702270891271606
Cost on val dataset after 3262 epochs is = 0.07257905493394125
learning rate =  0.07417052725002489
Initial Cost on Val dataset for this epoch 3262 = 0.07257905493394125
Error on this batch = 0.3876071778896978
Error on this batch = 0.3870189719195093
Cost on val dataset after 3263 epochs is = 0.07257878675504871
learning rate =  0.07416294955000792
Initial Cost on Val dataset for this epoch 3263 = 0.07257878675504871
Error on this batch = 0.3876047404018453
Error on this batch = 0.387015230

Cost on val dataset after 3295 epochs is = 0.07257030143539697
learning rate =  0.07392208574191553
Initial Cost on Val dataset for this epoch 3295 = 0.07257030143539697
Error on this batch = 0.3875264868333606
Error on this batch = 0.38689320079366063
Cost on val dataset after 3296 epochs is = 0.07257003931292501
learning rate =  0.07391460904663424
Initial Cost on Val dataset for this epoch 3296 = 0.07257003931292501
Error on this batch = 0.3875240327899651
Error on this batch = 0.3868893232827945
Cost on val dataset after 3297 epochs is = 0.07256977737557242
learning rate =  0.07390713537529446
Initial Cost on Val dataset for this epoch 3297 = 0.07256977737557242
Error on this batch = 0.38752157815257265
Error on this batch = 0.3868854424019786
Cost on val dataset after 3298 epochs is = 0.07256951562329401
learning rate =  0.07389966472575653
Initial Cost on Val dataset for this epoch 3298 = 0.07256951562329401
Error on this batch = 0.38751912291225965
Error on this batch = 0.386881

Cost on val dataset after 3330 epochs is = 0.07256123648801696
learning rate =  0.07366218676052967
Initial Cost on Val dataset for this epoch 3330 = 0.07256123648801696
Error on this batch = 0.38744016911769236
Error on this batch = 0.3867558343006108
Cost on val dataset after 3331 epochs is = 0.07256098075605773
learning rate =  0.07365481464411129
Initial Cost on Val dataset for this epoch 3331 = 0.07256098075605773
Error on this batch = 0.3874376872081197
Error on this batch = 0.3867518715113024
Cost on val dataset after 3332 epochs is = 0.07256072520234816
learning rate =  0.07364744547801468
Initial Cost on Val dataset for this epoch 3332 = 0.07256072520234816
Error on this batch = 0.3874352042361956
Error on this batch = 0.38674790735026593
Cost on val dataset after 3333 epochs is = 0.0725604698265718
learning rate =  0.07364007926017418
Initial Cost on Val dataset for this epoch 3333 = 0.0725604698265718
Error on this batch = 0.3874327201843963
Error on this batch = 0.386743941

Cost on val dataset after 3365 epochs is = 0.07255238969932797
learning rate =  0.07340590471501358
Initial Cost on Val dataset for this epoch 3365 = 0.07255238969932797
Error on this batch = 0.3873525466779488
Error on this batch = 0.3866167333834322
Cost on val dataset after 3366 epochs is = 0.07255214000028723
learning rate =  0.07339863464418116
Initial Cost on Val dataset for this epoch 3366 = 0.07255214000028723
Error on this batch = 0.3873500161022851
Error on this batch = 0.3866127600983512
Cost on val dataset after 3367 epochs is = 0.07255189046714136
learning rate =  0.07339136745258473
Initial Cost on Val dataset for this epoch 3367 = 0.07255189046714136
Error on this batch = 0.38734748376163636
Error on this batch = 0.3866087876356895
Cost on val dataset after 3368 epochs is = 0.07255164109953338
learning rate =  0.0733841031382294
Initial Cost on Val dataset for this epoch 3368 = 0.07255164109953338
Error on this batch = 0.38734494963403676
Error on this batch = 0.38660481

Error on this batch = 0.38648244663834713
Cost on val dataset after 3400 epochs is = 0.07254374670131482
learning rate =  0.07315315242710727
Initial Cost on Val dataset for this epoch 3400 = 0.07254374670131482
Error on this batch = 0.3872627809168058
Error on this batch = 0.3864785328423947
Cost on val dataset after 3401 epochs is = 0.07254350260887171
learning rate =  0.07314598195534724
Initial Cost on Val dataset for this epoch 3401 = 0.07254350260887171
Error on this batch = 0.3872601753324654
Error on this batch = 0.38647462169081215
Cost on val dataset after 3402 epochs is = 0.0725432586710363
learning rate =  0.07313881429415924
Initial Cost on Val dataset for this epoch 3402 = 0.0725432586710363
Error on this batch = 0.3872575672062101
Error on this batch = 0.3864707132285552
Cost on val dataset after 3403 epochs is = 0.07254301488752508
learning rate =  0.07313164944161593
Initial Cost on Val dataset for this epoch 3403 = 0.07254301488752508
Error on this batch = 0.387254956

Error on this batch = 0.38635490876430295
Cost on val dataset after 3433 epochs is = 0.0725357717892752
learning rate =  0.07291800040230173
Initial Cost on Val dataset for this epoch 3433 = 0.0725357717892752
Error on this batch = 0.38717534020850186
Error on this batch = 0.3863511027328675
Cost on val dataset after 3434 epochs is = 0.07253553265523986
learning rate =  0.07291092167199784
Initial Cost on Val dataset for this epoch 3434 = 0.07253553265523986
Error on this batch = 0.3871726398501705
Error on this batch = 0.3863473005134195
Cost on val dataset after 3435 epochs is = 0.07253529366688018
learning rate =  0.07290384568964826
Initial Cost on Val dataset for this epoch 3435 = 0.07253529366688018
Error on this batch = 0.38716993629247676
Error on this batch = 0.3863435021309541
Cost on val dataset after 3436 epochs is = 0.07253505482391778
learning rate =  0.07289677245338679
Initial Cost on Val dataset for this epoch 3436 = 0.07253505482391778
Error on this batch = 0.38716722

Error on this batch = 0.38623143460397324
Cost on val dataset after 3466 epochs is = 0.07252795565764979
learning rate =  0.07268584311331097
Initial Cost on Val dataset for this epoch 3466 = 0.07252795565764979
Error on this batch = 0.38708445123323815
Error on this batch = 0.3862277643639389
Cost on val dataset after 3467 epochs is = 0.0725277211686828
learning rate =  0.07267885408989161
Initial Cost on Val dataset for this epoch 3467 = 0.0725277211686828
Error on this batch = 0.38708163709308396
Error on this batch = 0.3862240984558568
Cost on val dataset after 3468 epochs is = 0.07252748681456711
learning rate =  0.07267186775378302
Initial Cost on Val dataset for this epoch 3468 = 0.07252748681456711
Error on this batch = 0.387078819280042
Error on this batch = 0.38622043688635116
Cost on val dataset after 3469 epochs is = 0.07252725259490335
learning rate =  0.07266488410317748
Initial Cost on Val dataset for this epoch 3469 = 0.07252725259490335
Error on this batch = 0.38707599

Error on this batch = 0.3861126214596965
Cost on val dataset after 3499 epochs is = 0.07252028632960063
learning rate =  0.0724566144481731
Initial Cost on Val dataset for this epoch 3499 = 0.07252028632960063
Error on this batch = 0.38698959766174834
Error on this batch = 0.38610909523713455
Cost on val dataset after 3500 epochs is = 0.07252005605114954
learning rate =  0.07244971316091847
Initial Cost on Val dataset for this epoch 3500 = 0.07252005605114954
Error on this batch = 0.38698665808366634
Error on this batch = 0.3861055733377161
Cost on val dataset after 3501 epochs is = 0.07251982589159091
learning rate =  0.07244281450222499
Initial Cost on Val dataset for this epoch 3501 = 0.07251982589159091
Error on this batch = 0.38698371461606584
Error on this batch = 0.38610205575489104
Cost on val dataset after 3502 epochs is = 0.07251959585035382
learning rate =  0.07243591847034113
Initial Cost on Val dataset for this epoch 3502 = 0.07251959585035382
Error on this batch = 0.38698

Error on this batch = 0.3859951034836601
Cost on val dataset after 3533 epochs is = 0.07251251995226052
learning rate =  0.07222343489984395
Initial Cost on Val dataset for this epoch 3533 = 0.07251251995226052
Error on this batch = 0.38688746899465914
Error on this batch = 0.3859917190189776
Cost on val dataset after 3534 epochs is = 0.07251229337202607
learning rate =  0.07221662201093457
Initial Cost on Val dataset for this epoch 3534 = 0.07251229337202607
Error on this batch = 0.38688439749222225
Error on this batch = 0.3859883385179802
Cost on val dataset after 3535 epochs is = 0.07251206688919666
learning rate =  0.07220981169195691
Initial Cost on Val dataset for this epoch 3535 = 0.07251206688919666
Error on this batch = 0.38688132216875964
Error on this batch = 0.3859849619668958
Cost on val dataset after 3536 epochs is = 0.072511840503096
learning rate =  0.07220300394121497
Initial Cost on Val dataset for this epoch 3536 = 0.072511840503096
Error on this batch = 0.3868782430

Error on this batch = 0.3858821684889563
Cost on val dataset after 3567 epochs is = 0.0725048667997233
learning rate =  0.07199322833331663
Initial Cost on Val dataset for this epoch 3567 = 0.0725048667997233
Error on this batch = 0.38678094104872585
Error on this batch = 0.38587891053812995
Cost on val dataset after 3568 epochs is = 0.07250464315025797
learning rate =  0.07198650188010236
Initial Cost on Val dataset for this epoch 3568 = 0.07250464315025797
Error on this batch = 0.38677774432572287
Error on this batch = 0.3858756560401207
Cost on val dataset after 3569 epochs is = 0.0725044195752402
learning rate =  0.07197977794004082
Initial Cost on Val dataset for this epoch 3569 = 0.0725044195752402
Error on this batch = 0.3867745441023465
Error on this batch = 0.3858724049793372
Cost on val dataset after 3570 epochs is = 0.07250419607401788
learning rate =  0.07197305651148928
Initial Cost on Val dataset for this epoch 3570 = 0.07250419607401788
Error on this batch = 0.3867713403

Error on this batch = 0.3857732429562435
Cost on val dataset after 3601 epochs is = 0.07249730072049626
learning rate =  0.07176592904318149
Initial Cost on Val dataset for this epoch 3601 = 0.07249730072049626
Error on this batch = 0.3866703680735497
Error on this batch = 0.3857700938150867
Cost on val dataset after 3602 epochs is = 0.07249707925521479
learning rate =  0.0717592871246759
Initial Cost on Val dataset for this epoch 3602 = 0.07249707925521479
Error on this batch = 0.38666705993625416
Error on this batch = 0.38576694761513575
Cost on val dataset after 3603 epochs is = 0.07249685784408502
learning rate =  0.07175264766431928
Initial Cost on Val dataset for this epoch 3603 = 0.07249685784408502
Error on this batch = 0.3866637487768215
Error on this batch = 0.3857638043422975
Cost on val dataset after 3604 epochs is = 0.07249663648658107
learning rate =  0.07174601066052004
Initial Cost on Val dataset for this epoch 3604 = 0.07249663648658107
Error on this batch = 0.38666043

Error on this batch = 0.38567080049158475
Cost on val dataset after 3634 epochs is = 0.072490018285571
learning rate =  0.07154803501107487
Initial Cost on Val dataset for this epoch 3634 = 0.072490018285571
Error on this batch = 0.38655968870113705
Error on this batch = 0.3856677413917782
Cost on val dataset after 3635 epochs is = 0.07248979835602047
learning rate =  0.07154147337906715
Initial Cost on Val dataset for this epoch 3635 = 0.07248979835602047
Error on this batch = 0.3865562890643791
Error on this batch = 0.38566468480978133
Cost on val dataset after 3636 epochs is = 0.07248957846584796
learning rate =  0.0715349141534524
Initial Cost on Val dataset for this epoch 3636 = 0.07248957846584796
Error on this batch = 0.3865528869210327
Error on this batch = 0.38566163073417925
Cost on val dataset after 3637 epochs is = 0.07248935861469136
learning rate =  0.07152835733268667
Initial Cost on Val dataset for this epoch 3637 = 0.07248935861469136
Error on this batch = 0.3865494822

Error on this batch = 0.3855711207344639
Cost on val dataset after 3667 epochs is = 0.0724827796313049
learning rate =  0.07133276336389296
Initial Cost on Val dataset for this epoch 3667 = 0.0724827796313049
Error on this batch = 0.3864462603468936
Error on this batch = 0.3855681391474445
Cost on val dataset after 3668 epochs is = 0.07248256083723663
learning rate =  0.07132628033528651
Initial Cost on Val dataset for this epoch 3668 = 0.07248256083723663
Error on this batch = 0.3864427859687261
Error on this batch = 0.3855651597447796
Cost on val dataset after 3669 epochs is = 0.07248234207320578
learning rate =  0.07131979966285985
Initial Cost on Val dataset for this epoch 3669 = 0.07248234207320578
Error on this batch = 0.3864393095729195
Error on this batch = 0.385562182517683
Cost on val dataset after 3670 epochs is = 0.07248212333900991
learning rate =  0.07131332134511481
Initial Cost on Val dataset for this epoch 3670 = 0.07248212333900991
Error on this batch = 0.386435831173

Cost on val dataset after 3700 epochs is = 0.07247557435830956
learning rate =  0.07112005936412875
Initial Cost on Val dataset for this epoch 3700 = 0.07247557435830956
Error on this batch = 0.386330612903129
Error on this batch = 0.38547092278231637
Cost on val dataset after 3701 epochs is = 0.07247535647176312
learning rate =  0.07111365330578354
Initial Cost on Val dataset for this epoch 3701 = 0.07247535647176312
Error on this batch = 0.38632707882498635
Error on this batch = 0.3854680109357496
Cost on val dataset after 3702 epochs is = 0.07247513861076839
learning rate =  0.07110724955488841
Initial Cost on Val dataset for this epoch 3702 = 0.07247513861076839
Error on this batch = 0.3863235431440254
Error on this batch = 0.38546510101144904
Cost on val dataset after 3703 epochs is = 0.07247492077525393
learning rate =  0.07110084810998932
Initial Cost on Val dataset for this epoch 3703 = 0.07247492077525393
Error on this batch = 0.386320005871373
Error on this batch = 0.38546219

Error on this batch = 0.3853786656963552
Cost on val dataset after 3733 epochs is = 0.07246839733419139
learning rate =  0.07090986989201883
Initial Cost on Val dataset for this epoch 3733 = 0.07246839733419139
Error on this batch = 0.38621319901815454
Error on this batch = 0.38537581227932427
Cost on val dataset after 3734 epochs is = 0.0724681802701728
learning rate =  0.07090353921884066
Initial Cost on Val dataset for this epoch 3734 = 0.0724681802701728
Error on this batch = 0.38620961742389215
Error on this batch = 0.385372960593221
Cost on val dataset after 3735 epochs is = 0.07246796323094255
learning rate =  0.07089721080581
Initial Cost on Val dataset for this epoch 3735 = 0.07246796323094255
Error on this batch = 0.38620603454563807
Error on this batch = 0.3853701106327289
Cost on val dataset after 3736 epochs is = 0.07246774621652205
learning rate =  0.07089088465151516
Initial Cost on Val dataset for this epoch 3736 = 0.07246774621652205
Error on this batch = 0.38620245039

Cost on val dataset after 3767 epochs is = 0.07246103128367633
learning rate =  0.07069588655705321
Initial Cost on Val dataset for this epoch 3767 = 0.07246103128367633
Error on this batch = 0.38609074871648247
Error on this batch = 0.3852797929105601
Cost on val dataset after 3768 epochs is = 0.07246081508596117
learning rate =  0.07068963194439055
Initial Cost on Val dataset for this epoch 3768 = 0.07246081508596117
Error on this batch = 0.38608712746633517
Error on this batch = 0.38527699712169866
Cost on val dataset after 3769 epochs is = 0.07246059891478471
learning rate =  0.07068337954457514
Initial Cost on Val dataset for this epoch 3769 = 0.07246059891478471
Error on this batch = 0.3860835051570997
Error on this batch = 0.3852742028960976
Cost on val dataset after 3770 epochs is = 0.07246038277022386
learning rate =  0.07067712935623725
Initial Cost on Val dataset for this epoch 3770 = 0.07246038277022386
Error on this batch = 0.38607988179387676
Error on this batch = 0.38527

Error on this batch = 0.3851883347738961
Cost on val dataset after 3801 epochs is = 0.07245369595051143
learning rate =  0.07048446299331174
Initial Cost on Val dataset for this epoch 3801 = 0.07245369595051143
Error on this batch = 0.38596705886628485
Error on this batch = 0.38518558842782136
Cost on val dataset after 3802 epochs is = 0.07245348070323698
learning rate =  0.0704782828562355
Initial Cost on Val dataset for this epoch 3802 = 0.07245348070323698
Error on this batch = 0.3859634039931635
Error on this batch = 0.3851828435150156
Cost on val dataset after 3803 epochs is = 0.07245326548553965
learning rate =  0.07047210488610775
Initial Cost on Val dataset for this epoch 3803 = 0.07245326548553965
Error on this batch = 0.3859597481905068
Error on this batch = 0.3851801000318968
Cost on val dataset after 3804 epochs is = 0.0724530502975169
learning rate =  0.07046592908159921
Initial Cost on Val dataset for this epoch 3804 = 0.0724530502975169
Error on this batch = 0.3859560914

Error on this batch = 0.3850957425450844
Cost on val dataset after 3835 epochs is = 0.07244639472043186
learning rate =  0.07027554608994395
Initial Cost on Val dataset for this epoch 3835 = 0.07244639472043186
Error on this batch = 0.38584228383832286
Error on this batch = 0.3850930430374531
Cost on val dataset after 3836 epochs is = 0.07244618053342297
learning rate =  0.07026943889028776
Initial Cost on Val dataset for this epoch 3836 = 0.07244618053342297
Error on this batch = 0.385838598396898
Error on this batch = 0.38509034485219656
Cost on val dataset after 3837 epochs is = 0.07244596637927107
learning rate =  0.07026333381302938
Initial Cost on Val dataset for this epoch 3837 = 0.07244596637927107
Error on this batch = 0.3858349120768123
Error on this batch = 0.3850876479863877
Cost on val dataset after 3838 epochs is = 0.07244575225806796
learning rate =  0.07025723085687839
Initial Cost on Val dataset for this epoch 3838 = 0.07244575225806796
Error on this batch = 0.38583122

Cost on val dataset after 3868 epochs is = 0.07243934437591258
learning rate =  0.070075122132274
Initial Cost on Val dataset for this epoch 3868 = 0.07243934437591258
Error on this batch = 0.3857202010430735
Error on this batch = 0.3850046845080874
Cost on val dataset after 3869 epochs is = 0.07243913131853229
learning rate =  0.07006908429645813
Initial Cost on Val dataset for this epoch 3869 = 0.07243913131853229
Error on this batch = 0.3857164866050484
Error on this batch = 0.3850020284335497
Cost on val dataset after 3870 epochs is = 0.07243891829671842
learning rate =  0.0700630485410405
Initial Cost on Val dataset for this epoch 3870 = 0.07243891829671842
Error on this batch = 0.3857127712799732
Error on this batch = 0.38499937359262076
Cost on val dataset after 3871 epochs is = 0.07243870531054067
learning rate =  0.07005701486476704
Initial Cost on Val dataset for this epoch 3871 = 0.07243870531054067
Error on this batch = 0.3857090550668775
Error on this batch = 0.38499671998

Cost on val dataset after 3902 epochs is = 0.07243212075333373
learning rate =  0.0698709953630748
Initial Cost on Val dataset for this epoch 3902 = 0.07243212075333373
Error on this batch = 0.3855934050846108
Error on this batch = 0.38491505735445813
Cost on val dataset after 3903 epochs is = 0.07243190893889308
learning rate =  0.06986502756364682
Initial Cost on Val dataset for this epoch 3903 = 0.07243190893889308
Error on this batch = 0.3855896597367472
Error on this batch = 0.38491244206715436
Cost on val dataset after 3904 epochs is = 0.07243169716187053
learning rate =  0.06985906180257574
Initial Cost on Val dataset for this epoch 3904 = 0.07243169716187053
Error on this batch = 0.3855859134501307
Error on this batch = 0.3849098279474144
Cost on val dataset after 3905 epochs is = 0.07243148542230321
learning rate =  0.06985309807864354
Initial Cost on Val dataset for this epoch 3905 = 0.07243148542230321
Error on this batch = 0.38558216622272545
Error on this batch = 0.3849072

Cost on val dataset after 3935 epochs is = 0.07242515079303825
learning rate =  0.06967512763087125
Initial Cost on Val dataset for this epoch 3935 = 0.07242515079303825
Error on this batch = 0.38546930085570535
Error on this batch = 0.3848293615253819
Cost on val dataset after 3936 epochs is = 0.0724249402272277
learning rate =  0.06966922645970464
Initial Cost on Val dataset for this epoch 3936 = 0.0724249402272277
Error on this batch = 0.38546552332584866
Error on this batch = 0.384826784041776
Cost on val dataset after 3937 epochs is = 0.07242472969949865
learning rate =  0.06966332728724124
Initial Cost on Val dataset for this epoch 3937 = 0.07242472969949865
Error on this batch = 0.385461744778039
Error on this batch = 0.3848242076842546
Cost on val dataset after 3938 epochs is = 0.0724245192098528
learning rate =  0.06965743011229669
Initial Cost on Val dataset for this epoch 3938 = 0.0724245192098528
Error on this batch = 0.3854579652095367
Error on this batch = 0.3848216324519

Error on this batch = 0.384744895080174
Cost on val dataset after 3969 epochs is = 0.07241801287949708
learning rate =  0.06947560204263543
Initial Cost on Val dataset for this epoch 3969 = 0.07241801287949708
Error on this batch = 0.3853402766252536
Error on this batch = 0.3847423543961074
Cost on val dataset after 3970 epochs is = 0.07241780360299234
learning rate =  0.06946976816889547
Initial Cost on Val dataset for this epoch 3970 = 0.07241780360299234
Error on this batch = 0.38533646285326245
Error on this batch = 0.38473981481949027
Cost on val dataset after 3971 epochs is = 0.07241759436402853
learning rate =  0.06946393625414603
Initial Cost on Val dataset for this epoch 3971 = 0.07241759436402853
Error on this batch = 0.38533264796157524
Error on this batch = 0.38473727635011645
Cost on val dataset after 3972 epochs is = 0.07241738516257151
learning rate =  0.06945810629723621
Initial Cost on Val dataset for this epoch 3972 = 0.07241738516257151
Error on this batch = 0.385328

Error on this batch = 0.3846616369712186
Cost on val dataset after 4002 epochs is = 0.0724111263514947
learning rate =  0.0692841123161896
Initial Cost on Val dataset for this epoch 4002 = 0.0724111263514947
Error on this batch = 0.3852138128988979
Error on this batch = 0.38465913283824704
Cost on val dataset after 4003 epochs is = 0.07241091829070222
learning rate =  0.06927834248668956
Initial Cost on Val dataset for this epoch 4003 = 0.07241091829070222
Error on this batch = 0.3852099604148157
Error on this batch = 0.38465662981694126
Cost on val dataset after 4004 epochs is = 0.07241071026583844
learning rate =  0.06927257457870463
Initial Cost on Val dataset for this epoch 4004 = 0.07241071026583844
Error on this batch = 0.3852061066990691
Error on this batch = 0.3846541279077802
Cost on val dataset after 4005 epochs is = 0.07241050227684022
learning rate =  0.06926680859111527
Initial Cost on Val dataset for this epoch 4005 = 0.07241050227684022
Error on this batch = 0.3852022517

Cost on val dataset after 4035 epochs is = 0.07240427894436482
learning rate =  0.06909471643214522
Initial Cost on Val dataset for this epoch 4035 = 0.07240427894436482
Error on this batch = 0.38508601091782624
Error on this batch = 0.384577123890346
Cost on val dataset after 4036 epochs is = 0.0724040720324297
learning rate =  0.06908900942654958
Initial Cost on Val dataset for this epoch 4036 = 0.0724040720324297
Error on this batch = 0.3850821158725197
Error on this batch = 0.38457465794212986
Cost on val dataset after 4037 epochs is = 0.07240386515402716
learning rate =  0.06908330430600948
Initial Cost on Val dataset for this epoch 4037 = 0.07240386515402716
Error on this batch = 0.38507821947443566
Error on this batch = 0.3845721931329711
Cost on val dataset after 4038 epochs is = 0.07240365830907751
learning rate =  0.06907760106943556
Initial Cost on Val dataset for this epoch 4038 = 0.07240365830907751
Error on this batch = 0.38507432171978695
Error on this batch = 0.38456972

Cost on val dataset after 4070 epochs is = 0.07239705645322886
learning rate =  0.06889608576126341
Initial Cost on Val dataset for this epoch 4070 = 0.07239705645322886
Error on this batch = 0.38494885431947795
Error on this batch = 0.3844915018176613
Cost on val dataset after 4071 epochs is = 0.0723968506673725
learning rate =  0.06889044409027838
Initial Cost on Val dataset for this epoch 4071 = 0.0723968506673725
Error on this batch = 0.3849449096334979
Error on this batch = 0.3844890765275375
Cost on val dataset after 4072 epochs is = 0.07239664491229575
learning rate =  0.06888480426675007
Initial Cost on Val dataset for this epoch 4072 = 0.07239664491229575
Error on this batch = 0.3849409634585818
Error on this batch = 0.3844866524263775
Cost on val dataset after 4073 epochs is = 0.07239643918792711
learning rate =  0.06887916628962007
Initial Cost on Val dataset for this epoch 4073 = 0.07239643918792711
Error on this batch = 0.38493701579073375
Error on this batch = 0.384484229

Cost on val dataset after 4105 epochs is = 0.07238987185015164
learning rate =  0.06869971963101609
Initial Cost on Val dataset for this epoch 4105 = 0.07238987185015164
Error on this batch = 0.3848098779352921
Error on this batch = 0.3844073357820411
Cost on val dataset after 4106 epochs is = 0.07238966710655591
learning rate =  0.06869414199685737
Initial Cost on Val dataset for this epoch 4106 = 0.07238966710655591
Error on this batch = 0.3848058787207011
Error on this batch = 0.38440495318424367
Cost on val dataset after 4107 epochs is = 0.07238946239200224
learning rate =  0.06868856617361885
Initial Cost on Val dataset for this epoch 4107 = 0.07238946239200224
Error on this batch = 0.38480187787380316
Error on this batch = 0.38440257184051363
Cost on val dataset after 4108 epochs is = 0.0723892577064743
learning rate =  0.06868299216027184
Initial Cost on Val dataset for this epoch 4108 = 0.0723892577064743
Error on this batch = 0.3847978753903911
Error on this batch = 0.38440019

Error on this batch = 0.3843293828025145
Cost on val dataset after 4139 epochs is = 0.07238292689678931
learning rate =  0.06851108987867979
Initial Cost on Val dataset for this epoch 4139 = 0.07238292689678931
Error on this batch = 0.38467296341763635
Error on this batch = 0.3843270426055616
Cost on val dataset after 4140 epochs is = 0.07238272314945206
learning rate =  0.06850557324367096
Initial Cost on Val dataset for this epoch 4140 = 0.07238272314945206
Error on this batch = 0.3846689063022993
Error on this batch = 0.3843247037246419
Cost on val dataset after 4141 epochs is = 0.07238251943239847
learning rate =  0.06850005838507012
Initial Cost on Val dataset for this epoch 4141 = 0.07238251943239847
Error on this batch = 0.38466484740756585
Error on this batch = 0.3843223661614222
Cost on val dataset after 4142 epochs is = 0.07238231574573477
learning rate =  0.06849454530187649
Initial Cost on Val dataset for this epoch 4142 = 0.07238231574573477
Error on this batch = 0.3846607

Cost on val dataset after 4172 epochs is = 0.07237622001943546
learning rate =  0.06832997343923819
Initial Cost on Val dataset for this epoch 4172 = 0.07237622001943546
Error on this batch = 0.3845381143351555
Error on this batch = 0.3842505631462018
Cost on val dataset after 4173 epochs is = 0.07237601735693465
learning rate =  0.06832451490163394
Initial Cost on Val dataset for this epoch 4173 = 0.07237601735693465
Error on this batch = 0.3845339960888475
Error on this batch = 0.3842482684608892
Cost on val dataset after 4174 epochs is = 0.07237581473117184
learning rate =  0.06831905810783215
Initial Cost on Val dataset for this epoch 4174 = 0.07237581473117184
Error on this batch = 0.3845298759104856
Error on this batch = 0.38424597513087466
Cost on val dataset after 4175 epochs is = 0.07237561214245018
learning rate =  0.06831360305685817
Initial Cost on Val dataset for this epoch 4175 = 0.07237561214245018
Error on this batch = 0.384525753795253
Error on this batch = 0.384243683

Error on this batch = 0.38417555383180413
Cost on val dataset after 4206 epochs is = 0.07236935225423395
learning rate =  0.06814535563737857
Initial Cost on Val dataset for this epoch 4206 = 0.07236935225423395
Error on this batch = 0.3843969806399223
Error on this batch = 0.3841733037394806
Cost on val dataset after 4207 epochs is = 0.0723691510544555
learning rate =  0.0681399558468934
Initial Cost on Val dataset for this epoch 4207 = 0.0723691510544555
Error on this batch = 0.3843927939211338
Error on this batch = 0.3841710549806178
Cost on val dataset after 4208 epochs is = 0.07236894990583187
learning rate =  0.06813455776750404
Initial Cost on Val dataset for this epoch 4208 = 0.07236894990583187
Error on this batch = 0.38438860509881595
Error on this batch = 0.3841688075532156
Cost on val dataset after 4209 epochs is = 0.07236874880891614
learning rate =  0.0681291613982618
Initial Cost on Val dataset for this epoch 4209 = 0.07236874880891614
Error on this batch = 0.38438441416

Error on this batch = 0.3840997824716814
Cost on val dataset after 4240 epochs is = 0.07236254377117281
learning rate =  0.0679627170435123
Initial Cost on Val dataset for this epoch 4240 = 0.07236254377117281
Error on this batch = 0.3842534196678972
Error on this batch = 0.3840975760331336
Cost on val dataset after 4241 epochs is = 0.0723623446586885
learning rate =  0.06795737490254991
Initial Cost on Val dataset for this epoch 4241 = 0.0723623446586885
Error on this batch = 0.3842491583612778
Error on this batch = 0.38409537080695005
Cost on val dataset after 4242 epochs is = 0.07236214561949363
learning rate =  0.067952034440846
Initial Cost on Val dataset for this epoch 4242 = 0.07236214561949363
Error on this batch = 0.3842448947621728
Error on this batch = 0.38409316678789834
Cost on val dataset after 4243 epochs is = 0.07236194665431381
learning rate =  0.067946695657477
Initial Cost on Val dataset for this epoch 4243 = 0.07236194665431381
Error on this batch = 0.38424062886476

Cost on val dataset after 4273 epochs is = 0.07235601573338046
learning rate =  0.06778730803090337
Initial Cost on Val dataset for this epoch 4273 = 0.07235601573338046
Error on this batch = 0.3841115539075233
Error on this batch = 0.3840254085676245
Cost on val dataset after 4274 epochs is = 0.07235581942244704
learning rate =  0.06778202082176946
Initial Cost on Val dataset for this epoch 4274 = 0.07235581942244704
Error on this batch = 0.38410721381680063
Error on this batch = 0.384023239939539
Cost on val dataset after 4275 epochs is = 0.07235562320831151
learning rate =  0.06777673526179623
Initial Cost on Val dataset for this epoch 4275 = 0.07235562320831151
Error on this batch = 0.38410287123616443
Error on this batch = 0.3840210723045652
Cost on val dataset after 4276 epochs is = 0.07235542709161778
learning rate =  0.06777145135008371
Initial Cost on Val dataset for this epoch 4276 = 0.07235542709161778
Error on this batch = 0.38409852615951323
Error on this batch = 0.3840189

Error on this batch = 0.38395432853389844
Cost on val dataset after 4307 epochs is = 0.07234939888759072
learning rate =  0.06760846274631395
Initial Cost on Val dataset for this epoch 4307 = 0.07234939888759072
Error on this batch = 0.3839625574560263
Error on this batch = 0.38395218888063776
Cost on val dataset after 4308 epochs is = 0.07234920617065212
learning rate =  0.06760323110791604
Initial Cost on Val dataset for this epoch 4308 = 0.07234920617065212
Error on this batch = 0.38395812928771494
Error on this batch = 0.383950049990713
Cost on val dataset after 4309 epochs is = 0.07234901356651849
learning rate =  0.06759800108846858
Initial Cost on Val dataset for this epoch 4309 = 0.07234901356651849
Error on this batch = 0.38395369842291915
Error on this batch = 0.38394791185822136
Cost on val dataset after 4310 epochs is = 0.07234882107543093
learning rate =  0.06759277268709508
Initial Cost on Val dataset for this epoch 4310 = 0.07234882107543093
Error on this batch = 0.38394

Error on this batch = 0.3838840958438426
Cost on val dataset after 4340 epochs is = 0.07234309946670078
learning rate =  0.06743666873025786
Initial Cost on Val dataset for this epoch 4340 = 0.07234309946670078
Error on this batch = 0.3838149720727877
Error on this batch = 0.3838819789599608
Cost on val dataset after 4341 epochs is = 0.07234291051434498
learning rate =  0.06743149005808806
Initial Cost on Val dataset for this epoch 4341 = 0.07234291051434498
Error on this batch = 0.38381045183577156
Error on this batch = 0.3838798627157115
Cost on val dataset after 4342 epochs is = 0.07234272167450118
learning rate =  0.0674263129762974
Initial Cost on Val dataset for this epoch 4342 = 0.07234272167450118
Error on this batch = 0.3838059287121601
Error on this batch = 0.3838777471100418
Cost on val dataset after 4343 epochs is = 0.07234253294690096
learning rate =  0.06742113748403143
Initial Cost on Val dataset for this epoch 4343 = 0.07234253294690096
Error on this batch = 0.383801402

Error on this batch = 0.3838124823628903
Cost on val dataset after 4374 epochs is = 0.07233673600073012
learning rate =  0.06726148099600167
Initial Cost on Val dataset for this epoch 4374 = 0.07233673600073012
Error on this batch = 0.38365963260739383
Error on this batch = 0.383810387665818
Cost on val dataset after 4375 epochs is = 0.07233655064818416
learning rate =  0.06725635592123358
Initial Cost on Val dataset for this epoch 4375 = 0.07233655064818416
Error on this batch = 0.38365501125589985
Error on this batch = 0.38380829366763586
Cost on val dataset after 4376 epochs is = 0.07233636539235737
learning rate =  0.06725123240815509
Initial Cost on Val dataset for this epoch 4376 = 0.07233636539235737
Error on this batch = 0.38365038684184727
Error on this batch = 0.38380620037280283
Cost on val dataset after 4377 epochs is = 0.07233618023262361
learning rate =  0.06724611045593358
Initial Cost on Val dataset for this epoch 4377 = 0.07233618023262361
Error on this batch = 0.38364

Error on this batch = 0.3837437553668205
Cost on val dataset after 4407 epochs is = 0.07233066682086521
learning rate =  0.0670931735839726
Initial Cost on Val dataset for this epoch 4407 = 0.07233066682086521
Error on this batch = 0.38350548592266515
Error on this batch = 0.38374168668093644
Cost on val dataset after 4408 epochs is = 0.07233048430530585
learning rate =  0.06708809960878998
Initial Cost on Val dataset for this epoch 4408 = 0.07233048430530585
Error on this batch = 0.3835007611268757
Error on this batch = 0.38373961889382313
Cost on val dataset after 4409 epochs is = 0.07233030186380626
learning rate =  0.06708302716815302
Initial Cost on Val dataset for this epoch 4409 = 0.07233030186380626
Error on this batch = 0.38349603312572716
Error on this batch = 0.38373755201263465
Cost on val dataset after 4410 epochs is = 0.0723301194956559
learning rate =  0.06707795626124972
Initial Cost on Val dataset for this epoch 4410 = 0.0723301194956559
Error on this batch = 0.3834913

Cost on val dataset after 4440 epochs is = 0.0723246790983304
learning rate =  0.06692653823780273
Initial Cost on Val dataset for this epoch 4440 = 0.0723246790983304
Error on this batch = 0.38334785681596556
Error on this batch = 0.3836739673673354
Cost on val dataset after 4441 epochs is = 0.07232449866117037
learning rate =  0.06692151447721954
Initial Cost on Val dataset for this epoch 4441 = 0.07232449866117037
Error on this batch = 0.38334302453984487
Error on this batch = 0.38367193325941673
Cost on val dataset after 4442 epochs is = 0.07232431827570648
learning rate =  0.0669164922247071
Initial Cost on Val dataset for this epoch 4442 = 0.07232431827570648
Error on this batch = 0.38333818896232374
Error on this batch = 0.3836699002891443
Cost on val dataset after 4443 epochs is = 0.07232413794131298
learning rate =  0.06691147147947331
Initial Cost on Val dataset for this epoch 4443 = 0.07232413794131298
Error on this batch = 0.3833333500815309
Error on this batch = 0.38366786

Cost on val dataset after 4473 epochs is = 0.07231874871078432
learning rate =  0.06676154610263178
Initial Cost on Val dataset for this epoch 4473 = 0.07231874871078432
Error on this batch = 0.3831866415312296
Error on this batch = 0.383607473438121
Cost on val dataset after 4474 epochs is = 0.07231856967020932
learning rate =  0.06675657169339627
Initial Cost on Val dataset for this epoch 4474 = 0.07231856967020932
Error on this batch = 0.38318169975298616
Error on this batch = 0.38360547972333975
Cost on val dataset after 4475 epochs is = 0.07231839066271228
learning rate =  0.06675159876640428
Initial Cost on Val dataset for this epoch 4475 = 0.07231839066271228
Error on this batch = 0.38317675465476947
Error on this batch = 0.38360348730515953
Cost on val dataset after 4476 epochs is = 0.07231821168779294
learning rate =  0.06674662732088309
Initial Cost on Val dataset for this epoch 4476 = 0.07231821168779294
Error on this batch = 0.38317180623757086
Error on this batch = 0.38360

Error on this batch = 0.3835443273888865
Cost on val dataset after 4506 epochs is = 0.07231285524007515
learning rate =  0.06659816902973432
Initial Cost on Val dataset for this epoch 4506 = 0.07231285524007515
Error on this batch = 0.3830218195603719
Error on this batch = 0.3835423759514752
Cost on val dataset after 4507 epochs is = 0.07231267704421566
learning rate =  0.06659324312964018
Initial Cost on Val dataset for this epoch 4507 = 0.07231267704421566
Error on this batch = 0.3830167693169764
Error on this batch = 0.38354042583731374
Cost on val dataset after 4508 epochs is = 0.07231249886653282
learning rate =  0.06658831868658965
Initial Cost on Val dataset for this epoch 4508 = 0.07231249886653282
Error on this batch = 0.3830117158421139
Error on this batch = 0.3835384770449578
Cost on val dataset after 4509 epochs is = 0.07231232070661844
learning rate =  0.06658339569982866
Initial Cost on Val dataset for this epoch 4509 = 0.07231232070661844
Error on this batch = 0.38300665

Error on this batch = 0.38348061524358235
Cost on val dataset after 4539 epochs is = 0.07230698221834425
learning rate =  0.06643637955421881
Initial Cost on Val dataset for this epoch 4539 = 0.07230698221834425
Error on this batch = 0.38285348447212597
Error on this batch = 0.38347870598408207
Cost on val dataset after 4540 epochs is = 0.07230680441592277
learning rate =  0.06643150134144304
Initial Cost on Val dataset for this epoch 4540 = 0.07230680441592277
Error on this batch = 0.382848330569238
Error on this batch = 0.38347679793119255
Cost on val dataset after 4541 epochs is = 0.07230662661902962
learning rate =  0.06642662456111857
Initial Cost on Val dataset for this epoch 4541 = 0.07230662661902962
Error on this batch = 0.38284317364029136
Error on this batch = 0.38347489107935545
Cost on val dataset after 4542 epochs is = 0.07230644882730052
learning rate =  0.06642174921250943
Initial Cost on Val dataset for this epoch 4542 = 0.07230644882730052
Error on this batch = 0.3828

Cost on val dataset after 4573 epochs is = 0.07230093788288326
learning rate =  0.06627131954605343
Initial Cost on Val dataset for this epoch 4573 = 0.07230093788288326
Error on this batch = 0.38267660515102236
Error on this batch = 0.3834144679700178
Cost on val dataset after 4574 epochs is = 0.0723007600670654
learning rate =  0.0662664896269673
Initial Cost on Val dataset for this epoch 4574 = 0.0723007600670654
Error on this batch = 0.38267135326040264
Error on this batch = 0.38341259706635783
Cost on val dataset after 4575 epochs is = 0.07230058224463252
learning rate =  0.06626166111561045
Initial Cost on Val dataset for this epoch 4575 = 0.07230058224463252
Error on this batch = 0.38266609865377765
Error on this batch = 0.3834107271269798
Cost on val dataset after 4576 epochs is = 0.07230040441523065
learning rate =  0.06625683401126499
Initial Cost on Val dataset for this epoch 4576 = 0.07230040441523065
Error on this batch = 0.38266084134127243
Error on this batch = 0.3834088

Error on this batch = 0.3833550378570735
Cost on val dataset after 4606 epochs is = 0.07229506453268672
learning rate =  0.06611267160188194
Initial Cost on Val dataset for this epoch 4606 = 0.07229506453268672
Error on this batch = 0.3825019151411783
Error on this batch = 0.38335319387114797
Cost on val dataset after 4607 epochs is = 0.07229488631101198
learning rate =  0.0661078877616625
Initial Cost on Val dataset for this epoch 4607 = 0.07229488631101198
Error on this batch = 0.3824965791120606
Error on this batch = 0.38335135059798947
Cost on val dataset after 4608 epochs is = 0.07229470807096114
learning rate =  0.06610310530575624
Initial Cost on Val dataset for this epoch 4608 = 0.07229470807096114
Error on this batch = 0.3824912407125838
Error on this batch = 0.383349508030217
Cost on val dataset after 4609 epochs is = 0.0722945298121734
learning rate =  0.06609832423346228
Initial Cost on Val dataset for this epoch 4609 = 0.0722945298121734
Error on this batch = 0.38248589995

Cost on val dataset after 4640 epochs is = 0.072288992509996
learning rate =  0.06595079346349285
Initial Cost on Val dataset for this epoch 4640 = 0.072288992509996
Error on this batch = 0.38231922261497003
Error on this batch = 0.3832908777248052
Cost on val dataset after 4641 epochs is = 0.07228881345878287
learning rate =  0.06594605629974554
Initial Cost on Val dataset for this epoch 4641 = 0.07228881345878287
Error on this batch = 0.3823138117845029
Error on this batch = 0.38328905476395975
Cost on val dataset after 4642 epochs is = 0.07228863437669902
learning rate =  0.06594132049676336
Initial Cost on Val dataset for this epoch 4642 = 0.07228863437669902
Error on this batch = 0.3823083989286585
Error on this batch = 0.38328723230335016
Cost on val dataset after 4643 epochs is = 0.07228845526336902
learning rate =  0.0659365860538625
Initial Cost on Val dataset for this epoch 4643 = 0.07228845526336902
Error on this batch = 0.3823029840569647
Error on this batch = 0.38328541033

Cost on val dataset after 4673 epochs is = 0.07228306545152817
learning rate =  0.06579518183246792
Initial Cost on Val dataset for this epoch 4673 = 0.07228306545152817
Error on this batch = 0.382139645912968
Error on this batch = 0.38323096502165216
Cost on val dataset after 4674 epochs is = 0.07228288518063719
learning rate =  0.06579048921553221
Initial Cost on Val dataset for this epoch 4674 = 0.07228288518063719
Error on this batch = 0.3821341730226573
Error on this batch = 0.3832291568914533
Cost on val dataset after 4675 epochs is = 0.07228270486622328
learning rate =  0.06578579793704972
Initial Cost on Val dataset for this epoch 4675 = 0.07228270486622328
Error on this batch = 0.3821286983966202
Error on this batch = 0.3832273491784089
Cost on val dataset after 4676 epochs is = 0.07228252450789467
learning rate =  0.06578110799635249
Initial Cost on Val dataset for this epoch 4676 = 0.07228252450789467
Error on this batch = 0.3821232220428177
Error on this batch = 0.383225541

Error on this batch = 0.38317331304782853
Cost on val dataset after 4706 epochs is = 0.07227709137631073
learning rate =  0.06564102855199062
Initial Cost on Val dataset for this epoch 4706 = 0.07227709137631073
Error on this batch = 0.3819581656636795
Error on this batch = 0.38317151853921944
Cost on val dataset after 4707 epochs is = 0.07227690945992796
learning rate =  0.06563637975391899
Initial Cost on Val dataset for this epoch 4707 = 0.07227690945992796
Error on this batch = 0.38195263945618896
Error on this batch = 0.3831697244834422
Cost on val dataset after 4708 epochs is = 0.07227672748687175
learning rate =  0.06563173227250758
Initial Cost on Val dataset for this epoch 4708 = 0.07227672748687175
Error on this batch = 0.3819471117529621
Error on this batch = 0.38316793088321904
Cost on val dataset after 4709 epochs is = 0.07227654545673758
learning rate =  0.06562708610710394
Initial Cost on Val dataset for this epoch 4709 = 0.07227654545673758
Error on this batch = 0.38194

Cost on val dataset after 4740 epochs is = 0.07227087199516871
learning rate =  0.06548370418093234
Initial Cost on Val dataset for this epoch 4740 = 0.07227087199516871
Error on this batch = 0.3817694739198525
Error on this batch = 0.3831107965951064
Cost on val dataset after 4741 epochs is = 0.07227068792380666
learning rate =  0.06547909978602327
Initial Cost on Val dataset for this epoch 4741 = 0.07227068792380666
Error on this batch = 0.38176390042165986
Error on this batch = 0.3831090200530984
Cost on val dataset after 4742 epochs is = 0.07227050378192654
learning rate =  0.0654744966858474
Initial Cost on Val dataset for this epoch 4742 = 0.07227050378192654
Error on this batch = 0.3817583256396958
Error on this batch = 0.38310724410226854
Cost on val dataset after 4743 epochs is = 0.0722703195691204
learning rate =  0.06546989487976773
Initial Cost on Val dataset for this epoch 4743 = 0.0722703195691204
Error on this batch = 0.3817527495799083
Error on this batch = 0.3831054687

Error on this batch = 0.3830542665748021
Cost on val dataset after 4773 epochs is = 0.07226475818760239
learning rate =  0.06533243931084902
Initial Cost on Val dataset for this epoch 4773 = 0.07226475818760239
Error on this batch = 0.3815849029110829
Error on this batch = 0.3830525116009437
Cost on val dataset after 4774 epochs is = 0.07226457157475193
learning rate =  0.06532787730822885
Initial Cost on Val dataset for this epoch 4774 = 0.07226457157475193
Error on this batch = 0.38157929016952735
Error on this batch = 0.3830507573938366
Cost on val dataset after 4775 epochs is = 0.07226438487803664
learning rate =  0.06532331657955527
Initial Cost on Val dataset for this epoch 4775 = 0.07226438487803664
Error on this batch = 0.3815736763395171
Error on this batch = 0.3830490039592979
Cost on val dataset after 4776 epochs is = 0.07226419809705811
learning rate =  0.0653187571242058
Initial Cost on Val dataset for this epoch 4776 = 0.07226419809705811
Error on this batch = 0.381568061

Cost on val dataset after 4807 epochs is = 0.07225836394482443
learning rate =  0.06517804220117004
Initial Cost on Val dataset for this epoch 4807 = 0.07225836394482443
Error on this batch = 0.3813934956078163
Error on this batch = 0.3829933369556375
Cost on val dataset after 4808 epochs is = 0.07225817426059654
learning rate =  0.0651735231661281
Initial Cost on Val dataset for this epoch 4808 = 0.07225817426059654
Error on this batch = 0.3813878493107955
Error on this batch = 0.38299161229167694
Cost on val dataset after 4809 epochs is = 0.07225798447930909
learning rate =  0.06516900538411126
Initial Cost on Val dataset for this epoch 4809 = 0.07225798447930909
Error on this batch = 0.3813822021380907
Error on this batch = 0.38298988859779615
Cost on val dataset after 4810 epochs is = 0.0722577946005873
learning rate =  0.06516448885451165
Initial Cost on Val dataset for this epoch 4810 = 0.0722577946005873
Error on this batch = 0.38137655409631277
Error on this batch = 0.382988165

Error on this batch = 0.38293696522050014
Cost on val dataset after 4841 epochs is = 0.07225185803273804
learning rate =  0.06502509433867541
Initial Cost on Val dataset for this epoch 4841 = 0.07225185803273804
Error on this batch = 0.3812010709969259
Error on this batch = 0.3829352754353228
Cost on val dataset after 4842 epochs is = 0.07225166484411782
learning rate =  0.06502061756796668
Initial Cost on Val dataset for this epoch 4842 = 0.07225166484411782
Error on this batch = 0.38119539879079595
Error on this batch = 0.38293358679476913
Cost on val dataset after 4843 epochs is = 0.07225147154626427
learning rate =  0.06501614202984903
Initial Cost on Val dataset for this epoch 4843 = 0.07225147154626427
Error on this batch = 0.3811897259486557
Error on this batch = 0.3829318993036715
Cost on val dataset after 4844 epochs is = 0.07225127813883905
learning rate =  0.06501166772372866
Initial Cost on Val dataset for this epoch 4844 = 0.07225127813883905
Error on this batch = 0.381184

Cost on val dataset after 4875 epochs is = 0.0722452263763909
learning rate =  0.06487357208386284
Initial Cost on Val dataset for this epoch 4875 = 0.0722452263763909
Error on this batch = 0.3810079057829324
Error on this batch = 0.382878533717682
Cost on val dataset after 4876 epochs is = 0.07224502929201884
learning rate =  0.06486913689057057
Initial Cost on Val dataset for this epoch 4876 = 0.07224502929201884
Error on this batch = 0.38100221637121745
Error on this batch = 0.3828768866543695
Cost on val dataset after 4877 epochs is = 0.07224483208767321
learning rate =  0.06486470290990805
Initial Cost on Val dataset for this epoch 4877 = 0.07224483208767321
Error on this batch = 0.38099652659741023
Error on this batch = 0.3828752408848758
Cost on val dataset after 4878 epochs is = 0.07224463476306378
learning rate =  0.06486027014129521
Initial Cost on Val dataset for this epoch 4878 = 0.07224463476306378
Error on this batch = 0.38099083647010645
Error on this batch = 0.382873596

Cost on val dataset after 4910 epochs is = 0.07223825522515369
learning rate =  0.06471905805730718
Initial Cost on Val dataset for this epoch 4910 = 0.07223825522515369
Error on this batch = 0.38080861875688665
Error on this batch = 0.38282167804151557
Cost on val dataset after 4911 epochs is = 0.07223805377984333
learning rate =  0.0647146649634804
Initial Cost on Val dataset for this epoch 4911 = 0.07223805377984333
Error on this batch = 0.38080292198153837
Error on this batch = 0.382820078168122
Cost on val dataset after 4912 epochs is = 0.0722378522054035
learning rate =  0.06471027306221387
Initial Cost on Val dataset for this epoch 4912 = 0.0722378522054035
Error on this batch = 0.3807972251625062
Error on this batch = 0.38281847969264826
Cost on val dataset after 4913 epochs is = 0.07223765050160506
learning rate =  0.0647058823529412
Initial Cost on Val dataset for this epoch 4913 = 0.07223765050160506
Error on this batch = 0.38079152830939494
Error on this batch = 0.382816882

Cost on val dataset after 4945 epochs is = 0.07223112641218568
learning rate =  0.06456600565950378
Initial Cost on Val dataset for this epoch 4945 = 0.07223112641218568
Error on this batch = 0.38060926981672644
Error on this batch = 0.38276652718172754
Cost on val dataset after 4946 epochs is = 0.07223092032422271
learning rate =  0.06456165397076245
Initial Cost on Val dataset for this epoch 4946 = 0.07223092032422271
Error on this batch = 0.38060357739078654
Error on this batch = 0.3827649773448353
Cost on val dataset after 4947 epochs is = 0.07223071410033276
learning rate =  0.06455730345498306
Initial Cost on Val dataset for this epoch 4947 = 0.07223071410033276
Error on this batch = 0.3805978852718483
Error on this batch = 0.382763428963197
Cost on val dataset after 4948 epochs is = 0.07223050774036102
learning rate =  0.06455295411161242
Initial Cost on Val dataset for this epoch 4948 = 0.07223050774036102
Error on this batch = 0.38059219347032824
Error on this batch = 0.382761

Error on this batch = 0.3827146532337721
Cost on val dataset after 4980 epochs is = 0.07222383157146721
learning rate =  0.06441439086479932
Initial Cost on Val dataset for this epoch 4980 = 0.07222383157146721
Error on this batch = 0.38041028668290694
Error on this batch = 0.38271315320266225
Cost on val dataset after 4981 epochs is = 0.07222362065007067
learning rate =  0.06441007990300993
Initial Cost on Val dataset for this epoch 4981 = 0.07222362065007067
Error on this batch = 0.380404611313397
Error on this batch = 0.38271165463965817
Cost on val dataset after 4982 epochs is = 0.07222340958881868
learning rate =  0.06440577009504103
Initial Cost on Val dataset for this epoch 4982 = 0.07222340958881868
Error on this batch = 0.380398936625334
Error on this batch = 0.3827101575445318
Cost on val dataset after 4983 epochs is = 0.07222319838764713
learning rate =  0.06440146144035229
Initial Cost on Val dataset for this epoch 4983 = 0.07222319838764713
Error on this batch = 0.38039326

Cost on val dataset after 5014 epochs is = 0.07221658153822282
learning rate =  0.06426846224063786
Initial Cost on Val dataset for this epoch 5014 = 0.07221658153822282
Error on this batch = 0.3802177722824904
Error on this batch = 0.38266302244623723
Cost on val dataset after 5015 epochs is = 0.07221636584361232
learning rate =  0.06426419020777227
Initial Cost on Val dataset for this epoch 5015 = 0.07221636584361232
Error on this batch = 0.38021212626840434
Error on this batch = 0.3826615734480386
Cost on val dataset after 5016 epochs is = 0.07221615000866768
learning rate =  0.06425991931055708
Initial Cost on Val dataset for this epoch 5016 = 0.07221615000866768
Error on this batch = 0.38020648131178675
Error on this batch = 0.38266012589056103
Cost on val dataset after 5017 epochs is = 0.07221593403343136
learning rate =  0.06425564954846408
Initial Cost on Val dataset for this epoch 5017 = 0.07221593403343136
Error on this batch = 0.3802008374237627
Error on this batch = 0.38265

Error on this batch = 0.38261595758394534
Cost on val dataset after 5048 epochs is = 0.07220916961262198
learning rate =  0.06412384707846532
Initial Cost on Val dataset for this epoch 5048 = 0.07220916961262198
Error on this batch = 0.38002646753658054
Error on this batch = 0.3826145552500509
Cost on val dataset after 5049 epochs is = 0.07220894919189354
learning rate =  0.06411961336347358
Initial Cost on Val dataset for this epoch 5049 = 0.07220894919189354
Error on this batch = 0.3800208637041244
Error on this batch = 0.3826131542963029
Cost on val dataset after 5050 epochs is = 0.07220872863431292
learning rate =  0.06411538076636818
Initial Cost on Val dataset for this epoch 5050 = 0.07220872863431292
Error on this batch = 0.3800152613056204
Error on this batch = 0.3826117547203592
Cost on val dataset after 5051 epochs is = 0.0722085079400498
learning rate =  0.06411114928663265
Initial Cost on Val dataset for this epoch 5051 = 0.0722085079400498
Error on this batch = 0.380009660

Error on this batch = 0.38256903710810386
Cost on val dataset after 5082 epochs is = 0.07220159974159371
learning rate =  0.06398052482828324
Initial Cost on Val dataset for this epoch 5082 = 0.07220159974159371
Error on this batch = 0.3798368066751479
Error on this batch = 0.3825676802082326
Cost on val dataset after 5083 epochs is = 0.07220137478810365
learning rate =  0.06397632883372581
Initial Cost on Val dataset for this epoch 5083 = 0.07220137478810365
Error on this batch = 0.37983125766708375
Error on this batch = 0.3825663245922395
Cost on val dataset after 5084 epochs is = 0.07220114970590467
learning rate =  0.06397213393968496
Initial Cost on Val dataset for this epoch 5084 = 0.07220114970590467
Error on this batch = 0.37982571045781466
Error on this batch = 0.38256497025676767
Cost on val dataset after 5085 epochs is = 0.07220092449531361
learning rate =  0.06396794014565566
Initial Cost on Val dataset for this epoch 5085 = 0.07220092449531361
Error on this batch = 0.37982

Error on this batch = 0.38252360123870405
Cost on val dataset after 5116 epochs is = 0.07219388122970395
learning rate =  0.06383847539634724
Initial Cost on Val dataset for this epoch 5116 = 0.07219388122970395
Error on this batch = 0.3796492108981978
Error on this batch = 0.3825222859204815
Cost on val dataset after 5117 epochs is = 0.07219365210590882
learning rate =  0.06383431653800346
Initial Cost on Val dataset for this epoch 5117 = 0.07219365210590882
Error on this batch = 0.37964372876301083
Error on this batch = 0.38252097175431937
Cost on val dataset after 5118 epochs is = 0.0721934228668238
learning rate =  0.06383015876318951
Initial Cost on Val dataset for this epoch 5118 = 0.0721934228668238
Error on this batch = 0.379638248767872
Error on this batch = 0.38251965873577
Cost on val dataset after 5119 epochs is = 0.07219319351292787
learning rate =  0.06382600207141147
Initial Cost on Val dataset for this epoch 5119 = 0.07219319351292787
Error on this batch = 0.37963277092

Error on this batch = 0.38248077834722927
Cost on val dataset after 5149 epochs is = 0.07218626208534297
learning rate =  0.06370180249299354
Initial Cost on Val dataset for this epoch 5149 = 0.07218626208534297
Error on this batch = 0.3794694812413254
Error on this batch = 0.38247949850955026
Cost on val dataset after 5150 epochs is = 0.07218602943660554
learning rate =  0.06369767913208532
Initial Cost on Val dataset for this epoch 5150 = 0.07218602943660554
Error on this batch = 0.3794640745990839
Error on this batch = 0.3824782196587242
Cost on val dataset after 5151 epochs is = 0.07218579669096505
learning rate =  0.06369355683857575
Initial Cost on Val dataset for this epoch 5151 = 0.07218579669096505
Error on this batch = 0.3794586703948681
Error on this batch = 0.38247694178913555
Cost on val dataset after 5152 epochs is = 0.07218556384906064
learning rate =  0.06368943561198136
Initial Cost on Val dataset for this epoch 5152 = 0.07218556384906064
Error on this batch = 0.379453

Error on this batch = 0.3824390326289618
Cost on val dataset after 5182 epochs is = 0.0721785371881916
learning rate =  0.06356629254450967
Initial Cost on Val dataset for this epoch 5182 = 0.0721785371881916
Error on this batch = 0.37929239356797106
Error on this batch = 0.38243778219343205
Cost on val dataset after 5183 epochs is = 0.07217830170406787
learning rate =  0.06356220415418809
Initial Cost on Val dataset for this epoch 5183 = 0.07217830170406787
Error on this batch = 0.3792870715989872
Error on this batch = 0.38243653254026216
Cost on val dataset after 5184 epochs is = 0.07217806614650844
learning rate =  0.06355811681547487
Initial Cost on Val dataset for this epoch 5184 = 0.07217806614650844
Error on this batch = 0.3792817523236218
Error on this batch = 0.38243528366265245
Cost on val dataset after 5185 epochs is = 0.07217783051629573
learning rate =  0.06355403052789671
Initial Cost on Val dataset for this epoch 5185 = 0.07217783051629573
Error on this batch = 0.3792764

Error on this batch = 0.38239691455353225
Cost on val dataset after 5216 epochs is = 0.07217049437570323
learning rate =  0.06342787440784128
Initial Cost on Val dataset for this epoch 5216 = 0.07217049437570323
Error on this batch = 0.3791129973048271
Error on this batch = 0.3823956867160191
Cost on val dataset after 5217 epochs is = 0.07217025685423153
learning rate =  0.06342382150850708
Initial Cost on Val dataset for this epoch 5217 = 0.07217025685423153
Error on this batch = 0.3791077705458691
Error on this batch = 0.38239445941270456
Cost on val dataset after 5218 epochs is = 0.07217001928776584
learning rate =  0.06341976964485914
Initial Cost on Val dataset for this epoch 5218 = 0.07217001928776584
Error on this batch = 0.3791025466902581
Error on this batch = 0.3823932326358437
Cost on val dataset after 5219 epochs is = 0.07216978167718903
learning rate =  0.06341571881643442
Initial Cost on Val dataset for this epoch 5219 = 0.07216978167718903
Error on this batch = 0.3790973

Cost on val dataset after 5249 epochs is = 0.07216263728867436
learning rate =  0.06329467305771035
Initial Cost on Val dataset for this epoch 5249 = 0.07216263728867436
Error on this batch = 0.378942073632528
Error on this batch = 0.3823554207219951
Cost on val dataset after 5250 epochs is = 0.07216239875603114
learning rate =  0.0632906540931267
Initial Cost on Val dataset for this epoch 5250 = 0.07216239875603114
Error on this batch = 0.37893694515495696
Error on this batch = 0.3823542066204242
Cost on val dataset after 5251 epochs is = 0.07216216020813618
learning rate =  0.06328663614910289
Initial Cost on Val dataset for this epoch 5251 = 0.07216216020813618
Error on this batch = 0.37893181972451445
Error on this batch = 0.3823529927833161
Cost on val dataset after 5252 epochs is = 0.0721619216458956
learning rate =  0.06328261922518545
Initial Cost on Val dataset for this epoch 5252 = 0.0721619216458956
Error on this batch = 0.37892669734457685
Error on this batch = 0.3823517792

Error on this batch = 0.38231545207880163
Cost on val dataset after 5283 epochs is = 0.07215452399140623
learning rate =  0.06315859809583471
Initial Cost on Val dataset for this epoch 5283 = 0.07215452399140623
Error on this batch = 0.3787694322952902
Error on this batch = 0.3823142425800259
Cost on val dataset after 5284 epochs is = 0.0721542854407737
learning rate =  0.06315461357761734
Initial Cost on Val dataset for this epoch 5284 = 0.0721542854407737
Error on this batch = 0.37876440896016006
Error on this batch = 0.38231303309229
Cost on val dataset after 5285 epochs is = 0.07215404690483473
learning rate =  0.06315063006470292
Initial Cost on Val dataset for this epoch 5285 = 0.07215404690483473
Error on this batch = 0.37875938875376347
Error on this batch = 0.38231182360840477
Cost on val dataset after 5286 epochs is = 0.07215380838442707
learning rate =  0.06314664755664767
Initial Cost on Val dataset for this epoch 5286 = 0.07215380838442707
Error on this batch = 0.378754371

Error on this batch = 0.382274294364713
Cost on val dataset after 5317 epochs is = 0.0721464263502827
learning rate =  0.06302368580869387
Initial Cost on Val dataset for this epoch 5317 = 0.0721464263502827
Error on this batch = 0.37860039966855086
Error on this batch = 0.38227308148327577
Cost on val dataset after 5318 epochs is = 0.07214618874586164
learning rate =  0.06301973522347644
Initial Cost on Val dataset for this epoch 5318 = 0.07214618874586164
Error on this batch = 0.3785954831392469
Error on this batch = 0.3822718683954186
Cost on val dataset after 5319 epochs is = 0.07214595118232917
learning rate =  0.06301578562862882
Initial Cost on Val dataset for this epoch 5319 = 0.07214595118232917
Error on this batch = 0.3785905697537746
Error on this batch = 0.38227065509572
Cost on val dataset after 5320 epochs is = 0.07214571366037303
learning rate =  0.06301183702371664
Initial Cost on Val dataset for this epoch 5320 = 0.07214571366037303
Error on this batch = 0.378585659511

Cost on val dataset after 5350 epochs is = 0.07213861052152049
learning rate =  0.06289383705278424
Initial Cost on Val dataset for this epoch 5350 = 0.07213861052152049
Error on this batch = 0.37843980951168654
Error on this batch = 0.3822329111060103
Cost on val dataset after 5351 epochs is = 0.07213837459940677
learning rate =  0.0628899189218837
Initial Cost on Val dataset for this epoch 5351 = 0.07213837459940677
Error on this batch = 0.37843499621253285
Error on this batch = 0.38223168854790845
Cost on val dataset after 5352 epochs is = 0.07213813873792708
learning rate =  0.06288600176716028
Initial Cost on Val dataset for this epoch 5352 = 0.07213813873792708
Error on this batch = 0.3784301860167163
Error on this batch = 0.3822304656351898
Cost on val dataset after 5353 epochs is = 0.07213790293758145
learning rate =  0.06288208558818843
Initial Cost on Val dataset for this epoch 5353 = 0.07213790293758145
Error on this batch = 0.3784253789223034
Error on this batch = 0.3822292

Error on this batch = 0.38219236442994814
Cost on val dataset after 5384 epochs is = 0.07213062590706028
learning rate =  0.06276116570335738
Initial Cost on Val dataset for this epoch 5384 = 0.07213062590706028
Error on this batch = 0.37827788518282246
Error on this batch = 0.3821911288049508
Cost on val dataset after 5385 epochs is = 0.07213039229225221
learning rate =  0.06275728052532942
Initial Cost on Val dataset for this epoch 5385 = 0.07213039229225221
Error on this batch = 0.3782731761160121
Error on this batch = 0.38218989275088505
Cost on val dataset after 5386 epochs is = 0.07213015875184349
learning rate =  0.06275339630915776
Initial Cost on Val dataset for this epoch 5386 = 0.07213015875184349
Error on this batch = 0.3782684700691421
Error on this batch = 0.38218865626642884
Cost on val dataset after 5387 epochs is = 0.07212992528614143
learning rate =  0.06274951305442575
Initial Cost on Val dataset for this epoch 5387 = 0.07212992528614143
Error on this batch = 0.37826

Cost on val dataset after 5418 epochs is = 0.07212272634554856
learning rate =  0.06262960677047853
Initial Cost on Val dataset for this epoch 5418 = 0.07212272634554856
Error on this batch = 0.3781194528295347
Error on this batch = 0.3821488559303049
Cost on val dataset after 5419 epochs is = 0.07212249540274283
learning rate =  0.0626257540633594
Initial Cost on Val dataset for this epoch 5419 = 0.07212249540274283
Error on this batch = 0.3781148447026221
Error on this batch = 0.3821476047837999
Cost on val dataset after 5420 epochs is = 0.07212226454203152
learning rate =  0.062621902304074
Initial Cost on Val dataset for this epoch 5420 = 0.07212226454203152
Error on this batch = 0.3781102394875125
Error on this batch = 0.38214635318642765
Cost on val dataset after 5421 epochs is = 0.07212203376356234
learning rate =  0.06261805149221433
Initial Cost on Val dataset for this epoch 5421 = 0.07212203376356234
Error on this batch = 0.3781056371808184
Error on this batch = 0.38214510113

Error on this batch = 0.38210859625823496
Cost on val dataset after 5451 epochs is = 0.07211514922893289
learning rate =  0.06250296567594377
Initial Cost on Val dataset for this epoch 5451 = 0.07211514922893289
Error on this batch = 0.3779689033582338
Error on this batch = 0.3821073307728803
Cost on val dataset after 5452 epochs is = 0.07211492105284466
learning rate =  0.06249914403327598
Initial Cost on Val dataset for this epoch 5452 = 0.07211492105284466
Error on this batch = 0.3779643895037934
Error on this batch = 0.3821060648451855
Cost on val dataset after 5453 epochs is = 0.07211469296181261
learning rate =  0.062495323325109396
Initial Cost on Val dataset for this epoch 5453 = 0.07211469296181261
Error on this batch = 0.37795987844702966
Error on this batch = 0.3821047984756277
Cost on val dataset after 5454 epochs is = 0.07211446495587229
learning rate =  0.0624915035510442
Initial Cost on Val dataset for this epoch 5454 = 0.07211446495587229
Error on this batch = 0.3779553

Cost on val dataset after 5485 epochs is = 0.0721074390538506
learning rate =  0.062373551699150695
Initial Cost on Val dataset for this epoch 5485 = 0.0721074390538506
Error on this batch = 0.3778169810519452
Error on this batch = 0.3820640444328353
Cost on val dataset after 5486 epochs is = 0.07210721377482573
learning rate =  0.062369761606676205
Initial Cost on Val dataset for this epoch 5486 = 0.07210721377482573
Error on this batch = 0.377812560378239
Error on this batch = 0.38206276377410237
Cost on val dataset after 5487 epochs is = 0.07210698858073293
learning rate =  0.062365972435244846
Initial Cost on Val dataset for this epoch 5487 = 0.07210698858073293
Error on this batch = 0.37780814238535093
Error on this batch = 0.3820614826913467
Cost on val dataset after 5488 epochs is = 0.07210676347153325
learning rate =  0.06236218418446499
Initial Cost on Val dataset for this epoch 5488 = 0.07210676347153325
Error on this batch = 0.3778037270699225
Error on this batch = 0.3820602

Error on this batch = 0.3820228553601072
Cost on val dataset after 5518 epochs is = 0.07210004941612441
learning rate =  0.062248962831255264
Initial Cost on Val dataset for this epoch 5518 = 0.07210004941612441
Error on this batch = 0.3776724962258788
Error on this batch = 0.3820215613568587
Cost on val dataset after 5519 epochs is = 0.07209982691130071
learning rate =  0.06224520292771682
Initial Cost on Val dataset for this epoch 5519 = 0.07209982691130071
Error on this batch = 0.3776681622809842
Error on this batch = 0.3820202669429611
Cost on val dataset after 5520 epochs is = 0.0720996044893843
learning rate =  0.06224144393242247
Initial Cost on Val dataset for this epoch 5520 = 0.0720996044893843
Error on this batch = 0.3776638309095746
Error on this batch = 0.382018972118722
Cost on val dataset after 5521 epochs is = 0.07209938215029407
learning rate =  0.06223768584498835
Initial Cost on Val dataset for this epoch 5521 = 0.07209938215029407
Error on this batch = 0.37765950210

Error on this batch = 0.3819799378093184
Cost on val dataset after 5551 epochs is = 0.07209275005584456
learning rate =  0.06212536348240361
Initial Cost on Val dataset for this epoch 5551 = 0.07209275005584456
Error on this batch = 0.3775308181021669
Error on this batch = 0.38197863037730045
Cost on val dataset after 5552 epochs is = 0.07209253023978426
learning rate =  0.06212163334949268
Initial Cost on Val dataset for this epoch 5552 = 0.07209253023978426
Error on this batch = 0.37752656747426466
Error on this batch = 0.38197732254096833
Cost on val dataset after 5553 epochs is = 0.0720923105035546
learning rate =  0.0621179041122794
Initial Cost on Val dataset for this epoch 5553 = 0.0720923105035546
Error on this batch = 0.3775223193208861
Error on this batch = 0.38197601430039724
Cost on val dataset after 5554 epochs is = 0.07209209084705197
learning rate =  0.06211417577038748
Initial Cost on Val dataset for this epoch 5554 = 0.07209209084705197
Error on this batch = 0.37751807

Cost on val dataset after 5584 epochs is = 0.07208553769474628
learning rate =  0.06200273998070186
Initial Cost on Val dataset for this epoch 5584 = 0.07208553769474628
Error on this batch = 0.37739183869219567
Error on this batch = 0.38193525848296916
Cost on val dataset after 5585 epochs is = 0.07208532045627838
learning rate =  0.06199903920834135
Initial Cost on Val dataset for this epoch 5585 = 0.07208532045627838
Error on this batch = 0.3773876682618373
Error on this batch = 0.38193393731285186
Cost on val dataset after 5586 epochs is = 0.07208510329402947
learning rate =  0.0619953393193782
Initial Cost on Val dataset for this epoch 5586 = 0.07208510329402947
Error on this batch = 0.37738350021625744
Error on this batch = 0.3819326157379713
Cost on val dataset after 5587 epochs is = 0.0720848862078854
learning rate =  0.061991640313443445
Initial Cost on Val dataset for this epoch 5587 = 0.0720848862078854
Error on this batch = 0.3773793345528836
Error on this batch = 0.3819312

Error on this batch = 0.38189277974048275
Cost on val dataset after 5617 epochs is = 0.0720784084391679
learning rate =  0.06188107892252602
Initial Cost on Val dataset for this epoch 5617 = 0.0720784084391679
Error on this batch = 0.3772554599746883
Error on this batch = 0.3818914455615262
Cost on val dataset after 5618 epochs is = 0.0720781936547793
learning rate =  0.061877407108660964
Initial Cost on Val dataset for this epoch 5618 = 0.0720781936547793
Error on this batch = 0.3772513669313068
Error on this batch = 0.38189011097386255
Cost on val dataset after 5619 epochs is = 0.07207797894276562
learning rate =  0.06187373616613285
Initial Cost on Val dataset for this epoch 5619 = 0.07207797894276562
Error on this batch = 0.37724727619241366
Error on this batch = 0.3818887759773492
Cost on val dataset after 5620 epochs is = 0.07207776430300875
learning rate =  0.061870066094579894
Initial Cost on Val dataset for this epoch 5620 = 0.07207776430300875
Error on this batch = 0.37724318

Error on this batch = 0.3818471875325649
Cost on val dataset after 5651 epochs is = 0.07207114566398673
learning rate =  0.06175672391611354
Initial Cost on Val dataset for this epoch 5651 = 0.07207114566398673
Error on this batch = 0.3771175760480573
Error on this batch = 0.3818458393803538
Cost on val dataset after 5652 epochs is = 0.07207093327391519
learning rate =  0.06175308152601862
Initial Cost on Val dataset for this epoch 5652 = 0.07207093327391519
Error on this batch = 0.3771135601274489
Error on this batch = 0.3818444908149823
Cost on val dataset after 5653 epochs is = 0.07207072095219089
learning rate =  0.06174943999507929
Initial Cost on Val dataset for this epoch 5653 = 0.07207072095219089
Error on this batch = 0.3771095464385122
Error on this batch = 0.3818431418363471
Cost on val dataset after 5654 epochs is = 0.07207050869869545
learning rate =  0.06174579932294096
Initial Cost on Val dataset for this epoch 5654 = 0.07207050869869545
Error on this batch = 0.377105534

Error on this batch = 0.3818011180839113
Cost on val dataset after 5685 epochs is = 0.07206396203783223
learning rate =  0.06163336252699255
Initial Cost on Val dataset for this epoch 5685 = 0.07206396203783223
Error on this batch = 0.3769822752007363
Error on this batch = 0.38179975584332715
Cost on val dataset after 5686 epochs is = 0.07206375190540842
learning rate =  0.061629749150509686
Initial Cost on Val dataset for this epoch 5686 = 0.07206375190540842
Error on this batch = 0.3769783340913935
Error on this batch = 0.3817983931877514
Cost on val dataset after 5687 epochs is = 0.07206354183733268
learning rate =  0.06162613662124289
Initial Cost on Val dataset for this epoch 5687 = 0.07206354183733268
Error on this batch = 0.37697439515082215
Error on this batch = 0.381797030117192
Cost on val dataset after 5688 epochs is = 0.07206333183348841
learning rate =  0.06162252493884457
Initial Cost on Val dataset for this epoch 5688 = 0.07206333183348841
Error on this batch = 0.3769704

Error on this batch = 0.38175594524676526
Cost on val dataset after 5718 epochs is = 0.0720570610107986
learning rate =  0.06151456654511686
Initial Cost on Val dataset for this epoch 5718 = 0.0720570610107986
Error on this batch = 0.3768533547499158
Error on this batch = 0.38175456933910895
Cost on val dataset after 5719 epochs is = 0.07205685294074553
learning rate =  0.061510980944354185
Initial Cost on Val dataset for this epoch 5719 = 0.07205685294074553
Error on this batch = 0.37684948435570664
Error on this batch = 0.3817531930190279
Cost on val dataset after 5720 epochs is = 0.07205664493123702
learning rate =  0.0615073961794446
Initial Cost on Val dataset for this epoch 5720 = 0.07205664493123702
Error on this batch = 0.3768456160783859
Error on this batch = 0.3817518162866783
Cost on val dataset after 5721 epochs is = 0.07205643698215941
learning rate =  0.061503812250047175
Initial Cost on Val dataset for this epoch 5721 = 0.07205643698215941
Error on this batch = 0.3768417

Error on this batch = 0.3817089341778639
Cost on val dataset after 5752 epochs is = 0.07205001991888524
learning rate =  0.06139312300183566
Initial Cost on Val dataset for this epoch 5752 = 0.07205001991888524
Error on this batch = 0.3767229406838828
Error on this batch = 0.3817075443647671
Cost on val dataset after 5753 epochs is = 0.07204981384418391
learning rate =  0.06138956563000003
Initial Cost on Val dataset for this epoch 5753 = 0.07204981384418391
Error on this batch = 0.3767191415040194
Error on this batch = 0.3817061541473214
Cost on val dataset after 5754 epochs is = 0.0720496078262207
learning rate =  0.06138600908253654
Initial Cost on Val dataset for this epoch 5754 = 0.0720496078262207
Error on this batch = 0.37671534439647936
Error on this batch = 0.3817047635258514
Cost on val dataset after 5755 epochs is = 0.07204940186488563
learning rate =  0.06138245335911091
Initial Cost on Val dataset for this epoch 5755 = 0.07204940186488563
Error on this batch = 0.3767115493

Error on this batch = 0.3816614558672835
Cost on val dataset after 5786 epochs is = 0.072043044556618
learning rate =  0.06127263284093727
Initial Cost on Val dataset for this epoch 5786 = 0.072043044556618
Error on this batch = 0.3765949243936064
Error on this batch = 0.38166005251826546
Cost on val dataset after 5787 epochs is = 0.07204284035004448
learning rate =  0.061269103311387935
Initial Cost on Val dataset for this epoch 5787 = 0.07204284035004448
Error on this batch = 0.37659119505121824
Error on this batch = 0.3816586487787182
Cost on val dataset after 5788 epochs is = 0.07204263619653911
learning rate =  0.06126557459495374
Initial Cost on Val dataset for this epoch 5788 = 0.07204263619653911
Error on this batch = 0.37658746774481744
Error on this batch = 0.38165724464913325
Cost on val dataset after 5789 epochs is = 0.07204243209599626
learning rate =  0.061262046691306936
Initial Cost on Val dataset for this epoch 5789 = 0.07204243209599626
Error on this batch = 0.3765837

Error on this batch = 0.38161352607119936
Cost on val dataset after 5820 epochs is = 0.0720361306783048
learning rate =  0.06115308304127764
Initial Cost on Val dataset for this epoch 5820 = 0.0720361306783048
Error on this batch = 0.3764692635152126
Error on this batch = 0.3816121097476661
Cost on val dataset after 5821 epochs is = 0.07203592821758427
learning rate =  0.061149580974894924
Initial Cost on Val dataset for this epoch 5821 = 0.07203592821758427
Error on this batch = 0.37646560289734804
Error on this batch = 0.3816106930528904
Cost on val dataset after 5822 epochs is = 0.07203572580641648
learning rate =  0.06114607971058867
Initial Cost on Val dataset for this epoch 5822 = 0.07203572580641648
Error on this batch = 0.376461944286375
Error on this batch = 0.3816092759875169
Cost on val dataset after 5823 epochs is = 0.07203552344470054
learning rate =  0.06114257924803749
Initial Cost on Val dataset for this epoch 5823 = 0.07203552344470054
Error on this batch = 0.376458287

Error on this batch = 0.38156659506248525
Cost on val dataset after 5853 epochs is = 0.07202947509400479
learning rate =  0.06103793660086268
Initial Cost on Val dataset for this epoch 5853 = 0.07202947509400479
Error on this batch = 0.376349518841338
Error on this batch = 0.3815651668504033
Cost on val dataset after 5854 epochs is = 0.07202927421606989
learning rate =  0.06103446083429284
Initial Cost on Val dataset for this epoch 5854 = 0.07202927421606989
Error on this batch = 0.37634592407860207
Error on this batch = 0.3815637382904625
Cost on val dataset after 5855 epochs is = 0.07202907338443783
learning rate =  0.06103098585928907
Initial Cost on Val dataset for this epoch 5855 = 0.07202907338443783
Error on this batch = 0.37634233130038997
Error on this batch = 0.38156230938343594
Cost on val dataset after 5856 epochs is = 0.07202887259901268
learning rate =  0.061027511675535946
Initial Cost on Val dataset for this epoch 5856 = 0.07202887259901268
Error on this batch = 0.37633

Error on this batch = 0.3815192847516866
Cost on val dataset after 5886 epochs is = 0.07202287005353242
learning rate =  0.060923652537329044
Initial Cost on Val dataset for this epoch 5886 = 0.07202287005353242
Error on this batch = 0.3762319364009223
Error on this batch = 0.3815178454868605
Cost on val dataset after 5887 epochs is = 0.07202267065384697
learning rate =  0.06092020272687222
Initial Cost on Val dataset for this epoch 5887 = 0.07202267065384697
Error on this batch = 0.3762284068334875
Error on this batch = 0.381516405901438
Cost on val dataset after 5888 epochs is = 0.07202247129738566
learning rate =  0.06091675369766671
Initial Cost on Val dataset for this epoch 5888 = 0.07202247129738566
Error on this batch = 0.3762248792329193
Error on this batch = 0.3815149659962976
Cost on val dataset after 5889 epochs is = 0.072022271984058
learning rate =  0.06091330544940294
Initial Cost on Val dataset for this epoch 5889 = 0.072022271984058
Error on this batch = 0.3762213535987

Error on this batch = 0.3814716246406304
Cost on val dataset after 5919 epochs is = 0.07201631219926695
learning rate =  0.06081021961110229
Initial Cost on Val dataset for this epoch 5919 = 0.07201631219926695
Error on this batch = 0.3761164967308116
Error on this batch = 0.3814701752744307
Cost on val dataset after 5920 epochs is = 0.07201611417910221
learning rate =  0.06080679541943949
Initial Cost on Val dataset for this epoch 5920 = 0.07201611417910221
Error on this batch = 0.37611303183729844
Error on this batch = 0.3814687256179909
Cost on val dataset after 5921 epochs is = 0.07201591619926352
learning rate =  0.06080337199890419
Initial Cost on Val dataset for this epoch 5921 = 0.07201591619926352
Error on this batch = 0.37610956889664793
Error on this batch = 0.38146727567227035
Cost on val dataset after 5922 epochs is = 0.072015718259666
learning rate =  0.06079994934919254
Initial Cost on Val dataset for this epoch 5922 = 0.072015718259666
Error on this batch = 0.3761061079

Error on this batch = 0.38142218919760057
Cost on val dataset after 5953 epochs is = 0.07200960163645564
learning rate =  0.06069422788675469
Initial Cost on Val dataset for this epoch 5953 = 0.07200960163645564
Error on this batch = 0.3759997837476967
Error on this batch = 0.38142073051297587
Cost on val dataset after 5954 epochs is = 0.07200940494077475
learning rate =  0.06069082974398477
Initial Cost on Val dataset for this epoch 5954 = 0.07200940494077475
Error on this batch = 0.3759963850524251
Error on this batch = 0.38141927157176975
Cost on val dataset after 5955 epochs is = 0.07200920828263017
learning rate =  0.06068743236210664
Initial Cost on Val dataset for this epoch 5955 = 0.07200920828263017
Error on this batch = 0.37599298829834055
Error on this batch = 0.3814178123749997
Cost on val dataset after 5956 epochs is = 0.07200901166194297
learning rate =  0.06068403574082221
Initial Cost on Val dataset for this epoch 5956 = 0.07200901166194297
Error on this batch = 0.37598

Cost on val dataset after 5986 epochs is = 0.07200313007447948
learning rate =  0.06058248930655453
Initial Cost on Val dataset for this epoch 5986 = 0.07200313007447948
Error on this batch = 0.3758886501061511
Error on this batch = 0.3813724561236861
Cost on val dataset after 5987 epochs is = 0.07200293457676074
learning rate =  0.0605791161167198
Initial Cost on Val dataset for this epoch 5987 = 0.07200293457676074
Error on this batch = 0.37588531531274577
Error on this batch = 0.38137098929317015
Cost on val dataset after 5988 epochs is = 0.07200273911406549
learning rate =  0.060575743678026824
Initial Cost on Val dataset for this epoch 5988 = 0.07200273911406549
Error on this batch = 0.37588198245090854
Error on this batch = 0.38136952224129095
Cost on val dataset after 5989 epochs is = 0.07200254368632059
learning rate =  0.06057237199018295
Initial Cost on Val dataset for this epoch 5989 = 0.07200254368632059
Error on this batch = 0.37587865152036515
Error on this batch = 0.3813

Error on this batch = 0.3813239395908156
Cost on val dataset after 6020 epochs is = 0.07199650237089647
learning rate =  0.06046822049892189
Initial Cost on Val dataset for this epoch 6020 = 0.07199650237089647
Error on this batch = 0.3757763491261836
Error on this batch = 0.38132246601270864
Cost on val dataset after 6021 epochs is = 0.07199630802401846
learning rate =  0.060464872684691875
Initial Cost on Val dataset for this epoch 6021 = 0.07199630802401846
Error on this batch = 0.37577307985556785
Error on this batch = 0.3813209922481335
Cost on val dataset after 6022 epochs is = 0.07199611370977753
learning rate =  0.060461525611743734
Initial Cost on Val dataset for this epoch 6022 = 0.07199611370977753
Error on this batch = 0.3757698125074771
Error on this batch = 0.3813195182981506
Cost on val dataset after 6023 epochs is = 0.0719959194281066
learning rate =  0.060458179279790275
Initial Cost on Val dataset for this epoch 6023 = 0.0719959194281066
Error on this batch = 0.375766

Error on this batch = 0.3812752188405018
Cost on val dataset after 6053 epochs is = 0.07199010579752795
learning rate =  0.060358132465718625
Initial Cost on Val dataset for this epoch 6053 = 0.07199010579752795
Error on this batch = 0.3756694768619222
Error on this batch = 0.38127373966818523
Cost on val dataset after 6054 epochs is = 0.07198991249321714
learning rate =  0.06035480896301572
Initial Cost on Val dataset for this epoch 6054 = 0.07198991249321714
Error on this batch = 0.375666270896261
Error on this batch = 0.3812722603442981
Cost on val dataset after 6055 epochs is = 0.07198971921942268
learning rate =  0.06035148619220065
Initial Cost on Val dataset for this epoch 6055 = 0.07198971921942268
Error on this batch = 0.3756630668444325
Error on this batch = 0.3812707808698911
Cost on val dataset after 6056 epochs is = 0.07198952597608338
learning rate =  0.06034816415299141
Initial Cost on Val dataset for this epoch 6056 = 0.07198952597608338
Error on this batch = 0.37565986

Cost on val dataset after 6087 epochs is = 0.0719835502119229
learning rate =  0.06024554228254087
Initial Cost on Val dataset for this epoch 6087 = 0.0719835502119229
Error on this batch = 0.37556154605009334
Error on this batch = 0.3812233644667411
Cost on val dataset after 6088 epochs is = 0.07198335791181881
learning rate =  0.06024224350674265
Initial Cost on Val dataset for this epoch 6088 = 0.07198335791181881
Error on this batch = 0.37555840499986076
Error on this batch = 0.381221880610039
Cost on val dataset after 6089 epochs is = 0.07198316564024378
learning rate =  0.06023894545333045
Initial Cost on Val dataset for this epoch 6089 = 0.07198316564024378
Error on this batch = 0.37555526585392046
Error on this batch = 0.38122039663809526
Cost on val dataset after 6090 epochs is = 0.0719829733971423
learning rate =  0.0602356481220275
Initial Cost on Val dataset for this epoch 6090 = 0.0719829733971423
Error on this batch = 0.3755521286119769
Error on this batch = 0.38121891255

Error on this batch = 0.3754619754659225
Error on this batch = 0.3811758289158271
Cost on val dataset after 6120 epochs is = 0.07197721907594526
learning rate =  0.06013706259702309
Initial Cost on Val dataset for this epoch 6120 = 0.07197721907594526
Error on this batch = 0.37545889520088
Error on this batch = 0.3811743418720306
Cost on val dataset after 6121 epochs is = 0.07197702768891305
learning rate =  0.06013378751464013
Initial Cost on Val dataset for this epoch 6121 = 0.07197702768891305
Error on this batch = 0.37545581683010026
Error on this batch = 0.38117285474506496
Cost on val dataset after 6122 epochs is = 0.0719768363286664
learning rate =  0.06013051314558848
Initial Cost on Val dataset for this epoch 6122 = 0.0719768363286664
Error on this batch = 0.3754527403532489
Error on this batch = 0.38117136753590986
Cost on val dataset after 6123 epochs is = 0.07197664499515527
learning rate =  0.0601272394895963
Initial Cost on Val dataset for this epoch 6123 = 0.071976644995

Error on this batch = 0.3811252285607105
Cost on val dataset after 6154 epochs is = 0.07197072665130202
learning rate =  0.06002610835495964
Initial Cost on Val dataset for this epoch 6154 = 0.07197072665130202
Error on this batch = 0.37535529101084464
Error on this batch = 0.3811237392295177
Cost on val dataset after 6155 epochs is = 0.07197053614783629
learning rate =  0.06002285737421481
Initial Cost on Val dataset for this epoch 6155 = 0.07197053614783629
Error on this batch = 0.37535227683638317
Error on this batch = 0.3811222498475786
Cost on val dataset after 6156 epochs is = 0.0719703456695419
learning rate =  0.06001960709764084
Initial Cost on Val dataset for this epoch 6156 = 0.0719703456695419
Error on this batch = 0.37534926454358697
Error on this batch = 0.3811207604158171
Cost on val dataset after 6157 epochs is = 0.07197015521637407
learning rate =  0.060016357524970844
Initial Cost on Val dataset for this epoch 6157 = 0.07197015521637407
Error on this batch = 0.3753462

Error on this batch = 0.3810745682744354
Cost on val dataset after 6188 epochs is = 0.07196426339344113
learning rate =  0.05991596845916763
Initial Cost on Val dataset for this epoch 6188 = 0.07196426339344113
Error on this batch = 0.3752538622854084
Error on this batch = 0.38107307772504795
Cost on val dataset after 6189 epochs is = 0.07196407372155701
learning rate =  0.05991274127137408
Initial Cost on Val dataset for this epoch 6189 = 0.07196407372155701
Error on this batch = 0.37525091185888637
Error on this batch = 0.38107158715527223
Cost on val dataset after 6190 epochs is = 0.07196388407340361
learning rate =  0.05990951477875804
Initial Cost on Val dataset for this epoch 6190 = 0.07196388407340361
Error on this batch = 0.37524796329981003
Error on this batch = 0.3810700965659667
Cost on val dataset after 6191 epochs is = 0.071963694448941
learning rate =  0.05990628898105749
Initial Cost on Val dataset for this epoch 6191 = 0.071963694448941
Error on this batch = 0.375245016

Error on this batch = 0.38102537398051256
Cost on val dataset after 6221 epochs is = 0.0719580165391166
learning rate =  0.05980983689187239
Initial Cost on Val dataset for this epoch 6221 = 0.0719580165391166
Error on this batch = 0.3751574816830582
Error on this batch = 0.3810238832004449
Cost on val dataset after 6222 epochs is = 0.07195782763003428
learning rate =  0.05980663250735557
Initial Cost on Val dataset for this epoch 6222 = 0.07195782763003428
Error on this batch = 0.37515459263363
Error on this batch = 0.3810223924272305
Cost on val dataset after 6223 epochs is = 0.07195763874343808
learning rate =  0.059803428809443546
Initial Cost on Val dataset for this epoch 6223 = 0.07195763874343808
Error on this batch = 0.3751517054357366
Error on this batch = 0.381020901661659
Cost on val dataset after 6224 epochs is = 0.07195744987929248
learning rate =  0.05980022579787888
Initial Cost on Val dataset for this epoch 6224 = 0.07195744987929248
Error on this batch = 0.375148820088

Error on this batch = 0.3809761860754975
Cost on val dataset after 6254 epochs is = 0.07195179422352273
learning rate =  0.05970445333129425
Initial Cost on Val dataset for this epoch 6254 = 0.07195179422352273
Error on this batch = 0.37506311776028556
Error on this batch = 0.3809746959265822
Cost on val dataset after 6255 epochs is = 0.07195160603841828
learning rate =  0.05970127146956731
Initial Cost on Val dataset for this epoch 6255 = 0.07195160603841828
Error on this batch = 0.37506028952754167
Error on this batch = 0.38097320580942284
Cost on val dataset after 6256 epochs is = 0.0719514178746935
learning rate =  0.05969809028602265
Initial Cost on Val dataset for this epoch 6256 = 0.0719514178746935
Error on this batch = 0.3750574631281931
Error on this batch = 0.38097171572473687
Cost on val dataset after 6257 epochs is = 0.0719512297323169
learning rate =  0.05969490978040736
Initial Cost on Val dataset for this epoch 6257 = 0.0719512297323169
Error on this batch = 0.375054638

Error on this batch = 0.3809270317513521
Cost on val dataset after 6287 epochs is = 0.07194559523614365
learning rate =  0.0595998086001206
Initial Cost on Val dataset for this epoch 6287 = 0.07194559523614365
Error on this batch = 0.3749707508309112
Error on this batch = 0.3809255430170331
Cost on val dataset after 6288 epochs is = 0.07194540774053143
learning rate =  0.05959664898559949
Initial Cost on Val dataset for this epoch 6288 = 0.07194540774053143
Error on this batch = 0.3749679827794474
Error on this batch = 0.3809240543369722
Cost on val dataset after 6289 epochs is = 0.07194522026531668
learning rate =  0.059593490040984985
Initial Cost on Val dataset for this epoch 6289 = 0.07194522026531668
Error on this batch = 0.37496521654093123
Error on this batch = 0.3809225657118138
Cost on val dataset after 6290 epochs is = 0.0719450328104714
learning rate =  0.05959033176602857
Initial Cost on Val dataset for this epoch 6290 = 0.0719450328104714
Error on this batch = 0.3749624521

Cost on val dataset after 6321 epochs is = 0.07193923166544477
learning rate =  0.059492756042942775
Initial Cost on Val dataset for this epoch 6321 = 0.07193923166544477
Error on this batch = 0.3748776501200326
Error on this batch = 0.3808749624367205
Cost on val dataset after 6322 epochs is = 0.07193904484823412
learning rate =  0.059489619066609885
Initial Cost on Val dataset for this epoch 6322 = 0.07193904484823412
Error on this batch = 0.37487494332797383
Error on this batch = 0.38087347597055116
Cost on val dataset after 6323 epochs is = 0.07193885805052364
learning rate =  0.059486482751807235
Initial Cost on Val dataset for this epoch 6323 = 0.07193885805052364
Error on this batch = 0.3748722383253599
Error on this batch = 0.3808719895798977
Cost on val dataset after 6324 epochs is = 0.07193867127228856
learning rate =  0.0594833470982907
Initial Cost on Val dataset for this epoch 6324 = 0.07193867127228856
Error on this batch = 0.3748695351114623
Error on this batch = 0.38087

Cost on val dataset after 6355 epochs is = 0.07193289067543919
learning rate =  0.059386468511364546
Initial Cost on Val dataset for this epoch 6355 = 0.07193289067543919
Error on this batch = 0.37478661861160784
Error on this batch = 0.3808244682569958
Cost on val dataset after 6356 epochs is = 0.07193270450782602
learning rate =  0.05938335389088833
Initial Cost on Val dataset for this epoch 6356 = 0.07193270450782602
Error on this batch = 0.3747839722394028
Error on this batch = 0.38082298466486847
Cost on val dataset after 6357 epochs is = 0.07193251835891622
learning rate =  0.05938023992371476
Initial Cost on Val dataset for this epoch 6357 = 0.07193251835891622
Error on this batch = 0.3747813276306966
Error on this batch = 0.38082150116642355
Cost on val dataset after 6358 epochs is = 0.07193233222868771
learning rate =  0.059377126609604106
Initial Cost on Val dataset for this epoch 6358 = 0.07193233222868771
Error on this batch = 0.3747786847846899
Error on this batch = 0.3808

Cost on val dataset after 6389 epochs is = 0.07192657134029984
learning rate =  0.05928093648899764
Initial Cost on Val dataset for this epoch 6389 = 0.07192657134029984
Error on this batch = 0.3746976264007353
Error on this batch = 0.38077408157300774
Cost on val dataset after 6390 epochs is = 0.07192638579662874
learning rate =  0.05927784394704989
Initial Cost on Val dataset for this epoch 6390 = 0.07192638579662874
Error on this batch = 0.3746950395267908
Error on this batch = 0.3807726014340873
Cost on val dataset after 6391 epochs is = 0.07192620027094847
learning rate =  0.059274752050322656
Initial Cost on Val dataset for this epoch 6391 = 0.07192620027094847
Error on this batch = 0.3746924543880236
Error on this batch = 0.3807711214047114
Cost on val dataset after 6392 epochs is = 0.0719260147632392
learning rate =  0.05927166079858039
Initial Cost on Val dataset for this epoch 6392 = 0.0719260147632392
Error on this batch = 0.3746898709835657
Error on this batch = 0.380769641

Cost on val dataset after 6422 epochs is = 0.07192045779232537
learning rate =  0.059179222001038395
Initial Cost on Val dataset for this epoch 6422 = 0.07192045779232537
Error on this batch = 0.3746131709180113
Error on this batch = 0.3807252971196107
Cost on val dataset after 6423 epochs is = 0.07192027283217507
learning rate =  0.059176150627667665
Initial Cost on Val dataset for this epoch 6423 = 0.07192027283217507
Error on this batch = 0.3746106408359317
Error on this batch = 0.3807238208147733
Cost on val dataset after 6424 epochs is = 0.07192008788938932
learning rate =  0.059173079891808915
Initial Cost on Val dataset for this epoch 6424 = 0.07192008788938932
Error on this batch = 0.37460811245935677
Error on this batch = 0.3807223446328487
Cost on val dataset after 6425 epochs is = 0.07191990296395
learning rate =  0.05917000979323062
Initial Cost on Val dataset for this epoch 6425 = 0.07191990296395
Error on this batch = 0.3746055857873541
Error on this batch = 0.38072086857

Error on this batch = 0.38067664594324013
Cost on val dataset after 6456 epochs is = 0.07191417878220543
learning rate =  0.05907515157070773
Initial Cost on Val dataset for this epoch 6456 = 0.07191417878220543
Error on this batch = 0.3745280992529923
Error on this batch = 0.3806751738842963
Cost on val dataset after 6457 epochs is = 0.07191399440252888
learning rate =  0.059072101743569076
Initial Cost on Val dataset for this epoch 6457 = 0.07191399440252888
Error on this batch = 0.3745256266241961
Error on this batch = 0.3806737019598355
Cost on val dataset after 6458 epochs is = 0.07191381003962327
learning rate =  0.05906905254613715
Initial Cost on Val dataset for this epoch 6458 = 0.07191381003962327
Error on this batch = 0.37452315566818356
Error on this batch = 0.38067223017018237
Cost on val dataset after 6459 epochs is = 0.07191362569347173
learning rate =  0.059066003978184484
Initial Cost on Val dataset for this epoch 6459 = 0.07191362569347173
Error on this batch = 0.3745

Error on this batch = 0.3806281407192954
Cost on val dataset after 6489 epochs is = 0.07190810301730924
learning rate =  0.058974838523859964
Initial Cost on Val dataset for this epoch 6489 = 0.07190810301730924
Error on this batch = 0.37444738023495155
Error on this batch = 0.38062667326212407
Cost on val dataset after 6490 epochs is = 0.07190791918234321
learning rate =  0.05897180935756341
Initial Cost on Val dataset for this epoch 6490 = 0.07190791918234321
Error on this batch = 0.37444496227335805
Error on this batch = 0.38062520594942256
Cost on val dataset after 6491 epochs is = 0.07190773536360716
learning rate =  0.0589687808135278
Initial Cost on Val dataset for this epoch 6491 = 0.07190773536360716
Error on this batch = 0.374442545950804
Error on this batch = 0.380623738781471
Cost on val dataset after 6492 epochs is = 0.07190755156108508
learning rate =  0.058965752891529485
Initial Cost on Val dataset for this epoch 6492 = 0.07190755156108508
Error on this batch = 0.374440

Error on this batch = 0.3805797923923649
Cost on val dataset after 6522 epochs is = 0.07190204494617332
learning rate =  0.05887520337423365
Initial Cost on Val dataset for this epoch 6522 = 0.07190204494617332
Error on this batch = 0.3743684471054591
Error on this batch = 0.38057832984414364
Cost on val dataset after 6523 epochs is = 0.07190186163842735
learning rate =  0.05887219462507805
Initial Cost on Val dataset for this epoch 6523 = 0.07190186163842735
Error on this batch = 0.3743660826674132
Error on this batch = 0.3805768674490183
Cost on val dataset after 6524 epochs is = 0.07190167834639052
learning rate =  0.05886918649086275
Initial Cost on Val dataset for this epoch 6524 = 0.07190167834639052
Error on this batch = 0.374363719832796
Error on this batch = 0.3805754052072313
Cost on val dataset after 6525 epochs is = 0.07190149507004717
learning rate =  0.058866178971367845
Initial Cost on Val dataset for this epoch 6525 = 0.07190149507004717
Error on this batch = 0.37436135

Cost on val dataset after 6555 epochs is = 0.07189600399982651
learning rate =  0.058776238145310876
Initial Cost on Val dataset for this epoch 6555 = 0.07189600399982651
Error on this batch = 0.3742912611512969
Error on this batch = 0.38053015304449955
Cost on val dataset after 6556 epochs is = 0.07189582120224172
learning rate =  0.0587732495736809
Initial Cost on Val dataset for this epoch 6556 = 0.07189582120224172
Error on this batch = 0.3742889490327791
Error on this batch = 0.3805286958316718
Cost on val dataset after 6557 epochs is = 0.07189563841985079
learning rate =  0.05877026160979301
Initial Cost on Val dataset for this epoch 6557 = 0.07189563841985079
Error on this batch = 0.3742866384802865
Error on this batch = 0.3805272387796404
Cost on val dataset after 6558 epochs is = 0.07189545565263807
learning rate =  0.05876727425343093
Initial Cost on Val dataset for this epoch 6558 = 0.07189545565263807
Error on this batch = 0.37428432949265833
Error on this batch = 0.3805257

Cost on val dataset after 6588 epochs is = 0.07188997961590356
learning rate =  0.058677934993981384
Initial Cost on Val dataset for this epoch 6588 = 0.07188997961590356
Error on this batch = 0.37421578170343994
Error on this batch = 0.3804821510386262
Cost on val dataset after 6589 epochs is = 0.07188979731137499
learning rate =  0.058674966364258924
Initial Cost on Val dataset for this epoch 6589 = 0.07188979731137499
Error on this batch = 0.37421352064222235
Error on this batch = 0.3804806992390335
Cost on val dataset after 6590 epochs is = 0.0718896150215162
learning rate =  0.05867199833520007
Initial Cost on Val dataset for this epoch 6590 = 0.0718896150215162
Error on this batch = 0.3742112611078895
Error on this batch = 0.3804792476067834
Cost on val dataset after 6591 epochs is = 0.07188943274631097
learning rate =  0.05866903090659219
Initial Cost on Val dataset for this epoch 6591 = 0.07188943274631097
Error on this batch = 0.37420900309922867
Error on this batch = 0.380477

Error on this batch = 0.38043433100103213
Cost on val dataset after 6622 epochs is = 0.07188378939373812
learning rate =  0.05857733728813436
Initial Cost on Val dataset for this epoch 6622 = 0.07188378939373812
Error on this batch = 0.37413975487489537
Error on this batch = 0.38043288481889426
Cost on val dataset after 6623 epochs is = 0.07188360757875624
learning rate =  0.05857438896231459
Initial Cost on Val dataset for this epoch 6623 = 0.07188360757875624
Error on this batch = 0.3741375450380744
Error on this batch = 0.38043143880994224
Cost on val dataset after 6624 epochs is = 0.07188342577787937
learning rate =  0.05857144122998803
Initial Cost on Val dataset for this epoch 6624 = 0.07188342577787937
Error on this batch = 0.3741353366860309
Error on this batch = 0.38042999297434427
Cost on val dataset after 6625 epochs is = 0.07188324399109036
learning rate =  0.05856849409094562
Initial Cost on Val dataset for this epoch 6625 = 0.07188324399109036
Error on this batch = 0.3741

Error on this batch = 0.3803852589521441
Cost on val dataset after 6656 epochs is = 0.07187761549282118
learning rate =  0.058477425913911155
Initial Cost on Val dataset for this epoch 6656 = 0.07187761549282118
Error on this batch = 0.374065445737904
Error on this batch = 0.38038381875016386
Cost on val dataset after 6657 epochs is = 0.07187743414755082
learning rate =  0.058474497650215035
Initial Cost on Val dataset for this epoch 6657 = 0.07187743414755082
Error on this batch = 0.37406328566424124
Error on this batch = 0.3803823787268604
Cost on val dataset after 6658 epochs is = 0.0718772528157795
learning rate =  0.05847156997296335
Initial Cost on Val dataset for this epoch 6658 = 0.0718772528157795
Error on this batch = 0.3740611270314561
Error on this batch = 0.38038093888238905
Cost on val dataset after 6659 epochs is = 0.07187707149748863
learning rate =  0.0584686428819506
Initial Cost on Val dataset for this epoch 6659 = 0.07187707149748863
Error on this batch = 0.37405896

Error on this batch = 0.3803335253411116
Cost on val dataset after 6692 epochs is = 0.07187109543147124
learning rate =  0.058372376419596736
Initial Cost on Val dataset for this epoch 6692 = 0.07187109543147124
Error on this batch = 0.3739885813347872
Error on this batch = 0.38033209166756593
Cost on val dataset after 6693 epochs is = 0.07187091456005676
learning rate =  0.058369469139164624
Initial Cost on Val dataset for this epoch 6693 = 0.07187091456005676
Error on this batch = 0.3739864722910917
Error on this batch = 0.38033065817813916
Cost on val dataset after 6694 epochs is = 0.07187073370143661
learning rate =  0.05836656243784318
Initial Cost on Val dataset for this epoch 6694 = 0.07187073370143661
Error on this batch = 0.37398436463983487
Error on this batch = 0.3803292248729789
Cost on val dataset after 6695 epochs is = 0.07187055285559009
learning rate =  0.0583636563154306
Initial Cost on Val dataset for this epoch 6695 = 0.07187055285559009
Error on this batch = 0.37398

Error on this batch = 0.38028345727863705
Cost on val dataset after 6727 epochs is = 0.0718647724056436
learning rate =  0.05827096485952381
Initial Cost on Val dataset for this epoch 6727 = 0.0718647724056436
Error on this batch = 0.3739155842291466
Error on this batch = 0.3802820301364554
Cost on val dataset after 6728 epochs is = 0.07186459196927324
learning rate =  0.0582680777285247
Initial Cost on Val dataset for this epoch 6728 = 0.07186459196927324
Error on this batch = 0.37391352309279463
Error on this batch = 0.3802806031835074
Cost on val dataset after 6729 epochs is = 0.07186441154493166
learning rate =  0.05826519116963122
Initial Cost on Val dataset for this epoch 6729 = 0.07186441154493166
Error on this batch = 0.3739114632998453
Error on this batch = 0.3802791764199384
Cost on val dataset after 6730 epochs is = 0.0718642311325956
learning rate =  0.05826230518264499
Initial Cost on Val dataset for this epoch 6730 = 0.0718642311325956
Error on this batch = 0.373909404848

Error on this batch = 0.380233620847912
Cost on val dataset after 6762 epochs is = 0.07185846413354477
learning rate =  0.058170254384239756
Initial Cost on Val dataset for this epoch 6762 = 0.07185846413354477
Error on this batch = 0.373844234344453
Error on this batch = 0.3802322004154833
Cost on val dataset after 6763 epochs is = 0.0718582841038255
learning rate =  0.058167387159551345
Initial Cost on Val dataset for this epoch 6763 = 0.0718582841038255
Error on this batch = 0.3738422193664874
Error on this batch = 0.3802307801773908
Cost on val dataset after 6764 epochs is = 0.0718581040852725
learning rate =  0.05816452050008391
Initial Cost on Val dataset for this epoch 6764 = 0.0718581040852725
Error on this batch = 0.37384020568096515
Error on this batch = 0.38022936013378167
Cost on val dataset after 6765 epochs is = 0.07185792407785949
learning rate =  0.058161654405642485
Initial Cost on Val dataset for this epoch 6765 = 0.07185792407785949
Error on this batch = 0.3738381932

Error on this batch = 0.3801840223110806
Cost on val dataset after 6797 epochs is = 0.07185216956059419
learning rate =  0.05807023655561211
Initial Cost on Val dataset for this epoch 6797 = 0.07185216956059419
Error on this batch = 0.37377446932445524
Error on this batch = 0.3801826087686652
Cost on val dataset after 6798 epochs is = 0.07185198990545652
learning rate =  0.05806738899828123
Initial Cost on Val dataset for this epoch 6798 = 0.07185198990545652
Error on this batch = 0.373772498688793
Error on this batch = 0.38018119542580653
Cost on val dataset after 6799 epochs is = 0.07185181026050924
learning rate =  0.058064541999404425
Initial Cost on Val dataset for this epoch 6799 = 0.07185181026050924
Error on this batch = 0.3737705292927087
Error on this batch = 0.3801797822826562
Cost on val dataset after 6800 epochs is = 0.07185163062572268
learning rate =  0.05806169555879006
Initial Cost on Val dataset for this epoch 6800 = 0.07185163062572268
Error on this batch = 0.3737685

Cost on val dataset after 6831 epochs is = 0.07184606682009215
learning rate =  0.05797373175669021
Initial Cost on Val dataset for this epoch 6831 = 0.07184606682009215
Error on this batch = 0.37370815384268613
Error on this batch = 0.3801346680671195
Cost on val dataset after 6832 epochs is = 0.07184588749412923
learning rate =  0.05797090307984958
Initial Cost on Val dataset for this epoch 6832 = 0.07184588749412923
Error on this batch = 0.3737062244809646
Error on this batch = 0.3801332616008544
Cost on val dataset after 6833 epochs is = 0.07184570817728668
learning rate =  0.057968074954999754
Initial Cost on Val dataset for this epoch 6833 = 0.07184570817728668
Error on this batch = 0.37370429630569946
Error on this batch = 0.3801318553395819
Cost on val dataset after 6834 epochs is = 0.07184552886953109
learning rate =  0.057965247381952245
Initial Cost on Val dataset for this epoch 6834 = 0.07184552886953109
Error on this batch = 0.3737023693153032
Error on this batch = 0.38013

Cost on val dataset after 6866 epochs is = 0.07183979561338405
learning rate =  0.05787505527301568
Initial Cost on Val dataset for this epoch 6866 = 0.07183979561338405
Error on this batch = 0.3736413216605283
Error on this batch = 0.38008556477290756
Cost on val dataset after 6867 epochs is = 0.07183961658600754
learning rate =  0.057872245804308685
Initial Cost on Val dataset for this epoch 6867 = 0.07183961658600754
Error on this batch = 0.3736394328711515
Error on this batch = 0.3800841655776836
Cost on val dataset after 6868 epochs is = 0.07183943756651374
learning rate =  0.057869436881050174
Initial Cost on Val dataset for this epoch 6868 = 0.07183943756651374
Error on this batch = 0.37363754521185627
Error on this batch = 0.38008276659316925
Cost on val dataset after 6869 epochs is = 0.0718392585548652
learning rate =  0.05786662850305483
Initial Cost on Val dataset for this epoch 6869 = 0.0718392585548652
Error on this batch = 0.3736356586810086
Error on this batch = 0.380081

Cost on val dataset after 6901 epochs is = 0.07183353409403798
learning rate =  0.057777047202060894
Initial Cost on Val dataset for this epoch 6901 = 0.07183353409403798
Error on this batch = 0.37357587567420764
Error on this batch = 0.38003671942472805
Cost on val dataset after 6902 epochs is = 0.07183335531949268
learning rate =  0.0577742567143868
Initial Cost on Val dataset for this epoch 6902 = 0.07183335531949268
Error on this batch = 0.3735740254577749
Error on this batch = 0.3800353277059429
Cost on val dataset after 6903 epochs is = 0.07183317655144864
learning rate =  0.05777146676572908
Initial Cost on Val dataset for this epoch 6903 = 0.07183317655144864
Error on this batch = 0.37357217631354844
Error on this batch = 0.3800339362038993
Cost on val dataset after 6904 epochs is = 0.07183299778986418
learning rate =  0.05776867735590553
Initial Cost on Val dataset for this epoch 6904 = 0.07183299778986418
Error on this batch = 0.37357032823985575
Error on this batch = 0.38003

Cost on val dataset after 6936 epochs is = 0.07182728057488456
learning rate =  0.05767969966042452
Initial Cost on Val dataset for this epoch 6936 = 0.07182728057488456
Error on this batch = 0.37351174506647594
Error on this batch = 0.3799881394013069
Cost on val dataset after 6937 epochs is = 0.07182710200237725
learning rate =  0.05767692793049904
Initial Cost on Val dataset for this epoch 6937 = 0.07182710200237725
Error on this batch = 0.37350993137617755
Error on this batch = 0.3799867553755578
Cost on val dataset after 6938 epochs is = 0.0718269234348469
learning rate =  0.05767415673326562
Initial Cost on Val dataset for this epoch 6938 = 0.0718269234348469
Error on this batch = 0.3735081186990282
Error on this batch = 0.37998537157290324
Cost on val dataset after 6939 epochs is = 0.07182674487224797
learning rate =  0.05767138606854511
Initial Cost on Val dataset for this epoch 6939 = 0.07182674487224797
Error on this batch = 0.373506307033327
Error on this batch = 0.379983987

Cost on val dataset after 6971 epochs is = 0.07182103319451674
learning rate =  0.057583004896863854
Initial Cost on Val dataset for this epoch 6971 = 0.07182103319451674
Error on this batch = 0.3734488575696181
Error on this batch = 0.3799398324719563
Cost on val dataset after 6972 epochs is = 0.07182085476838893
learning rate =  0.05758025170513617
Initial Cost on Val dataset for this epoch 6972 = 0.07182085476838893
Error on this batch = 0.37344707832414925
Error on this batch = 0.37993845636681783
Cost on val dataset after 6973 epochs is = 0.07182067634558031
learning rate =  0.0575774990398817
Initial Cost on Val dataset for this epoch 6973 = 0.07182067634558031
Error on this batch = 0.3734453000320122
Error on this batch = 0.3799370804914294
Cost on val dataset after 6974 epochs is = 0.0718204979260418
learning rate =  0.05757474690092431
Initial Cost on Val dataset for this epoch 6974 = 0.0718204979260418
Error on this batch = 0.37344352269149184
Error on this batch = 0.37993570

Cost on val dataset after 7006 epochs is = 0.07181478992848592
learning rate =  0.05748695528942901
Initial Cost on Val dataset for this epoch 7006 = 0.07181478992848592
Error on this batch = 0.37338713997850476
Error on this batch = 0.3798918067763642
Cost on val dataset after 7007 epochs is = 0.07181461158869559
learning rate =  0.05748422042000103
Initial Cost on Val dataset for this epoch 7007 = 0.07181461158869559
Error on this batch = 0.37338539307851604
Error on this batch = 0.3798904388294831
Cost on val dataset after 7008 epochs is = 0.07181443325045188
learning rate =  0.05748148607093064
Initial Cost on Val dataset for this epoch 7008 = 0.07181443325045188
Error on this batch = 0.3733836470717997
Error on this batch = 0.37988907111927767
Cost on val dataset after 7009 epochs is = 0.07181425491370269
learning rate =  0.05747875224204462
Initial Cost on Val dataset for this epoch 7009 = 0.07181425491370269
Error on this batch = 0.3733819019566419
Error on this batch = 0.379887

Cost on val dataset after 7041 epochs is = 0.07180854861176483
learning rate =  0.05739154334267323
Initial Cost on Val dataset for this epoch 7041 = 0.07180854861176483
Error on this batch = 0.3733265187791425
Error on this batch = 0.37984407078582316
Cost on val dataset after 7042 epochs is = 0.0718083702946988
learning rate =  0.057388826583221
Initial Cost on Val dataset for this epoch 7042 = 0.0718083702946988
Error on this batch = 0.37332480212665387
Error on this batch = 0.3798427112436601
Cost on val dataset after 7043 epochs is = 0.07180819197731991
learning rate =  0.057386110338111716
Initial Cost on Val dataset for this epoch 7043 = 0.07180819197731991
Error on this batch = 0.3733230863077246
Error on this batch = 0.3798413519453314
Cost on val dataset after 7044 epochs is = 0.07180801365957412
learning rate =  0.05738339460717501
Initial Cost on Val dataset for this epoch 7044 = 0.07180801365957412
Error on this batch = 0.3733213713206597
Error on this batch = 0.3798399928

Cost on val dataset after 7076 epochs is = 0.07180230697266989
learning rate =  0.057296761684937005
Initial Cost on Val dataset for this epoch 7076 = 0.07180230697266989
Error on this batch = 0.3732669208642993
Error on this batch = 0.37979663325895474
Cost on val dataset after 7077 epochs is = 0.07180212861227352
learning rate =  0.05729406282663414
Initial Cost on Val dataset for this epoch 7077 = 0.07180212861227352
Error on this batch = 0.3732652323840202
Error on this batch = 0.3797952823755912
Cost on val dataset after 7078 epochs is = 0.07180195024965447
learning rate =  0.057291364476758415
Initial Cost on Val dataset for this epoch 7078 = 0.07180195024965447
Error on this batch = 0.3732635446785576
Error on this batch = 0.3797939317434249
Cost on val dataset after 7079 epochs is = 0.07180177188475784
learning rate =  0.057288666635142226
Initial Cost on Val dataset for this epoch 7079 = 0.07180177188475784
Error on this batch = 0.3732618577462533
Error on this batch = 0.37979

Error on this batch = 0.37975084542994103
Cost on val dataset after 7111 epochs is = 0.07179606267710474
learning rate =  0.05720260306570341
Initial Cost on Val dataset for this epoch 7111 = 0.07179606267710474
Error on this batch = 0.3732082742920829
Error on this batch = 0.3797495032079024
Cost on val dataset after 7112 epochs is = 0.07179588420627543
learning rate =  0.05719992190314661
Initial Cost on Val dataset for this epoch 7112 = 0.07179588420627543
Error on this batch = 0.3732066119531848
Error on this batch = 0.37974816124440497
Cost on val dataset after 7113 epochs is = 0.07179570573130613
learning rate =  0.05719724124319783
Initial Cost on Val dataset for this epoch 7113 = 0.07179570573130613
Error on this batch = 0.37320495033194223
Error on this batch = 0.37974681953966755
Cost on val dataset after 7114 epochs is = 0.0717955272521424
learning rate =  0.057194561085692236
Initial Cost on Val dataset for this epoch 7114 = 0.0717955272521424
Error on this batch = 0.373203

Error on this batch = 0.3797053562421814
Cost on val dataset after 7145 epochs is = 0.07178999202243168
learning rate =  0.05711172451893234
Initial Cost on Val dataset for this epoch 7145 = 0.07178999202243168
Error on this batch = 0.3731521478594826
Error on this batch = 0.3797040229346316
Cost on val dataset after 7146 epochs is = 0.07178981338054853
learning rate =  0.05710906035302285
Initial Cost on Val dataset for this epoch 7146 = 0.07178981338054853
Error on this batch = 0.3731505090319645
Error on this batch = 0.37970268989318656
Cost on val dataset after 7147 epochs is = 0.07178963473269925
learning rate =  0.05710639668415923
Initial Cost on Val dataset for this epoch 7147 = 0.07178963473269925
Error on this batch = 0.37314887086861276
Error on this batch = 0.37970135711807246
Cost on val dataset after 7148 epochs is = 0.07178945607883115
learning rate =  0.05710373351217925
Initial Cost on Val dataset for this epoch 7148 = 0.07178945607883115
Error on this batch = 0.373147

Error on this batch = 0.37965885030691016
Cost on val dataset after 7180 epochs is = 0.0717837356655036
learning rate =  0.057018773396359745
Initial Cost on Val dataset for this epoch 7180 = 0.0717837356655036
Error on this batch = 0.373095174120833
Error on this batch = 0.37965752644961975
Cost on val dataset after 7181 epochs is = 0.07178355678393535
learning rate =  0.05701612653100471
Initial Cost on Val dataset for this epoch 7181 = 0.07178355678393535
Error on this batch = 0.373093557638138
Error on this batch = 0.37965620286653445
Cost on val dataset after 7182 epochs is = 0.07178337789460285
learning rate =  0.0570134801570612
Initial Cost on Val dataset for this epoch 7182 = 0.07178337789460285
Error on this batch = 0.3730919417670803
Error on this batch = 0.379654879557892
Cost on val dataset after 7183 epochs is = 0.07178319899745637
learning rate =  0.05701083427436959
Initial Cost on Val dataset for this epoch 7183 = 0.07178319899745637
Error on this batch = 0.37309032650

Error on this batch = 0.37961268002979387
Cost on val dataset after 7215 epochs is = 0.07177746987093564
learning rate =  0.05692642445782765
Initial Cost on Val dataset for this epoch 7215 = 0.07177746987093564
Error on this batch = 0.37303895172174645
Error on this batch = 0.37961136591375394
Cost on val dataset after 7216 epochs is = 0.07177729068873241
learning rate =  0.05692379469737406
Initial Cost on Val dataset for this epoch 7216 = 0.07177729068873241
Error on this batch = 0.37303735579348485
Error on this batch = 0.3796100520804919
Cost on val dataset after 7217 epochs is = 0.07177711149708993
learning rate =  0.05692116542278846
Initial Cost on Val dataset for this epoch 7217 = 0.07177711149708993
Error on this batch = 0.37303576042720055
Error on this batch = 0.37960873853026206
Cost on val dataset after 7218 epochs is = 0.07177693229596245
learning rate =  0.05691853663391377
Initial Cost on Val dataset for this epoch 7218 = 0.07177693229596245
Error on this batch = 0.373

Error on this batch = 0.3795668559158955
Cost on val dataset after 7250 epochs is = 0.07177119258521875
learning rate =  0.056834670909150814
Initial Cost on Val dataset for this epoch 7250 = 0.07177119258521875
Error on this batch = 0.3729834197394122
Error on this batch = 0.3795655518523182
Cost on val dataset after 7251 epochs is = 0.07177101304631582
learning rate =  0.05683205806109135
Initial Cost on Val dataset for this epoch 7251 = 0.07177101304631582
Error on this batch = 0.3729818426784237
Error on this batch = 0.37956424808079464
Cost on val dataset after 7252 epochs is = 0.07177083349645391
learning rate =  0.056829445693445245
Initial Cost on Val dataset for this epoch 7252 = 0.07177083349645391
Error on this batch = 0.372980266132757
Error on this batch = 0.3795629446016031
Cost on val dataset after 7253 epochs is = 0.07177065393559211
learning rate =  0.056826833806057915
Initial Cost on Val dataset for this epoch 7253 = 0.07177065393559211
Error on this batch = 0.372978

Cost on val dataset after 7284 epochs is = 0.07176508187662106
learning rate =  0.056746102665415896
Initial Cost on Val dataset for this epoch 7284 = 0.07176508187662106
Error on this batch = 0.37293008116064713
Error on this batch = 0.37952138932958945
Cost on val dataset after 7285 epochs is = 0.07176490194305196
learning rate =  0.05674350606512558
Initial Cost on Val dataset for this epoch 7285 = 0.07176490194305196
Error on this batch = 0.3729285209119299
Error on this batch = 0.379520095659282
Cost on val dataset after 7286 epochs is = 0.0717647219972165
learning rate =  0.05674090994003319
Initial Cost on Val dataset for this epoch 7286 = 0.0717647219972165
Error on this batch = 0.3729269611360535
Error on this batch = 0.37951880229131313
Cost on val dataset after 7287 epochs is = 0.07176454203907885
learning rate =  0.05673831428998657
Initial Cost on Val dataset for this epoch 7287 = 0.07176454203907885
Error on this batch = 0.3729254018318095
Error on this batch = 0.37951750

Cost on val dataset after 7319 epochs is = 0.07175877667671109
learning rate =  0.05665550340571734
Initial Cost on Val dataset for this epoch 7319 = 0.07175877667671109
Error on this batch = 0.3728757460020566
Error on this batch = 0.3794762928659696
Cost on val dataset after 7320 epochs is = 0.07175859629349829
learning rate =  0.05665292334726823
Initial Cost on Val dataset for this epoch 7320 = 0.07175859629349829
Error on this batch = 0.37287420159812473
Error on this batch = 0.37947500997117184
Cost on val dataset after 7321 epochs is = 0.0717584158968555
learning rate =  0.05665034375873232
Initial Cost on Val dataset for this epoch 7321 = 0.0717584158968555
Error on this batch = 0.372872657626035
Error on this batch = 0.3794737273903742
Cost on val dataset after 7322 epochs is = 0.07175823548675216
learning rate =  0.056647764639959866
Initial Cost on Val dataset for this epoch 7322 = 0.07175823548675216
Error on this batch = 0.37287111408465334
Error on this batch = 0.37947244

Cost on val dataset after 7354 epochs is = 0.07175245508114363
learning rate =  0.05656547998175851
Initial Cost on Val dataset for this epoch 7354 = 0.07175245508114363
Error on this batch = 0.3728219414921377
Error on this batch = 0.37943158079609873
Cost on val dataset after 7355 epochs is = 0.0717522742106578
learning rate =  0.05656291628161079
Initial Cost on Val dataset for this epoch 7355 = 0.0717522742106578
Error on this batch = 0.37282041154072204
Error on this batch = 0.37943030911391995
Cost on val dataset after 7356 epochs is = 0.07175209332575866
learning rate =  0.05656035304617511
Initial Cost on Val dataset for this epoch 7356 = 0.07175209332575866
Error on this batch = 0.37281888198259594
Error on this batch = 0.37942903775917186
Cost on val dataset after 7357 epochs is = 0.07175191242642064
learning rate =  0.05655779027530408
Initial Cost on Val dataset for this epoch 7357 = 0.07175191242642064
Error on this batch = 0.3728173528166901
Error on this batch = 0.379427

Cost on val dataset after 7389 epochs is = 0.07174611587752541
learning rate =  0.0564760260190433
Initial Cost on Val dataset for this epoch 7389 = 0.07174611587752541
Error on this batch = 0.3727686202835469
Error on this batch = 0.3793872695402057
Cost on val dataset after 7390 epochs is = 0.07174593448818868
learning rate =  0.056473478496552626
Initial Cost on Val dataset for this epoch 7390 = 0.07174593448818868
Error on this batch = 0.37276710347123315
Error on this batch = 0.37938600957580365
Cost on val dataset after 7391 epochs is = 0.07174575308361822
learning rate =  0.0564709314336547
Initial Cost on Val dataset for this epoch 7391 = 0.07174575308361822
Error on this batch = 0.3727655870156964
Error on this batch = 0.3793847499543759
Cost on val dataset after 7392 epochs is = 0.0717455716637927
learning rate =  0.0564683848302044
Initial Cost on Val dataset for this epoch 7392 = 0.0717455716637927
Error on this batch = 0.37276407091591923
Error on this batch = 0.3793834906

Cost on val dataset after 7423 epochs is = 0.0717399399712204
learning rate =  0.05638966722074584
Initial Cost on Val dataset for this epoch 7423 = 0.0717399399712204
Error on this batch = 0.3727172427864272
Error on this batch = 0.3793446261039572
Cost on val dataset after 7424 epochs is = 0.07173975805259028
learning rate =  0.05638713524340776
Initial Cost on Val dataset for this epoch 7424 = 0.07173975805259028
Error on this batch = 0.3727157375411514
Error on this batch = 0.3793433780798215
Cost on val dataset after 7425 epochs is = 0.07173957611805738
learning rate =  0.05638460372076616
Initial Cost on Val dataset for this epoch 7425 = 0.07173957611805738
Error on this batch = 0.37271423261874076
Error on this batch = 0.37934213041594605
Cost on val dataset after 7426 epochs is = 0.07173939416760364
learning rate =  0.056382072652678146
Initial Cost on Val dataset for this epoch 7426 = 0.07173939416760364
Error on this batch = 0.37271272801821753
Error on this batch = 0.3793408

Cost on val dataset after 7457 epochs is = 0.07173374571103355
learning rate =  0.05630383422396214
Initial Cost on Val dataset for this epoch 7457 = 0.07173374571103355
Error on this batch = 0.37266623977011554
Error on this batch = 0.3793023986999133
Cost on val dataset after 7458 epochs is = 0.0717335632419357
learning rate =  0.056301317626532246
Initial Cost on Val dataset for this epoch 7458 = 0.0717335632419357
Error on this batch = 0.37266474495968976
Error on this batch = 0.3793011632375429
Cost on val dataset after 7459 epochs is = 0.07173338075635387
learning rate =  0.05629880147897677
Initial Cost on Val dataset for this epoch 7459 = 0.07173338075635387
Error on this batch = 0.37266325043943566
Error on this batch = 0.37929992815469316
Cost on val dataset after 7460 epochs is = 0.07173319825427185
learning rate =  0.05629628578115497
Initial Cost on Val dataset for this epoch 7460 = 0.07173319825427185
Error on this batch = 0.37266175620840813
Error on this batch = 0.37929

Cost on val dataset after 7492 epochs is = 0.07172734937953401
learning rate =  0.05621602007147136
Initial Cost on Val dataset for this epoch 7492 = 0.07172734937953401
Error on this batch = 0.3726140879175888
Error on this batch = 0.379259387240425
Cost on val dataset after 7493 epochs is = 0.07172716632396339
learning rate =  0.0562135191363283
Initial Cost on Val dataset for this epoch 7493 = 0.07172716632396339
Error on this batch = 0.3726126027066195
Error on this batch = 0.37925816541768326
Cost on val dataset after 7494 epochs is = 0.07172698325134931
learning rate =  0.0562110186461717
Initial Cost on Val dataset for this epoch 7494 = 0.07172698325134931
Error on this batch = 0.372611117753274
Error on this batch = 0.3792569439955189
Cost on val dataset after 7495 epochs is = 0.07172680016167567
learning rate =  0.05620851860086303
Initial Cost on Val dataset for this epoch 7495 = 0.07172680016167567
Error on this batch = 0.37260963305663936
Error on this batch = 0.37925572297

Cost on val dataset after 7526 epochs is = 0.07172111583078847
learning rate =  0.056131237087842574
Initial Cost on Val dataset for this epoch 7526 = 0.07172111583078847
Error on this batch = 0.37256372985347197
Error on this batch = 0.37921807356451437
Cost on val dataset after 7527 epochs is = 0.07172093218647006
learning rate =  0.05612875120488592
Initial Cost on Val dataset for this epoch 7527 = 0.07172093218647006
Error on this batch = 0.3725622528954975
Error on this batch = 0.3792168656941321
Cost on val dataset after 7528 epochs is = 0.07172074852453429
learning rate =  0.05612626576223975
Initial Cost on Val dataset for this epoch 7528 = 0.07172074852453429
Error on this batch = 0.3725607761646839
Error on this batch = 0.37921565824415887
Cost on val dataset after 7529 epochs is = 0.07172056484496296
learning rate =  0.05612378075976762
Initial Cost on Val dataset for this epoch 7529 = 0.07172056484496296
Error on this batch = 0.37255929966015544
Error on this batch = 0.3792

Error on this batch = 0.37917724489547594
Cost on val dataset after 7561 epochs is = 0.07171467767177417
learning rate =  0.0560444922790984
Initial Cost on Val dataset for this epoch 7561 = 0.07171467767177417
Error on this batch = 0.37251216582417973
Error on this batch = 0.37917605159941786
Cost on val dataset after 7562 epochs is = 0.07171449339918029
learning rate =  0.05604202172620654
Initial Cost on Val dataset for this epoch 7562 = 0.07171449339918029
Error on this batch = 0.3725106963047979
Error on this batch = 0.37917485873972334
Cost on val dataset after 7563 epochs is = 0.07171430910825016
learning rate =  0.056039551608884665
Initial Cost on Val dataset for this epoch 7563 = 0.07171430910825016
Error on this batch = 0.37250922698277295
Error on this batch = 0.37917366631675536
Cost on val dataset after 7564 epochs is = 0.0717141247989601
learning rate =  0.05603708192699838
Initial Cost on Val dataset for this epoch 7564 = 0.0717141247989601
Error on this batch = 0.37250

Error on this batch = 0.3791357410835522
Cost on val dataset after 7596 epochs is = 0.07170821705302265
learning rate =  0.055958281215656096
Initial Cost on Val dataset for this epoch 7596 = 0.07170821705302265
Error on this batch = 0.3724608447846579
Error on this batch = 0.3791345632163396
Cost on val dataset after 7597 epochs is = 0.07170803212290924
learning rate =  0.055955825828148974
Initial Cost on Val dataset for this epoch 7597 = 0.07170803212290924
Error on this batch = 0.3724593816954723
Error on this batch = 0.3791333857924289
Cost on val dataset after 7598 epochs is = 0.07170784717347221
learning rate =  0.055953370871543874
Initial Cost on Val dataset for this epoch 7598 = 0.07170784717347221
Error on this batch = 0.37245791877598594
Error on this batch = 0.3791322088118051
Cost on val dataset after 7599 epochs is = 0.07170766220467766
learning rate =  0.05595091634570849
Initial Cost on Val dataset for this epoch 7599 = 0.07170766220467766
Error on this batch = 0.37245

Error on this batch = 0.3790947785757773
Cost on val dataset after 7631 epochs is = 0.0717017327577065
learning rate =  0.05587259817662528
Initial Cost on Val dataset for this epoch 7631 = 0.0717017327577065
Error on this batch = 0.37240973281936385
Error on this batch = 0.3790936161099538
Cost on val dataset after 7632 epochs is = 0.07170154712829822
learning rate =  0.05587015779233842
Initial Cost on Val dataset for this epoch 7632 = 0.07170154712829822
Error on this batch = 0.3724082752358357
Error on this batch = 0.3790924540769485
Cost on val dataset after 7633 epochs is = 0.07170136147811595
learning rate =  0.05586771783435679
Initial Cost on Val dataset for this epoch 7633 = 0.07170136147811595
Error on this batch = 0.3724068177969428
Error on this batch = 0.37909129247611434
Cost on val dataset after 7634 epochs is = 0.0717011758071091
learning rate =  0.05586527830255009
Initial Cost on Val dataset for this epoch 7634 = 0.0717011758071091
Error on this batch = 0.37240536050

Error on this batch = 0.3790566412417097
Cost on val dataset after 7664 epochs is = 0.07169559571691597
learning rate =  0.05579228987569171
Initial Cost on Val dataset for this epoch 7664 = 0.07169559571691597
Error on this batch = 0.37236170530397983
Error on this batch = 0.37905549258737614
Cost on val dataset after 7665 epochs is = 0.07169540937178248
learning rate =  0.055789863490985
Initial Cost on Val dataset for this epoch 7665 = 0.07169540937178248
Error on this batch = 0.37236025214484364
Error on this batch = 0.3790543443316313
Cost on val dataset after 7666 epochs is = 0.07169522300386041
learning rate =  0.05578743752831329
Initial Cost on Val dataset for this epoch 7666 = 0.07169522300386041
Error on this batch = 0.3723587991088262
Error on this batch = 0.37905319647298547
Cost on val dataset after 7667 epochs is = 0.07169503661307651
learning rate =  0.055785011987548136
Initial Cost on Val dataset for this epoch 7667 = 0.07169503661307651
Error on this batch = 0.372357

Error on this batch = 0.37901666419840707
Cost on val dataset after 7699 epochs is = 0.0716890595549608
learning rate =  0.05570761668401885
Initial Cost on Val dataset for this epoch 7699 = 0.0716890595549608
Error on this batch = 0.37231091385875337
Error on this batch = 0.37901552841297304
Cost on val dataset after 7700 epochs is = 0.07168887236314166
learning rate =  0.05570520499447586
Initial Cost on Val dataset for this epoch 7700 = 0.07168887236314166
Error on this batch = 0.3723094646324662
Error on this batch = 0.3790143929560479
Cost on val dataset after 7701 epochs is = 0.07168868514546292
learning rate =  0.05570279372250529
Initial Cost on Val dataset for this epoch 7701 = 0.07168868514546292
Error on this batch = 0.37230801550700543
Error on this batch = 0.3790132578250692
Cost on val dataset after 7702 epochs is = 0.07168849790182075
learning rate =  0.055700382867980645
Initial Cost on Val dataset for this epoch 7702 = 0.07168849790182075
Error on this batch = 0.372306

Error on this batch = 0.3789770887749501
Cost on val dataset after 7734 epochs is = 0.07168249172570372
learning rate =  0.05562345518045822
Initial Cost on Val dataset for this epoch 7734 = 0.07168249172570372
Error on this batch = 0.37226024643461986
Error on this batch = 0.37897596276086476
Cost on val dataset after 7735 epochs is = 0.07168230356084838
learning rate =  0.055621058031033586
Initial Cost on Val dataset for this epoch 7735 = 0.07168230356084838
Error on this batch = 0.37225880032130987
Error on this batch = 0.37897483696749135
Cost on val dataset after 7736 epochs is = 0.07168211536594196
learning rate =  0.055618661294785904
Initial Cost on Val dataset for this epoch 7736 = 0.07168211536594196
Error on this batch = 0.37225735428341283
Error on this batch = 0.37897371139123637
Cost on val dataset after 7737 epochs is = 0.07168192714084841
learning rate =  0.05561626497159057
Initial Cost on Val dataset for this epoch 7737 = 0.07168192714084841
Error on this batch = 0.3

Error on this batch = 0.37893778486504853
Cost on val dataset after 7769 epochs is = 0.0716758871428817
learning rate =  0.055539799978104425
Initial Cost on Val dataset for this epoch 7769 = 0.0716758871428817
Error on this batch = 0.37220967157708396
Error on this batch = 0.37893666428993755
Cost on val dataset after 7770 epochs is = 0.07167569784016017
learning rate =  0.05553741721607918
Initial Cost on Val dataset for this epoch 7770 = 0.07167569784016017
Error on this batch = 0.3722082275647925
Error on this batch = 0.378935543797093
Cost on val dataset after 7771 epochs is = 0.07167550850217963
learning rate =  0.05553503486290124
Initial Cost on Val dataset for this epoch 7771 = 0.07167550850217963
Error on this batch = 0.3722067835944504
Error on this batch = 0.3789344233822712
Cost on val dataset after 7772 epochs is = 0.07167531912878031
learning rate =  0.05553265291844784
Initial Cost on Val dataset for this epoch 7772 = 0.07167531912878031
Error on this batch = 0.37220533

Cost on val dataset after 7803 epochs is = 0.07166943009919635
learning rate =  0.05545901470047176
Initial Cost on Val dataset for this epoch 7803 = 0.07166943009919635
Error on this batch = 0.3721605915163895
Error on this batch = 0.3788985856048467
Cost on val dataset after 7804 epochs is = 0.07166923950632727
learning rate =  0.05545664577070507
Initial Cost on Val dataset for this epoch 7804 = 0.07166923950632727
Error on this batch = 0.3721591482405506
Error on this batch = 0.37889746535312696
Cost on val dataset after 7805 epochs is = 0.07166904887263842
learning rate =  0.05545427724564152
Initial Cost on Val dataset for this epoch 7805 = 0.07166904887263842
Error on this batch = 0.37215770496278566
Error on this batch = 0.3788963450334337
Cost on val dataset after 7806 epochs is = 0.07166885819796623
learning rate =  0.05545190912516011
Initial Cost on Val dataset for this epoch 7806 = 0.07166885819796623
Error on this batch = 0.3721562616816226
Error on this batch = 0.3788952

Cost on val dataset after 7838 epochs is = 0.07166273401082936
learning rate =  0.05537634216790002
Initial Cost on Val dataset for this epoch 7838 = 0.07166273401082936
Error on this batch = 0.3721100655858092
Error on this batch = 0.3788593096145191
Cost on val dataset after 7839 epochs is = 0.07166254189475103
learning rate =  0.05537398733109806
Initial Cost on Val dataset for this epoch 7839 = 0.07166254189475103
Error on this batch = 0.37210862130588124
Error on this batch = 0.37885818458864673
Cost on val dataset after 7840 epochs is = 0.07166234973242196
learning rate =  0.05537163289479559
Initial Cost on Val dataset for this epoch 7840 = 0.07166234973242196
Error on this batch = 0.3721071769671693
Error on this batch = 0.3788570593585146
Cost on val dataset after 7841 epochs is = 0.07166215752370028
learning rate =  0.05536927885887341
Initial Cost on Val dataset for this epoch 7841 = 0.07166215752370028
Error on this batch = 0.3721057325679124
Error on this batch = 0.3788559

Cost on val dataset after 7872 epochs is = 0.071656175319561
learning rate =  0.055296501685972936
Initial Cost on Val dataset for this epoch 7872 = 0.071656175319561
Error on this batch = 0.3720609163401128
Error on this batch = 0.37882092510324794
Cost on val dataset after 7873 epochs is = 0.07165598155810296
learning rate =  0.055294160399591695
Initial Cost on Val dataset for this epoch 7873 = 0.07165598155810296
Error on this batch = 0.3720594690481277
Error on this batch = 0.3788197914053441
Cost on val dataset after 7874 epochs is = 0.07165578774622543
learning rate =  0.05529181950968587
Initial Cost on Val dataset for this epoch 7874 = 0.07165578774622543
Error on this batch = 0.3720580216354082
Error on this batch = 0.37881865740571796
Cost on val dataset after 7875 epochs is = 0.07165559388382892
learning rate =  0.05528947901613796
Initial Cost on Val dataset for this epoch 7875 = 0.07165559388382892
Error on this batch = 0.3720565741001063
Error on this batch = 0.378817523

Error on this batch = 0.3787821987633717
Cost on val dataset after 7907 epochs is = 0.07164936309615724
learning rate =  0.05521479179918915
Initial Cost on Val dataset for this epoch 7907 = 0.07164936309615724
Error on this batch = 0.3720101773067253
Error on this batch = 0.3787810538086418
Cost on val dataset after 7908 epochs is = 0.07164916752067452
learning rate =  0.055212464319870994
Initial Cost on Val dataset for this epoch 7908 = 0.07164916752067452
Error on this batch = 0.3720087247065555
Error on this batch = 0.3787799084995618
Cost on val dataset after 7909 epochs is = 0.07164897189219306
learning rate =  0.05521013723294588
Initial Cost on Val dataset for this epoch 7909 = 0.07164897189219306
Error on this batch = 0.37200727192286875
Error on this batch = 0.3787787628352688
Cost on val dataset after 7910 epochs is = 0.0716487762106671
learning rate =  0.05520781053829805
Initial Cost on Val dataset for this epoch 7910 = 0.0716487762106671
Error on this batch = 0.372005818

Error on this batch = 0.3787419107519916
Cost on val dataset after 7942 epochs is = 0.0716424862004461
learning rate =  0.055133562741841675
Initial Cost on Val dataset for this epoch 7942 = 0.0716424862004461
Error on this batch = 0.3719592164450877
Error on this batch = 0.37874075311482175
Cost on val dataset after 7943 epochs is = 0.07164228875271773
learning rate =  0.05513124892772356
Initial Cost on Val dataset for this epoch 7943 = 0.07164228875271773
Error on this batch = 0.3719577564690515
Error on this batch = 0.37873959511382665
Cost on val dataset after 7944 epochs is = 0.07164209125128311
learning rate =  0.0551289355019759
Initial Cost on Val dataset for this epoch 7944 = 0.07164209125128311
Error on this batch = 0.3719562962554963
Error on this batch = 0.3787384367493033
Cost on val dataset after 7945 epochs is = 0.07164189369614779
learning rate =  0.055126622464484645
Initial Cost on Val dataset for this epoch 7945 = 0.07164189369614779
Error on this batch = 0.37195483

Error on this batch = 0.3787011802599596
Cost on val dataset after 7977 epochs is = 0.07163554367808674
learning rate =  0.05505280958408305
Initial Cost on Val dataset for this epoch 7977 = 0.07163554367808674
Error on this batch = 0.3719079677049147
Error on this batch = 0.37870001022919625
Cost on val dataset after 7978 epochs is = 0.07163534436220133
learning rate =  0.05505050929537507
Initial Cost on Val dataset for this epoch 7978 = 0.07163534436220133
Error on this batch = 0.3719064986868489
Error on this batch = 0.3786988398592601
Cost on val dataset after 7979 epochs is = 0.07163514499353663
learning rate =  0.055048209391073626
Initial Cost on Val dataset for this epoch 7979 = 0.07163514499353663
Error on this batch = 0.371905029390951
Error on this batch = 0.37869766915121506
Cost on val dataset after 7980 epochs is = 0.07163494557213912
learning rate =  0.055045909871066306
Initial Cost on Val dataset for this epoch 7980 = 0.07163494557213912
Error on this batch = 0.371903

Error on this batch = 0.3786612159516703
Cost on val dataset after 8011 epochs is = 0.07162873764953237
learning rate =  0.054974814749621514
Initial Cost on Val dataset for this epoch 8011 = 0.07162873764953237
Error on this batch = 0.3718578602267763
Error on this batch = 0.37866003506620904
Cost on val dataset after 8012 epochs is = 0.07162853657061084
learning rate =  0.05497252746795673
Initial Cost on Val dataset for this epoch 8012 = 0.07162853657061084
Error on this batch = 0.37185638131332854
Error on this batch = 0.37865885388479065
Cost on val dataset after 8013 epochs is = 0.07162833544097794
learning rate =  0.05497024056690293
Initial Cost on Val dataset for this epoch 8013 = 0.07162833544097794
Error on this batch = 0.3718549020971865
Error on this batch = 0.37865767240884846
Cost on val dataset after 8014 epochs is = 0.07162813426070759
learning rate =  0.05496795404634928
Initial Cost on Val dataset for this epoch 8014 = 0.07162813426070759
Error on this batch = 0.3718

Error on this batch = 0.3786197184914805
Cost on val dataset after 8046 epochs is = 0.07162167022836992
learning rate =  0.05489498563195962
Initial Cost on Val dataset for this epoch 8046 = 0.07162167022836992
Error on this batch = 0.3718059152499446
Error on this batch = 0.3786185281283676
Cost on val dataset after 8047 epochs is = 0.07162146742223523
learning rate =  0.0548927116060402
Initial Cost on Val dataset for this epoch 8047 = 0.07162146742223523
Error on this batch = 0.37180442549632375
Error on this batch = 0.3786173375216421
Cost on val dataset after 8048 epochs is = 0.07162126456829643
learning rate =  0.05489043795688024
Initial Cost on Val dataset for this epoch 8048 = 0.07162126456829643
Error on this batch = 0.37180293542761095
Error on this batch = 0.37861614667282445
Cost on val dataset after 8049 epochs is = 0.07162106166664446
learning rate =  0.05488816468437052
Initial Cost on Val dataset for this epoch 8049 = 0.07162106166664446
Error on this batch = 0.3718014

Cost on val dataset after 8080 epochs is = 0.0716147485596832
learning rate =  0.05481787946125366
Initial Cost on Val dataset for this epoch 8080 = 0.0716147485596832
Error on this batch = 0.37175508611821245
Error on this batch = 0.37857792070143503
Cost on val dataset after 8081 epochs is = 0.07161454418094453
learning rate =  0.054815618184150976
Initial Cost on Val dataset for this epoch 8081 = 0.07161454418094453
Error on this batch = 0.3717535855940311
Error on this batch = 0.3785767227047934
Cost on val dataset after 8082 epochs is = 0.07161433975764482
learning rate =  0.054813357280119365
Initial Cost on Val dataset for this epoch 8082 = 0.07161433975764482
Error on this batch = 0.37175208475269017
Error on this batch = 0.37857552451648374
Cost on val dataset after 8083 epochs is = 0.07161413528988304
learning rate =  0.054811096749051144
Initial Cost on Val dataset for this epoch 8083 = 0.07161413528988304
Error on this batch = 0.37175058359424484
Error on this batch = 0.378

Cost on val dataset after 8115 epochs is = 0.0716075694430834
learning rate =  0.054738956037378744
Initial Cost on Val dataset for this epoch 8115 = 0.0716075694430834
Error on this batch = 0.371702379697751
Error on this batch = 0.37853588583953096
Cost on val dataset after 8116 epochs is = 0.07160736356421908
learning rate =  0.0547367077539722
Initial Cost on Val dataset for this epoch 8116 = 0.07160736356421908
Error on this batch = 0.37170086813921366
Error on this batch = 0.37853468194917483
Cost on val dataset after 8117 epochs is = 0.07160715764430779
learning rate =  0.05473445983989354
Initial Cost on Val dataset for this epoch 8117 = 0.07160715764430779
Error on this batch = 0.3716993562683471
Error on this batch = 0.37853347791395037
Cost on val dataset after 8118 epochs is = 0.07160695168345066
learning rate =  0.05473221229503659
Initial Cost on Val dataset for this epoch 8118 = 0.07160695168345066
Error on this batch = 0.37169784408536116
Error on this batch = 0.3785322

Cost on val dataset after 8150 epochs is = 0.0716003399206364
learning rate =  0.05466048517569551
Initial Cost on Val dataset for this epoch 8150 = 0.0716003399206364
Error on this batch = 0.37164929083236986
Error on this batch = 0.37849367125701205
Cost on val dataset after 8151 epochs is = 0.07160013266512594
learning rate =  0.05465824975588377
Initial Cost on Val dataset for this epoch 8151 = 0.07160013266512594
Error on this batch = 0.3716477684846135
Error on this batch = 0.37849246299227574
Cost on val dataset after 8152 epochs is = 0.07159992537208756
learning rate =  0.054656014701710116
Initial Cost on Val dataset for this epoch 8152 = 0.07159992537208756
Error on this batch = 0.3716462458333446
Error on this batch = 0.37849125462231953
Cost on val dataset after 8153 epochs is = 0.07159971804162064
learning rate =  0.054653780013069884
Initial Cost on Val dataset for this epoch 8153 = 0.07159971804162064
Error on this batch = 0.3716447228788469
Error on this batch = 0.37849

Cost on val dataset after 8183 epochs is = 0.07159348121223741
learning rate =  0.054586908809603665
Initial Cost on Val dataset for this epoch 8183 = 0.07159348121223741
Error on this batch = 0.37159889468206897
Error on this batch = 0.378453748287118
Cost on val dataset after 8184 epochs is = 0.07159327277022193
learning rate =  0.05458468540082724
Initial Cost on Val dataset for this epoch 8184 = 0.07159327277022193
Error on this batch = 0.3715973624720681
Error on this batch = 0.3784525370561145
Cost on val dataset after 8185 epochs is = 0.07159306429390144
learning rate =  0.054582462354258
Initial Cost on Val dataset for this epoch 8185 = 0.07159306429390144
Error on this batch = 0.3715958299682213
Error on this batch = 0.3784513257503863
Cost on val dataset after 8186 epochs is = 0.07159285578337152
learning rate =  0.05458023966979269
Initial Cost on Val dataset for this epoch 8186 = 0.07159285578337152
Error on this batch = 0.37159429717082304
Error on this batch = 0.378450114

Error on this batch = 0.37841252917047
Cost on val dataset after 8218 epochs is = 0.07158616594770176
learning rate =  0.05450930434214024
Initial Cost on Val dataset for this epoch 8218 = 0.07158616594770176
Error on this batch = 0.371545094385135
Error on this batch = 0.3784113158263459
Cost on val dataset after 8219 epochs is = 0.0715859563608673
learning rate =  0.054507093549456834
Initial Cost on Val dataset for this epoch 8219 = 0.0715859563608673
Error on this batch = 0.3715435520608837
Error on this batch = 0.37841010243230755
Cost on val dataset after 8220 epochs is = 0.0715857467429799
learning rate =  0.054504883115391825
Initial Cost on Val dataset for this epoch 8220 = 0.0715857467429799
Error on this batch = 0.37154200945259436
Error on this batch = 0.37840888898898994
Cost on val dataset after 8221 epochs is = 0.07158553709412958
learning rate =  0.0545026730398434
Initial Cost on Val dataset for this epoch 8221 = 0.07158553709412958
Error on this batch = 0.371540466560

Error on this batch = 0.37837003631171184
Cost on val dataset after 8253 epochs is = 0.07157881251104449
learning rate =  0.05443213931210274
Initial Cost on Val dataset for this epoch 8253 = 0.07157881251104449
Error on this batch = 0.37149094563177687
Error on this batch = 0.3783688215647703
Cost on val dataset after 8254 epochs is = 0.07157860188966121
learning rate =  0.054429941010767126
Initial Cost on Val dataset for this epoch 8254 = 0.07157860188966121
Error on this batch = 0.3714893935080266
Error on this batch = 0.37836760678711934
Cost on val dataset after 8255 epochs is = 0.07157839124026145
learning rate =  0.054427743064511674
Initial Cost on Val dataset for this epoch 8255 = 0.07157839124026145
Error on this batch = 0.3714878411081572
Error on this batch = 0.3783663919792203
Cost on val dataset after 8256 epochs is = 0.07157818056292849
learning rate =  0.05442554547323604
Initial Cost on Val dataset for this epoch 8256 = 0.07157818056292849
Error on this batch = 0.3714

Error on this batch = 0.3783275046762185
Cost on val dataset after 8288 epochs is = 0.07157142462643334
learning rate =  0.05435540938333073
Initial Cost on Val dataset for this epoch 8288 = 0.07157142462643334
Error on this batch = 0.3714364581027364
Error on this batch = 0.37832628909913296
Cost on val dataset after 8289 epochs is = 0.07157121307259456
learning rate =  0.054353223450352296
Initial Cost on Val dataset for this epoch 8289 = 0.07157121307259456
Error on this batch = 0.37143489640946287
Error on this batch = 0.3783250735048
Cost on val dataset after 8290 epochs is = 0.07157100149352359
learning rate =  0.054351037868965474
Initial Cost on Val dataset for this epoch 8290 = 0.07157100149352359
Error on this batch = 0.37143333444506943
Error on this batch = 0.37832385789352607
Cost on val dataset after 8291 epochs is = 0.07157078988929592
learning rate =  0.05434885263907131
Initial Cost on Val dataset for this epoch 8291 = 0.07157078988929592
Error on this batch = 0.371431

Cost on val dataset after 8323 epochs is = 0.07156400571017427
learning rate =  0.054279110280530965
Initial Cost on Val dataset for this epoch 8323 = 0.07156400571017427
Error on this batch = 0.3713816379691406
Error on this batch = 0.3782837349783857
Cost on val dataset after 8324 epochs is = 0.07156379331651129
learning rate =  0.054276936594641785
Initial Cost on Val dataset for this epoch 8324 = 0.07156379331651129
Error on this batch = 0.3713800668218373
Error on this batch = 0.3782825189439487
Cost on val dataset after 8325 epochs is = 0.07156358090010985
learning rate =  0.05427476325690444
Initial Cost on Val dataset for this epoch 8325 = 0.07156358090010985
Error on this batch = 0.3713784954048298
Error on this batch = 0.37828130290071627
Cost on val dataset after 8326 epochs is = 0.07156336846103642
learning rate =  0.05427259026722133
Initial Cost on Val dataset for this epoch 8326 = 0.07156336846103642
Error on this batch = 0.37137692371810216
Error on this batch = 0.37828

Cost on val dataset after 8356 epochs is = 0.07155698506480469
learning rate =  0.054207561939588804
Initial Cost on Val dataset for this epoch 8356 = 0.07155698506480469
Error on this batch = 0.3713296474837495
Error on this batch = 0.37824360192309914
Cost on val dataset after 8357 epochs is = 0.07155677195434461
learning rate =  0.05420539969140384
Initial Cost on Val dataset for this epoch 8357 = 0.07155677195434461
Error on this batch = 0.37132806741002156
Error on this batch = 0.3782423856629727
Cost on val dataset after 8358 epochs is = 0.07155655882318891
learning rate =  0.05420323778817132
Initial Cost on Val dataset for this epoch 8358 = 0.07155655882318891
Error on this batch = 0.37132648706434823
Error on this batch = 0.3782411693973894
Cost on val dataset after 8359 epochs is = 0.07155634567139432
learning rate =  0.054201076229794955
Initial Cost on Val dataset for this epoch 8359 = 0.07155634567139432
Error on this batch = 0.37132490644660665
Error on this batch = 0.378

Cost on val dataset after 8390 epochs is = 0.07154972802299726
learning rate =  0.05413423844517604
Initial Cost on Val dataset for this epoch 8390 = 0.07154972802299726
Error on this batch = 0.3712757715347478
Error on this batch = 0.3782022460844687
Cost on val dataset after 8391 epochs is = 0.07154951423870964
learning rate =  0.05413208787158228
Initial Cost on Val dataset for this epoch 8391 = 0.07154951423870964
Error on this batch = 0.37127418212721025
Error on this batch = 0.3782010296393728
Cost on val dataset after 8392 epochs is = 0.07154930043546494
learning rate =  0.054129937639688396
Initial Cost on Val dataset for this epoch 8392 = 0.07154930043546494
Error on this batch = 0.37127259244180444
Error on this batch = 0.3781998131881673
Cost on val dataset after 8393 epochs is = 0.07154908661330807
learning rate =  0.054127787749399386
Initial Cost on Val dataset for this epoch 8393 = 0.07154908661330807
Error on this batch = 0.37127100247830364
Error on this batch = 0.3781

Error on this batch = 0.3781620996058881
Cost on val dataset after 8424 epochs is = 0.07154244897274736
learning rate =  0.0540613100698857
Initial Cost on Val dataset for this epoch 8424 = 0.07154244897274736
Error on this batch = 0.3712215743075407
Error on this batch = 0.3781608828997455
Cost on val dataset after 8425 epochs is = 0.07154223456674064
learning rate =  0.05405917106101783
Initial Cost on Val dataset for this epoch 8425 = 0.07154223456674064
Error on this batch = 0.37121997530872236
Error on this batch = 0.3781596661831856
Cost on val dataset after 8426 epochs is = 0.07154202014307878
learning rate =  0.05405703239064091
Initial Cost on Val dataset for this epoch 8426 = 0.07154202014307878
Error on this batch = 0.37121837602280694
Error on this batch = 0.37815844945602506
Cost on val dataset after 8427 epochs is = 0.07154180570179278
learning rate =  0.05405489405866121
Initial Cost on Val dataset for this epoch 8427 = 0.07154180570179278
Error on this batch = 0.3712167

Cost on val dataset after 8458 epochs is = 0.0715351494278496
learning rate =  0.053988773102643
Initial Cost on Val dataset for this epoch 8458 = 0.0715351494278496
Error on this batch = 0.3711670453058404
Error on this batch = 0.37811950733906785
Cost on val dataset after 8459 epochs is = 0.07153493443615822
learning rate =  0.05398664555010681
Initial Cost on Val dataset for this epoch 8459 = 0.07153493443615822
Error on this batch = 0.37116543635694726
Error on this batch = 0.37811829014021087
Cost on val dataset after 8460 epochs is = 0.07153471942760083
learning rate =  0.05398451833289549
Initial Cost on Val dataset for this epoch 8460 = 0.07153471942760083
Error on this batch = 0.3711638271091593
Error on this batch = 0.3781170729228159
Cost on val dataset after 8461 epochs is = 0.07153450440219201
learning rate =  0.05398239145091658
Initial Cost on Val dataset for this epoch 8461 = 0.07153450440219201
Error on this batch = 0.37116221756210155
Error on this batch = 0.378115855

Error on this batch = 0.37807811038637995
Cost on val dataset after 8493 epochs is = 0.07152761475086811
learning rate =  0.053914507678813985
Initial Cost on Val dataset for this epoch 8493 = 0.07152761475086811
Error on this batch = 0.37111055174090263
Error on this batch = 0.3780768923854696
Cost on val dataset after 8494 epochs is = 0.07152739917402005
learning rate =  0.05391239180786174
Initial Cost on Val dataset for this epoch 8494 = 0.07152739917402005
Error on this batch = 0.3711089321009516
Error on this batch = 0.37807567435516504
Cost on val dataset after 8495 epochs is = 0.07152718358049172
learning rate =  0.0539102762690192
Initial Cost on Val dataset for this epoch 8495 = 0.07152718358049172
Error on this batch = 0.37110731214845033
Error on this batch = 0.37807445629510966
Cost on val dataset after 8496 epochs is = 0.07152696797027806
learning rate =  0.05390816106219516
Initial Cost on Val dataset for this epoch 8496 = 0.07152696797027806
Error on this batch = 0.3711

Error on this batch = 0.37803546043699526
Cost on val dataset after 8528 epochs is = 0.0715200595709198
learning rate =  0.0538406492056352
Initial Cost on Val dataset for this epoch 8528 = 0.0715200595709198
Error on this batch = 0.3710536757843056
Error on this batch = 0.3780342411836446
Cost on val dataset after 8529 epochs is = 0.0715198434031658
learning rate =  0.05383854490448377
Initial Cost on Val dataset for this epoch 8529 = 0.0715198434031658
Error on this batch = 0.3710520449705335
Error on this batch = 0.3780330218874188
Cost on val dataset after 8530 epochs is = 0.07151962721817766
learning rate =  0.05383644093227074
Initial Cost on Val dataset for this epoch 8530 = 0.07151962721817766
Error on this batch = 0.3710504138305603
Error on this batch = 0.37803180254790725
Cost on val dataset after 8531 epochs is = 0.07151941101592753
learning rate =  0.053834337288906124
Initial Cost on Val dataset for this epoch 8531 = 0.07151941101592753
Error on this batch = 0.37104878236

Cost on val dataset after 8562 epochs is = 0.07151269999898606
learning rate =  0.053769286964158454
Initial Cost on Val dataset for this epoch 8562 = 0.07151269999898606
Error on this batch = 0.3709980429655554
Error on this batch = 0.37799275832267837
Cost on val dataset after 8563 epochs is = 0.07151248322531552
learning rate =  0.05376719379664762
Initial Cost on Val dataset for this epoch 8563 = 0.07151248322531552
Error on this batch = 0.3709964008624568
Error on this batch = 0.37799153731856183
Cost on val dataset after 8564 epochs is = 0.07151226643305233
learning rate =  0.053765100955035734
Initial Cost on Val dataset for this epoch 8564 = 0.07151226643305233
Error on this batch = 0.37099475842154783
Error on this batch = 0.3779903162567151
Cost on val dataset after 8565 epochs is = 0.07151204962214307
learning rate =  0.053763008439234004
Initial Cost on Val dataset for this epoch 8565 = 0.07151204962214307
Error on this batch = 0.3709931156425282
Error on this batch = 0.377

Cost on val dataset after 8597 epochs is = 0.07150510146448034
learning rate =  0.05369621943185003
Initial Cost on Val dataset for this epoch 8597 = 0.07150510146448034
Error on this batch = 0.3709403665545327
Error on this batch = 0.37794998599403357
Cost on val dataset after 8598 epochs is = 0.07150488400277268
learning rate =  0.053694137618199606
Initial Cost on Val dataset for this epoch 8598 = 0.07150488400277268
Error on this batch = 0.37093871246978993
Error on this batch = 0.3779487627120379
Cost on val dataset after 8599 epochs is = 0.0715046665200957
learning rate =  0.053692056127361026
Initial Cost on Val dataset for this epoch 8599 = 0.0715046665200957
Error on this batch = 0.37093705803863947
Error on this batch = 0.37794753935724823
Cost on val dataset after 8600 epochs is = 0.07150444901636538
learning rate =  0.05368997495924671
Initial Cost on Val dataset for this epoch 8600 = 0.07150444901636538
Error on this batch = 0.37093540326090013
Error on this batch = 0.3779

Cost on val dataset after 8632 epochs is = 0.0714974772273846
learning rate =  0.05362354745564705
Initial Cost on Val dataset for this epoch 8632 = 0.0714974772273846
Error on this batch = 0.37088226650673833
Error on this batch = 0.377907125117318
Cost on val dataset after 8633 epochs is = 0.07149725897537046
learning rate =  0.05362147688851796
Initial Cost on Val dataset for this epoch 8633 = 0.07149725897537046
Error on this batch = 0.37088060021721986
Error on this batch = 0.37790589904498545
Cost on val dataset after 8634 epochs is = 0.07149704069885739
learning rate =  0.05361940664115524
Initial Cost on Val dataset for this epoch 8634 = 0.07149704069885739
Error on this batch = 0.3708789335774683
Error on this batch = 0.3779046728858997
Cost on val dataset after 8635 epochs is = 0.07149682239772573
learning rate =  0.05361733671347248
Initial Cost on Val dataset for this epoch 8635 = 0.07149682239772573
Error on this batch = 0.3708772665874541
Error on this batch = 0.377903446

Cost on val dataset after 8667 epochs is = 0.0714898229878478
learning rate =  0.0535512673033019
Initial Cost on Val dataset for this epoch 8667 = 0.0714898229878478
Error on this batch = 0.3708237380388718
Error on this batch = 0.3778641586744941
Cost on val dataset after 8668 epochs is = 0.07148960379972978
learning rate =  0.053549207876798954
Initial Cost on Val dataset for this epoch 8668 = 0.07148960379972978
Error on this batch = 0.3708220595045569
Error on this batch = 0.37786292936184934
Cost on val dataset after 8669 epochs is = 0.07148938458223818
learning rate =  0.053547148767057746
Initial Cost on Val dataset for this epoch 8669 = 0.07148938458223818
Error on this batch = 0.3708203806217924
Error on this batch = 0.3778616999510113
Cost on val dataset after 8670 epochs is = 0.07148916533521184
learning rate =  0.05354508997399306
Initial Cost on Val dataset for this epoch 8670 = 0.07148916533521184
Error on this batch = 0.3708187013907147
Error on this batch = 0.377860470

Cost on val dataset after 8702 epochs is = 0.07148213280278073
learning rate =  0.053479375292672174
Initial Cost on Val dataset for this epoch 8702 = 0.07148213280278073
Error on this batch = 0.37076478315091294
Error on this batch = 0.3778210726898682
Cost on val dataset after 8703 epochs is = 0.07148191248202822
learning rate =  0.053477326902318836
Initial Cost on Val dataset for this epoch 8703 = 0.07148191248202822
Error on this batch = 0.37076309253284245
Error on this batch = 0.3778198398001631
Cost on val dataset after 8704 epochs is = 0.07148169212546417
learning rate =  0.0534752788257628
Initial Cost on Val dataset for this epoch 8704 = 0.07148169212546417
Error on this batch = 0.37076140157387594
Error on this batch = 0.3778186068048301
Cost on val dataset after 8705 epochs is = 0.07148147173287947
learning rate =  0.05347323106291996
Initial Cost on Val dataset for this epoch 8705 = 0.07148147173287947
Error on this batch = 0.37075971027431076
Error on this batch = 0.3778

Cost on val dataset after 8735 epochs is = 0.07147484210748403
learning rate =  0.053411943638371207
Initial Cost on Val dataset for this epoch 8735 = 0.07147484210748403
Error on this batch = 0.37070881455665267
Error on this batch = 0.377780331001081
Cost on val dataset after 8736 epochs is = 0.07147462048636852
learning rate =  0.05340990555909124
Initial Cost on Val dataset for this epoch 8736 = 0.07147462048636852
Error on this batch = 0.3707071128689063
Error on this batch = 0.3777790945793318
Cost on val dataset after 8737 epochs is = 0.07147439882174499
learning rate =  0.053407867790849696
Initial Cost on Val dataset for this epoch 8737 = 0.07147439882174499
Error on this batch = 0.3707054108522654
Error on this batch = 0.37777785805003355
Cost on val dataset after 8738 epochs is = 0.07147417711335394
learning rate =  0.053405830333563505
Initial Cost on Val dataset for this epoch 8738 = 0.07147417711335394
Error on this batch = 0.3707037085071607
Error on this batch = 0.37777

Cost on val dataset after 8770 epochs is = 0.07146705770054063
learning rate =  0.053340795389457255
Initial Cost on Val dataset for this epoch 8770 = 0.07146705770054063
Error on this batch = 0.37064906280128257
Error on this batch = 0.37773699280133133
Cost on val dataset after 8771 epochs is = 0.07146683439196912
learning rate =  0.05333876814730763
Initial Cost on Val dataset for this epoch 8771 = 0.07146683439196912
Error on this batch = 0.3706473498819428
Error on this batch = 0.37773575267738124
Cost on val dataset after 8772 epochs is = 0.07146661102987524
learning rate =  0.053336741213308066
Initial Cost on Val dataset for this epoch 8772 = 0.07146661102987524
Error on this batch = 0.37064563665088895
Error on this batch = 0.37773451245094963
Cost on val dataset after 8773 epochs is = 0.07146638761394508
learning rate =  0.05333471458737658
Initial Cost on Val dataset for this epoch 8773 = 0.07146638761394508
Error on this batch = 0.3706439231086729
Error on this batch = 0.37

Cost on val dataset after 8805 epochs is = 0.07145920792718667
learning rate =  0.05327002472869185
Initial Cost on Val dataset for this epoch 8805 = 0.07145920792718667
Error on this batch = 0.37058892894138595
Error on this batch = 0.3776935296949699
Cost on val dataset after 8806 epochs is = 0.07145898254958061
learning rate =  0.05326800822323024
Initial Cost on Val dataset for this epoch 8806 = 0.07145898254958061
Error on this batch = 0.3705872054623897
Error on this batch = 0.3776922862002164
Cost on val dataset after 8807 epochs is = 0.07145875710667582
learning rate =  0.05326599202306846
Initial Cost on Val dataset for this epoch 8807 = 0.07145875710667582
Error on this batch = 0.37058548169301636
Error on this batch = 0.3776910426175407
Cost on val dataset after 8808 epochs is = 0.07145853159811569
learning rate =  0.05326397612812565
Initial Cost on Val dataset for this epoch 8808 = 0.07145853159811569
Error on this batch = 0.3705837576339407
Error on this batch = 0.3776897

Cost on val dataset after 8840 epochs is = 0.07145127849307059
learning rate =  0.053199628163178386
Initial Cost on Val dataset for this epoch 8840 = 0.07145127849307059
Error on this batch = 0.3705284390483444
Error on this batch = 0.3776499594314971
Cost on val dataset after 8841 epochs is = 0.07145105061445012
learning rate =  0.053197622295287066
Initial Cost on Val dataset for this epoch 8841 = 0.07145105061445012
Error on this batch = 0.3705267058372258
Error on this batch = 0.37764871327631155
Cost on val dataset after 8842 epochs is = 0.07145082265782585
learning rate =  0.05319561672988289
Initial Cost on Val dataset for this epoch 8842 = 0.07145082265782585
Error on this batch = 0.37052497236217746
Error on this batch = 0.37764746706012575
Cost on val dataset after 8843 epochs is = 0.07145059462283881
learning rate =  0.05319361146688605
Initial Cost on Val dataset for this epoch 8843 = 0.07145059462283881
Error on this batch = 0.370523238624059
Error on this batch = 0.37764

Cost on val dataset after 8875 epochs is = 0.07144325405968499
learning rate =  0.05312960224599685
Initial Cost on Val dataset for this epoch 8875 = 0.07144325405968499
Error on this batch = 0.3704676256736415
Error on this batch = 0.3776063148453007
Cost on val dataset after 8876 epochs is = 0.07144302324849831
learning rate =  0.05312760691786015
Initial Cost on Val dataset for this epoch 8876 = 0.07144302324849831
Error on this batch = 0.37046588379178447
Error on this batch = 0.37760506722883763
Cost on val dataset after 8877 epochs is = 0.07144279234793967
learning rate =  0.05312561188943477
Initial Cost on Val dataset for this epoch 8877 = 0.07144279234793967
Error on this batch = 0.370464141681437
Error on this batch = 0.37760381959393996
Cost on val dataset after 8878 epochs is = 0.07144256135774334
learning rate =  0.05312361716064194
Initial Cost on Val dataset for this epoch 8878 = 0.07144256135774334
Error on this batch = 0.37046239934381403
Error on this batch = 0.377602

Cost on val dataset after 8910 epochs is = 0.07143512105773572
learning rate =  0.053059943575420244
Initial Cost on Val dataset for this epoch 8910 = 0.07143512105773572
Error on this batch = 0.3704065326311068
Error on this batch = 0.3775626476844496
Cost on val dataset after 8911 epochs is = 0.07143488700055488
learning rate =  0.05305795869050236
Initial Cost on Val dataset for this epoch 8911 = 0.07143488700055488
Error on this batch = 0.37040478358103995
Error on this batch = 0.377561400391298
Cost on val dataset after 8912 epochs is = 0.07143465284866071
learning rate =  0.05305597410255622
Initial Cost on Val dataset for this epoch 8912 = 0.07143465284866071
Error on this batch = 0.3704030343551522
Error on this batch = 0.3775601531394708
Cost on val dataset after 8913 epochs is = 0.07143441860205836
learning rate =  0.05305398981150409
Initial Cost on Val dataset for this epoch 8913 = 0.07143441860205836
Error on this batch = 0.370401284955316
Error on this batch = 0.377558905

Cost on val dataset after 8945 epochs is = 0.07142687338854564
learning rate =  0.052990648794147256
Initial Cost on Val dataset for this epoch 8945 = 0.07142687338854564
Error on this batch = 0.37034522487367477
Error on this batch = 0.3775190299950064
Cost on val dataset after 8946 epochs is = 0.07142663610351765
learning rate =  0.05298867425717057
Initial Cost on Val dataset for this epoch 8946 = 0.07142663610351765
Error on this batch = 0.37034347095616094
Error on this batch = 0.377517785334892
Cost on val dataset after 8947 epochs is = 0.07142639873196402
learning rate =  0.05298670001446169
Initial Cost on Val dataset for this epoch 8947 = 0.07142639873196402
Error on this batch = 0.370341716944449
Error on this batch = 0.3775165407861916
Cost on val dataset after 8948 epochs is = 0.0714261612744029
learning rate =  0.052984726065943885
Initial Cost on Val dataset for this epoch 8948 = 0.0714261612744029
Error on this batch = 0.3703399628414038
Error on this batch = 0.377515296

Cost on val dataset after 8978 epochs is = 0.07141900089040386
learning rate =  0.05292564402980304
Initial Cost on Val dataset for this epoch 8978 = 0.07141900089040386
Error on this batch = 0.3702873127722529
Error on this batch = 0.3774780249191501
Cost on val dataset after 8979 epochs is = 0.07141876112661509
learning rate =  0.05292367916330176
Initial Cost on Val dataset for this epoch 8979 = 0.07141876112661509
Error on this batch = 0.3702855574289899
Error on this batch = 0.3774767848531994
Cost on val dataset after 8980 epochs is = 0.07141852130347442
learning rate =  0.05292171458855096
Initial Cost on Val dataset for this epoch 8980 = 0.07141852130347442
Error on this batch = 0.37028380210233836
Error on this batch = 0.3774755449503522
Cost on val dataset after 8981 epochs is = 0.07141828142213767
learning rate =  0.05291975030547483
Initial Cost on Val dataset for this epoch 8981 = 0.07141828142213767
Error on this batch = 0.37028204679614407
Error on this batch = 0.3774743

Cost on val dataset after 9013 epochs is = 0.07141058235343879
learning rate =  0.052857046799135825
Initial Cost on Val dataset for this epoch 9013 = 0.07141058235343879
Error on this batch = 0.3702259119111727
Error on this batch = 0.377434722487584
Cost on val dataset after 9014 epochs is = 0.07141034131534432
learning rate =  0.052855092098982454
Initial Cost on Val dataset for this epoch 9014 = 0.07141034131534432
Error on this batch = 0.3702241595667684
Error on this batch = 0.377433488262836
Cost on val dataset after 9015 epochs is = 0.07141010026835157
learning rate =  0.05285313768794314
Initial Cost on Val dataset for this epoch 9015 = 0.07141010026835157
Error on this batch = 0.3702224073840314
Error on this batch = 0.37743225419344756
Cost on val dataset after 9016 epochs is = 0.07140985921413925
learning rate =  0.05285118356594306
Initial Cost on Val dataset for this epoch 9016 = 0.07140985921413925
Error on this batch = 0.37022065536726007
Error on this batch = 0.3774310

Cost on val dataset after 9048 epochs is = 0.0714021519705373
learning rate =  0.05278880382834607
Initial Cost on Val dataset for this epoch 9048 = 0.0714021519705373
Error on this batch = 0.3701647037649214
Error on this batch = 0.37739160144368783
Cost on val dataset after 9049 epochs is = 0.07140191164562279
learning rate =  0.05278685920284411
Initial Cost on Val dataset for this epoch 9049 = 0.07140191164562279
Error on this batch = 0.37016295957383355
Error on this batch = 0.3773903710795478
Cost on val dataset after 9050 epochs is = 0.07140167137192728
learning rate =  0.052784914863853706
Initial Cost on Val dataset for this epoch 9050 = 0.07140167137192728
Error on this batch = 0.3701612156876273
Error on this batch = 0.37738914076003893
Cost on val dataset after 9051 epochs is = 0.07140143115109363
learning rate =  0.052782970811300976
Initial Cost on Val dataset for this epoch 9051 = 0.07140143115109363
Error on this batch = 0.37015947211000355
Error on this batch = 0.37738

Cost on val dataset after 9083 epochs is = 0.07139378110840269
learning rate =  0.05272091192812275
Initial Cost on Val dataset for this epoch 9083 = 0.07139378110840269
Error on this batch = 0.37010386099448317
Error on this batch = 0.3773485325579863
Cost on val dataset after 9084 epochs is = 0.07139354345914635
learning rate =  0.05271897728675269
Initial Cost on Val dataset for this epoch 9084 = 0.07139354345914635
Error on this batch = 0.37010212945941573
Error on this batch = 0.37734730072282985
Cost on val dataset after 9085 epochs is = 0.07139330590934441
learning rate =  0.052717042929325005
Initial Cost on Val dataset for this epoch 9085 = 0.07139330590934441
Error on this batch = 0.37010039833866204
Error on this batch = 0.37734606874871335
Cost on val dataset after 9086 epochs is = 0.07139306846002462
learning rate =  0.052715108855766774
Initial Cost on Val dataset for this epoch 9086 = 0.07139306846002462
Error on this batch = 0.3700986676347133
Error on this batch = 0.37

Cost on val dataset after 9116 epochs is = 0.07138599588080087
learning rate =  0.05265721828772175
Initial Cost on Val dataset for this epoch 9116 = 0.07138599588080087
Error on this batch = 0.370046951342905
Error on this batch = 0.37730778070137505
Cost on val dataset after 9117 epochs is = 0.0713857619334123
learning rate =  0.05265529297809341
Initial Cost on Val dataset for this epoch 9117 = 0.0713857619334123
Error on this batch = 0.37004523461967226
Error on this batch = 0.3773065416549485
Cost on val dataset after 9118 epochs is = 0.0713855281075617
learning rate =  0.05265336795001514
Initial Cost on Val dataset for this epoch 9118 = 0.0713855281075617
Error on this batch = 0.3700435183765056
Error on this batch = 0.3773053023163541
Cost on val dataset after 9119 epochs is = 0.07138529440354996
learning rate =  0.052651443203414874
Initial Cost on Val dataset for this epoch 9119 = 0.07138529440354996
Error on this batch = 0.3700418026149424
Error on this batch = 0.37730406268

Cost on val dataset after 9151 epochs is = 0.07137788103517158
learning rate =  0.052589999502853584
Initial Cost on Val dataset for this epoch 9151 = 0.07137788103517158
Error on this batch = 0.3699871607408588
Error on this batch = 0.3772642221829723
Cost on val dataset after 9152 epochs is = 0.07137765140212672
learning rate =  0.05258808400478221
Initial Cost on Val dataset for this epoch 9152 = 0.07137765140212672
Error on this batch = 0.3699854616158828
Error on this batch = 0.3772629714340432
Cost on val dataset after 9153 epochs is = 0.07137742189082814
learning rate =  0.05258616878575492
Initial Cost on Val dataset for this epoch 9153 = 0.07137742189082814
Error on this batch = 0.3699837630147165
Error on this batch = 0.3772617203255038
Cost on val dataset after 9154 epochs is = 0.07137719250102376
learning rate =  0.05258425384570059
Initial Cost on Val dataset for this epoch 9154 = 0.07137719250102376
Error on this batch = 0.36998206493838764
Error on this batch = 0.3772604

Cost on val dataset after 9186 epochs is = 0.07136991417053325
learning rate =  0.052523122637386704
Initial Cost on Val dataset for this epoch 9186 = 0.07136991417053325
Error on this batch = 0.3699280094154483
Error on this batch = 0.37722023449753805
Cost on val dataset after 9187 epochs is = 0.0713696885892644
learning rate =  0.05252121686370442
Initial Cost on Val dataset for this epoch 9187 = 0.0713696885892644
Error on this batch = 0.3699263291948429
Error on this batch = 0.3772189715243133
Cost on val dataset after 9188 epochs is = 0.07136946311593578
learning rate =  0.052519311366592006
Initial Cost on Val dataset for this epoch 9188 = 0.07136946311593578
Error on this batch = 0.36992464953070625
Error on this batch = 0.37721770822788087
Cost on val dataset after 9189 epochs is = 0.07136923775004862
learning rate =  0.0525174061459792
Initial Cost on Val dataset for this epoch 9189 = 0.07136923775004862
Error on this batch = 0.36992297042391026
Error on this batch = 0.377216

Cost on val dataset after 9221 epochs is = 0.07136207965737956
learning rate =  0.05245658465923488
Initial Cost on Val dataset for this epoch 9221 = 0.07136207965737956
Error on this batch = 0.3698695384351018
Error on this batch = 0.377175854969781
Cost on val dataset after 9222 epochs is = 0.07136185754274668
learning rate =  0.05245468852387594
Initial Cost on Val dataset for this epoch 9222 = 0.07136185754274668
Error on this batch = 0.3698678782023197
Error on this batch = 0.3771745822827402
Cost on val dataset after 9223 epochs is = 0.07136163551750642
learning rate =  0.05245279266264385
Initial Cost on Val dataset for this epoch 9223 = 0.07136163551750642
Error on this batch = 0.3698662185559484
Error on this batch = 0.37717330937362975
Cost on val dataset after 9224 epochs is = 0.07136141358112055
learning rate =  0.05245089707546926
Initial Cost on Val dataset for this epoch 9224 = 0.07136141358112055
Error on this batch = 0.36986455949683517
Error on this batch = 0.37717203

Cost on val dataset after 9254 epochs is = 0.07135479417990934
learning rate =  0.05239415655403021
Initial Cost on Val dataset for this epoch 9254 = 0.07135479417990934
Error on this batch = 0.3698150649905139
Error on this batch = 0.3771337580299759
Cost on val dataset after 9255 epochs is = 0.07135457473723232
learning rate =  0.05239226942837831
Initial Cost on Val dataset for this epoch 9255 = 0.07135457473723232
Error on this batch = 0.3698134245551978
Error on this batch = 0.3771324798634019
Cost on val dataset after 9256 epochs is = 0.07135435536685791
learning rate =  0.052390382574577975
Initial Cost on Val dataset for this epoch 9256 = 0.07135435536685791
Error on this batch = 0.3698117847340965
Error on this batch = 0.37713120159132746
Cost on val dataset after 9257 epochs is = 0.07135413606829795
learning rate =  0.052388495992560694
Initial Cost on Val dataset for this epoch 9257 = 0.07135413606829795
Error on this batch = 0.36981014552804475
Error on this batch = 0.37712

Cost on val dataset after 9289 epochs is = 0.07134715362540911
learning rate =  0.052328268460686375
Initial Cost on Val dataset for this epoch 9289 = 0.07134715362540911
Error on this batch = 0.3697580206291113
Error on this batch = 0.37708898243108935
Cost on val dataset after 9290 epochs is = 0.07134693643725912
learning rate =  0.05232639080922757
Initial Cost on Val dataset for this epoch 9290 = 0.07134693643725912
Error on this batch = 0.3697564021818132
Error on this batch = 0.3770877026477388
Cost on val dataset after 9291 epochs is = 0.07134671930564904
learning rate =  0.052324513427236534
Initial Cost on Val dataset for this epoch 9291 = 0.07134671930564904
Error on this batch = 0.3697547843773307
Error on this batch = 0.3770864228789409
Cost on val dataset after 9292 epochs is = 0.0713465022301697
learning rate =  0.05232263631464561
Initial Cost on Val dataset for this epoch 9292 = 0.0713465022301697
Error on this batch = 0.3697531672164561
Error on this batch = 0.37708514

Cost on val dataset after 9324 epochs is = 0.07133958312058784
learning rate =  0.052262710551328015
Initial Cost on Val dataset for this epoch 9324 = 0.07133958312058784
Error on this batch = 0.36970176253235826
Error on this batch = 0.377044219015757
Cost on val dataset after 9325 epochs is = 0.07133936768244058
learning rate =  0.052260842291136554
Initial Cost on Val dataset for this epoch 9325 = 0.07133936768244058
Error on this batch = 0.3697001670419527
Error on this batch = 0.3770429415636816
Cost on val dataset after 9326 epochs is = 0.07133915228782142
learning rate =  0.05225897429805883
Initial Cost on Val dataset for this epoch 9326 = 0.07133915228782142
Error on this batch = 0.3696985722208597
Error on this batch = 0.37704166423004787
Cost on val dataset after 9327 epochs is = 0.0713389369363963
learning rate =  0.052257106572028006
Initial Cost on Val dataset for this epoch 9327 = 0.0713389369363963
Error on this batch = 0.3696969780697938
Error on this batch = 0.3770403

Cost on val dataset after 9359 epochs is = 0.07133206659702546
learning rate =  0.05219747994116489
Initial Cost on Val dataset for this epoch 9359 = 0.07133206659702546
Error on this batch = 0.36964632314174867
Error on this batch = 0.3769995953540196
Cost on val dataset after 9360 epochs is = 0.07133185249580559
learning rate =  0.05219562099034807
Initial Cost on Val dataset for this epoch 9360 = 0.07133185249580559
Error on this batch = 0.36964475148424314
Error on this batch = 0.37699832353555696
Cost on val dataset after 9361 epochs is = 0.07133163842751039
learning rate =  0.05219376230432021
Initial Cost on Val dataset for this epoch 9361 = 0.07133163842751039
Error on this batch = 0.36964318051937894
Error on this batch = 0.37699705191969346
Cost on val dataset after 9362 epochs is = 0.07133142439186729
learning rate =  0.052191903883015305
Initial Cost on Val dataset for this epoch 9362 = 0.07133142439186729
Error on this batch = 0.36964161024776926
Error on this batch = 0.37

Cost on val dataset after 9393 epochs is = 0.07132480406388367
learning rate =  0.05213442376615514
Initial Cost on Val dataset for this epoch 9393 = 0.07132480406388367
Error on this batch = 0.3695932788807653
Error on this batch = 0.3769564791726513
Cost on val dataset after 9394 epochs is = 0.07132459093792771
learning rate =  0.05213257378127845
Initial Cost on Val dataset for this epoch 9394 = 0.07132459093792771
Error on this batch = 0.36959173109812726
Error on this batch = 0.37695521534276766
Cost on val dataset after 9395 epochs is = 0.07132437783642982
learning rate =  0.05213072405895994
Initial Cost on Val dataset for this epoch 9395 = 0.07132437783642982
Error on this batch = 0.36959018402720417
Error on this batch = 0.3769539517793414
Cost on val dataset after 9396 epochs is = 0.07132416475916388
learning rate =  0.05212887459913439
Initial Cost on Val dataset for this epoch 9396 = 0.07132416475916388
Error on this batch = 0.3695886376685019
Error on this batch = 0.376952

Cost on val dataset after 9428 epochs is = 0.07131735778688521
learning rate =  0.052069830092030374
Initial Cost on Val dataset for this epoch 9428 = 0.07131735778688521
Error on this batch = 0.36953953310523624
Error on this batch = 0.3769124137326265
Cost on val dataset after 9429 epochs is = 0.07131714538968785
learning rate =  0.052067989258044116
Initial Cost on Val dataset for this epoch 9429 = 0.07131714538968785
Error on this batch = 0.3695380105131686
Error on this batch = 0.3769111601224185
Cost on val dataset after 9430 epochs is = 0.07131693300968096
learning rate =  0.05206614868434758
Initial Cost on Val dataset for this epoch 9430 = 0.07131693300968096
Error on this batch = 0.36953648864867883
Error on this batch = 0.37690990682880104
Cost on val dataset after 9431 epochs is = 0.07131672064667481
learning rate =  0.052064308370876354
Initial Cost on Val dataset for this epoch 9431 = 0.07131672064667481
Error on this batch = 0.36953496751216647
Error on this batch = 0.37

Cost on val dataset after 9463 epochs is = 0.07130993291653927
learning rate =  0.052005555354580964
Initial Cost on Val dataset for this epoch 9463 = 0.07130993291653927
Error on this batch = 0.3694866777585412
Error on this batch = 0.37686873330485493
Cost on val dataset after 9464 epochs is = 0.07130972101367497
learning rate =  0.05200372359256492
Initial Cost on Val dataset for this epoch 9464 = 0.07130972101367497
Error on this batch = 0.3694851808510973
Error on this batch = 0.37686749144599463
Cost on val dataset after 9465 epochs is = 0.07130950912185505
learning rate =  0.052001892088598047
Initial Cost on Val dataset for this epoch 9465 = 0.07130950912185505
Error on this batch = 0.36948368468357556
Error on this batch = 0.3768662499413004
Cost on val dataset after 9466 epochs is = 0.07130929724091764
learning rate =  0.05200006084261672
Initial Cost on Val dataset for this epoch 9466 = 0.07130929724091764
Error on this batch = 0.3694821892562821
Error on this batch = 0.3768

Cost on val dataset after 9498 epochs is = 0.07130252186328839
learning rate =  0.05194159680811371
Initial Cost on Val dataset for this epoch 9498 = 0.07130252186328839
Error on this batch = 0.36943472813320966
Error on this batch = 0.37682548462111526
Cost on val dataset after 9499 epochs is = 0.07130231025499188
learning rate =  0.05193977404011642
Initial Cost on Val dataset for this epoch 9499 = 0.07130231025499188
Error on this batch = 0.36943325729011706
Error on this batch = 0.37682425565931477
Cost on val dataset after 9500 epochs is = 0.07130209865246001
learning rate =  0.05193795152795519
Initial Cost on Val dataset for this epoch 9500 = 0.07130209865246001
Error on this batch = 0.36943178719631947
Error on this batch = 0.37682302707978177
Cost on val dataset after 9501 epochs is = 0.07130188705555277
learning rate =  0.0519361292715672
Initial Cost on Val dataset for this epoch 9501 = 0.07130188705555277
Error on this batch = 0.3694303178520472
Error on this batch = 0.3768

Error on this batch = 0.37678391826589347
Cost on val dataset after 9533 epochs is = 0.0712951181143781
learning rate =  0.05187795174057896
Initial Cost on Val dataset for this epoch 9533 = 0.0712951181143781
Error on this batch = 0.3693836958702892
Error on this batch = 0.37678270265828345
Cost on val dataset after 9534 epochs is = 0.07129490662799344
learning rate =  0.051876137889602325
Initial Cost on Val dataset for this epoch 9534 = 0.07129490662799344
Error on this batch = 0.3693822513781322
Error on this batch = 0.3767814874536532
Cost on val dataset after 9535 epochs is = 0.07129469514278265
learning rate =  0.051874324292275655
Initial Cost on Val dataset for this epoch 9535 = 0.07129469514278265
Error on this batch = 0.369380807642317
Error on this batch = 0.3767802726525349
Cost on val dataset after 9536 epochs is = 0.07129448365862344
learning rate =  0.0518725109485369
Initial Cost on Val dataset for this epoch 9536 = 0.07129448365862344
Error on this batch = 0.369379364

Cost on val dataset after 9567 epochs is = 0.07128792752462053
learning rate =  0.05181642273460443
Initial Cost on Val dataset for this epoch 9567 = 0.07128792752462053
Error on this batch = 0.3693350084248522
Error on this batch = 0.3767416150667042
Cost on val dataset after 9568 epochs is = 0.07128771601218627
learning rate =  0.05181461747303677
Initial Cost on Val dataset for this epoch 9568 = 0.07128771601218627
Error on this batch = 0.3693335897382731
Error on this batch = 0.3767404138568902
Cost on val dataset after 9569 epochs is = 0.07128750449701382
learning rate =  0.05181281246302092
Initial Cost on Val dataset for this epoch 9569 = 0.07128750449701382
Error on this batch = 0.36933217181326283
Error on this batch = 0.3767392130666475
Cost on val dataset after 9570 epochs is = 0.07128729297899553
learning rate =  0.05181100770449555
Initial Cost on Val dataset for this epoch 9570 = 0.07128729297899553
Error on this batch = 0.36933075464995696
Error on this batch = 0.3767380

Error on this batch = 0.37670219887178863
Cost on val dataset after 9601 epochs is = 0.07128073393995349
learning rate =  0.05175518459583003
Initial Cost on Val dataset for this epoch 9601 = 0.07128073393995349
Error on this batch = 0.36928720109255553
Error on this batch = 0.3767010117169541
Cost on val dataset after 9602 epochs is = 0.07128052227639782
learning rate =  0.05175338785274245
Initial Cost on Val dataset for this epoch 9602 = 0.07128052227639782
Error on this batch = 0.36928580837038816
Error on this batch = 0.37669982499436333
Cost on val dataset after 9603 epochs is = 0.07128031060666493
learning rate =  0.05175159135913324
Initial Cost on Val dataset for this epoch 9603 = 0.07128031060666493
Error on this batch = 0.3692844164139554
Error on this batch = 0.3766986387043667
Cost on val dataset after 9604 epochs is = 0.07128009893066037
learning rate =  0.05174979511494176
Initial Cost on Val dataset for this epoch 9604 = 0.07128009893066037
Error on this batch = 0.36928

Cost on val dataset after 9635 epochs is = 0.07127353336638265
learning rate =  0.05169423492646716
Initial Cost on Val dataset for this epoch 9635 = 0.07127353336638265
Error on this batch = 0.36924027875966803
Error on this batch = 0.37666090784785744
Cost on val dataset after 9636 epochs is = 0.0712733214421148
learning rate =  0.051692446631764574
Initial Cost on Val dataset for this epoch 9636 = 0.0712733214421148
Error on this batch = 0.3692389121321577
Error on this batch = 0.37665973601987185
Cost on val dataset after 9637 epochs is = 0.07127310950867277
learning rate =  0.051690658584491206
Initial Cost on Val dataset for this epoch 9637 = 0.07127310950867277
Error on this batch = 0.3692375462739243
Error on this batch = 0.376658564635501
Cost on val dataset after 9638 epochs is = 0.07127289756597506
learning rate =  0.05168887078458716
Initial Cost on Val dataset for this epoch 9638 = 0.07127289756597506
Error on this batch = 0.3692361811850665
Error on this batch = 0.3766573

Cost on val dataset after 9670 epochs is = 0.07126611004490196
learning rate =  0.0516317914412739
Initial Cost on Val dataset for this epoch 9670 = 0.07126611004490196
Error on this batch = 0.36919290515142167
Error on this batch = 0.37662015973467633
Cost on val dataset after 9671 epochs is = 0.07126589775355144
learning rate =  0.051630011771086316
Initial Cost on Val dataset for this epoch 9671 = 0.07126589775355144
Error on this batch = 0.36919156550605553
Error on this batch = 0.37661900360551065
Cost on val dataset after 9672 epochs is = 0.07126568545040371
learning rate =  0.051628232346243585
Initial Cost on Val dataset for this epoch 9672 = 0.07126568545040371
Error on this batch = 0.36919022663331985
Error on this batch = 0.37661784792988445
Cost on val dataset after 9673 epochs is = 0.07126547313539097
learning rate =  0.051626453166686515
Initial Cost on Val dataset for this epoch 9673 = 0.07126547313539097
Error on this batch = 0.3691888885333091
Error on this batch = 0.3

Cost on val dataset after 9704 epochs is = 0.07125888513413256
learning rate =  0.051571419940139286
Initial Cost on Val dataset for this epoch 9704 = 0.07125888513413256
Error on this batch = 0.3691477912188288
Error on this batch = 0.3765811073278908
Cost on val dataset after 9705 epochs is = 0.07125867240612287
learning rate =  0.05156964857859568
Initial Cost on Val dataset for this epoch 9705 = 0.07125867240612287
Error on this batch = 0.3691464778958337
Error on this batch = 0.3765799667627771
Cost on val dataset after 9706 epochs is = 0.07125845966424596
learning rate =  0.05156787746039603
Initial Cost on Val dataset for this epoch 9706 = 0.07125845966424596
Error on this batch = 0.36914516534868513
Error on this batch = 0.37657882665972514
Cost on val dataset after 9707 epochs is = 0.0712582469084486
learning rate =  0.05156610658548185
Initial Cost on Val dataset for this epoch 9707 = 0.0712582469084486
Error on this batch = 0.36914385357747864
Error on this batch = 0.3765776

Error on this batch = 0.37654483954633966
Cost on val dataset after 9737 epochs is = 0.07125185751643465
learning rate =  0.05151309317647765
Initial Cost on Val dataset for this epoch 9737 = 0.07125185751643465
Error on this batch = 0.36910486173261753
Error on this batch = 0.37654371387757074
Cost on val dataset after 9738 epochs is = 0.07125164430524634
learning rate =  0.05151132981450809
Initial Cost on Val dataset for this epoch 9738 = 0.07125164430524634
Error on this batch = 0.3691035740635816
Error on this batch = 0.37654258867777296
Cost on val dataset after 9739 epochs is = 0.07125143107868363
learning rate =  0.05150956669396268
Initial Cost on Val dataset for this epoch 9739 = 0.07125143107868363
Error on this batch = 0.36910228717360044
Error on this batch = 0.37654146394714133
Cost on val dataset after 9740 epochs is = 0.07125121783670922
learning rate =  0.05150780381478357
Initial Cost on Val dataset for this epoch 9740 = 0.07125121783670922
Error on this batch = 0.369

Error on this batch = 0.376505721371467
Cost on val dataset after 9772 epochs is = 0.07124438576097171
learning rate =  0.05145151877739441
Initial Cost on Val dataset for this epoch 9772 = 0.07124438576097171
Error on this batch = 0.3690602575062485
Error on this batch = 0.3765046122222443
Cost on val dataset after 9773 epochs is = 0.07124417199300828
learning rate =  0.05144976383098547
Initial Cost on Val dataset for this epoch 9773 = 0.07124417199300828
Error on this batch = 0.3690589971635405
Error on this batch = 0.37650350354783174
Cost on val dataset after 9774 epochs is = 0.07124395820868853
learning rate =  0.051448009123988066
Initial Cost on Val dataset for this epoch 9774 = 0.07124395820868853
Error on this batch = 0.36905773760339244
Error on this batch = 0.3765023953483619
Cost on val dataset after 9775 epochs is = 0.07124374440799466
learning rate =  0.051446254656345045
Initial Cost on Val dataset for this epoch 9775 = 0.07124374440799466
Error on this batch = 0.369056

Error on this batch = 0.3764693707747901
Cost on val dataset after 9805 epochs is = 0.07123732270970165
learning rate =  0.05139373164417271
Initial Cost on Val dataset for this epoch 9805 = 0.07123732270970165
Error on this batch = 0.36901907993386673
Error on this batch = 0.3764682773513067
Cost on val dataset after 9806 epochs is = 0.07123710839591166
learning rate =  0.05139198456827498
Initial Cost on Val dataset for this epoch 9806 = 0.07123710839591166
Error on this batch = 0.36901784546821176
Error on this batch = 0.3764671844057071
Cost on val dataset after 9807 epochs is = 0.07123689406550418
learning rate =  0.051390237729913045
Initial Cost on Val dataset for this epoch 9807 = 0.07123689406550418
Error on this batch = 0.3690166117883716
Error on this batch = 0.37646609193803726
Cost on val dataset after 9808 epochs is = 0.07123667971848224
learning rate =  0.05138849112903043
Initial Cost on Val dataset for this epoch 9808 = 0.07123667971848224
Error on this batch = 0.36901

Error on this batch = 0.3764313854181185
Cost on val dataset after 9840 epochs is = 0.0712298118952771
learning rate =  0.051332724952692435
Initial Cost on Val dataset for this epoch 9840 = 0.0712298118952771
Error on this batch = 0.368976341792772
Error on this batch = 0.37643030872551353
Cost on val dataset after 9841 epochs is = 0.07122959700617412
learning rate =  0.051330986157048786
Initial Cost on Val dataset for this epoch 9841 = 0.07122959700617412
Error on this batch = 0.36897513488392614
Error on this batch = 0.3764292325103087
Cost on val dataset after 9842 epochs is = 0.07122938210097166
learning rate =  0.051329247596974395
Initial Cost on Val dataset for this epoch 9842 = 0.07122938210097166
Error on this batch = 0.3689739287639007
Error on this batch = 0.37642815677241986
Cost on val dataset after 9843 epochs is = 0.07122916717969739
learning rate =  0.051327509272413425
Initial Cost on Val dataset for this epoch 9843 = 0.07122916717969739
Error on this batch = 0.36897

Error on this batch = 0.3763939844431134
Cost on val dataset after 9875 epochs is = 0.07122228141721366
learning rate =  0.051272006904361254
Initial Cost on Val dataset for this epoch 9875 = 0.07122228141721366
Error on this batch = 0.368934569748467
Error on this batch = 0.37639292437948796
Cost on val dataset after 9876 epochs is = 0.0712220659859398
learning rate =  0.05127027632053759
Initial Cost on Val dataset for this epoch 9876 = 0.0712220659859398
Error on this batch = 0.3689333904839435
Error on this batch = 0.3763918647875309
Cost on val dataset after 9877 epochs is = 0.07122185053997782
learning rate =  0.051268545970339835
Initial Cost on Val dataset for this epoch 9877 = 0.07122185053997782
Error on this batch = 0.36893221201004106
Error on this batch = 0.3763908056669868
Cost on val dataset after 9878 epochs is = 0.0712216350793817
learning rate =  0.051266815853712795
Initial Cost on Val dataset for this epoch 9878 = 0.0712216350793817
Error on this batch = 0.368931034

Error on this batch = 0.376357160892363
Cost on val dataset after 9910 epochs is = 0.07121473297613423
learning rate =  0.05121157511770968
Initial Cost on Val dataset for this epoch 9910 = 0.07121473297613423
Error on this batch = 0.36889376594575096
Error on this batch = 0.3763561171479455
Cost on val dataset after 9911 epochs is = 0.07121451706785309
learning rate =  0.05120985267807708
Initial Cost on Val dataset for this epoch 9911 = 0.07121451706785309
Error on this batch = 0.36889261435190585
Error on this batch = 0.37635507386282385
Cost on val dataset after 9912 epochs is = 0.07121430114722384
learning rate =  0.05120813047014983
Initial Cost on Val dataset for this epoch 9912 = 0.07121430114722384
Error on this batch = 0.3688914635480782
Error on this batch = 0.3763540310365352
Cost on val dataset after 9913 epochs is = 0.07121408521432684
learning rate =  0.05120640849387339
Initial Cost on Val dataset for this epoch 9913 = 0.07121408521432684
Error on this batch = 0.3688903

Error on this batch = 0.3763198717674061
Cost on val dataset after 9946 epochs is = 0.0712069531154424
learning rate =  0.05114971287696099
Initial Cost on Val dataset for this epoch 9946 = 0.0712069531154424
Error on this batch = 0.3688528055770957
Error on this batch = 0.3763188442201221
Cost on val dataset after 9947 epochs is = 0.07120673681842703
learning rate =  0.051147998744490636
Initial Cost on Val dataset for this epoch 9947 = 0.07120673681842703
Error on this batch = 0.36885168235429205
Error on this batch = 0.3763178171114005
Cost on val dataset after 9948 epochs is = 0.0712065205124074
learning rate =  0.05114628484177366
Initial Cost on Val dataset for this epoch 9948 = 0.0712065205124074
Error on this batch = 0.3688505599166068
Error on this batch = 0.3763167904405442
Cost on val dataset after 9949 epochs is = 0.07120630419748909
learning rate =  0.05114457116875617
Initial Cost on Val dataset for this epoch 9949 = 0.07120630419748909
Error on this batch = 0.36884943826

Error on this batch = 0.37628416365606016
Cost on val dataset after 9981 epochs is = 0.07119937808516527
learning rate =  0.05108985459201583
Initial Cost on Val dataset for this epoch 9981 = 0.07119937808516527
Error on this batch = 0.36881395820905666
Error on this batch = 0.37628315100152765
Cost on val dataset after 9982 epochs is = 0.07119916153975701
learning rate =  0.051088148468969245
Initial Cost on Val dataset for this epoch 9982 = 0.07119916153975701
Error on this batch = 0.36881286229917815
Error on this batch = 0.3762821387573554
Cost on val dataset after 9983 epochs is = 0.07119894498939093
learning rate =  0.05108644257380072
Initial Cost on Val dataset for this epoch 9983 = 0.07119894498939093
Error on this batch = 0.36881176716339226
Error on this batch = 0.3762811269226269
Cost on val dataset after 9984 epochs is = 0.07119872843419237
learning rate =  0.051084736906457
Initial Cost on Val dataset for this epoch 9984 = 0.07119872843419237
Error on this batch = 0.36881

Error on this batch = 0.3762489586213492
Cost on val dataset after 10016 epochs is = 0.07119179688913915
learning rate =  0.051030275525183466
Initial Cost on Val dataset for this epoch 10016 = 0.07119179688913915
Error on this batch = 0.36877605877726083
Error on this batch = 0.376247959748181
Cost on val dataset after 10017 epochs is = 0.07119158024756737
learning rate =  0.051028577346296185
Initial Cost on Val dataset for this epoch 10017 = 0.07119158024756737
Error on this batch = 0.36877498965912003
Error on this batch = 0.3762469612501168
Cost on val dataset after 10018 epochs is = 0.07119136360566677
learning rate =  0.05102687939343344
Initial Cost on Val dataset for this epoch 10018 = 0.07119136360566677
Error on this batch = 0.36877392129575115
Error on this batch = 0.37624596312606406
Cost on val dataset after 10019 epochs is = 0.07119114696357559
learning rate =  0.0510251816665426
Initial Cost on Val dataset for this epoch 10019 = 0.07119114696357559
Error on this batch =

Error on this batch = 0.3762142139068992
Cost on val dataset after 10051 epochs is = 0.07118421515404637
learning rate =  0.050970973405111475
Initial Cost on Val dataset for this epoch 10051 = 0.07118421515404637
Error on this batch = 0.368739083766532
Error on this batch = 0.3762132274894752
Cost on val dataset after 10052 epochs is = 0.0711839985844793
learning rate =  0.05096928310587611
Initial Cost on Val dataset for this epoch 10052 = 0.0711839985844793
Error on this batch = 0.36873804059895804
Error on this batch = 0.37621224140697485
Cost on val dataset after 10053 epochs is = 0.07118378201952313
learning rate =  0.05096759303083323
Initial Cost on Val dataset for this epoch 10053 = 0.07118378201952313
Error on this batch = 0.3687369981569337
Error on this batch = 0.3762112556582077
Cost on val dataset after 10054 epochs is = 0.0711835654593206
learning rate =  0.0509659031799308
Initial Cost on Val dataset for this epoch 10054 = 0.0711835654593206
Error on this batch = 0.3687

Error on this batch = 0.3761798807391814
Cost on val dataset after 10086 epochs is = 0.07117663889350136
learning rate =  0.050911945986751105
Initial Cost on Val dataset for this epoch 10086 = 0.07117663889350136
Error on this batch = 0.3687029976853776
Error on this batch = 0.3761789053310858
Cost on val dataset after 10087 epochs is = 0.07117642256975094
learning rate =  0.05091026350340597
Initial Cost on Val dataset for this epoch 10087 = 0.07117642256975094
Error on this batch = 0.3687019792681228
Error on this batch = 0.37617793021598106
Cost on val dataset after 10088 epochs is = 0.07117620625555089
learning rate =  0.050908581242442394
Initial Cost on Val dataset for this epoch 10088 = 0.07117620625555089
Error on this batch = 0.36870096153679427
Error on this batch = 0.37617695539267965
Cost on val dataset after 10089 epochs is = 0.07117598995103913
learning rate =  0.05090689920380897
Initial Cost on Val dataset for this epoch 10089 = 0.07117598995103913
Error on this batch 

Cost on val dataset after 10119 epochs is = 0.07116950599281809
learning rate =  0.050856541173904625
Initial Cost on Val dataset for this epoch 10119 = 0.07116950599281809
Error on this batch = 0.368669744805975
Error on this batch = 0.3761468742246132
Cost on val dataset after 10120 epochs is = 0.07116929005537875
learning rate =  0.050854866002087024
Initial Cost on Val dataset for this epoch 10120 = 0.07116929005537875
Error on this batch = 0.3686687483092886
Error on this batch = 0.3761459081256686
Cost on val dataset after 10121 epochs is = 0.07116907413186853
learning rate =  0.05085319105096263
Initial Cost on Val dataset for this epoch 10121 = 0.07116907413186853
Error on this batch = 0.3686677524525592
Error on this batch = 0.37614494228075585
Cost on val dataset after 10122 epochs is = 0.07116885822241333
learning rate =  0.050851516320480565
Initial Cost on Val dataset for this epoch 10122 = 0.07116885822241333
Error on this batch = 0.368666757234284
Error on this batch = 0

Error on this batch = 0.3761151213576115
Cost on val dataset after 10153 epochs is = 0.07116217266414586
learning rate =  0.050799708837329954
Initial Cost on Val dataset for this epoch 10153 = 0.07116217266414586
Error on this batch = 0.3686362136992126
Error on this batch = 0.3761141630947662
Cost on val dataset after 10154 epochs is = 0.07116195726786315
learning rate =  0.05079804114063996
Initial Cost on Val dataset for this epoch 10154 = 0.07116195726786315
Error on this batch = 0.36863523808763277
Error on this batch = 0.3761132050527589
Cost on val dataset after 10155 epochs is = 0.07116174188949521
learning rate =  0.05079637366292276
Initial Cost on Val dataset for this epoch 10155 = 0.07116174188949521
Error on this batch = 0.36863426306221264
Error on this batch = 0.3761122472306794
Cost on val dataset after 10156 epochs is = 0.0711615265291491
learning rate =  0.05079470640412801
Initial Cost on Val dataset for this epoch 10156 = 0.0711615265291491
Error on this batch = 0.

Error on this batch = 0.37608361058030426
Cost on val dataset after 10186 epochs is = 0.07115507460358415
learning rate =  0.050744790190368046
Initial Cost on Val dataset for this epoch 10186 = 0.07115507460358415
Error on this batch = 0.36860431892247797
Error on this batch = 0.37608265915955563
Cost on val dataset after 10187 epochs is = 0.07115485985146568
learning rate =  0.050743129693305654
Initial Cost on Val dataset for this epoch 10187 = 0.07115485985146568
Error on this batch = 0.36860336177238184
Error on this batch = 0.37608170793304607
Cost on val dataset after 10188 epochs is = 0.07115464512044604
learning rate =  0.05074146941356448
Initial Cost on Val dataset for this epoch 10188 = 0.07115464512044604
Error on this batch = 0.3686024051530943
Error on this batch = 0.37608075690008486
Cost on val dataset after 10189 epochs is = 0.07115443041060993
learning rate =  0.05073980935109475
Initial Cost on Val dataset for this epoch 10189 = 0.07115443041060993
Error on this bat

Cost on val dataset after 10219 epochs is = 0.07114799935597294
learning rate =  0.05069010826203338
Initial Cost on Val dataset for this epoch 10219 = 0.07114799935597294
Error on this batch = 0.36857300405115967
Error on this batch = 0.37605136743726003
Cost on val dataset after 10220 epochs is = 0.07114778534082869
learning rate =  0.0506884549103814
Initial Cost on Val dataset for this epoch 10220 = 0.07114778534082869
Error on this batch = 0.36857206353092964
Error on this batch = 0.3760504222753292
Cost on val dataset after 10221 epochs is = 0.07114757134918914
learning rate =  0.05068680177441681
Initial Cost on Val dataset for this epoch 10221 = 0.07114757134918914
Error on this batch = 0.3685711234863935
Error on this batch = 0.3760494772880956
Cost on val dataset after 10222 epochs is = 0.07114735738111444
learning rate =  0.050685148854090374
Initial Cost on Val dataset for this epoch 10222 = 0.07114735738111444
Error on this batch = 0.36857018391591767
Error on this batch =

Cost on val dataset after 10252 epochs is = 0.0711409495645318
learning rate =  0.050635661272483236
Initial Cost on Val dataset for this epoch 10252 = 0.0711409495645318
Error on this batch = 0.3685422092757868
Error on this batch = 0.3760202671301367
Cost on val dataset after 10253 epochs is = 0.07114073635274909
learning rate =  0.050634015012605865
Initial Cost on Val dataset for this epoch 10253 = 0.07114073635274909
Error on this batch = 0.36854128361004335
Error on this batch = 0.3760193275296001
Cost on val dataset after 10254 epochs is = 0.07114052316606362
learning rate =  0.05063236896679954
Initial Cost on Val dataset for this epoch 10254 = 0.07114052316606362
Error on this batch = 0.3685403583680316
Error on this batch = 0.376018388092218
Cost on val dataset after 10255 epochs is = 0.07114031000451108
learning rate =  0.05063072313501556
Initial Cost on Val dataset for this epoch 10255 = 0.07114031000451108
Error on this batch = 0.3685394335482511
Error on this batch = 0.3

Error on this batch = 0.3759902798160954
Cost on val dataset after 10285 epochs is = 0.07113392699262447
learning rate =  0.05058144746093104
Initial Cost on Val dataset for this epoch 10285 = 0.07113392699262447
Error on this batch = 0.3685118780599516
Error on this batch = 0.37598934534143647
Cost on val dataset after 10286 epochs is = 0.07113371462402449
learning rate =  0.05057980823976579
Initial Cost on Val dataset for this epoch 10286 = 0.07113371462402449
Error on this batch = 0.368510965613478
Error on this batch = 0.3759884110246851
Cost on val dataset after 10287 epochs is = 0.07113350228132778
learning rate =  0.05057816923107249
Initial Cost on Val dataset for this epoch 10287 = 0.07113350228132778
Error on this batch = 0.36851005354402105
Error on this batch = 0.37598747686575623
Cost on val dataset after 10288 epochs is = 0.07113328996454722
learning rate =  0.05057653043480296
Initial Cost on Val dataset for this epoch 10288 = 0.07113328996454722
Error on this batch = 0

Error on this batch = 0.37595859593537045
Cost on val dataset after 10319 epochs is = 0.0711267210395121
learning rate =  0.05052583285043177
Initial Cost on Val dataset for this epoch 10319 = 0.0711267210395121
Error on this batch = 0.3684810587688546
Error on this batch = 0.3759576668046522
Cost on val dataset after 10320 epochs is = 0.07112650955507643
learning rate =  0.050524200826371386
Initial Cost on Val dataset for this epoch 10320 = 0.07112650955507643
Error on this batch = 0.36848015843673126
Error on this batch = 0.3759567378310076
Cost on val dataset after 10321 epochs is = 0.07112629809663384
learning rate =  0.0505225690131532
Initial Cost on Val dataset for this epoch 10321 = 0.07112629809663384
Error on this batch = 0.3684792584401495
Error on this batch = 0.37595580901446624
Cost on val dataset after 10322 epochs is = 0.07112608666417675
learning rate =  0.05052093741072955
Initial Cost on Val dataset for this epoch 10322 = 0.07112608666417675
Error on this batch = 0.

Cost on val dataset after 10352 epochs is = 0.07111975571495151
learning rate =  0.05047208712169853
Initial Cost on Val dataset for this epoch 10352 = 0.07111975571495151
Error on this batch = 0.36845151914738417
Error on this batch = 0.3759270938891832
Cost on val dataset after 10353 epochs is = 0.07111954508149232
learning rate =  0.05047046203034826
Initial Cost on Val dataset for this epoch 10353 = 0.07111954508149232
Error on this batch = 0.36845062933310746
Error on this batch = 0.3759261701282954
Cost on val dataset after 10354 epochs is = 0.07111933447351096
learning rate =  0.050468837148275394
Initial Cost on Val dataset for this epoch 10354 = 0.07111933447351096
Error on this batch = 0.3684497398207883
Error on this batch = 0.3759252465265982
Cost on val dataset after 10355 epochs is = 0.07111912389098396
learning rate =  0.050467212475432774
Initial Cost on Val dataset for this epoch 10355 = 0.07111912389098396
Error on this batch = 0.36844885060950816
Error on this batch 

Cost on val dataset after 10386 epochs is = 0.07111260831168516
learning rate =  0.05041695113891622
Initial Cost on Val dataset for this epoch 10386 = 0.07111260831168516
Error on this batch = 0.36842142961746005
Error on this batch = 0.3758957758759357
Cost on val dataset after 10387 epochs is = 0.07111239852898822
learning rate =  0.050415333136632094
Initial Cost on Val dataset for this epoch 10387 = 0.07111239852898822
Error on this batch = 0.368420549586985
Error on this batch = 0.37589485757977176
Cost on val dataset after 10388 epochs is = 0.07111218877076023
learning rate =  0.05041371534203044
Initial Cost on Val dataset for this epoch 10388 = 0.07111218877076023
Error on this batch = 0.36841966983025304
Error on this batch = 0.3758939394460073
Cost on val dataset after 10389 epochs is = 0.0711119790369657
learning rate =  0.050412097755064625
Initial Cost on Val dataset for this epoch 10389 = 0.0711119790369657
Error on this batch = 0.36841879034652375
Error on this batch = 

Error on this batch = 0.37586647139233015
Cost on val dataset after 10419 epochs is = 0.07110569819784889
learning rate =  0.05036366646599859
Initial Cost on Val dataset for this epoch 10419 = 0.07110569819784889
Error on this batch = 0.3683925292991683
Error on this batch = 0.375865558336161
Cost on val dataset after 10420 epochs is = 0.0711054892024812
learning rate =  0.05036205529268849
Initial Cost on Val dataset for this epoch 10420 = 0.0711054892024812
Error on this batch = 0.36839165793644085
Error on this batch = 0.3758646454450144
Cost on val dataset after 10421 epochs is = 0.07110528023028057
learning rate =  0.050360444325529406
Initial Cost on Val dataset for this epoch 10421 = 0.07110528023028057
Error on this batch = 0.36839078682541593
Error on this batch = 0.37586373271895424
Cost on val dataset after 10422 epochs is = 0.07110507128120395
learning rate =  0.05035883356447521
Initial Cost on Val dataset for this epoch 10422 = 0.07110507128120395
Error on this batch = 0

Error on this batch = 0.3683647677151393
Error on this batch = 0.3758364279559361
Cost on val dataset after 10452 epochs is = 0.07109881334228009
learning rate =  0.050310606343282153
Initial Cost on Val dataset for this epoch 10452 = 0.07109881334228009
Error on this batch = 0.3683639041296797
Error on this batch = 0.37583552037060897
Cost on val dataset after 10453 epochs is = 0.07109860508794638
learning rate =  0.05030900194866587
Initial Cost on Val dataset for this epoch 10453 = 0.07109860508794638
Error on this batch = 0.3683630407786555
Error on this batch = 0.3758346129515461
Cost on val dataset after 10454 epochs is = 0.07109839685529044
learning rate =  0.05030739775868523
Initial Cost on Val dataset for this epoch 10454 = 0.07109839685529044
Error on this batch = 0.3683621776615827
Error on this batch = 0.37583370569875335
Cost on val dataset after 10455 epochs is = 0.07109818864426543
learning rate =  0.05030579377329457
Initial Cost on Val dataset for this epoch 10455 = 0

Error on this batch = 0.3758065653722862
Cost on val dataset after 10485 epochs is = 0.07109195213766865
learning rate =  0.05025776911997474
Initial Cost on Val dataset for this epoch 10485 = 0.07109195213766865
Error on this batch = 0.36833553456710527
Error on this batch = 0.37580566326498016
Cost on val dataset after 10486 epochs is = 0.07109174457373696
learning rate =  0.05025617145429925
Initial Cost on Val dataset for this epoch 10486 = 0.07109174457373696
Error on this batch = 0.36833467869819986
Error on this batch = 0.37580476132296947
Cost on val dataset after 10487 epochs is = 0.07109153702991829
learning rate =  0.05025457399175989
Initial Cost on Val dataset for this epoch 10487 = 0.07109153702991829
Error on this batch = 0.3683338230488583
Error on this batch = 0.3758038595461848
Cost on val dataset after 10488 epochs is = 0.07109132950616505
learning rate =  0.05025297673231148
Initial Cost on Val dataset for this epoch 10488 = 0.07109132950616505
Error on this batch =

Error on this batch = 0.3757768826278435
Cost on val dataset after 10518 epochs is = 0.07108511288918158
learning rate =  0.050205153162566685
Initial Cost on Val dataset for this epoch 10518 = 0.07108511288918158
Error on this batch = 0.36830740477368884
Error on this batch = 0.3757759859251135
Cost on val dataset after 10519 epochs is = 0.07108490596403375
learning rate =  0.050203562176599
Initial Cost on Val dataset for this epoch 10519 = 0.07108490596403375
Error on this batch = 0.36830655595523637
Error on this batch = 0.3757750893840661
Cost on val dataset after 10520 epochs is = 0.0710846990574488
learning rate =  0.050201971392283586
Initial Cost on Val dataset for this epoch 10520 = 0.0710846990574488
Error on this batch = 0.3683057073445346
Error on this batch = 0.37577419300454823
Cost on val dataset after 10521 epochs is = 0.07108449216938077
learning rate =  0.05020038080957571
Initial Cost on Val dataset for this epoch 10521 = 0.07108449216938077
Error on this batch = 0.

Error on this batch = 0.3682786596295034
Error on this batch = 0.375745593092376
Cost on val dataset after 10553 epochs is = 0.07107788125756476
learning rate =  0.05014958834482047
Initial Cost on Val dataset for this epoch 10553 = 0.07107788125756476
Error on this batch = 0.36827781770237067
Error on this batch = 0.37574470194009335
Cost on val dataset after 10554 epochs is = 0.07107767495534734
learning rate =  0.050148004390164015
Initial Cost on Val dataset for this epoch 10554 = 0.07107767495534734
Error on this batch = 0.36827697597286246
Error on this batch = 0.37574381094256015
Cost on val dataset after 10555 epochs is = 0.0710774686701392
learning rate =  0.05014642063560289
Initial Cost on Val dataset for this epoch 10555 = 0.0710774686701392
Error on this batch = 0.3682761344407072
Error on this batch = 0.3757429200995307
Cost on val dataset after 10556 epochs is = 0.07107726240189784
learning rate =  0.050144837081092866
Initial Cost on Val dataset for this epoch 10556 = 0

Cost on val dataset after 10587 epochs is = 0.0710708762762154
learning rate =  0.05009584587594981
Initial Cost on Val dataset for this epoch 10587 = 0.0710708762762154
Error on this batch = 0.36824930805700434
Error on this batch = 0.3757144930940967
Cost on val dataset after 10588 epochs is = 0.07107067052926334
learning rate =  0.05009426869980434
Initial Cost on Val dataset for this epoch 10588 = 0.07107067052926334
Error on this batch = 0.3682484728935627
Error on this batch = 0.37571360719483693
Cost on val dataset after 10589 epochs is = 0.07107046479795015
learning rate =  0.050092691722258116
Initial Cost on Val dataset for this epoch 10589 = 0.07107046479795015
Error on this batch = 0.368247637918993
Error on this batch = 0.375712721440183
Cost on val dataset after 10590 epochs is = 0.07107025908223791
learning rate =  0.05009111494326743
Initial Cost on Val dataset for this epoch 10590 = 0.07107025908223791
Error on this batch = 0.3682468031330663
Error on this batch = 0.37

Cost on val dataset after 10622 epochs is = 0.07106368419765326
learning rate =  0.05004076259174035
Initial Cost on Val dataset for this epoch 10622 = 0.07106368419765326
Error on this batch = 0.36822018827326247
Error on this batch = 0.3756835703307366
Cost on val dataset after 10623 epochs is = 0.0710634789764761
learning rate =  0.05003919234062247
Initial Cost on Val dataset for this epoch 10623 = 0.0710634789764761
Error on this batch = 0.3682193595910813
Error on this batch = 0.37568268927648646
Cost on val dataset after 10624 epochs is = 0.0710632737696995
learning rate =  0.05003762228658044
Initial Cost on Val dataset for this epoch 10624 = 0.0710632737696995
Error on this batch = 0.36821853109038183
Error on this batch = 0.3756818083536109
Cost on val dataset after 10625 epochs is = 0.07106306857729086
learning rate =  0.05003605242957098
Initial Cost on Val dataset for this epoch 10625 = 0.07106306857729086
Error on this batch = 0.3682177027709706
Error on this batch = 0.37

Cost on val dataset after 10657 epochs is = 0.07105650981905196
learning rate =  0.04998592078018303
Initial Cost on Val dataset for this epoch 10657 = 0.07105650981905196
Error on this batch = 0.3681912911614961
Error on this batch = 0.37565280871973794
Cost on val dataset after 10658 epochs is = 0.07105630508346274
learning rate =  0.049984357401056494
Initial Cost on Val dataset for this epoch 10658 = 0.07105630508346274
Error on this batch = 0.3681904687221542
Error on this batch = 0.3756519319976954
Cost on val dataset after 10659 epochs is = 0.07105610036122173
learning rate =  0.04998279421749902
Initial Cost on Val dataset for this epoch 10659 = 0.07105610036122173
Error on this batch = 0.36818964645807
Error on this batch = 0.37565105539091137
Cost on val dataset after 10660 epochs is = 0.07105589565230162
learning rate =  0.049981231229467814
Initial Cost on Val dataset for this epoch 10660 = 0.07105589565230162
Error on this batch = 0.3681888243690812
Error on this batch = 0

Cost on val dataset after 10690 epochs is = 0.07104976044930876
learning rate =  0.0499344322964566
Initial Cost on Val dataset for this epoch 10690 = 0.07104976044930876
Error on this batch = 0.36816424235063716
Error on this batch = 0.3756239349313835
Cost on val dataset after 10691 epochs is = 0.07104955614057369
learning rate =  0.04993287534857905
Initial Cost on Val dataset for this epoch 10691 = 0.07104955614057369
Error on this batch = 0.36816342561393683
Error on this batch = 0.37562306173710114
Cost on val dataset after 10692 epochs is = 0.07104935184436142
learning rate =  0.04993131859486491
Initial Cost on Val dataset for this epoch 10692 = 0.07104935184436142
Error on this batch = 0.3681626090475972
Error on this batch = 0.3756221886404562
Cost on val dataset after 10693 epochs is = 0.07104914756064938
learning rate =  0.049929762035271816
Initial Cost on Val dataset for this epoch 10693 = 0.07104914756064938
Error on this batch = 0.3681617926514838
Error on this batch = 

Cost on val dataset after 10723 epochs is = 0.07104302475524886
learning rate =  0.049883155304148837
Initial Cost on Val dataset for this epoch 10723 = 0.07104302475524886
Error on this batch = 0.36813737929034895
Error on this batch = 0.37559516789270414
Cost on val dataset after 10724 epochs is = 0.07104282084855756
learning rate =  0.04988160474136445
Initial Cost on Val dataset for this epoch 10724 = 0.07104282084855756
Error on this batch = 0.36813656810904377
Error on this batch = 0.37559429760990426
Cost on val dataset after 10725 epochs is = 0.0710426169537126
learning rate =  0.04988005437135222
Initial Cost on Val dataset for this epoch 10725 = 0.0710426169537126
Error on this batch = 0.3681357570940915
Error on this batch = 0.375593427404984
Cost on val dataset after 10726 epochs is = 0.07104241307069559
learning rate =  0.04987850419407021
Initial Cost on Val dataset for this epoch 10726 = 0.07104241307069559
Error on this batch = 0.368134946245384
Error on this batch = 0.

Cost on val dataset after 10756 epochs is = 0.07103630199377994
learning rate =  0.0498320882876834
Initial Cost on Val dataset for this epoch 10756 = 0.07103630199377994
Error on this batch = 0.3681106975832446
Error on this batch = 0.3755664861741309
Cost on val dataset after 10757 epochs is = 0.07103609846893925
learning rate =  0.04983054406430814
Initial Cost on Val dataset for this epoch 10757 = 0.07103609846893925
Error on this batch = 0.36810989183867665
Error on this batch = 0.3755656181194915
Cost on val dataset after 10758 epochs is = 0.07103589495539775
learning rate =  0.049829000032327965
Initial Cost on Val dataset for this epoch 10758 = 0.07103589495539775
Error on this batch = 0.36810908625729455
Error on this batch = 0.3755647501210544
Cost on val dataset after 10759 epochs is = 0.07103569145314055
learning rate =  0.04982745619170136
Initial Cost on Val dataset for this epoch 10759 = 0.07103569145314055
Error on this batch = 0.36810828083901453
Error on this batch = 

Cost on val dataset after 10791 epochs is = 0.07102918525517733
learning rate =  0.049778154078614606
Initial Cost on Val dataset for this epoch 10791 = 0.07102918525517733
Error on this batch = 0.36808259310880515
Error on this batch = 0.37553613314274087
Cost on val dataset after 10792 epochs is = 0.07102898211758057
learning rate =  0.0497766165294529
Initial Cost on Val dataset for this epoch 10792 = 0.07102898211758057
Error on this batch = 0.3680817930305753
Error on this batch = 0.3755352666380699
Cost on val dataset after 10793 epochs is = 0.07102877899081647
learning rate =  0.04977507917024105
Initial Cost on Val dataset for this epoch 10793 = 0.07102877899081647
Error on this batch = 0.3680809931130143
Error on this batch = 0.37553440016476186
Cost on val dataset after 10794 epochs is = 0.07102857587487309
learning rate =  0.04977354200093801
Initial Cost on Val dataset for this epoch 10794 = 0.07102857587487309
Error on this batch = 0.36808019335606185
Error on this batch =

Cost on val dataset after 10826 epochs is = 0.07102208180997976
learning rate =  0.0497244526100305
Initial Cost on Val dataset for this epoch 10826 = 0.07102208180997976
Error on this batch = 0.36805468560451354
Error on this batch = 0.37550581926289384
Cost on val dataset after 10827 epochs is = 0.07102187904483402
learning rate =  0.04972292168476444
Initial Cost on Val dataset for this epoch 10827 = 0.07102187904483402
Error on this batch = 0.368053891117567
Error on this batch = 0.37550495341034296
Cost on val dataset after 10828 epochs is = 0.0710216762901387
learning rate =  0.04972139094801857
Initial Cost on Val dataset for this epoch 10828 = 0.0710216762901387
Error on this batch = 0.3680530967895117
Error on this batch = 0.3755040875626922
Cost on val dataset after 10829 epochs is = 0.07102147354588378
learning rate =  0.04971986039975226
Initial Cost on Val dataset for this epoch 10829 = 0.07102147354588378
Error on this batch = 0.3680523026203061
Error on this batch = 0.37

Error on this batch = 0.3754772444360183
Cost on val dataset after 10860 epochs is = 0.07101519359923619
learning rate =  0.04967250666830406
Initial Cost on Val dataset for this epoch 10860 = 0.07101519359923619
Error on this batch = 0.36802776195566206
Error on this batch = 0.3754763783295475
Cost on val dataset after 10861 epochs is = 0.07101499118396873
learning rate =  0.04967098213000993
Initial Cost on Val dataset for this epoch 10861 = 0.07101499118396873
Error on this batch = 0.36802697284999525
Error on this batch = 0.3754755122018266
Cost on val dataset after 10862 epochs is = 0.0710147887788272
learning rate =  0.049669457778861816
Initial Cost on Val dataset for this epoch 10862 = 0.0710147887788272
Error on this batch = 0.368026183902014
Error on this batch = 0.37547464605204844
Cost on val dataset after 10863 epochs is = 0.07101458638380244
learning rate =  0.049667933614819526
Initial Cost on Val dataset for this epoch 10863 = 0.07101458638380244
Error on this batch = 0

Error on this batch = 0.37544778003053825
Cost on val dataset after 10894 epochs is = 0.07100831710603472
learning rate =  0.04962077711523949
Initial Cost on Val dataset for this epoch 10894 = 0.07100831710603472
Error on this batch = 0.3680010206522443
Error on this batch = 0.37544691274448466
Cost on val dataset after 10895 epochs is = 0.07100811502991522
learning rate =  0.04961925891743181
Initial Cost on Val dataset for this epoch 10895 = 0.07100811502991522
Error on this batch = 0.3680002368919276
Error on this batch = 0.3754460454093578
Cost on val dataset after 10896 epochs is = 0.07100791296360989
learning rate =  0.04961774090541026
Initial Cost on Val dataset for this epoch 10896 = 0.07100791296360989
Error on this batch = 0.36799945328834727
Error on this batch = 0.37544517802432953
Cost on val dataset after 10897 epochs is = 0.07100771090710946
learning rate =  0.049616223079135054
Initial Cost on Val dataset for this epoch 10897 = 0.07100771090710946
Error on this batch 

Error on this batch = 0.37541825980383764
Cost on val dataset after 10928 epochs is = 0.07100145196704132
learning rate =  0.04956926237829954
Initial Cost on Val dataset for this epoch 10928 = 0.07100145196704132
Error on this batch = 0.36797446056120153
Error on this batch = 0.37541739038283517
Cost on val dataset after 10929 epochs is = 0.07100125021924641
learning rate =  0.04956775047497467
Initial Cost on Val dataset for this epoch 10929 = 0.07100125021924641
Error on this batch = 0.3679736821137189
Error on this batch = 0.37541652088444355
Cost on val dataset after 10930 epochs is = 0.07100104848093525
learning rate =  0.04956623875609012
Initial Cost on Val dataset for this epoch 10930 = 0.07100104848093525
Error on this batch = 0.3679729038219437
Error on this batch = 0.3754156513078273
Cost on val dataset after 10931 epochs is = 0.0710008467520975
learning rate =  0.0495647272216065
Initial Cost on Val dataset for this epoch 10931 = 0.0710008467520975
Error on this batch = 0.

Cost on val dataset after 10961 epochs is = 0.07099479923919887
learning rate =  0.0495194667387396
Initial Cost on Val dataset for this epoch 10961 = 0.07099479923919887
Error on this batch = 0.36794885379285974
Error on this batch = 0.375388651078101
Cost on val dataset after 10962 epochs is = 0.07099459779867953
learning rate =  0.04951796090122191
Initial Cost on Val dataset for this epoch 10962 = 0.07099459779867953
Error on this batch = 0.36794808046204247
Error on this batch = 0.37538777855773237
Cost on val dataset after 10963 epochs is = 0.070994396367279
learning rate =  0.049516455246851575
Initial Cost on Val dataset for this epoch 10963 = 0.070994396367279
Error on this batch = 0.3679473072854592
Error on this batch = 0.3753869059316459
Cost on val dataset after 10964 epochs is = 0.07099419494498532
learning rate =  0.0495149497755896
Initial Cost on Val dataset for this epoch 10964 = 0.07099419494498532
Error on this batch = 0.3679465342630533
Error on this batch = 0.3753

Error on this batch = 0.37535979759335136
Cost on val dataset after 10995 epochs is = 0.07098795530266622
learning rate =  0.04946837077610476
Initial Cost on Val dataset for this epoch 10995 = 0.07098795530266622
Error on this batch = 0.36792264668686786
Error on this batch = 0.3753589211504331
Cost on val dataset after 10996 epochs is = 0.07098775416509309
learning rate =  0.04946687114380093
Initial Cost on Val dataset for this epoch 10996 = 0.07098775416509309
Error on this batch = 0.3679218785623435
Error on this batch = 0.37535804457481603
Cost on val dataset after 10997 epochs is = 0.07098755303619574
learning rate =  0.04946537169332582
Initial Cost on Val dataset for this epoch 10997 = 0.07098755303619574
Error on this batch = 0.3679211105895525
Error on this batch = 0.3753571678656969
Cost on val dataset after 10998 epochs is = 0.07098735191595991
learning rate =  0.049463872424640835
Initial Cost on Val dataset for this epoch 10998 = 0.07098735191595991
Error on this batch =

Error on this batch = 0.37532991933266785
Cost on val dataset after 11029 epochs is = 0.07098112140372907
learning rate =  0.04941748505337294
Initial Cost on Val dataset for this epoch 11029 = 0.07098112140372907
Error on this batch = 0.36789661494243553
Error on this batch = 0.37532903793398187
Cost on val dataset after 11030 epochs is = 0.07098092055270813
learning rate =  0.04941599158167288
Initial Cost on Val dataset for this epoch 11030 = 0.07098092055270813
Error on this batch = 0.36789585191492113
Error on this batch = 0.37532815637594275
Cost on val dataset after 11031 epochs is = 0.07098071970983752
learning rate =  0.049414498290496406
Initial Cost on Val dataset for this epoch 11031 = 0.07098071970983752
Error on this batch = 0.36789508903506274
Error on this batch = 0.3753272746577909
Cost on val dataset after 11032 epochs is = 0.0709805188751005
learning rate =  0.04941300517980535
Initial Cost on Val dataset for this epoch 11032 = 0.0709805188751005
Error on this batch 

Cost on val dataset after 11061 epochs is = 0.07097469812768178
learning rate =  0.049369783309700425
Initial Cost on Val dataset for this epoch 11061 = 0.07097469812768178
Error on this batch = 0.3678722704804143
Error on this batch = 0.37530074496339866
Cost on val dataset after 11062 epochs is = 0.07097449752877358
learning rate =  0.049368295595873704
Initial Cost on Val dataset for this epoch 11062 = 0.07097449752877358
Error on this batch = 0.36787151209307595
Error on this batch = 0.375299857914515
Cost on val dataset after 11063 epochs is = 0.0709742969374413
learning rate =  0.049366808061354434
Initial Cost on Val dataset for this epoch 11063 = 0.0709742969374413
Error on this batch = 0.3678707538475339
Error on this batch = 0.3752989706821599
Cost on val dataset after 11064 epochs is = 0.07097409635366572
learning rate =  0.049365320706104776
Initial Cost on Val dataset for this epoch 11064 = 0.07097409635366572
Error on this batch = 0.36786999574356677
Error on this batch =

Error on this batch = 0.3752713717697263
Cost on val dataset after 11095 epochs is = 0.07096788189683974
learning rate =  0.04931930140530174
Initial Cost on Val dataset for this epoch 11095 = 0.07096788189683974
Error on this batch = 0.3678465634140195
Error on this batch = 0.3752704783111189
Cost on val dataset after 11096 epochs is = 0.07096768154433374
learning rate =  0.049317819766781144
Initial Cost on Val dataset for this epoch 11096 = 0.07096768154433374
Error on this batch = 0.3678458097078579
Error on this batch = 0.37526958464730825
Cost on val dataset after 11097 epochs is = 0.07096748119871284
learning rate =  0.049316338306288614
Initial Cost on Val dataset for this epoch 11097 = 0.07096748119871284
Error on this batch = 0.3678450561344599
Error on this batch = 0.37526869077768
Cost on val dataset after 11098 epochs is = 0.07096728085995564
learning rate =  0.04931485702378671
Initial Cost on Val dataset for this epoch 11098 = 0.07096728085995564
Error on this batch = 0.

Error on this batch = 0.3752408755166897
Cost on val dataset after 11129 epochs is = 0.07096107364326625
learning rate =  0.04926902534587469
Initial Cost on Val dataset for this epoch 11129 = 0.07096107364326625
Error on this batch = 0.3678210098420368
Error on this batch = 0.37523997475411336
Cost on val dataset after 11130 epochs is = 0.07096087351249483
learning rate =  0.04926754973937628
Initial Cost on Val dataset for this epoch 11130 = 0.07096087351249483
Error on this batch = 0.36782026045287153
Error on this batch = 0.3752390737671236
Cost on val dataset after 11131 epochs is = 0.07096067338784953
learning rate =  0.04926607430963955
Initial Cost on Val dataset for this epoch 11131 = 0.07096067338784953
Error on this batch = 0.36781951118386
Error on this batch = 0.37523817255521075
Cost on val dataset after 11132 epochs is = 0.07096047326930725
learning rate =  0.049264599056627445
Initial Cost on Val dataset for this epoch 11132 = 0.07096047326930725
Error on this batch = 0

Error on this batch = 0.37521102922427785
Cost on val dataset after 11162 epochs is = 0.07095447243482755
learning rate =  0.049220423459894434
Initial Cost on Val dataset for this epoch 11162 = 0.07095447243482755
Error on this batch = 0.36779634092948504
Error on this batch = 0.37521012080618116
Cost on val dataset after 11163 epochs is = 0.07095427249382873
learning rate =  0.04921895366702866
Initial Cost on Val dataset for this epoch 11163 = 0.07095427249382873
Error on this batch = 0.36779559525657507
Error on this batch = 0.3752092121487286
Cost on val dataset after 11164 epochs is = 0.07095407255817678
learning rate =  0.049217484049707695
Initial Cost on Val dataset for this epoch 11164 = 0.07095407255817678
Error on this batch = 0.36779484968753423
Error on this batch = 0.3752083032515301
Cost on val dataset after 11165 epochs is = 0.07095387262784772
learning rate =  0.04921601460789486
Initial Cost on Val dataset for this epoch 11165 = 0.07095387262784772
Error on this batc

Cost on val dataset after 11195 epochs is = 0.07094787707392279
learning rate =  0.04917201278326695
Initial Cost on Val dataset for this epoch 11195 = 0.07094787707392279
Error on this batch = 0.367771785329644
Error on this batch = 0.3751800065778906
Cost on val dataset after 11196 epochs is = 0.07094767729668262
learning rate =  0.04917054876406868
Initial Cost on Val dataset for this epoch 11196 = 0.07094767729668262
Error on this batch = 0.36777104276475037
Error on this batch = 0.3751790898264286
Cost on val dataset after 11197 epochs is = 0.07094747752399669
learning rate =  0.04916908491921029
Initial Cost on Val dataset for this epoch 11197 = 0.07094747752399669
Error on this batch = 0.36777030028293595
Error on this batch = 0.3751781728245751
Cost on val dataset after 11198 epochs is = 0.07094727775584113
learning rate =  0.049167621248655466
Initial Cost on Val dataset for this epoch 11198 = 0.07094727775584113
Error on this batch = 0.3677695578834958
Error on this batch = 0

Error on this batch = 0.3751505452343413
Cost on val dataset after 11228 epochs is = 0.0709412867002077
learning rate =  0.04912379200341812
Initial Cost on Val dataset for this epoch 11228 = 0.0709412867002077
Error on this batch = 0.3677473205233968
Error on this batch = 0.37514962036592997
Cost on val dataset after 11229 epochs is = 0.07094108706077538
learning rate =  0.04912233371831356
Initial Cost on Val dataset for this epoch 11229 = 0.07094108706077538
Error on this batch = 0.3677465803032219
Error on this batch = 0.37514869524114686
Cost on val dataset after 11230 epochs is = 0.07094088742512315
learning rate =  0.04912087560635573
Initial Cost on Val dataset for this epoch 11230 = 0.07094088742512315
Error on this batch = 0.36774584014046224
Error on this batch = 0.3751477698598767
Cost on val dataset after 11231 epochs is = 0.07094068779322821
learning rate =  0.04911941766750867
Initial Cost on Val dataset for this epoch 11231 = 0.07094068779322821
Error on this batch = 0.

Error on this batch = 0.3751198887623551
Cost on val dataset after 11261 epochs is = 0.07093470047246449
learning rate =  0.04907575982060791
Initial Cost on Val dataset for this epoch 11261 = 0.07093470047246449
Error on this batch = 0.3677229187118812
Error on this batch = 0.3751189553966988
Cost on val dataset after 11262 epochs is = 0.07093450094601994
learning rate =  0.04907430723040962
Initial Cost on Val dataset for this epoch 11262 = 0.07093450094601994
Error on this batch = 0.36772217990905615
Error on this batch = 0.37511802177318315
Cost on val dataset after 11263 epochs is = 0.07093430142263213
learning rate =  0.04907285481217655
Initial Cost on Val dataset for this epoch 11263 = 0.07093430142263213
Error on this batch = 0.36772144113294664
Error on this batch = 0.3751170878918378
Cost on val dataset after 11264 epochs is = 0.07093410190228026
learning rate =  0.04907140256587307
Initial Cost on Val dataset for this epoch 11264 = 0.07093410190228026
Error on this batch = 

Error on this batch = 0.37508801005175124
Cost on val dataset after 11295 epochs is = 0.07092791816736305
learning rate =  0.04902646801366975
Initial Cost on Val dataset for this epoch 11295 = 0.07092791816736305
Error on this batch = 0.36769780815903913
Error on this batch = 0.3750870679616906
Cost on val dataset after 11296 epochs is = 0.0709277187336574
learning rate =  0.04902502125036694
Initial Cost on Val dataset for this epoch 11296 = 0.0709277187336574
Error on this batch = 0.3676970696718871
Error on this batch = 0.37508612561722365
Cost on val dataset after 11297 epochs is = 0.07092751930234731
learning rate =  0.049023574657824046
Initial Cost on Val dataset for this epoch 11297 = 0.07092751930234731
Error on this batch = 0.3676963311746598
Error on this batch = 0.3750851830185274
Cost on val dataset after 11298 epochs is = 0.07092731987341488
learning rate =  0.04902212823600579
Initial Cost on Val dataset for this epoch 11298 = 0.07092731987341488
Error on this batch = 0

Cost on val dataset after 11327 epochs is = 0.07092153739189498
learning rate =  0.04898025611034027
Initial Cost on Val dataset for this epoch 11327 = 0.07092153739189498
Error on this batch = 0.367674165684348
Error on this batch = 0.3750567878880552
Cost on val dataset after 11328 epochs is = 0.0709213380264786
learning rate =  0.04897881479391327
Initial Cost on Val dataset for this epoch 11328 = 0.0709213380264786
Error on this batch = 0.36767342628054794
Error on this batch = 0.37505583751855465
Cost on val dataset after 11329 epochs is = 0.07092113866293613
learning rate =  0.04897737364712276
Initial Cost on Val dataset for this epoch 11329 = 0.07092113866293613
Error on this batch = 0.36767268682765136
Error on this batch = 0.37505488690265754
Cost on val dataset after 11330 epochs is = 0.0709209393012531
learning rate =  0.04897593266993381
Initial Cost on Val dataset for this epoch 11330 = 0.0709209393012531
Error on this batch = 0.3676719473243766
Error on this batch = 0.37

Error on this batch = 0.37502625553079333
Cost on val dataset after 11360 epochs is = 0.07091495924781173
learning rate =  0.04893278204612186
Initial Cost on Val dataset for this epoch 11360 = 0.07091495924781173
Error on this batch = 0.3676497323067842
Error on this batch = 0.3750252974496362
Cost on val dataset after 11361 epochs is = 0.07091475993716771
learning rate =  0.04893134630930846
Initial Cost on Val dataset for this epoch 11361 = 0.07091475993716771
Error on this batch = 0.36764899058811057
Error on this batch = 0.37502433913389427
Cost on val dataset after 11362 epochs is = 0.07091456062797721
learning rate =  0.04892991074098405
Initial Cost on Val dataset for this epoch 11362 = 0.07091456062797721
Error on this batch = 0.3676482487763451
Error on this batch = 0.37502338058399604
Cost on val dataset after 11363 epochs is = 0.07091436132022944
learning rate =  0.04892847534111404
Initial Cost on Val dataset for this epoch 11363 = 0.07091436132022944
Error on this batch =

Cost on val dataset after 11392 epochs is = 0.07090858197880925
learning rate =  0.04888692186738406
Initial Cost on Val dataset for this epoch 11392 = 0.07090858197880925
Error on this batch = 0.36762594419234235
Error on this batch = 0.37499451746714896
Cost on val dataset after 11393 epochs is = 0.07090838270990216
learning rate =  0.0488854915051073
Initial Cost on Val dataset for this epoch 11393 = 0.07090838270990216
Error on this batch = 0.36762519879745126
Error on this batch = 0.3749935518890966
Cost on val dataset after 11394 epochs is = 0.07090818344216086
learning rate =  0.04888406131021736
Initial Cost on Val dataset for this epoch 11394 = 0.07090818344216086
Error on this batch = 0.3676244532641242
Error on this batch = 0.3749925860923309
Cost on val dataset after 11395 epochs is = 0.07090798417557824
learning rate =  0.04888263128267997
Initial Cost on Val dataset for this epoch 11395 = 0.07090798417557824
Error on this batch = 0.36762370759091395
Error on this batch = 

Error on this batch = 0.3749635132347494
Cost on val dataset after 11425 epochs is = 0.07090200668631493
learning rate =  0.04883980810589194
Initial Cost on Val dataset for this epoch 11425 = 0.07090200668631493
Error on this batch = 0.36760126512277774
Error on this batch = 0.374962540936255
Cost on val dataset after 11426 epochs is = 0.07090180745274506
learning rate =  0.048838383249337
Initial Cost on Val dataset for this epoch 11426 = 0.07090180745274506
Error on this batch = 0.367600514389342
Error on this batch = 0.3749615684374715
Cost on val dataset after 11427 epochs is = 0.07090160822016568
learning rate =  0.04883695855904304
Initial Cost on Val dataset for this epoch 11427 = 0.07090160822016568
Error on this batch = 0.36759976346918877
Error on this batch = 0.37496059573901663
Cost on val dataset after 11428 epochs is = 0.0709014089885733
learning rate =  0.048835534034976105
Initial Cost on Val dataset for this epoch 11428 = 0.0709014089885733
Error on this batch = 0.367

Error on this batch = 0.37493034652339147
Cost on val dataset after 11459 epochs is = 0.07089523328450469
learning rate =  0.04879145605274655
Initial Cost on Val dataset for this epoch 11459 = 0.07089523328450469
Error on this batch = 0.3675756266361803
Error on this batch = 0.37492936777476116
Cost on val dataset after 11460 epochs is = 0.07089503408326428
learning rate =  0.04879003683007922
Initial Cost on Val dataset for this epoch 11460 = 0.07089503408326428
Error on this batch = 0.3675748687299999
Error on this batch = 0.3749283888480508
Cost on val dataset after 11461 epochs is = 0.07089483488295202
learning rate =  0.04878861777252417
Initial Cost on Val dataset for this epoch 11461 = 0.07089483488295202
Error on this batch = 0.3675741105872652
Error on this batch = 0.3749274097439478
Cost on val dataset after 11462 epochs is = 0.07089463568356776
learning rate =  0.048787198880047795
Initial Cost on Val dataset for this epoch 11462 = 0.07089463568356776
Error on this batch = 

Cost on val dataset after 11492 epochs is = 0.07088866013615412
learning rate =  0.048744708701018526
Initial Cost on Val dataset for this epoch 11492 = 0.07088866013615412
Error on this batch = 0.36755048298540727
Error on this batch = 0.37489697335837835
Cost on val dataset after 11493 epochs is = 0.07088846096590941
learning rate =  0.048743294909362155
Initial Cost on Val dataset for this epoch 11493 = 0.07088846096590941
Error on this batch = 0.3675497165164694
Error on this batch = 0.37489598895051834
Cost on val dataset after 11494 epochs is = 0.07088826179663078
learning rate =  0.048741881281713986
Initial Cost on Val dataset for this epoch 11494 = 0.07088826179663078
Error on this batch = 0.3675489497640801
Error on this batch = 0.37489500438884593
Cost on val dataset after 11495 epochs is = 0.0708880626283207
learning rate =  0.048740467818040714
Initial Cost on Val dataset for this epoch 11495 = 0.0708880626283207
Error on this batch = 0.36754818272685696
Error on this batc

Cost on val dataset after 11526 epochs is = 0.07088188890762191
learning rate =  0.048696731594524846
Initial Cost on Val dataset for this epoch 11526 = 0.07088188890762191
Error on this batch = 0.36752425589161275
Error on this batch = 0.3748634216742642
Cost on val dataset after 11527 epochs is = 0.07088168977199776
learning rate =  0.0486953233605277
Initial Cost on Val dataset for this epoch 11527 = 0.07088168977199776
Error on this batch = 0.3675234790280679
Error on this batch = 0.3748624324577443
Cost on val dataset after 11528 epochs is = 0.0708814906374593
learning rate =  0.048693915289412186
Initial Cost on Val dataset for this epoch 11528 = 0.0708814906374593
Error on this batch = 0.36752270183584224
Error on this batch = 0.37486144311312386
Cost on val dataset after 11529 epochs is = 0.0708812915040111
learning rate =  0.048692507381145354
Initial Cost on Val dataset for this epoch 11529 = 0.0708812915040111
Error on this batch = 0.3675219243136678
Error on this batch = 0.

Error on this batch = 0.374830714134543
Cost on val dataset after 11560 epochs is = 0.07087511893477966
learning rate =  0.048648942818396254
Initial Cost on Val dataset for this epoch 11560 = 0.07087511893477966
Error on this batch = 0.36749765074767465
Error on this batch = 0.3748297211021997
Cost on val dataset after 11561 epochs is = 0.0708749198388877
learning rate =  0.04864754010393788
Initial Cost on Val dataset for this epoch 11561 = 0.0708749198388877
Error on this batch = 0.3674968620229832
Error on this batch = 0.3748287279675827
Cost on val dataset after 11562 epochs is = 0.07087472074426053
learning rate =  0.048646137551245625
Initial Cost on Val dataset for this epoch 11562 = 0.07087472074426053
Error on this batch = 0.367496072928921
Error on this batch = 0.37482773473148345
Cost on val dataset after 11563 epochs is = 0.07087452165090412
learning rate =  0.04864473516028684
Initial Cost on Val dataset for this epoch 11563 = 0.07087452165090412
Error on this batch = 0.3

Cost on val dataset after 11592 epochs is = 0.07086874852409983
learning rate =  0.04860413603014563
Initial Cost on Val dataset for this epoch 11592 = 0.07086874852409983
Error on this batch = 0.3674722229981562
Error on this batch = 0.3747978943976351
Cost on val dataset after 11593 epochs is = 0.07086854947176982
learning rate =  0.04860273847605807
Initial Cost on Val dataset for this epoch 11593 = 0.07086854947176982
Error on this batch = 0.36747142192553894
Error on this batch = 0.37479689840977515
Cost on val dataset after 11594 epochs is = 0.07086835042090821
learning rate =  0.048601341082696675
Initial Cost on Val dataset for this epoch 11594 = 0.07086835042090821
Error on this batch = 0.36747062045040857
Error on this batch = 0.37479590234590393
Cost on val dataset after 11595 epochs is = 0.07086815137152175
learning rate =  0.04859994385002909
Initial Cost on Val dataset for this epoch 11595 = 0.07086815137152175
Error on this batch = 0.3674698185718182
Error on this batch 

learning rate =  0.04855949385941943
Initial Cost on Val dataset for this epoch 11624 = 0.07086237961176539
Error on this batch = 0.36744638453260114
Error on this batch = 0.3747659890469256
Cost on val dataset after 11625 epochs is = 0.07086218060983047
learning rate =  0.04855810143257175
Initial Cost on Val dataset for this epoch 11625 = 0.07086218060983047
Error on this batch = 0.3674455701378051
Error on this batch = 0.37476499102289607
Cost on val dataset after 11626 epochs is = 0.07086198160958558
learning rate =  0.04855670916541978
Initial Cost on Val dataset for this epoch 11626 = 0.07086198160958558
Error on this batch = 0.3674447553130041
Error on this batch = 0.3747639929484048
Cost on val dataset after 11627 epochs is = 0.0708617826110378
learning rate =  0.04855531705793147
Initial Cost on Val dataset for this epoch 11627 = 0.0708617826110378
Error on this batch = 0.36744394005743447
Error on this batch = 0.37476299482424824
Cost on val dataset after 11628 epochs is = 0.

Cost on val dataset after 11657 epochs is = 0.0708558134789321
learning rate =  0.04851362791829829
Initial Cost on Val dataset for this epoch 11657 = 0.0708558134789321
Error on this batch = 0.3674192785357925
Error on this batch = 0.374733031946336
Cost on val dataset after 11658 epochs is = 0.07085561453651248
learning rate =  0.04851224074453725
Initial Cost on Val dataset for this epoch 11658 = 0.07085561453651248
Error on this batch = 0.36741844957973113
Error on this batch = 0.3747320326762674
Cost on val dataset after 11659 epochs is = 0.07085541559601684
learning rate =  0.048510853729419136
Initial Cost on Val dataset for this epoch 11659 = 0.07085541559601684
Error on this batch = 0.36741762017157487
Error on this batch = 0.3747310333818783
Cost on val dataset after 11660 epochs is = 0.07085521665745223
learning rate =  0.04850946687291221
Initial Cost on Val dataset for this epoch 11660 = 0.07085521665745223
Error on this batch = 0.36741679031075436
Error on this batch = 0.

Error on this batch = 0.3747000474621717
Cost on val dataset after 11691 epochs is = 0.07084905055792759
learning rate =  0.04846655281954774
Initial Cost on Val dataset for this epoch 11691 = 0.07084905055792759
Error on this batch = 0.36739083727051625
Error on this batch = 0.37469904780191793
Cost on val dataset after 11692 epochs is = 0.07084885168483955
learning rate =  0.04846517102188998
Initial Cost on Val dataset for this epoch 11692 = 0.07084885168483955
Error on this batch = 0.3673899926589349
Error on this batch = 0.3746980481430175
Cost on val dataset after 11693 epochs is = 0.07084865281391112
learning rate =  0.048463789381800806
Initial Cost on Val dataset for this epoch 11693 = 0.07084865281391112
Error on this batch = 0.3673891475791766
Error on this batch = 0.3746970484862388
Cost on val dataset after 11694 epochs is = 0.07084845394514908
learning rate =  0.048462407899248765
Initial Cost on Val dataset for this epoch 11694 = 0.07084845394514908
Error on this batch =

Cost on val dataset after 11725 epochs is = 0.07084229012433452
learning rate =  0.04841965990742921
Initial Cost on Val dataset for this epoch 11725 = 0.07084229012433452
Error on this batch = 0.36736185585760567
Error on this batch = 0.3746650651508116
Cost on val dataset after 11726 epochs is = 0.07084209132838729
learning rate =  0.048418283449518255
Initial Cost on Val dataset for this epoch 11726 = 0.07084209132838729
Error on this batch = 0.36736099515209936
Error on this batch = 0.3746640659904796
Cost on val dataset after 11727 epochs is = 0.07084189253482193
learning rate =  0.04841690714811191
Initial Cost on Val dataset for this epoch 11727 = 0.07084189253482193
Error on this batch = 0.36736013396883077
Error on this batch = 0.3746630668579659
Cost on val dataset after 11728 epochs is = 0.07084169374364474
learning rate =  0.048415531003179
Initial Cost on Val dataset for this epoch 11728 = 0.07084169374364474
Error on this batch = 0.36735927230761123
Error on this batch = 

Cost on val dataset after 11759 epochs is = 0.0708355324351405
learning rate =  0.04837294795161367
Initial Cost on Val dataset for this epoch 11759 = 0.0708355324351405
Error on this batch = 0.36733232295223944
Error on this batch = 0.37463111370616703
Cost on val dataset after 11760 epochs is = 0.07083533372359697
learning rate =  0.04837157679744327
Initial Cost on Val dataset for this epoch 11760 = 0.07083533372359697
Error on this batch = 0.36733144592714395
Error on this batch = 0.3746301159027297
Cost on val dataset after 11761 epochs is = 0.0708351350146391
learning rate =  0.04837020579872372
Initial Cost on Val dataset for this epoch 11761 = 0.0708351350146391
Error on this batch = 0.3673305684206683
Error on this batch = 0.37462911815184113
Cost on val dataset after 11762 epochs is = 0.0708349363082726
learning rate =  0.04836883495542416
Initial Cost on Val dataset for this epoch 11762 = 0.0708349363082726
Error on this batch = 0.36732969043278985
Error on this batch = 0.37

Cost on val dataset after 11794 epochs is = 0.07082857910580315
learning rate =  0.04832504984751697
Initial Cost on Val dataset for this epoch 11794 = 0.07082857910580315
Error on this batch = 0.36730134073478843
Error on this batch = 0.3745962264497405
Cost on val dataset after 11795 epochs is = 0.07082838048801607
learning rate =  0.04832368411583392
Initial Cost on Val dataset for this epoch 11795 = 0.07082838048801607
Error on this batch = 0.367300446876516
Error on this batch = 0.3745952309015577
Cost on val dataset after 11796 epochs is = 0.07082818187300231
learning rate =  0.04832231853852751
Initial Cost on Val dataset for this epoch 11796 = 0.07082818187300231
Error on this batch = 0.3672995525386398
Error on this batch = 0.37459423543017095
Cost on val dataset after 11797 epochs is = 0.07082798326076684
learning rate =  0.048320953115567236
Initial Cost on Val dataset for this epoch 11797 = 0.07082798326076684
Error on this batch = 0.36729865772128406
Error on this batch = 

Error on this batch = 0.3745624248669197
Cost on val dataset after 11829 epochs is = 0.07082162916516148
learning rate =  0.048277340893319115
Initial Cost on Val dataset for this epoch 11829 = 0.07082162916516148
Error on this batch = 0.3672697713402778
Error on this batch = 0.3745614323003072
Cost on val dataset after 11830 epochs is = 0.0708214306472951
learning rate =  0.04827598054670695
Initial Cost on Val dataset for this epoch 11830 = 0.0708214306472951
Error on this batch = 0.36726886079401655
Error on this batch = 0.37456043983270754
Cost on val dataset after 11831 epochs is = 0.07082123213236588
learning rate =  0.048274620353407835
Initial Cost on Val dataset for this epoch 11831 = 0.07082123213236588
Error on this batch = 0.36726794977466043
Error on this batch = 0.3745594474647528
Cost on val dataset after 11832 epochs is = 0.07082103362037809
learning rate =  0.04827326031339152
Initial Cost on Val dataset for this epoch 11832 = 0.07082103362037809
Error on this batch = 

Error on this batch = 0.37452774801881655
Cost on val dataset after 11864 epochs is = 0.07081468281472152
learning rate =  0.048229819785709914
Initial Cost on Val dataset for this epoch 11864 = 0.07081468281472152
Error on this batch = 0.3672376225014955
Error on this batch = 0.37452675928542734
Cost on val dataset after 11865 epochs is = 0.07081448440210068
learning rate =  0.048228464787119944
Initial Cost on Val dataset for this epoch 11865 = 0.07081448440210068
Error on this batch = 0.36723669556309907
Error on this batch = 0.37452577067241366
Cost on val dataset after 11866 epochs is = 0.07081428599255675
learning rate =  0.048227109940789846
Initial Cost on Val dataset for this epoch 11866 = 0.07081428599255675
Error on this batch = 0.36723576816196946
Error on this batch = 0.3745247821803624
Cost on val dataset after 11867 epochs is = 0.07081408758609335
learning rate =  0.04822575524668968
Initial Cost on Val dataset for this epoch 11867 = 0.07081408758609335
Error on this bat

Error on this batch = 0.37449321774489036
Cost on val dataset after 11899 epochs is = 0.07080774022656836
learning rate =  0.048182485234170375
Initial Cost on Val dataset for this epoch 11899 = 0.07080774022656836
Error on this batch = 0.3672049067042358
Error on this batch = 0.3744922335650851
Cost on val dataset after 11900 epochs is = 0.07080754192367915
learning rate =  0.04818113554691697
Initial Cost on Val dataset for this epoch 11900 = 0.07080754192367915
Error on this batch = 0.3672039637898001
Error on this batch = 0.37449124952539803
Cost on val dataset after 11901 epochs is = 0.07080734362398282
learning rate =  0.048179786010880556
Initial Cost on Val dataset for this epoch 11901 = 0.07080734362398282
Error on this batch = 0.36720302042599545
Error on this batch = 0.37449026562636856
Cost on val dataset after 11902 epochs is = 0.07080714532748232
learning rate =  0.04817843662603148
Initial Cost on Val dataset for this epoch 11902 = 0.07080714532748232
Error on this batch

Error on this batch = 0.37445885828218245
Cost on val dataset after 11934 epochs is = 0.07080080154362713
learning rate =  0.048135335960809975
Initial Cost on Val dataset for this epoch 11934 = 0.07080080154362713
Error on this batch = 0.36717164014855636
Error on this batch = 0.374457879318622
Cost on val dataset after 11935 epochs is = 0.07080060335412955
learning rate =  0.04813399154856593
Initial Cost on Val dataset for this epoch 11935 = 0.07080060335412955
Error on this batch = 0.36717068176575596
Error on this batch = 0.3744569005132503
Cost on val dataset after 11936 epochs is = 0.07080040516791741
learning rate =  0.04813264728650618
Initial Cost on Val dataset for this epoch 11936 = 0.07080040516791741
Error on this batch = 0.36716972294917244
Error on this batch = 0.37445592186655885
Cost on val dataset after 11937 epochs is = 0.07080020698499302
learning rate =  0.04813130317460136
Initial Cost on Val dataset for this epoch 11937 = 0.07080020698499302
Error on this batch 

Error on this batch = 0.3744246918246279
Cost on val dataset after 11969 epochs is = 0.07079386688016971
learning rate =  0.0480883707002065
Initial Cost on Val dataset for this epoch 11969 = 0.07079386688016971
Error on this batch = 0.36713784179087866
Error on this batch = 0.37442371868110486
Cost on val dataset after 11970 epochs is = 0.0707936688069129
learning rate =  0.048087031526998524
Initial Cost on Val dataset for this epoch 11970 = 0.0707936688069129
Error on this batch = 0.36713686851310046
Error on this batch = 0.37442274571215434
Cost on val dataset after 11971 epochs is = 0.07079347073701116
learning rate =  0.04808569250295219
Initial Cost on Val dataset for this epoch 11971 = 0.07079347073701116
Error on this batch = 0.3671358948186549
Error on this batch = 0.3744217729182197
Cost on val dataset after 11972 epochs is = 0.07079327267046616
learning rate =  0.04808435362803843
Initial Cost on Val dataset for this epoch 11972 = 0.07079327267046616
Error on this batch = 0

Error on this batch = 0.3743907385028546
Cost on val dataset after 12004 epochs is = 0.07078693632232651
learning rate =  0.04804158819924831
Initial Cost on Val dataset for this epoch 12004 = 0.07078693632232651
Error on this batch = 0.3671035324943967
Error on this batch = 0.374389771724304
Cost on val dataset after 12005 epochs is = 0.07078673836736221
learning rate =  0.048040254229452566
Initial Cost on Val dataset for this epoch 12005 = 0.07078673836736221
Error on this batch = 0.36710254493820965
Error on this batch = 0.37438880513503814
Cost on val dataset after 12006 epochs is = 0.07078654041580001
learning rate =  0.048038920407805726
Initial Cost on Val dataset for this epoch 12006 = 0.07078654041580001
Error on this batch = 0.3671015569834087
Error on this batch = 0.3743878387354531
Cost on val dataset after 12007 epochs is = 0.07078634246764093
learning rate =  0.048037586734279006
Initial Cost on Val dataset for this epoch 12007 = 0.07078634246764093
Error on this batch =

Error on this batch = 0.37435701640119007
Cost on val dataset after 12039 epochs is = 0.0707800099284995
learning rate =  0.04799498721697915
Initial Cost on Val dataset for this epoch 12039 = 0.0707800099284995
Error on this batch = 0.3670687343001323
Error on this batch = 0.37435605647463205
Cost on val dataset after 12040 epochs is = 0.07077981209309211
learning rate =  0.04799365841531683
Initial Cost on Val dataset for this epoch 12040 = 0.07077981209309211
Error on this batch = 0.36706773310626917
Error on this batch = 0.3743550967504373
Cost on val dataset after 12041 epochs is = 0.07077961426111146
learning rate =  0.04799232976080048
Initial Cost on Val dataset for this epoch 12041 = 0.07077961426111146
Error on this batch = 0.367066731532291
Error on this batch = 0.37435413722895616
Cost on val dataset after 12042 epochs is = 0.07077941643255792
learning rate =  0.04799100125340157
Initial Cost on Val dataset for this epoch 12042 = 0.07077941643255792
Error on this batch = 0.

Error on this batch = 0.37432449445996474
Cost on val dataset after 12073 epochs is = 0.07077328544845259
learning rate =  0.04794989033909327
Initial Cost on Val dataset for this epoch 12073 = 0.07077328544845259
Error on this batch = 0.3670344836403823
Error on this batch = 0.37432354160254433
Cost on val dataset after 12074 epochs is = 0.07077308772965658
learning rate =  0.04794856652444532
Initial Cost on Val dataset for this epoch 12074 = 0.07077308772965658
Error on this batch = 0.36703346982025653
Error on this batch = 0.3743225889586772
Cost on val dataset after 12075 epochs is = 0.07077289001428995
learning rate =  0.04794724285597832
Initial Cost on Val dataset for this epoch 12075 = 0.07077289001428995
Error on this batch = 0.36703245563803616
Error on this batch = 0.3743216365286706
Cost on val dataset after 12076 epochs is = 0.07077269230235243
learning rate =  0.04794591933366401
Initial Cost on Val dataset for this epoch 12076 = 0.07077269230235243
Error on this batch =

Cost on val dataset after 12106 epochs is = 0.07076676253684767
learning rate =  0.04790628148545885
Initial Cost on Val dataset for this epoch 12106 = 0.07076676253684767
Error on this batch = 0.36700083925864035
Error on this batch = 0.3742922188913063
Cost on val dataset after 12107 epochs is = 0.07076656493099617
learning rate =  0.04790496247991067
Initial Cost on Val dataset for this epoch 12107 = 0.07076656493099617
Error on this batch = 0.3669998137665463
Error on this batch = 0.3742912734595889
Cost on val dataset after 12108 epochs is = 0.07076636732855662
learning rate =  0.047903643619615424
Initial Cost on Val dataset for this epoch 12108 = 0.07076636732855662
Error on this batch = 0.36699878792962237
Error on this batch = 0.37429032825117586
Cost on val dataset after 12109 epochs is = 0.07076616972952822
learning rate =  0.04790232490454513
Initial Cost on Val dataset for this epoch 12109 = 0.07076616972952822
Error on this batch = 0.36699776174838616
Error on this batch 

Cost on val dataset after 12139 epochs is = 0.0707602433399766
learning rate =  0.04786283084351677
Initial Cost on Val dataset for this epoch 12139 = 0.0707602433399766
Error on this batch = 0.36696681875539144
Error on this batch = 0.37426113894964486
Cost on val dataset after 12140 epochs is = 0.07076004584618126
learning rate =  0.04786151661657843
Initial Cost on Val dataset for this epoch 12140 = 0.07076004584618126
Error on this batch = 0.3669657821547929
Error on this batch = 0.3742602010207146
Cost on val dataset after 12141 epochs is = 0.07075984835576085
learning rate =  0.04786020253397339
Initial Cost on Val dataset for this epoch 12141 = 0.07075984835576085
Error on this batch = 0.3669647452262086
Error on this batch = 0.3742592633232272
Cost on val dataset after 12142 epochs is = 0.07075965086871396
learning rate =  0.04785888859567392
Initial Cost on Val dataset for this epoch 12142 = 0.07075965086871396
Error on this batch = 0.3669637079701404
Error on this batch = 0.3

Error on this batch = 0.37422845056902665
Cost on val dataset after 12175 epochs is = 0.07075313567829758
learning rate =  0.04781560940612343
Initial Cost on Val dataset for this epoch 12175 = 0.07075313567829758
Error on this batch = 0.36692929805664265
Error on this batch = 0.37422752086795413
Cost on val dataset after 12176 epochs is = 0.07075293830497172
learning rate =  0.04781430035776386
Initial Cost on Val dataset for this epoch 12176 = 0.07075293830497172
Error on this batch = 0.36692824996104334
Error on this batch = 0.37422659140557607
Cost on val dataset after 12177 epochs is = 0.07075274093495804
learning rate =  0.04781299145274383
Initial Cost on Val dataset for this epoch 12177 = 0.07075274093495804
Error on this batch = 0.36692720155518826
Error on this batch = 0.3742256621820792
Cost on val dataset after 12178 epochs is = 0.07075254356825447
learning rate =  0.047811682691035876
Initial Cost on Val dataset for this epoch 12178 = 0.07075254356825447
Error on this batc

Error on this batch = 0.37419605421780316
Cost on val dataset after 12210 epochs is = 0.07074622956809934
learning rate =  0.047769877821200214
Initial Cost on Val dataset for this epoch 12210 = 0.07074622956809934
Error on this batch = 0.3668924332282997
Error on this batch = 0.374195132974772
Cost on val dataset after 12211 epochs is = 0.07074603230934558
learning rate =  0.04776857377343201
Initial Cost on Val dataset for this epoch 12211 = 0.07074603230934558
Error on this batch = 0.36689137455623894
Error on this batch = 0.3741942119763023
Cost on val dataset after 12212 epochs is = 0.07074583505382051
learning rate =  0.047767269868046525
Initial Cost on Val dataset for this epoch 12212 = 0.07074583505382051
Error on this batch = 0.3668903155904056
Error on this batch = 0.37419329122254125
Cost on val dataset after 12213 epochs is = 0.07074563780152146
learning rate =  0.047765966105016565
Initial Cost on Val dataset for this epoch 12213 = 0.07074563780152146
Error on this batch 

Error on this batch = 0.3741639571327623
Cost on val dataset after 12245 epochs is = 0.07073932741409111
learning rate =  0.047724320689374974
Initial Cost on Val dataset for this epoch 12245 = 0.07073932741409111
Error on this batch = 0.36685520788306164
Error on this batch = 0.3741630445298473
Cost on val dataset after 12246 epochs is = 0.07073913026660855
learning rate =  0.04772302160886452
Initial Cost on Val dataset for this epoch 12246 = 0.07073913026660855
Error on this batch = 0.36685413919791715
Error on this batch = 0.3741621321759927
Cost on val dataset after 12247 epochs is = 0.07073893312224887
learning rate =  0.04772172266978906
Initial Cost on Val dataset for this epoch 12247 = 0.07073893312224887
Error on this batch = 0.3668530702346811
Error on this batch = 0.3741612200713071
Cost on val dataset after 12248 epochs is = 0.07073873598100867
learning rate =  0.04772042387212165
Initial Cost on Val dataset for this epoch 12248 = 0.07073873598100867
Error on this batch = 

Error on this batch = 0.3741321648725082
Cost on val dataset after 12280 epochs is = 0.07073242908734834
learning rate =  0.047678936849363286
Initial Cost on Val dataset for this epoch 12280 = 0.07073242908734834
Error on this batch = 0.3668176412572863
Error on this batch = 0.3741312610438515
Cost on val dataset after 12281 epochs is = 0.07073223204704532
learning rate =  0.047677642703093465
Initial Cost on Val dataset for this epoch 12281 = 0.07073223204704532
Error on this batch = 0.3668165630940003
Error on this batch = 0.37413035746737805
Cost on val dataset after 12282 epochs is = 0.07073203500973645
learning rate =  0.04767634869731992
Initial Cost on Val dataset for this epoch 12282 = 0.07073203500973645
Error on this batch = 0.36681548466748715
Error on this batch = 0.37412945414315657
Cost on val dataset after 12283 epochs is = 0.07073183797541771
learning rate =  0.047675054832015944
Initial Cost on Val dataset for this epoch 12283 = 0.07073183797541771
Error on this batch

Error on this batch = 0.37410068130703134
Cost on val dataset after 12315 epochs is = 0.07072553443091366
learning rate =  0.047633725150892633
Initial Cost on Val dataset for this epoch 12315 = 0.07072553443091366
Error on this batch = 0.36677975159027315
Error on this batch = 0.3740997863380015
Cost on val dataset after 12316 epochs is = 0.07072533749288465
learning rate =  0.04763243590615898
Initial Cost on Val dataset for this epoch 12316 = 0.07072533749288465
Error on this batch = 0.36677866445545687
Error on this batch = 0.37409889162285986
Cost on val dataset after 12317 epochs is = 0.07072514055769731
learning rate =  0.047631146800991744
Initial Cost on Val dataset for this epoch 12317 = 0.07072514055769731
Error on this batch = 0.36677757707148173
Error on this batch = 0.3740979971616337
Cost on val dataset after 12318 epochs is = 0.07072494362534694
learning rate =  0.047629857835364484
Initial Cost on Val dataset for this epoch 12318 = 0.07072494362534694
Error on this bat

Error on this batch = 0.37406950856735094
Cost on val dataset after 12350 epochs is = 0.07071864325874332
learning rate =  0.04758868445456708
Initial Cost on Val dataset for this epoch 12350 = 0.07071864325874332
Error on this batch = 0.3667415561475278
Error on this batch = 0.37406862249242245
Cost on val dataset after 12351 epochs is = 0.07071844641723052
learning rate =  0.04758740007897392
Initial Cost on Val dataset for this epoch 12351 = 0.07071844641723052
Error on this batch = 0.3667404605206785
Error on this batch = 0.3740677366715868
Cost on val dataset after 12352 epochs is = 0.07071824957838188
learning rate =  0.04758611584202608
Initial Cost on Val dataset for this epoch 12352 = 0.07071824957838188
Error on this batch = 0.3667393646579907
Error on this batch = 0.3740668511048265
Cost on val dataset after 12353 epochs is = 0.0707180527421919
learning rate =  0.047584831743697376
Initial Cost on Val dataset for this epoch 12353 = 0.0707180527421919
Error on this batch = 0.

Error on this batch = 0.3740395243988119
Cost on val dataset after 12384 epochs is = 0.07071195210817088
learning rate =  0.04754509330803611
Initial Cost on Val dataset for this epoch 12384 = 0.07071195210817088
Error on this batch = 0.3667041747198466
Error on this batch = 0.3740386469451127
Cost on val dataset after 12385 epochs is = 0.07071175535404033
learning rate =  0.04754381363173393
Initial Cost on Val dataset for this epoch 12385 = 0.07071175535404033
Error on this batch = 0.36670307127899476
Error on this batch = 0.3740377697441417
Cost on val dataset after 12386 epochs is = 0.07071155860237548
learning rate =  0.0475425340931906
Initial Cost on Val dataset for this epoch 12386 = 0.07071155860237548
Error on this batch = 0.3667019676145958
Error on this batch = 0.3740368927958339
Cost on val dataset after 12387 epochs is = 0.0707113618531701
learning rate =  0.04754125469238015
Initial Cost on Val dataset for this epoch 12387 = 0.0707113618531701
Error on this batch = 0.366

Error on this batch = 0.37401070144932774
Cost on val dataset after 12417 epochs is = 0.07070546048866086
learning rate =  0.047502936585432295
Initial Cost on Val dataset for this epoch 12417 = 0.07070546048866086
Error on this batch = 0.3666676450436417
Error on this batch = 0.3740098322928766
Cost on val dataset after 12418 epochs is = 0.07070526381246434
learning rate =  0.047501661441508095
Initial Cost on Val dataset for this epoch 12418 = 0.07070526381246434
Error on this batch = 0.3666665344093907
Error on this batch = 0.3740089633861831
Cost on val dataset after 12419 epochs is = 0.0707050671385141
learning rate =  0.04750038643449005
Initial Cost on Val dataset for this epoch 12419 = 0.0707050671385141
Error on this batch = 0.36666542356298776
Error on this batch = 0.3740080947291296
Cost on val dataset after 12420 epochs is = 0.07070487046680302
learning rate =  0.04749911156435244
Initial Cost on Val dataset for this epoch 12420 = 0.07070487046680302
Error on this batch = 0

Error on this batch = 0.3739821504448497
Cost on val dataset after 12450 epochs is = 0.07069897132036658
learning rate =  0.04746092898233086
Initial Cost on Val dataset for this epoch 12450 = 0.07069897132036658
Error on this batch = 0.36663088392509474
Error on this batch = 0.37398128945863846
Cost on val dataset after 12451 epochs is = 0.07069877471439909
learning rate =  0.047459658342766574
Initial Cost on Val dataset for this epoch 12451 = 0.07069877471439909
Error on this batch = 0.36662976646616896
Error on this batch = 0.37398042871735626
Cost on val dataset after 12452 epochs is = 0.07069857811042886
learning rate =  0.04745838783926328
Initial Cost on Val dataset for this epoch 12452 = 0.07069857811042886
Error on this batch = 0.3666286488060442
Error on this batch = 0.37397956822082434
Cost on val dataset after 12453 epochs is = 0.07069838150844787
learning rate =  0.04745711747179549
Initial Cost on Val dataset for this epoch 12453 = 0.07069838150844787
Error on this batch

Error on this batch = 0.3739521603836286
Cost on val dataset after 12485 epochs is = 0.0706920912455856
learning rate =  0.04741653738736419
Initial Cost on Val dataset for this epoch 12485 = 0.0706920912455856
Error on this batch = 0.36659165526656495
Error on this batch = 0.3739513078500454
Cost on val dataset after 12486 epochs is = 0.07069189470452895
learning rate =  0.047415271494809806
Initial Cost on Val dataset for this epoch 12486 = 0.07069189470452895
Error on this batch = 0.36659053095764293
Error on this batch = 0.37395045555387824
Cost on val dataset after 12487 epochs is = 0.07069169816516895
learning rate =  0.047414005737428135
Initial Cost on Val dataset for this epoch 12487 = 0.07069169816516895
Error on this batch = 0.366589406458756
Error on this batch = 0.37394960349487194
Cost on val dataset after 12488 epochs is = 0.0706915016274964
learning rate =  0.04741274011519393
Initial Cost on Val dataset for this epoch 12488 = 0.0706915016274964
Error on this batch = 0.

Error on this batch = 0.37392415063357476
Cost on val dataset after 12518 epochs is = 0.07068560623490819
learning rate =  0.04737483416673946
Initial Cost on Val dataset for this epoch 12518 = 0.07068560623490819
Error on this batch = 0.36655445448766844
Error on this batch = 0.3739233057858752
Cost on val dataset after 12519 epochs is = 0.07068540974478849
learning rate =  0.04737357272158584
Initial Cost on Val dataset for this epoch 12519 = 0.07068540974478849
Error on this batch = 0.36655332407577396
Error on this batch = 0.37392246116578703
Cost on val dataset after 12520 epochs is = 0.07068521325604261
learning rate =  0.047372311410775005
Initial Cost on Val dataset for this epoch 12520 = 0.07068521325604261
Error on this batch = 0.36655219348424695
Error on this batch = 0.3739216167729656
Cost on val dataset after 12521 epochs is = 0.07068501676866018
learning rate =  0.04737105023428193
Initial Cost on Val dataset for this epoch 12521 = 0.07068501676866018
Error on this batch

Cost on val dataset after 12552 epochs is = 0.07067892627723397
learning rate =  0.04733202024831177
Initial Cost on Val dataset for this epoch 12552 = 0.07067892627723397
Error on this batch = 0.3665159215602427
Error on this batch = 0.37389471397461144
Cost on val dataset after 12553 epochs is = 0.07067872982772466
learning rate =  0.04733076335680742
Initial Cost on Val dataset for this epoch 12553 = 0.07067872982772466
Error on this batch = 0.3665147852141966
Error on this batch = 0.3738938768676286
Cost on val dataset after 12554 epochs is = 0.07067853337921096
learning rate =  0.047329506598798365
Initial Cost on Val dataset for this epoch 12554 = 0.07067853337921096
Error on this batch = 0.3665136486990066
Error on this batch = 0.3738930399742765
Cost on val dataset after 12555 epochs is = 0.070678336931681
learning rate =  0.047328249974259794
Initial Cost on Val dataset for this epoch 12555 = 0.070678336931681
Error on this batch = 0.36651251201498
Error on this batch = 0.3738

Cost on val dataset after 12587 epochs is = 0.07067205105654027
learning rate =  0.047288108313314474
Initial Cost on Val dataset for this epoch 12587 = 0.07067205105654027
Error on this batch = 0.36647605081794055
Error on this batch = 0.3738655391031965
Cost on val dataset after 12588 epochs is = 0.07067185463449557
learning rate =  0.04728685607942587
Initial Cost on Val dataset for this epoch 12588 = 0.07067185463449557
Error on this batch = 0.36647490873458344
Error on this batch = 0.3738647091714768
Cost on val dataset after 12589 epochs is = 0.07067165821300206
learning rate =  0.04728560397816808
Initial Cost on Val dataset for this epoch 12589 = 0.07067165821300206
Error on this batch = 0.36647376649283403
Error on this batch = 0.3738638794347247
Cost on val dataset after 12590 epochs is = 0.07067146179204616
learning rate =  0.047284352009516535
Initial Cost on Val dataset for this epoch 12590 = 0.07067146179204616
Error on this batch = 0.3664726240930002
Error on this batch 

Cost on val dataset after 12621 epochs is = 0.07066537293249917
learning rate =  0.04724560662019801
Initial Cost on Val dataset for this epoch 12621 = 0.07066537293249917
Error on this batch = 0.36643713297205666
Error on this batch = 0.37383742687885846
Cost on val dataset after 12622 epochs is = 0.07066517652125716
learning rate =  0.04724435888200595
Initial Cost on Val dataset for this epoch 12622 = 0.07066517652125716
Error on this batch = 0.36643598567664987
Error on this batch = 0.373836603199253
Cost on val dataset after 12623 epochs is = 0.07066498011007458
learning rate =  0.047243111275612586
Initial Cost on Val dataset for this epoch 12623 = 0.07066498011007458
Error on this batch = 0.3664348382333794
Error on this batch = 0.37383577969065174
Cost on val dataset after 12624 epochs is = 0.07066478369893603
learning rate =  0.047241863800993544
Initial Cost on Val dataset for this epoch 12624 = 0.07066478369893603
Error on this batch = 0.36643369064255743
Error on this batch

Error on this batch = 0.37380951253948336
Cost on val dataset after 12656 epochs is = 0.07065849847051582
learning rate =  0.047202014044586973
Initial Cost on Val dataset for this epoch 12656 = 0.07065849847051582
Error on this batch = 0.36639689171028367
Error on this batch = 0.3738086941776508
Cost on val dataset after 12657 epochs is = 0.07065830205180845
learning rate =  0.04720077090489599
Initial Cost on Val dataset for this epoch 12657 = 0.07065830205180845
Error on this batch = 0.36639573942705267
Error on this batch = 0.3738078759552316
Cost on val dataset after 12658 epochs is = 0.07065810563258695
learning rate =  0.04719952789615487
Initial Cost on Val dataset for this epoch 12658 = 0.07065810563258695
Error on this batch = 0.36639458700704
Error on this batch = 0.3738070578711561
Cost on val dataset after 12659 epochs is = 0.0706579092128339
learning rate =  0.04719828501833946
Initial Cost on Val dataset for this epoch 12659 = 0.0706579092128339
Error on this batch = 0.3

Error on this batch = 0.37378094533067185
Cost on val dataset after 12691 epochs is = 0.07065162339276175
learning rate =  0.04715858191291686
Initial Cost on Val dataset for this epoch 12691 = 0.07065162339276175
Error on this batch = 0.3663564825419704
Error on this batch = 0.3737801311489032
Cost on val dataset after 12692 epochs is = 0.07065142694531323
learning rate =  0.047157343342150354
Initial Cost on Val dataset for this epoch 12692 = 0.07065142694531323
Error on this batch = 0.3663553256658143
Error on this batch = 0.37377931706321343
Cost on val dataset after 12693 epochs is = 0.0706512304967065
learning rate =  0.04715610490149266
Initial Cost on Val dataset for this epoch 12693 = 0.0706512304967065
Error on this batch = 0.3663541686644146
Error on this batch = 0.3737785030721649
Cost on val dataset after 12694 epochs is = 0.07065103404692213
learning rate =  0.04715486659091985
Initial Cost on Val dataset for this epoch 12694 = 0.07065103404692213
Error on this batch = 0.

Error on this batch = 0.37375249603111377
Cost on val dataset after 12726 epochs is = 0.07064474691319103
learning rate =  0.0471153091946232
Initial Cost on Val dataset for this epoch 12726 = 0.07064474691319103
Error on this batch = 0.36631591961132026
Error on this batch = 0.37375168426814626
Cost on val dataset after 12727 epochs is = 0.07064455041332393
learning rate =  0.04711407516347558
Initial Cost on Val dataset for this epoch 12727 = 0.07064455041332393
Error on this batch = 0.3663147585579576
Error on this batch = 0.3737508725426294
Cost on val dataset after 12728 epochs is = 0.07064435391158988
learning rate =  0.04711284126160341
Initial Cost on Val dataset for this epoch 12728 = 0.07064435391158988
Error on this batch = 0.3663135973916542
Error on this batch = 0.37375006085260437
Cost on val dataset after 12729 epochs is = 0.0706441574079678
learning rate =  0.04711160748898299
Initial Cost on Val dataset for this epoch 12729 = 0.0706441574079678
Error on this batch = 0.

Error on this batch = 0.37372409279071805
Cost on val dataset after 12761 epochs is = 0.07063786816778943
learning rate =  0.047072194868572025
Initial Cost on Val dataset for this epoch 12761 = 0.07063786816778943
Error on this batch = 0.36627521800541274
Error on this batch = 0.37372328104547947
Cost on val dataset after 12762 epochs is = 0.07063767158988334
learning rate =  0.04707096534800548
Initial Cost on Val dataset for this epoch 12762 = 0.07063767158988334
Error on this batch = 0.3662740532251225
Error on this batch = 0.37372246925712743
Cost on val dataset after 12763 epochs is = 0.07063747500935982
learning rate =  0.04706973595588865
Initial Cost on Val dataset for this epoch 12763 = 0.07063747500935982
Error on this batch = 0.3662728883454625
Error on this batch = 0.37372165742294655
Cost on val dataset after 12764 epochs is = 0.07063727842619726
learning rate =  0.04706850669219803
Initial Cost on Val dataset for this epoch 12764 = 0.07063727842619726
Error on this batch

Error on this batch = 0.37369563680064605
Cost on val dataset after 12796 epochs is = 0.07063098624361752
learning rate =  0.04702923792294799
Initial Cost on Val dataset for this epoch 12796 = 0.07063098624361752
Error on this batch = 0.366234394378321
Error on this batch = 0.3736948217413975
Cost on val dataset after 12797 epochs is = 0.07063078956144068
learning rate =  0.047028012884189355
Initial Cost on Val dataset for this epoch 12797 = 0.07063078956144068
Error on this batch = 0.36623322637916245
Error on this batch = 0.3736940065261316
Cost on val dataset after 12798 epochs is = 0.07063059287591268
learning rate =  0.04702678797306218
Initial Cost on Val dataset for this epoch 12798 = 0.07063059287591268
Error on this batch = 0.3662320582963476
Error on this batch = 0.3736931911510017
Cost on val dataset after 12799 epochs is = 0.07063039618701389
learning rate =  0.047025563189543214
Initial Cost on Val dataset for this epoch 12799 = 0.07063039618701389
Error on this batch = 

Cost on val dataset after 12830 epochs is = 0.07062429705873252
learning rate =  0.04698765806745032
Initial Cost on Val dataset for this epoch 12830 = 0.07062429705873252
Error on this batch = 0.36619463858468565
Error on this batch = 0.3736669894602693
Cost on val dataset after 12831 epochs is = 0.07062410025252008
learning rate =  0.04698643735514416
Initial Cost on Val dataset for this epoch 12831 = 0.07062410025252008
Error on this batch = 0.3661934680387444
Error on this batch = 0.3736661663623881
Cost on val dataset after 12832 epochs is = 0.07062390344238526
learning rate =  0.04698521676968173
Initial Cost on Val dataset for this epoch 12832 = 0.07062390344238526
Error on this batch = 0.3661922974279611
Error on this batch = 0.3736653429464944
Cost on val dataset after 12833 epochs is = 0.07062370662831556
learning rate =  0.04698399631103996
Initial Cost on Val dataset for this epoch 12833 = 0.07062370662831556
Error on this batch = 0.3661911267529653
Error on this batch = 0.

Cost on val dataset after 12865 epochs is = 0.07061740644699975
learning rate =  0.046945008458112863
Initial Cost on Val dataset for this epoch 12865 = 0.07061740644699975
Error on this batch = 0.36615363532561246
Error on this batch = 0.3736379516900996
Cost on val dataset after 12866 epochs is = 0.07061720949887232
learning rate =  0.046943792171653315
Initial Cost on Val dataset for this epoch 12866 = 0.07061720949887232
Error on this batch = 0.3661524629247261
Error on this batch = 0.3736371136815538
Cost on val dataset after 12867 epochs is = 0.07061701254666262
learning rate =  0.04694257601123381
Initial Cost on Val dataset for this epoch 12867 = 0.07061701254666262
Error on this batch = 0.3661512904848642
Error on this batch = 0.37363627511624387
Cost on val dataset after 12868 epochs is = 0.07061681559037743
learning rate =  0.046941359976831507
Initial Cost on Val dataset for this epoch 12868 = 0.07061681559037743
Error on this batch = 0.36615011800690767
Error on this batch

Cost on val dataset after 12900 epochs is = 0.07061051092906091
learning rate =  0.046902513276578996
Initial Cost on Val dataset for this epoch 12900 = 0.07061051092906091
Error on this batch = 0.3661125844284057
Error on this batch = 0.3736082297554775
Cost on val dataset after 12901 epochs is = 0.07061031384879252
learning rate =  0.0469013013879609
Initial Cost on Val dataset for this epoch 12901 = 0.07061031384879252
Error on this batch = 0.36611141125977903
Error on this batch = 0.37360736655903837
Cost on val dataset after 12902 epochs is = 0.07061011676533889
learning rate =  0.04690008962458643
Initial Cost on Val dataset for this epoch 12902 = 0.07061011676533889
Error on this batch = 0.3661102380899531
Error on this batch = 0.3736065024439608
Cost on val dataset after 12903 epochs is = 0.07060991967875202
learning rate =  0.046898877986432934
Initial Cost on Val dataset for this epoch 12903 = 0.07060991967875202
Error on this batch = 0.36610906492026346
Error on this batch =

Cost on val dataset after 12933 epochs is = 0.07060400597184555
learning rate =  0.0468625869574804
Initial Cost on Val dataset for this epoch 12933 = 0.07060400597184555
Error on this batch = 0.36607387729460156
Error on this batch = 0.373579182110912
Cost on val dataset after 12934 epochs is = 0.0706038088263929
learning rate =  0.046861379189974975
Initial Cost on Val dataset for this epoch 12934 = 0.0706038088263929
Error on this batch = 0.3660727048980465
Error on this batch = 0.3735782808501153
Cost on val dataset after 12935 epochs is = 0.07060361168076344
learning rate =  0.04686017154696882
Initial Cost on Val dataset for this epoch 12935 = 0.07060361168076344
Error on this batch = 0.3660715325551328
Error on this batch = 0.37357737814890896
Cost on val dataset after 12936 epochs is = 0.07060341453510131
learning rate =  0.04685896402843949
Initial Cost on Val dataset for this epoch 12936 = 0.07060341453510131
Error on this batch = 0.3660703602679282
Error on this batch = 0.37

Cost on val dataset after 12968 epochs is = 0.07059710695354748
learning rate =  0.04682038902518161
Initial Cost on Val dataset for this epoch 12968 = 0.07059710695354748
Error on this batch = 0.3660328903818635
Error on this batch = 0.37354663964797125
Cost on val dataset after 12969 epochs is = 0.0705969099180219
learning rate =  0.04681918560182671
Initial Cost on Val dataset for this epoch 12969 = 0.0705969099180219
Error on this batch = 0.3660317212845935
Error on this batch = 0.3735456746296076
Cost on val dataset after 12970 epochs is = 0.07059671289028567
learning rate =  0.04681798230218852
Initial Cost on Val dataset for this epoch 12970 = 0.07059671289028567
Error on this batch = 0.36603055233093174
Error on this batch = 0.3735447073258109
Cost on val dataset after 12971 epochs is = 0.07059651587067385
learning rate =  0.046816779126244774
Initial Cost on Val dataset for this epoch 12971 = 0.07059651587067385
Error on this batch = 0.3660293835239795
Error on this batch = 0.

Cost on val dataset after 13002 epochs is = 0.07059041445470594
learning rate =  0.0467795419032747
Initial Cost on Val dataset for this epoch 13002 = 0.07059041445470594
Error on this batch = 0.3659932409395462
Error on this batch = 0.37351235780011877
Cost on val dataset after 13003 epochs is = 0.07059021791487745
learning rate =  0.04677834267383654
Initial Cost on Val dataset for this epoch 13003 = 0.07059021791487745
Error on this batch = 0.36599207853629445
Error on this batch = 0.3735112969725658
Cost on val dataset after 13004 epochs is = 0.07059002139815682
learning rate =  0.046777143567361595
Initial Cost on Val dataset for this epoch 13004 = 0.07059002139815682
Error on this batch = 0.3659909163842067
Error on this batch = 0.37351023272504075
Cost on val dataset after 13005 epochs is = 0.07058982490509275
learning rate =  0.04677594458382779
Initial Cost on Val dataset for this epoch 13005 = 0.07058982490509275
Error on this batch = 0.36598975448578785
Error on this batch =

Cost on val dataset after 13037 epochs is = 0.07058355266546938
learning rate =  0.046737641891945646
Initial Cost on Val dataset for this epoch 13037 = 0.07058355266546938
Error on this batch = 0.3659527125247295
Error on this batch = 0.37347299065011075
Cost on val dataset after 13038 epochs is = 0.07058335721439055
learning rate =  0.04673644695312959
Initial Cost on Val dataset for this epoch 13038 = 0.07058335721439055
Error on this batch = 0.36595155892988135
Error on this batch = 0.37347179333243374
Cost on val dataset after 13039 epochs is = 0.07058316179835855
learning rate =  0.046735252136507914
Initial Cost on Val dataset for this epoch 13039 = 0.07058316179835855
Error on this batch = 0.3659504055048189
Error on this batch = 0.3734705918761658
Cost on val dataset after 13040 epochs is = 0.0705829664170485
learning rate =  0.046734057442058746
Initial Cost on Val dataset for this epoch 13040 = 0.0705829664170485
Error on this batch = 0.3659492522372091
Error on this batch =

Cost on val dataset after 13072 epochs is = 0.07057672487054073
learning rate =  0.046695891596219986
Initial Cost on Val dataset for this epoch 13072 = 0.07057672487054073
Error on this batch = 0.36591229256526725
Error on this batch = 0.3734289705947971
Cost on val dataset after 13073 epochs is = 0.07057652967475946
learning rate =  0.04669470092123359
Initial Cost on Val dataset for this epoch 13073 = 0.07057652967475946
Error on this batch = 0.3659111290426351
Error on this batch = 0.3734276712368987
Cost on val dataset after 13074 epochs is = 0.0705763344232627
learning rate =  0.04669351036767959
Initial Cost on Val dataset for this epoch 13074 = 0.0705763344232627
Error on this batch = 0.3659099644189655
Error on this batch = 0.3734263717191048
Cost on val dataset after 13075 epochs is = 0.07057613911024006
learning rate =  0.04669231993553632
Initial Cost on Val dataset for this epoch 13075 = 0.07057613911024006
Error on this batch = 0.3659087986273796
Error on this batch = 0.3

Error on this batch = 0.37338885430322627
Cost on val dataset after 13105 epochs is = 0.07057021874864672
learning rate =  0.04665666331996146
Initial Cost on Val dataset for this epoch 13105 = 0.07057021874864672
Error on this batch = 0.3658729195571185
Error on this batch = 0.3733877103396277
Cost on val dataset after 13106 epochs is = 0.07057001837716015
learning rate =  0.04665547664083182
Initial Cost on Val dataset for this epoch 13106 = 0.07057001837716015
Error on this batch = 0.3658716819047868
Error on this batch = 0.37338657699325806
Cost on val dataset after 13107 epochs is = 0.07056981776802378
learning rate =  0.04665429008242234
Initial Cost on Val dataset for this epoch 13107 = 0.07056981776802378
Error on this batch = 0.36587044097478066
Error on this batch = 0.37338545457250966
Cost on val dataset after 13108 epochs is = 0.07056961692085663
learning rate =  0.04665310364471152
Initial Cost on Val dataset for this epoch 13108 = 0.07056961692085663
Error on this batch =

Cost on val dataset after 13138 epochs is = 0.0705635022991897
learning rate =  0.04661756653190068
Initial Cost on Val dataset for this epoch 13138 = 0.0705635022991897
Error on this batch = 0.36583034304883116
Error on this batch = 0.37335713903366236
Cost on val dataset after 13139 epochs is = 0.07056329684566345
learning rate =  0.04661638382521934
Initial Cost on Val dataset for this epoch 13139 = 0.07056329684566345
Error on this batch = 0.3658290020945006
Error on this batch = 0.3733564468135407
Cost on val dataset after 13140 epochs is = 0.07056309139669885
learning rate =  0.04661520123855186
Initial Cost on Val dataset for this epoch 13140 = 0.07056309139669885
Error on this batch = 0.3658276587327367
Error on this batch = 0.3733557678784642
Cost on val dataset after 13141 epochs is = 0.07056288596290165
learning rate =  0.046614018771876944
Initial Cost on Val dataset for this epoch 13141 = 0.07056288596290165
Error on this batch = 0.3658263130273979
Error on this batch = 0.

Cost on val dataset after 13172 epochs is = 0.07055656966245477
learning rate =  0.04657742170524481
Initial Cost on Val dataset for this epoch 13172 = 0.07055656966245477
Error on this batch = 0.36578377733117556
Error on this batch = 0.373339968492743
Cost on val dataset after 13173 epochs is = 0.07055636852032929
learning rate =  0.04657624306711218
Initial Cost on Val dataset for this epoch 13173 = 0.07055636852032929
Error on this batch = 0.3657823889939689
Error on this batch = 0.3733396227355786
Cost on val dataset after 13174 epochs is = 0.07055616756740608
learning rate =  0.04657506454827189
Initial Cost on Val dataset for this epoch 13174 = 0.07055616756740608
Error on this batch = 0.3657810001973272
Error on this batch = 0.3733392837276702
Cost on val dataset after 13175 epochs is = 0.07055596680353199
learning rate =  0.046573886148702816
Initial Cost on Val dataset for this epoch 13175 = 0.07055596680353199
Error on this batch = 0.36577961098287903
Error on this batch = 0

Error on this batch = 0.3733314739780409
Cost on val dataset after 13205 epochs is = 0.07055002406610426
learning rate =  0.046538589518169436
Initial Cost on Val dataset for this epoch 13205 = 0.07055002406610426
Error on this batch = 0.36573790623931246
Error on this batch = 0.37333127045686737
Cost on val dataset after 13206 epochs is = 0.0705498282414946
learning rate =  0.046537414805557724
Initial Cost on Val dataset for this epoch 13206 = 0.0705498282414946
Error on this batch = 0.3657365194525275
Error on this batch = 0.3733310694361528
Cost on val dataset after 13207 epochs is = 0.07054963253182536
learning rate =  0.04653624021154396
Initial Cost on Val dataset for this epoch 13207 = 0.07054963253182536
Error on this batch = 0.3657351330927891
Error on this batch = 0.37333087082782856
Cost on val dataset after 13208 epochs is = 0.07054943693401085
learning rate =  0.04653506573610719
Initial Cost on Val dataset for this epoch 13208 = 0.07054943693401085
Error on this batch = 

Cost on val dataset after 13237 epochs is = 0.07054380018557224
learning rate =  0.04650105743544125
Initial Cost on Val dataset for this epoch 13237 = 0.07054380018557224
Error on this batch = 0.3656937937503351
Error on this batch = 0.37332567915169124
Cost on val dataset after 13238 epochs is = 0.07054360663076616
learning rate =  0.04649988650759759
Initial Cost on Val dataset for this epoch 13238 = 0.07054360663076616
Error on this batch = 0.3656924253686256
Error on this batch = 0.37332552213961223
Cost on val dataset after 13239 epochs is = 0.07054341310597004
learning rate =  0.046498715697684025
Initial Cost on Val dataset for this epoch 13239 = 0.07054341310597004
Error on this batch = 0.3656910576559059
Error on this batch = 0.37332536567655217
Cost on val dataset after 13240 epochs is = 0.07054321960913489
learning rate =  0.046497545005679757
Initial Cost on Val dataset for this epoch 13240 = 0.07054321960913489
Error on this batch = 0.36568969061473466
Error on this batch

Error on this batch = 0.3733207821461463
Cost on val dataset after 13270 epochs is = 0.07053741902166695
learning rate =  0.04646247897050456
Initial Cost on Val dataset for this epoch 13270 = 0.07053741902166695
Error on this batch = 0.36564899767598874
Error on this batch = 0.3733206292285124
Cost on val dataset after 13271 epochs is = 0.07053722556829756
learning rate =  0.04646131192340845
Initial Cost on Val dataset for this epoch 13271 = 0.07053722556829756
Error on this batch = 0.36564765188328907
Error on this batch = 0.37332047611099856
Cost on val dataset after 13272 epochs is = 0.07053703209529581
learning rate =  0.04646014499355933
Initial Cost on Val dataset for this epoch 13272 = 0.07053703209529581
Error on this batch = 0.3656463067703858
Error on this batch = 0.3733203227799051
Cost on val dataset after 13273 epochs is = 0.07053683860164556
learning rate =  0.04645897818093657
Initial Cost on Val dataset for this epoch 13273 = 0.07053683860164556
Error on this batch = 

Error on this batch = 0.37331557039426244
Cost on val dataset after 13303 epochs is = 0.0705310199849999
learning rate =  0.046424028210489306
Initial Cost on Val dataset for this epoch 13303 = 0.0705310199849999
Error on this batch = 0.36560493739974975
Error on this batch = 0.37331540553957043
Cost on val dataset after 13304 epochs is = 0.07053082545474644
learning rate =  0.04642286502168847
Initial Cost on Val dataset for this epoch 13304 = 0.07053082545474644
Error on this batch = 0.3656036132222098
Error on this batch = 0.37331524020351653
Cost on val dataset after 13305 epochs is = 0.07053063088114964
learning rate =  0.04642170194945715
Initial Cost on Val dataset for this epoch 13305 = 0.07053063088114964
Error on this batch = 0.3656022896694174
Error on this batch = 0.3733150743816352
Cost on val dataset after 13306 epochs is = 0.07053043626373884
learning rate =  0.04642053899377489
Initial Cost on Val dataset for this epoch 13306 = 0.07053043626373884
Error on this batch = 

Cost on val dataset after 13335 epochs is = 0.0705247715320704
learning rate =  0.04638686388612812
Initial Cost on Val dataset for this epoch 13335 = 0.0705247715320704
Error on this batch = 0.3655628642919491
Error on this batch = 0.37330985761508345
Cost on val dataset after 13336 epochs is = 0.07052457542767888
learning rate =  0.04638570441743751
Initial Cost on Val dataset for this epoch 13336 = 0.07052457542767888
Error on this batch = 0.365561559183938
Error on this batch = 0.3733096752778045
Cost on val dataset after 13337 epochs is = 0.07052437926924364
learning rate =  0.04638454506466479
Initial Cost on Val dataset for this epoch 13337 = 0.07052437926924364
Error on this batch = 0.3655602546424062
Error on this batch = 0.37330949238036093
Cost on val dataset after 13338 epochs is = 0.07052418305654318
learning rate =  0.04638338582778967
Initial Cost on Val dataset for this epoch 13338 = 0.07052418305654318
Error on this batch = 0.36555895066568134
Error on this batch = 0.3

Cost on val dataset after 13368 epochs is = 0.07051827052875549
learning rate =  0.046348662513548106
Initial Cost on Val dataset for this epoch 13368 = 0.07051827052875549
Error on this batch = 0.3655200862272187
Error on this batch = 0.3733035430342377
Cost on val dataset after 13369 epochs is = 0.07051807254776792
learning rate =  0.04634750685946809
Initial Cost on Val dataset for this epoch 13369 = 0.07051807254776792
Error on this batch = 0.3655187990004756
Error on this batch = 0.3733033421235325
Cost on val dataset after 13370 epochs is = 0.07051787450759404
learning rate =  0.04634635132063944
Initial Cost on Val dataset for this epoch 13370 = 0.07051787450759404
Error on this batch = 0.3655175122919414
Error on this batch = 0.3733031406557231
Cost on val dataset after 13371 epochs is = 0.07051767640813214
learning rate =  0.04634519589704201
Initial Cost on Val dataset for this epoch 13371 = 0.07051767640813214
Error on this batch = 0.3655162261003852
Error on this batch = 0.

Cost on val dataset after 13401 epochs is = 0.07051170543739399
learning rate =  0.04631058667208975
Initial Cost on Val dataset for this epoch 13401 = 0.07051170543739399
Error on this batch = 0.3654778751954768
Error on this batch = 0.3732966239568174
Cost on val dataset after 13402 epochs is = 0.07051150546069822
learning rate =  0.04630943481071203
Initial Cost on Val dataset for this epoch 13402 = 0.07051150546069822
Error on this batch = 0.36547660449295616
Error on this batch = 0.37329640518829166
Cost on val dataset after 13403 epochs is = 0.07051130542247769
learning rate =  0.04630828306392458
Initial Cost on Val dataset for this epoch 13403 = 0.07051130542247769
Error on this batch = 0.3654753342750424
Error on this batch = 0.373296185899047
Cost on val dataset after 13404 epochs is = 0.07051110532268734
learning rate =  0.04630713143170745
Initial Cost on Val dataset for this epoch 13404 = 0.07051110532268734
Error on this batch = 0.36547406454093384
Error on this batch = 0

Error on this batch = 0.3732891375217379
Cost on val dataset after 13435 epochs is = 0.07050487149273482
learning rate =  0.046271487551248255
Initial Cost on Val dataset for this epoch 13435 = 0.07050487149273482
Error on this batch = 0.3654349389115889
Error on this batch = 0.3732889023423067
Cost on val dataset after 13436 epochs is = 0.07050466940491382
learning rate =  0.046270339574778406
Initial Cost on Val dataset for this epoch 13436 = 0.07050466940491382
Error on this batch = 0.36543368430427253
Error on this batch = 0.3732886666917601
Cost on val dataset after 13437 epochs is = 0.07050446725456827
learning rate =  0.046269191712223365
Initial Cost on Val dataset for this epoch 13437 = 0.07050446725456827
Error on this batch = 0.3654324301606014
Error on this batch = 0.3732884305716987
Cost on val dataset after 13438 epochs is = 0.07050426504168203
learning rate =  0.04626804396356335
Initial Cost on Val dataset for this epoch 13438 = 0.07050426504168203
Error on this batch =

Cost on val dataset after 13468 epochs is = 0.07049816951574231
learning rate =  0.04623366436704049
Initial Cost on Val dataset for this epoch 13468 = 0.07049816951574231
Error on this batch = 0.36539377968584236
Error on this batch = 0.3732808868345863
Cost on val dataset after 13469 epochs is = 0.07049796535895689
learning rate =  0.04623252013934198
Initial Cost on Val dataset for this epoch 13469 = 0.07049796535895689
Error on this batch = 0.36539254019953304
Error on this batch = 0.3732806365508239
Cost on val dataset after 13470 epochs is = 0.07049776113937936
learning rate =  0.046231376024908115
Initial Cost on Val dataset for this epoch 13470 = 0.07049776113937936
Error on this batch = 0.36539130116727175
Error on this batch = 0.37328038585164647
Cost on val dataset after 13471 epochs is = 0.0704975568570087
learning rate =  0.046230232023719266
Initial Cost on Val dataset for this epoch 13471 = 0.0704975568570087
Error on this batch = 0.3653900625889076
Error on this batch =

Error on this batch = 0.37327241716190257
Cost on val dataset after 13502 epochs is = 0.07049119296749952
learning rate =  0.046194824049576894
Initial Cost on Val dataset for this epoch 13502 = 0.07049119296749952
Error on this batch = 0.36535189130367796
Error on this batch = 0.3732721540424288
Cost on val dataset after 13503 epochs is = 0.07049098667709333
learning rate =  0.046193683661898695
Initial Cost on Val dataset for this epoch 13503 = 0.07049098667709333
Error on this batch = 0.36535066721291104
Error on this batch = 0.3732718905619515
Cost on val dataset after 13504 epochs is = 0.0704907803240306
learning rate =  0.04619254338682081
Initial Cost on Val dataset for this epoch 13504 = 0.0704907803240306
Error on this batch = 0.3653494435749633
Error on this batch = 0.3732716267221015
Cost on val dataset after 13505 epochs is = 0.07049057390831992
learning rate =  0.046191403224323746
Initial Cost on Val dataset for this epoch 13505 = 0.07049057390831992
Error on this batch =

Error on this batch = 0.37326327825726213
Cost on val dataset after 13536 epochs is = 0.0704841440052357
learning rate =  0.04615611392108924
Initial Cost on Val dataset for this epoch 13536 = 0.0704841440052357
Error on this batch = 0.3653105269536318
Error on this batch = 0.3732630037682084
Cost on val dataset after 13537 epochs is = 0.07048393559065519
learning rate =  0.046154977350935586
Initial Cost on Val dataset for this epoch 13537 = 0.07048393559065519
Error on this batch = 0.36530931833328856
Error on this batch = 0.3732627289725937
Cost on val dataset after 13538 epochs is = 0.07048372711383205
learning rate =  0.04615384089272345
Initial Cost on Val dataset for this epoch 13538 = 0.07048372711383205
Error on this batch = 0.3653081101712871
Error on this batch = 0.37326245387198415
Cost on val dataset after 13539 epochs is = 0.07048351857478197
learning rate =  0.046152704546433526
Initial Cost on Val dataset for this epoch 13539 = 0.07048351857478197
Error on this batch = 

Cost on val dataset after 13569 epochs is = 0.07047723355330462
learning rate =  0.046118666106070245
Initial Cost on Val dataset for this epoch 13569 = 0.07047723355330462
Error on this batch = 0.3652708859990284
Error on this batch = 0.3732537829165253
Cost on val dataset after 13570 epochs is = 0.07047702309402754
learning rate =  0.04611753321982581
Initial Cost on Val dataset for this epoch 13570 = 0.07047702309402754
Error on this batch = 0.36526969265608633
Error on this batch = 0.37325349886954795
Cost on val dataset after 13571 epochs is = 0.0704768125731243
learning rate =  0.04611640044488873
Initial Cost on Val dataset for this epoch 13571 = 0.0704768125731243
Error on this batch = 0.3652684997816914
Error on this batch = 0.3732532145679914
Cost on val dataset after 13572 epochs is = 0.07047660199061667
learning rate =  0.04611526778123987
Initial Cost on Val dataset for this epoch 13572 = 0.07047660199061667
Error on this batch = 0.36526730737621266
Error on this batch = 0

Error on this batch = 0.37324428297877854
Cost on val dataset after 13603 epochs is = 0.07047004350460397
learning rate =  0.04608021030288833
Initial Cost on Val dataset for this epoch 13603 = 0.07047004350460397
Error on this batch = 0.36523057753693294
Error on this batch = 0.3732439913044748
Cost on val dataset after 13604 epochs is = 0.07046983096337123
learning rate =  0.04607908119038628
Initial Cost on Val dataset for this epoch 13604 = 0.07046983096337123
Error on this batch = 0.36522940034864493
Error on this batch = 0.3732436994235399
Cost on val dataset after 13605 epochs is = 0.07046961836135729
learning rate =  0.046077952188543554
Initial Cost on Val dataset for this epoch 13605 = 0.07046961836135729
Error on this batch = 0.365228223642992
Error on this batch = 0.3732434073373902
Cost on val dataset after 13606 epochs is = 0.07046940569859031
learning rate =  0.04607682329734119
Initial Cost on Val dataset for this epoch 13606 = 0.07046940569859031
Error on this batch = 

Error on this batch = 0.37323455626939733
Cost on val dataset after 13636 epochs is = 0.07046299771285279
learning rate =  0.04604300791509551
Initial Cost on Val dataset for this epoch 13636 = 0.07046299771285279
Error on this batch = 0.3651919877153761
Error on this batch = 0.37323425851308173
Cost on val dataset after 13637 epochs is = 0.07046278318175235
learning rate =  0.04604188244435649
Initial Cost on Val dataset for this epoch 13637 = 0.07046278318175235
Error on this batch = 0.3651908267042438
Error on this batch = 0.3732339605958292
Cost on val dataset after 13638 epochs is = 0.070462568590902
learning rate =  0.04604075708365298
Initial Cost on Val dataset for this epoch 13638 = 0.070462568590902
Error on this batch = 0.36518966619166293
Error on this batch = 0.37323366251899337
Cost on val dataset after 13639 epochs is = 0.07046235394033637
learning rate =  0.04603963183296615
Initial Cost on Val dataset for this epoch 13639 = 0.07046235394033637
Error on this batch = 0.3

Cost on val dataset after 13669 epochs is = 0.07045588683596613
learning rate =  0.046005925376936874
Initial Cost on Val dataset for this epoch 13669 = 0.07045588683596613
Error on this batch = 0.36515394036767135
Error on this batch = 0.37322435029307216
Cost on val dataset after 13670 epochs is = 0.07045567035253018
learning rate =  0.04600480352745517
Initial Cost on Val dataset for this epoch 13670 = 0.07045567035253018
Error on this batch = 0.36515279607906437
Error on this batch = 0.37322404781464114
Cost on val dataset after 13671 epochs is = 0.07045545381059652
learning rate =  0.04600368178739017
Initial Cost on Val dataset for this epoch 13671 = 0.07045545381059652
Error on this batch = 0.3651516523059915
Error on this batch = 0.3732237452204168
Cost on val dataset after 13672 epochs is = 0.07045523721020663
learning rate =  0.04600256015672321
Initial Cost on Val dataset for this epoch 13672 = 0.07045523721020663
Error on this batch = 0.3651505090489724
Error on this batch 

Cost on val dataset after 13702 epochs is = 0.07044871223099519
learning rate =  0.04596896201436145
Initial Cost on Val dataset for this epoch 13702 = 0.07044871223099519
Error on this batch = 0.36511645386938973
Error on this batch = 0.37321431442705133
Cost on val dataset after 13703 epochs is = 0.070448493840216
learning rate =  0.04596784376579591
Initial Cost on Val dataset for this epoch 13703 = 0.070448493840216
Error on this batch = 0.36511532686599185
Error on this batch = 0.3732140088091005
Cost on val dataset after 13704 epochs is = 0.07044827539242154
learning rate =  0.04596672562603322
Initial Cost on Val dataset for this epoch 13704 = 0.07044827539242154
Error on this batch = 0.3651142003950588
Error on this batch = 0.37321370311775853
Cost on val dataset after 13705 epochs is = 0.07044805688766038
learning rate =  0.04596560759505486
Initial Cost on Val dataset for this epoch 13705 = 0.07044805688766038
Error on this batch = 0.3651130744570908
Error on this batch = 0.3

Cost on val dataset after 13735 epochs is = 0.07044147550462942
learning rate =  0.0459321171587209
Initial Cost on Val dataset for this epoch 13735 = 0.07044147550462942
Error on this batch = 0.3650795465745945
Error on this batch = 0.37320419716553177
Cost on val dataset after 13736 epochs is = 0.07044125525911421
learning rate =  0.04593100249089318
Initial Cost on Val dataset for this epoch 13736 = 0.07044125525911421
Error on this batch = 0.36507843739923473
Error on this batch = 0.37320388979039704
Cost on val dataset after 13737 epochs is = 0.07044103495829501
learning rate =  0.04592988793125938
Initial Cost on Val dataset for this epoch 13737 = 0.07044103495829501
Error on this batch = 0.3650773287720797
Error on this batch = 0.3732035823832682
Cost on val dataset after 13738 epochs is = 0.0704408146022271
learning rate =  0.04592877347980112
Initial Cost on Val dataset for this epoch 13738 = 0.0704408146022271
Error on this batch = 0.36507622069357554
Error on this batch = 0.

Cost on val dataset after 13768 epochs is = 0.07043417851144955
learning rate =  0.045895390146713365
Initial Cost on Val dataset for this epoch 13768 = 0.07043417851144955
Error on this batch = 0.3650432355759809
Error on this batch = 0.3731940436375652
Cost on val dataset after 13769 epochs is = 0.07043395647110985
learning rate =  0.0458942790396063
Initial Cost on Val dataset for this epoch 13769 = 0.07043395647110985
Error on this batch = 0.365042144713581
Error on this batch = 0.3731937358582299
Cost on val dataset after 13770 epochs is = 0.0704337343773874
learning rate =  0.04589316804008906
Initial Cost on Val dataset for this epoch 13770 = 0.0704337343773874
Error on this batch = 0.3650410544127675
Error on this batch = 0.3731934280874898
Cost on val dataset after 13771 epochs is = 0.07043351223034332
learning rate =  0.04589205714814343
Initial Cost on Val dataset for this epoch 13771 = 0.07043351223034332
Error on this batch = 0.3650399646738966
Error on this batch = 0.3731

Error on this batch = 0.37318420496936827
Cost on val dataset after 13801 epochs is = 0.07042682333362345
learning rate =  0.04585878032032801
Initial Cost on Val dataset for this epoch 13801 = 0.07042682333362345
Error on this batch = 0.3650075354363591
Error on this batch = 0.37318389806456453
Cost on val dataset after 13802 epochs is = 0.07042659956470869
learning rate =  0.04585767275408388
Initial Cost on Val dataset for this epoch 13802 = 0.07042659956470869
Error on this batch = 0.36500646327521935
Error on this batch = 0.3731835912068646
Cost on val dataset after 13803 epochs is = 0.07042637574450132
learning rate =  0.045856565294830315
Initial Cost on Val dataset for this epoch 13803 = 0.07042637574450132
Error on this batch = 0.36500539168551827
Error on this batch = 0.373183284397455
Cost on val dataset after 13804 epochs is = 0.07042615187306676
learning rate =  0.045855457942549215
Initial Cost on Val dataset for this epoch 13804 = 0.07042615187306676
Error on this batch 

Error on this batch = 0.3731741084017369
Cost on val dataset after 13834 epochs is = 0.07041941223687039
learning rate =  0.0458222870267903
Initial Cost on Val dataset for this epoch 13834 = 0.07041941223687039
Error on this batch = 0.36497245691842767
Error on this batch = 0.373173803670523
Cost on val dataset after 13835 epochs is = 0.07041918681023218
learning rate =  0.04582118298170924
Initial Cost on Val dataset for this epoch 13835 = 0.07041918681023218
Error on this batch = 0.364971403712357
Error on this batch = 0.3731734990248298
Cost on val dataset after 13836 epochs is = 0.07041896133449571
learning rate =  0.04582007904302421
Initial Cost on Val dataset for this epoch 13836 = 0.07041896133449571
Error on this batch = 0.3649703510829411
Error on this batch = 0.3731731944657935
Cost on val dataset after 13837 epochs is = 0.07041873580972818
learning rate =  0.045818975210717275
Initial Cost on Val dataset for this epoch 13837 = 0.07041873580972818
Error on this batch = 0.36

Error on this batch = 0.3731638022209997
Cost on val dataset after 13868 epochs is = 0.07041172058943117
learning rate =  0.045784809075046264
Initial Cost on Val dataset for this epoch 13868 = 0.07041172058943117
Error on this batch = 0.3649369716208086
Error on this batch = 0.37316350102223217
Cost on val dataset after 13869 epochs is = 0.07041149353114526
learning rate =  0.04578370863739081
Initial Cost on Val dataset for this epoch 13869 = 0.07041149353114526
Error on this batch = 0.364935938031429
Error on this batch = 0.37316319994637837
Cost on val dataset after 13870 epochs is = 0.07041126642603983
learning rate =  0.04578260830552379
Initial Cost on Val dataset for this epoch 13870 = 0.07041126642603983
Error on this batch = 0.3649349050182534
Error on this batch = 0.3731628989944937
Cost on val dataset after 13871 epochs is = 0.0704110392741811
learning rate =  0.0457815080794274
Initial Cost on Val dataset for this epoch 13871 = 0.0704110392741811
Error on this batch = 0.36

Error on this batch = 0.3731536365870335
Cost on val dataset after 13902 epochs is = 0.07040397473372052
learning rate =  0.04574745343577589
Initial Cost on Val dataset for this epoch 13902 = 0.07040397473372052
Error on this batch = 0.3649021519047011
Error on this batch = 0.3731533401402824
Cost on val dataset after 13903 epochs is = 0.0704037461201101
learning rate =  0.04574635658496901
Initial Cost on Val dataset for this epoch 13903 = 0.0704037461201101
Error on this batch = 0.36490113780980893
Error on this batch = 0.37315304385048625
Cost on val dataset after 13904 epochs is = 0.0704035174618698
learning rate =  0.04574525983934789
Initial Cost on Val dataset for this epoch 13904 = 0.0704035174618698
Error on this batch = 0.36490012428412427
Error on this batch = 0.3731527477185825
Cost on val dataset after 13905 epochs is = 0.07040328875906153
learning rate =  0.045744163198894884
Initial Cost on Val dataset for this epoch 13905 = 0.07040328875906153
Error on this batch = 0.3

Cost on val dataset after 13935 epochs is = 0.07039640725001171
learning rate =  0.04571131280109822
Initial Cost on Val dataset for this epoch 13935 = 0.07039640725001171
Error on this batch = 0.3648689854028892
Error on this batch = 0.3731436508638922
Cost on val dataset after 13936 epochs is = 0.0703961771952438
learning rate =  0.045710219412124636
Initial Cost on Val dataset for this epoch 13936 = 0.0703961771952438
Error on this batch = 0.3648679898983484
Error on this batch = 0.37314336025402045
Cost on val dataset after 13937 epochs is = 0.07039594709778266
learning rate =  0.04570912612775656
Initial Cost on Val dataset for this epoch 13937 = 0.07039594709778266
Error on this batch = 0.36486699494987235
Error on this batch = 0.37314306983053613
Cost on val dataset after 13938 epochs is = 0.07039571695768312
learning rate =  0.04570803294797646
Initial Cost on Val dataset for this epoch 13938 = 0.07039571695768312
Error on this batch = 0.3648660005569691
Error on this batch = 0

Cost on val dataset after 13968 epochs is = 0.07038879318874819
learning rate =  0.045675286101297674
Initial Cost on Val dataset for this epoch 13968 = 0.07038879318874819
Error on this batch = 0.36483642446089404
Error on this batch = 0.37313416321543136
Cost on val dataset after 13969 epochs is = 0.07038856175256762
learning rate =  0.045674196155081885
Initial Cost on Val dataset for this epoch 13969 = 0.07038856175256762
Error on this batch = 0.3648354470189896
Error on this batch = 0.37313387914179413
Cost on val dataset after 13970 epochs is = 0.07038833027535704
learning rate =  0.0456731063128959
Initial Cost on Val dataset for this epoch 13970 = 0.07038833027535704
Error on this batch = 0.364834470114277
Error on this batch = 0.37313359527763257
Cost on val dataset after 13971 epochs is = 0.07038809875716184
learning rate =  0.04567201657472235
Initial Cost on Val dataset for this epoch 13971 = 0.07038809875716184
Error on this batch = 0.36483349374610524
Error on this batch 

Error on this batch = 0.37312490246332114
Cost on val dataset after 14002 epochs is = 0.07038090159756816
learning rate =  0.045638286186900176
Initial Cost on Val dataset for this epoch 14002 = 0.07038090159756816
Error on this batch = 0.36480348872335894
Error on this batch = 0.37312462559125614
Cost on val dataset after 14003 epochs is = 0.0703806687899066
learning rate =  0.04563719976797512
Initial Cost on Val dataset for this epoch 14003 = 0.0703806687899066
Error on this batch = 0.36480252915772954
Error on this batch = 0.37312434894563085
Cost on val dataset after 14004 epochs is = 0.07038043594257615
learning rate =  0.04563611345249143
Initial Cost on Val dataset for this epoch 14004 = 0.07038043594257615
Error on this batch = 0.36480157010506925
Error on this batch = 0.37312407252685953
Cost on val dataset after 14005 epochs is = 0.07038020305561091
learning rate =  0.04563502724043188
Initial Cost on Val dataset for this epoch 14005 = 0.07038020305561091
Error on this batch

Error on this batch = 0.37311588726319644
Cost on val dataset after 14035 epochs is = 0.0703731981717503
learning rate =  0.04560248888555645
Initial Cost on Val dataset for this epoch 14035 = 0.0703731981717503
Error on this batch = 0.36477208961897467
Error on this batch = 0.37311561805008336
Cost on val dataset after 14036 epochs is = 0.07037296407115827
learning rate =  0.04560140587111936
Initial Cost on Val dataset for this epoch 14036 = 0.07037296407115827
Error on this batch = 0.3647711465636161
Error on this batch = 0.3731153490739612
Cost on val dataset after 14037 epochs is = 0.07037272993184107
learning rate =  0.045600322959557064
Initial Cost on Val dataset for this epoch 14037 = 0.07037272993184107
Error on this batch = 0.36477020399479304
Error on this batch = 0.3731150803350501
Cost on val dataset after 14038 epochs is = 0.0703724957538214
learning rate =  0.04559924015085247
Initial Cost on Val dataset for this epoch 14038 = 0.0703724957538214
Error on this batch = 0.

Cost on val dataset after 14068 epochs is = 0.07036545251474155
learning rate =  0.045566803633895175
Initial Cost on Val dataset for this epoch 14068 = 0.07036545251474155
Error on this batch = 0.3647412211636994
Error on this batch = 0.37310686802198056
Cost on val dataset after 14069 epochs is = 0.07036521714629056
learning rate =  0.04556572400531983
Initial Cost on Val dataset for this epoch 14069 = 0.07036521714629056
Error on this batch = 0.3647402937265706
Error on this batch = 0.3731066069560538
Cost on val dataset after 14070 epochs is = 0.07036498173967952
learning rate =  0.04556464447905713
Initial Cost on Val dataset for this epoch 14070 = 0.07036498173967952
Error on this batch = 0.36473936674866153
Error on this batch = 0.37310634613143684
Cost on val dataset after 14071 epochs is = 0.07036474629491979
learning rate =  0.045563565055090106
Initial Cost on Val dataset for this epoch 14071 = 0.07036474629491979
Error on this batch = 0.36473844022915247
Error on this batch

Error on this batch = 0.3730983802156421
Cost on val dataset after 14102 epochs is = 0.0703574286320572
learning rate =  0.045530153558384336
Initial Cost on Val dataset for this epoch 14102 = 0.0703574286320572
Error on this batch = 0.36470994108319277
Error on this batch = 0.3730981271012654
Cost on val dataset after 14103 epochs is = 0.07035719197054742
learning rate =  0.04552907739894463
Initial Cost on Val dataset for this epoch 14103 = 0.07035719197054742
Error on this batch = 0.36470902880861344
Error on this batch = 0.3730978742265345
Cost on val dataset after 14104 epochs is = 0.07035695527107921
learning rate =  0.04552800134124294
Initial Cost on Val dataset for this epoch 14104 = 0.07035695527107921
Error on this batch = 0.3647081169663083
Error on this batch = 0.37309762159131654
Cost on val dataset after 14105 epochs is = 0.07035671853365293
learning rate =  0.04552692538526246
Initial Cost on Val dataset for this epoch 14105 = 0.07035671853365293
Error on this batch = 0

Error on this batch = 0.37309015306209065
Cost on val dataset after 14135 epochs is = 0.07034959874984385
learning rate =  0.045494693922888546
Initial Cost on Val dataset for this epoch 14135 = 0.07034959874984385
Error on this batch = 0.3646800602750352
Error on this batch = 0.3730899077611951
Cost on val dataset after 14136 epochs is = 0.0703493608342633
learning rate =  0.04549362111193895
Initial Cost on Val dataset for this epoch 14136 = 0.0703493608342633
Error on this batch = 0.36467916188447946
Error on this batch = 0.3730896626932123
Cost on val dataset after 14137 epochs is = 0.07034912288057954
learning rate =  0.045492548402174066
Initial Cost on Val dataset for this epoch 14137 = 0.07034912288057954
Error on this batch = 0.3646782639030199
Error on this batch = 0.37308941785786737
Cost on val dataset after 14138 epochs is = 0.07034888488878334
learning rate =  0.045491475793577194
Initial Cost on Val dataset for this epoch 14138 = 0.07034888488878334
Error on this batch =

Error on this batch = 0.373082179453213
Cost on val dataset after 14168 epochs is = 0.07034172735574079
learning rate =  0.04545934449612784
Initial Cost on Val dataset for this epoch 14168 = 0.07034172735574079
Error on this batch = 0.3646506260621197
Error on this batch = 0.3730819416725414
Cost on val dataset after 14169 epochs is = 0.07034148817641593
learning rate =  0.04545827501547774
Initial Cost on Val dataset for this epoch 14169 = 0.07034148817641593
Error on this batch = 0.3646497408561821
Error on this batch = 0.37308170411373787
Cost on val dataset after 14170 epochs is = 0.0703412489585435
learning rate =  0.04545720563546334
Initial Cost on Val dataset for this epoch 14170 = 0.0703412489585435
Error on this batch = 0.36464885604043873
Error on this batch = 0.37308146677640835
Cost on val dataset after 14171 epochs is = 0.07034100970210572
learning rate =  0.045456136356068065
Initial Cost on Val dataset for this epoch 14171 = 0.07034100970210572
Error on this batch = 0.

Error on this batch = 0.37307421687373493
Cost on val dataset after 14202 epochs is = 0.07033357351648013
learning rate =  0.045423038511669366
Initial Cost on Val dataset for this epoch 14202 = 0.07033357351648013
Error on this batch = 0.3646207452125452
Error on this batch = 0.3730739863968058
Cost on val dataset after 14203 epochs is = 0.07033333301519656
learning rate =  0.04542197244335993
Initial Cost on Val dataset for this epoch 14203 = 0.07033333301519656
Error on this batch = 0.3646198730236567
Error on this batch = 0.3730737561266246
Cost on val dataset after 14204 epochs is = 0.07033309247463397
learning rate =  0.045420906475124964
Initial Cost on Val dataset for this epoch 14204 = 0.07033309247463397
Error on this batch = 0.3646190012108065
Error on this batch = 0.37307352606269645
Cost on val dataset after 14205 epochs is = 0.07033285189476704
learning rate =  0.04541984060694803
Initial Cost on Val dataset for this epoch 14205 = 0.07033285189476704
Error on this batch =

Error on this batch = 0.3730667174940116
Cost on val dataset after 14235 epochs is = 0.07032561608863737
learning rate =  0.0453879110072459
Initial Cost on Val dataset for this epoch 14235 = 0.07032561608863737
Error on this batch = 0.36459215992860367
Error on this batch = 0.3730664935647398
Cost on val dataset after 14236 epochs is = 0.07032537427671466
learning rate =  0.04538684823273531
Initial Cost on Val dataset for this epoch 14236 = 0.07032537427671466
Error on this batch = 0.3645913000016492
Error on this batch = 0.37306626982457813
Cost on val dataset after 14237 epochs is = 0.07032513242457576
learning rate =  0.04538578555775873
Initial Cost on Val dataset for this epoch 14237 = 0.07032513242457576
Error on this batch = 0.3645904404424465
Error on this batch = 0.3730660462729528
Cost on val dataset after 14238 epochs is = 0.07032489053218913
learning rate =  0.04538472298229987
Initial Cost on Val dataset for this epoch 14238 = 0.07032489053218913
Error on this batch = 0.

Cost on val dataset after 14267 epochs is = 0.07031785799763664
learning rate =  0.04535395151101061
Initial Cost on Val dataset for this epoch 14267 = 0.07031785799763664
Error on this batch = 0.36456482400899526
Error on this batch = 0.37305942446385887
Cost on val dataset after 14268 epochs is = 0.07031761488235297
learning rate =  0.04535289191351681
Initial Cost on Val dataset for this epoch 14268 = 0.07031761488235297
Error on this batch = 0.3645639757902866
Error on this batch = 0.3730592064612177
Cost on val dataset after 14269 epochs is = 0.07031737172576465
learning rate =  0.04535183241503692
Initial Cost on Val dataset for this epoch 14269 = 0.07031737172576465
Error on this batch = 0.3645631279362868
Error on this batch = 0.3730589886277031
Cost on val dataset after 14270 epochs is = 0.07031712852783513
learning rate =  0.04535077301555477
Initial Cost on Val dataset for this epoch 14270 = 0.07031712852783513
Error on this batch = 0.3645622804469761
Error on this batch = 0

Error on this batch = 0.37305252904921005
Cost on val dataset after 14300 epochs is = 0.07030981317954896
learning rate =  0.04531903698488545
Initial Cost on Val dataset for this epoch 14300 = 0.07030981317954896
Error on this batch = 0.3645370254253666
Error on this batch = 0.3730523161348259
Cost on val dataset after 14301 epochs is = 0.07030956868118969
learning rate =  0.04531798064632312
Initial Cost on Val dataset for this epoch 14301 = 0.07030956868118969
Error on this batch = 0.3645361892548492
Error on this batch = 0.37305210336841416
Cost on val dataset after 14302 epochs is = 0.07030932414025041
learning rate =  0.04531692440624241
Initial Cost on Val dataset for this epoch 14302 = 0.07030932414025041
Error on this batch = 0.36453535345061255
Error on this batch = 0.37305189074929007
Cost on val dataset after 14303 epochs is = 0.07030907955669041
learning rate =  0.04531586826462727
Initial Cost on Val dataset for this epoch 14303 = 0.07030907955669041
Error on this batch =

Cost on val dataset after 14332 epochs is = 0.07030196790637337
learning rate =  0.04528528291822397
Initial Cost on Val dataset for this epoch 14332 = 0.07030196790637337
Error on this batch = 0.36451045035254326
Error on this batch = 0.3730455772171544
Cost on val dataset after 14333 epochs is = 0.0703017220247117
learning rate =  0.0452842297231234
Initial Cost on Val dataset for this epoch 14333 = 0.0703017220247117
Error on this batch = 0.36450962597874087
Error on this batch = 0.3730453688176385
Cost on val dataset after 14334 epochs is = 0.07030147609911391
learning rate =  0.045283176625992176
Initial Cost on Val dataset for this epoch 14334 = 0.07030147609911391
Error on this batch = 0.36450880197672364
Error on this batch = 0.37304516054286857
Cost on val dataset after 14335 epochs is = 0.07030123012953603
learning rate =  0.04528212362681437
Initial Cost on Val dataset for this epoch 14335 = 0.07030123012953603
Error on this batch = 0.36450797834671933
Error on this batch = 

Error on this batch = 0.37303876192236785
Cost on val dataset after 14366 epochs is = 0.07029358301458026
learning rate =  0.045249529150359485
Initial Cost on Val dataset for this epoch 14366 = 0.07029358301458026
Error on this batch = 0.3644826317217003
Error on this batch = 0.3730385572533765
Cost on val dataset after 14367 epochs is = 0.0702933356139162
learning rate =  0.045248479277292974
Initial Cost on Val dataset for this epoch 14367 = 0.0702933356139162
Error on this batch = 0.36448182013467645
Error on this batch = 0.3730383526848493
Cost on val dataset after 14368 epochs is = 0.07029308816777494
learning rate =  0.0452474295016557
Initial Cost on Val dataset for this epoch 14368 = 0.07029308816777494
Error on this batch = 0.36448100892885976
Error on this batch = 0.3730381482160378
Cost on val dataset after 14369 epochs is = 0.07029284067610989
learning rate =  0.04524637982343183
Initial Cost on Val dataset for this epoch 14369 = 0.07029284067610989
Error on this batch = 0

Error on this batch = 0.37303205678199747
Cost on val dataset after 14399 epochs is = 0.070285394523912
learning rate =  0.04521493469562756
Initial Cost on Val dataset for this epoch 14399 = 0.070285394523912
Error on this batch = 0.36445605250068625
Error on this batch = 0.37303185502988856
Cost on val dataset after 14400 epochs is = 0.07028514559756302
learning rate =  0.04521388802939196
Initial Cost on Val dataset for this epoch 14400 = 0.07028514559756302
Error on this batch = 0.3644552536782446
Error on this batch = 0.37303165335324523
Cost on val dataset after 14401 epochs is = 0.07028489662416791
learning rate =  0.04521284146006541
Initial Cost on Val dataset for this epoch 14401 = 0.07028489662416791
Error on this batch = 0.36445445524897657
Error on this batch = 0.3730314517513022
Cost on val dataset after 14402 epochs is = 0.0702846476036782
learning rate =  0.04521179498763222
Initial Cost on Val dataset for this epoch 14402 = 0.0702846476036782
Error on this batch = 0.36

Cost on val dataset after 14432 epochs is = 0.07027715484812265
learning rate =  0.0451804457923167
Initial Cost on Val dataset for this epoch 14432 = 0.07027715484812265
Error on this batch = 0.3644299012087804
Error on this batch = 0.37302523495059875
Cost on val dataset after 14433 epochs is = 0.07027690434346115
learning rate =  0.04517940231580951
Initial Cost on Val dataset for this epoch 14433 = 0.07027690434346115
Error on this batch = 0.3644291155817318
Error on this batch = 0.37302503533299314
Cost on val dataset after 14434 epochs is = 0.0702766537901349
learning rate =  0.04517835893569515
Initial Cost on Val dataset for this epoch 14434 = 0.0702766537901349
Error on this batch = 0.3644283303619675
Error on this batch = 0.37302483576466144
Cost on val dataset after 14435 epochs is = 0.0702764031880943
learning rate =  0.045177315651958046
Initial Cost on Val dataset for this epoch 14435 = 0.0702764031880943
Error on this batch = 0.36442754554994217
Error on this batch = 0.3

Error on this batch = 0.37301886778400795
Cost on val dataset after 14465 epochs is = 0.07026886222768781
learning rate =  0.045146061878128065
Initial Cost on Val dataset for this epoch 14465 = 0.07026886222768781
Error on this batch = 0.36440419310152045
Error on this batch = 0.3730186693587831
Cost on val dataset after 14466 epochs is = 0.07026861009077298
learning rate =  0.04514502157437679
Initial Cost on Val dataset for this epoch 14466 = 0.07026861009077298
Error on this batch = 0.3644034211625285
Error on this batch = 0.3730184709580266
Cost on val dataset after 14467 epochs is = 0.07026835790354509
learning rate =  0.04514398136650605
Initial Cost on Val dataset for this epoch 14467 = 0.07026835790354509
Error on this batch = 0.36440264964653507
Error on this batch = 0.37301827258096326
Cost on val dataset after 14468 epochs is = 0.07026810566595389
learning rate =  0.045142941254500364
Initial Cost on Val dataset for this epoch 14468 = 0.07026810566595389
Error on this batch

Error on this batch = 0.3730123284449243
Cost on val dataset after 14498 epochs is = 0.07026051486986377
learning rate =  0.045111782395032364
Initial Cost on Val dataset for this epoch 14498 = 0.07026051486986377
Error on this batch = 0.36437894520598185
Error on this batch = 0.3730121304183771
Cost on val dataset after 14499 epochs is = 0.07026026104606185
learning rate =  0.04511074524719323
Initial Cost on Val dataset for this epoch 14499 = 0.07026026104606185
Error on this batch = 0.36437818749325457
Error on this batch = 0.3730119323907763
Cost on val dataset after 14500 epochs is = 0.07026000717028649
learning rate =  0.045109708194726206
Initial Cost on Val dataset for this epoch 14500 = 0.07026000717028649
Error on this batch = 0.3643774302203989
Error on this batch = 0.37301173436135104
Cost on val dataset after 14501 epochs is = 0.07025975324248736
learning rate =  0.04510867123761595
Initial Cost on Val dataset for this epoch 14501 = 0.07025975324248736
Error on this batch 

Error on this batch = 0.37300578881886287
Cost on val dataset after 14531 epochs is = 0.07025211096809841
learning rate =  0.045077606789227835
Initial Cost on Val dataset for this epoch 14531 = 0.07025211096809841
Error on this batch = 0.3643541758326807
Error on this batch = 0.37300559035212205
Cost on val dataset after 14532 epochs is = 0.07025185540263905
learning rate =  0.04507657278058452
Initial Cost on Val dataset for this epoch 14532 = 0.07025185540263905
Error on this batch = 0.36435343291676725
Error on this batch = 0.37300539185904313
Cost on val dataset after 14533 epochs is = 0.07025159978354985
learning rate =  0.04507553886680875
Initial Cost on Val dataset for this epoch 14533 = 0.07025159978354985
Error on this batch = 0.36435269045840346
Error on this batch = 0.3730051933388653
Cost on val dataset after 14534 epochs is = 0.07025134411078085
learning rate =  0.04507450504788527
Initial Cost on Val dataset for this epoch 14534 = 0.07025134411078085
Error on this batch

Error on this batch = 0.3729990216386156
Cost on val dataset after 14565 epochs is = 0.07024339135802224
learning rate =  0.045042503625061035
Initial Cost on Val dataset for this epoch 14565 = 0.07024339135802224
Error on this batch = 0.36432917665968434
Error on this batch = 0.37299882185183963
Cost on val dataset after 14566 epochs is = 0.0702431339412361
learning rate =  0.045041472833390196
Initial Cost on Val dataset for this epoch 14566 = 0.0702431339412361
Error on this batch = 0.3643284496088666
Error on this batch = 0.37299862201307726
Cost on val dataset after 14567 epochs is = 0.07024287646913314
learning rate =  0.045040442136071
Initial Cost on Val dataset for this epoch 14567 = 0.07024287646913314
Error on this batch = 0.364327723034365
Error on this batch = 0.3729984221215816
Cost on val dataset after 14568 epochs is = 0.07024261894166427
learning rate =  0.045039411533088325
Initial Cost on Val dataset for this epoch 14568 = 0.07024261894166427
Error on this batch = 0.

Error on this batch = 0.3729921952768332
Cost on val dataset after 14599 epochs is = 0.07023460786187738
learning rate =  0.045007509549249564
Initial Cost on Val dataset for this epoch 14599 = 0.07023460786187738
Error on this batch = 0.36430472749617354
Error on this batch = 0.37299199330641963
Cost on val dataset after 14600 epochs is = 0.07023434853693718
learning rate =  0.04500648195707703
Initial Cost on Val dataset for this epoch 14600 = 0.07023434853693718
Error on this batch = 0.36430401695462367
Error on this batch = 0.37299179125890114
Cost on val dataset after 14601 epochs is = 0.07023408915502781
learning rate =  0.045005454458744244
Initial Cost on Val dataset for this epoch 14601 = 0.07023408915502781
Error on this batch = 0.36430330690849316
Error on this batch = 0.372991589133548
Cost on val dataset after 14602 epochs is = 0.07023382971610129
learning rate =  0.045004427054236205
Initial Cost on Val dataset for this epoch 14602 = 0.07023382971610129
Error on this batc

Error on this batch = 0.3729852806856881
Cost on val dataset after 14633 epochs is = 0.0702257585694115
learning rate =  0.04497262396990797
Initial Cost on Val dataset for this epoch 14633 = 0.0702257585694115
Error on this batch = 0.3642808504009692
Error on this batch = 0.37298507568763783
Cost on val dataset after 14634 epochs is = 0.07022549728089679
learning rate =  0.04497159955989486
Initial Cost on Val dataset for this epoch 14634 = 0.07022549728089679
Error on this batch = 0.36428015702138483
Error on this batch = 0.3729848705879853
Cost on val dataset after 14635 epochs is = 0.0702252359338089
learning rate =  0.04497057524321357
Initial Cost on Val dataset for this epoch 14635 = 0.0702252359338089
Error on this batch = 0.3642794641564916
Error on this batch = 0.37298466538601976
Cost on val dataset after 14636 epochs is = 0.07022497452810154
learning rate =  0.04496955101984921
Initial Cost on Val dataset for this epoch 14636 = 0.07022497452810154
Error on this batch = 0.36

Error on this batch = 0.3729782495298343
Cost on val dataset after 14667 epochs is = 0.07021684162584277
learning rate =  0.04493784629972792
Initial Cost on Val dataset for this epoch 14667 = 0.07021684162584277
Error on this batch = 0.3642575676363913
Error on this batch = 0.37297804068237367
Cost on val dataset after 14668 epochs is = 0.07021657832019815
learning rate =  0.04493682505466928
Initial Cost on Val dataset for this epoch 14668 = 0.07021657832019815
Error on this batch = 0.3642568920750178
Error on this batch = 0.37297783170949705
Cost on val dataset after 14669 epochs is = 0.07021631495443792
learning rate =  0.04493580390243845
Initial Cost on Val dataset for this epoch 14669 = 0.07021631495443792
Error on this batch = 0.36425621704765
Error on this batch = 0.37297762261051476
Cost on val dataset after 14670 epochs is = 0.07021605152851777
learning rate =  0.04493478284302065
Initial Cost on Val dataset for this epoch 14670 = 0.07021605152851777
Error on this batch = 0.

Cost on val dataset after 14700 epochs is = 0.07020812055949412
learning rate =  0.04490419414545113
Initial Cost on Val dataset for this epoch 14700 = 0.07020812055949412
Error on this batch = 0.36423555916115663
Error on this batch = 0.37297107426047243
Cost on val dataset after 14701 epochs is = 0.07020785524700336
learning rate =  0.04490317595593227
Initial Cost on Val dataset for this epoch 14701 = 0.07020785524700336
Error on this batch = 0.3642349015213921
Error on this batch = 0.3729708607657821
Cost on val dataset after 14702 epochs is = 0.07020758987297042
learning rate =  0.04490215785875573
Initial Cost on Val dataset for this epoch 14702 = 0.07020758987297042
Error on this batch = 0.3642342444343332
Error on this batch = 0.37297064712258476
Cost on val dataset after 14703 epochs is = 0.07020732443735325
learning rate =  0.044901139853906855
Initial Cost on Val dataset for this epoch 14703 = 0.07020732443735325
Error on this batch = 0.3642335879005449
Error on this batch =

Error on this batch = 0.37296416548069417
Cost on val dataset after 14733 epochs is = 0.07019933252679035
learning rate =  0.04487064256825314
Initial Cost on Val dataset for this epoch 14733 = 0.07019933252679035
Error on this batch = 0.36421415195662554
Error on this batch = 0.3729639469056371
Cost on val dataset after 14734 epochs is = 0.07019906516162916
learning rate =  0.0448696274183112
Initial Cost on Val dataset for this epoch 14734 = 0.07019906516162916
Error on this batch = 0.3642135128539485
Error on this batch = 0.3729637281610157
Cost on val dataset after 14735 epochs is = 0.07019879773357925
learning rate =  0.04486861236022973
Initial Cost on Val dataset for this epoch 14735 = 0.07019879773357925
Error on this batch = 0.3642128743225663
Error on this batch = 0.3729635092461824
Cost on val dataset after 14736 epochs is = 0.07019853024260116
learning rate =  0.044867597393994166
Initial Cost on Val dataset for this epoch 14736 = 0.07019853024260116
Error on this batch = 0

Error on this batch = 0.3729568594674669
Cost on val dataset after 14766 epochs is = 0.07019047605941803
learning rate =  0.04483719104326493
Initial Cost on Val dataset for this epoch 14766 = 0.07019047605941803
Error on this batch = 0.3641933662668437
Error on this batch = 0.3729566349584107
Cost on val dataset after 14767 epochs is = 0.07019020659855969
learning rate =  0.04483617891705594
Initial Cost on Val dataset for this epoch 14767 = 0.07019020659855969
Error on this batch = 0.3641927463122899
Error on this batch = 0.37295641025876025
Cost on val dataset after 14768 epochs is = 0.07018993707355572
learning rate =  0.044835166882229126
Initial Cost on Val dataset for this epoch 14768 = 0.07018993707355572
Error on this batch = 0.36419212694746234
Error on this batch = 0.37295618536788894
Cost on val dataset after 14769 epochs is = 0.07018966748436956
learning rate =  0.04483415493877006
Initial Cost on Val dataset for this epoch 14769 = 0.07018966748436956
Error on this batch =

Error on this batch = 0.3729493466417652
Cost on val dataset after 14799 epochs is = 0.07018154978629346
learning rate =  0.044803839049521725
Initial Cost on Val dataset for this epoch 14799 = 0.07018154978629346
Error on this batch = 0.36417322216625536
Error on this batch = 0.37294911551565746
Cost on val dataset after 14800 epochs is = 0.07018127818980979
learning rate =  0.04480282993131941
Initial Cost on Val dataset for this epoch 14800 = 0.07018127818980979
Error on this batch = 0.36417262196505257
Error on this batch = 0.37294888417860494
Cost on val dataset after 14801 epochs is = 0.07018100652802181
learning rate =  0.04480182090402455
Initial Cost on Val dataset for this epoch 14801 = 0.07018100652802181
Error on this batch = 0.3641720223718141
Error on this batch = 0.3729486526300012
Cost on val dataset after 14802 epochs is = 0.07018073480089602
learning rate =  0.04480081196762281
Initial Cost on Val dataset for this epoch 14802 = 0.07018073480089602
Error on this batch 

Error on this batch = 0.37294136641298126
Cost on val dataset after 14833 epochs is = 0.07017227867423506
learning rate =  0.04476957994411997
Initial Cost on Val dataset for this epoch 14833 = 0.07017227867423506
Error on this batch = 0.3641531596715866
Error on this batch = 0.3729411277781001
Cost on val dataset after 14834 epochs is = 0.07017200483920233
learning rate =  0.04476857390875089
Initial Cost on Val dataset for this epoch 14834 = 0.07017200483920233
Error on this batch = 0.36415258044816307
Error on this batch = 0.3729408889119859
Cost on val dataset after 14835 epochs is = 0.07017173093778194
learning rate =  0.04476756796380383
Initial Cost on Val dataset for this epoch 14835 = 0.07017173093778194
Error on this batch = 0.36415200185126784
Error on this batch = 0.37294064981405173
Cost on val dataset after 14836 epochs is = 0.0701714569699438
learning rate =  0.04476656210926457
Initial Cost on Val dataset for this epoch 14836 = 0.0701714569699438
Error on this batch = 0

Error on this batch = 0.37293336619182677
Cost on val dataset after 14866 epochs is = 0.0701632069054978
learning rate =  0.044736428442304174
Initial Cost on Val dataset for this epoch 14866 = 0.0701632069054978
Error on this batch = 0.3641343790558534
Error on this batch = 0.37293311961941433
Cost on val dataset after 14867 epochs is = 0.07016293086434712
learning rate =  0.04473542538337014
Initial Cost on Val dataset for this epoch 14867 = 0.07016293086434712
Error on this batch = 0.3641338207930538
Error on this batch = 0.3729328727966909
Cost on val dataset after 14868 epochs is = 0.07016265475587052
learning rate =  0.04473442241439052
Initial Cost on Val dataset for this epoch 14868 = 0.07016265475587052
Error on this batch = 0.3641332631745667
Error on this batch = 0.37293262572308733
Cost on val dataset after 14869 epochs is = 0.07016237858004143
learning rate =  0.04473341953535117
Initial Cost on Val dataset for this epoch 14869 = 0.07016237858004143
Error on this batch = 0

Error on this batch = 0.3729250940524684
Cost on val dataset after 14899 epochs is = 0.07015406185849046
learning rate =  0.04470337491651002
Initial Cost on Val dataset for this epoch 14899 = 0.07015406185849046
Error on this batch = 0.36411629949215196
Error on this batch = 0.3729248389219127
Cost on val dataset after 14900 epochs is = 0.07015378358210483
learning rate =  0.04470237481863464
Initial Cost on Val dataset for this epoch 14900 = 0.07015378358210483
Error on this batch = 0.3641157627731257
Error on this batch = 0.37292458352251723
Cost on val dataset after 14901 epochs is = 0.07015350523757095
learning rate =  0.04470137481024948
Initial Cost on Val dataset for this epoch 14901 = 0.07015350523757095
Error on this batch = 0.364115226715964
Error on this batch = 0.3729243278537286
Cost on val dataset after 14902 epochs is = 0.07015322682486576
learning rate =  0.044700374891340516
Initial Cost on Val dataset for this epoch 14902 = 0.07015322682486576
Error on this batch = 0

Error on this batch = 0.37291626550185797
Cost on val dataset after 14933 epochs is = 0.07014456209717924
learning rate =  0.0446694217090507
Initial Cost on Val dataset for this epoch 14933 = 0.07014456209717924
Error on this batch = 0.3640984255036139
Error on this batch = 0.372916000922748
Cost on val dataset after 14934 epochs is = 0.07014428149146025
learning rate =  0.04466842464600024
Initial Cost on Val dataset for this epoch 14934 = 0.07014428149146025
Error on this batch = 0.3640979115831933
Error on this batch = 0.3729157360562125
Cost on val dataset after 14935 epochs is = 0.07014400081686971
learning rate =  0.04466742767196531
Initial Cost on Val dataset for this epoch 14935 = 0.07014400081686971
Error on this batch = 0.3640973983424904
Error on this batch = 0.3729154709017118
Cost on val dataset after 14936 epochs is = 0.07014372007338825
learning rate =  0.04466643078693201
Initial Cost on Val dataset for this epoch 14936 = 0.07014372007338825
Error on this batch = 0.36

Error on this batch = 0.37290737970050053
Cost on val dataset after 14966 epochs is = 0.07013526564314271
learning rate =  0.04463656555284277
Initial Cost on Val dataset for this epoch 14966 = 0.07013526564314271
Error on this batch = 0.36408182786042204
Error on this batch = 0.372907105353308
Cost on val dataset after 14967 epochs is = 0.07013498275503972
learning rate =  0.04463557141997935
Initial Cost on Val dataset for this epoch 14967 = 0.07013498275503972
Error on this batch = 0.36408133664524905
Error on this batch = 0.37290683070106256
Cost on val dataset after 14968 epochs is = 0.07013469979748424
learning rate =  0.04463457737567419
Initial Cost on Val dataset for this epoch 14968 = 0.07013469979748424
Error on this batch = 0.3640808461269235
Error on this batch = 0.3729065557432354
Cost on val dataset after 14969 epochs is = 0.07013441677046052
learning rate =  0.044633583419913476
Initial Cost on Val dataset for this epoch 14969 = 0.07013441677046052
Error on this batch =

Error on this batch = 0.3728981623019574
Cost on val dataset after 14999 epochs is = 0.07012589358333744
learning rate =  0.04460380585191793
Initial Cost on Val dataset for this epoch 14999 = 0.07012589358333744
Error on this batch = 0.3640659885072619
Error on this batch = 0.37289787761035664
Cost on val dataset after 15000 epochs is = 0.07012560839557767
learning rate =  0.04460281463420494
Initial Cost on Val dataset for this epoch 15000 = 0.07012560839557767
Error on this batch = 0.3640655205600087
Error on this batch = 0.37289759259639665
Cost on val dataset after 15001 epochs is = 0.07012532313790279
learning rate =  0.04460182350459629
Initial Cost on Val dataset for this epoch 15001 = 0.07012532313790279
Error on this batch = 0.36406505332657796
Error on this batch = 0.3728973072595572
Cost on val dataset after 15002 epochs is = 0.07012503781030059
learning rate =  0.04460083246307825
Initial Cost on Val dataset for this epoch 15002 = 0.07012503781030059
Error on this batch = 

Error on this batch = 0.37288859444238426
Cost on val dataset after 15032 epochs is = 0.07011644540985448
learning rate =  0.044571142111830844
Initial Cost on Val dataset for this epoch 15032 = 0.07011644540985448
Error on this batch = 0.3640509259394591
Error on this batch = 0.37288829883921965
Cost on val dataset after 15033 epochs is = 0.0701161579090307
learning rate =  0.04457015379434168
Initial Cost on Val dataset for this epoch 15033 = 0.0701161579090307
Error on this batch = 0.3640504818185681
Error on this batch = 0.3728880028966208
Cost on val dataset after 15034 epochs is = 0.07011587033794478
learning rate =  0.044569165564506213
Initial Cost on Val dataset for this epoch 15034 = 0.07011587033794478
Error on this batch = 0.36405003842837746
Error on this batch = 0.3728877066140723
Cost on val dataset after 15035 epochs is = 0.07011558269658794
learning rate =  0.04456817742231087
Initial Cost on Val dataset for this epoch 15035 = 0.07011558269658794
Error on this batch = 

Error on this batch = 0.3728786575119713
Cost on val dataset after 15065 epochs is = 0.07010692074063778
learning rate =  0.04453857384174909
Initial Cost on Val dataset for this epoch 15065 = 0.07010692074063778
Error on this batch = 0.36403665854600903
Error on this batch = 0.37287835043619183
Cost on val dataset after 15066 epochs is = 0.07010663091709213
learning rate =  0.044537588409666075
Initial Cost on Val dataset for this epoch 15066 = 0.07010663091709213
Error on this batch = 0.36403623880787367
Error on this batch = 0.37287804300403793
Cost on val dataset after 15067 epochs is = 0.07010634102304857
learning rate =  0.04453660306478943
Initial Cost on Val dataset for this epoch 15067 = 0.07010634102304857
Error on this batch = 0.3640358198172951
Error on this batch = 0.3728777352149973
Cost on val dataset after 15068 epochs is = 0.0701060510585016
learning rate =  0.04453561780710567
Initial Cost on Val dataset for this epoch 15068 = 0.0701060510585016
Error on this batch = 

Error on this batch = 0.3728683330512243
Cost on val dataset after 15098 epochs is = 0.07009731931481103
learning rate =  0.04450610055441896
Initial Cost on Val dataset for this epoch 15098 = 0.07009731931481103
Error on this batch = 0.3640232046913133
Error on this batch = 0.37286801394466473
Cost on val dataset after 15099 epochs is = 0.07009702716246714
learning rate =  0.044505117993032334
Initial Cost on Val dataset for this epoch 15099 = 0.07009702716246714
Error on this batch = 0.36402280989307584
Error on this batch = 0.37286769446482676
Cost on val dataset after 15100 epochs is = 0.07009673493949498
learning rate =  0.044504135518408
Initial Cost on Val dataset for this epoch 15100 = 0.07009673493949498
Error on this batch = 0.3640224158593247
Error on this batch = 0.37286737461119757
Cost on val dataset after 15101 epochs is = 0.0700964426458921
learning rate =  0.04450315313053257
Initial Cost on Val dataset for this epoch 15101 = 0.0700964426458921
Error on this batch = 0.

Cost on val dataset after 15130 epochs is = 0.07008793539955623
learning rate =  0.04447470155775943
Initial Cost on Val dataset for this epoch 15130 = 0.07008793539955623
Error on this batch = 0.3640109528959093
Error on this batch = 0.37285760264280365
Cost on val dataset after 15131 epochs is = 0.07008764098636487
learning rate =  0.04447372176613156
Initial Cost on Val dataset for this epoch 15131 = 0.07008764098636487
Error on this batch = 0.3640105828178279
Error on this batch = 0.3728572709468494
Cost on val dataset after 15132 epochs is = 0.07008734650251261
learning rate =  0.044472742060838455
Initial Cost on Val dataset for this epoch 15132 = 0.07008734650251261
Error on this batch = 0.36401021352082763
Error on this batch = 0.3728569388606459
Cost on val dataset after 15133 epochs is = 0.07008705194799983
learning rate =  0.0444717624418668
Initial Cost on Val dataset for this epoch 15133 = 0.07008705194799983
Error on this batch = 0.3640098450054314
Error on this batch = 0

Error on this batch = 0.372846447799911
Cost on val dataset after 15164 epochs is = 0.07007788571636768
learning rate =  0.04444143699668939
Initial Cost on Val dataset for this epoch 15164 = 0.07007788571636768
Error on this batch = 0.3639988115705951
Error on this batch = 0.3728461029520693
Cost on val dataset after 15165 epochs is = 0.07007758890140585
learning rate =  0.04444046013299284
Initial Cost on Val dataset for this epoch 15165 = 0.07007758890140585
Error on this batch = 0.36399846834449134
Error on this batch = 0.372845757696836
Cost on val dataset after 15166 epochs is = 0.07007729201584112
learning rate =  0.04443948335518008
Initial Cost on Val dataset for this epoch 15166 = 0.07007729201584112
Error on this batch = 0.3639981259174328
Error on this batch = 0.3728454120336875
Cost on val dataset after 15167 epochs is = 0.07007699505967652
learning rate =  0.04443850666323789
Initial Cost on Val dataset for this epoch 15167 = 0.07007699505967652
Error on this batch = 0.36

Error on this batch = 0.3728344912822507
Cost on val dataset after 15198 epochs is = 0.0700677544210186
learning rate =  0.0444082717328812
Initial Cost on Val dataset for this epoch 15198 = 0.0700677544210186
Error on this batch = 0.3639875933719286
Error on this batch = 0.3728341322878294
Cost on val dataset after 15199 epochs is = 0.07006745520769624
learning rate =  0.044407297781838405
Initial Cost on Val dataset for this epoch 15199 = 0.07006745520769624
Error on this batch = 0.3639872776164825
Error on this batch = 0.37283377286806074
Cost on val dataset after 15200 epochs is = 0.07006715592391426
learning rate =  0.04440632391623179
Initial Cost on Val dataset for this epoch 15200 = 0.07006715592391426
Error on this batch = 0.363986962678556
Error on this batch = 0.3728334130224109
Cost on val dataset after 15201 epochs is = 0.07006685656967804
learning rate =  0.04440535013604824
Initial Cost on Val dataset for this epoch 15201 = 0.07006685656967804
Error on this batch = 0.363

Error on this batch = 0.3728220436399887
Cost on val dataset after 15232 epochs is = 0.0700575416753627
learning rate =  0.04437520524877603
Initial Cost on Val dataset for this epoch 15232 = 0.0700575416753627
Error on this batch = 0.3639773196545237
Error on this batch = 0.3728216698819671
Cost on val dataset after 15233 epochs is = 0.07005724006980675
learning rate =  0.04437423419522307
Initial Cost on Val dataset for this epoch 15233 = 0.07005724006980675
Error on this batch = 0.36397703200891407
Error on this batch = 0.3728212956802257
Cost on val dataset after 15234 epochs is = 0.07005693839400852
learning rate =  0.044373263226662005
Initial Cost on Val dataset for this epoch 15234 = 0.07005693839400852
Error on this batch = 0.36397674520000356
Error on this batch = 0.3728209210342167
Cost on val dataset after 15235 epochs is = 0.07005663664797539
learning rate =  0.0443722923430798
Initial Cost on Val dataset for this epoch 15235 = 0.07005663664797539
Error on this batch = 0.3

Error on this batch = 0.37280947233173367
Cost on val dataset after 15265 epochs is = 0.07004755164651774
learning rate =  0.04434320528633859
Initial Cost on Val dataset for this epoch 15265 = 0.07004755164651774
Error on this batch = 0.3639682723087783
Error on this batch = 0.37280908363895493
Cost on val dataset after 15266 epochs is = 0.07004724772714607
learning rate =  0.04434223703066025
Initial Cost on Val dataset for this epoch 15266 = 0.07004724772714607
Error on this batch = 0.3639680125835065
Error on this batch = 0.37280869448410875
Cost on val dataset after 15267 epochs is = 0.07004694373780236
learning rate =  0.04434126885954574
Initial Cost on Val dataset for this epoch 15267 = 0.07004694373780236
Error on this batch = 0.3639677537144308
Error on this batch = 0.3728083048666299
Cost on val dataset after 15268 epochs is = 0.07004663967849559
learning rate =  0.044340300772982115
Initial Cost on Val dataset for this epoch 15268 = 0.07004663967849559
Error on this batch =

Error on this batch = 0.37279599414777986
Cost on val dataset after 15299 epochs is = 0.07003717918914996
learning rate =  0.04431033195637035
Initial Cost on Val dataset for this epoch 15299 = 0.07003717918914996
Error on this batch = 0.3639599256543184
Error on this batch = 0.3727955894240415
Cost on val dataset after 15300 epochs is = 0.07003687289600237
learning rate =  0.04430936656862835
Initial Cost on Val dataset for this epoch 15300 = 0.07003687289600237
Error on this batch = 0.3639596953863814
Error on this batch = 0.37279518421867613
Cost on val dataset after 15301 epochs is = 0.07003656653320964
learning rate =  0.044308401265012315
Initial Cost on Val dataset for this epoch 15301 = 0.07003656653320964
Error on this batch = 0.3639594659958715
Error on this batch = 0.3727947785310971
Cost on val dataset after 15302 epochs is = 0.07003626010078202
learning rate =  0.044307436045509456
Initial Cost on Val dataset for this epoch 15302 = 0.07003626010078202
Error on this batch =

Error on this batch = 0.37278195980419704
Cost on val dataset after 15333 epochs is = 0.07002672621372452
learning rate =  0.044277555891184754
Initial Cost on Val dataset for this epoch 15333 = 0.07002672621372452
Error on this batch = 0.36395259269761904
Error on this batch = 0.37278153837171163
Cost on val dataset after 15334 epochs is = 0.07002641755854536
learning rate =  0.04427659335654626
Initial Cost on Val dataset for this epoch 15334 = 0.07002641755854536
Error on this batch = 0.36395239263154777
Error on this batch = 0.37278111643724293
Cost on val dataset after 15335 epochs is = 0.07002610883408371
learning rate =  0.04427563090559916
Initial Cost on Val dataset for this epoch 15335 = 0.07002610883408371
Error on this batch = 0.363952193465556
Error on this batch = 0.3727806940001788
Cost on val dataset after 15336 epochs is = 0.07002580004035064
learning rate =  0.044274668538330716
Initial Cost on Val dataset for this epoch 15336 = 0.07002580004035064
Error on this batch

Error on this batch = 0.37276778411087913
Cost on val dataset after 15366 epochs is = 0.07001650407265757
learning rate =  0.04424583636780477
Initial Cost on Val dataset for this epoch 15366 = 0.07001650407265757
Error on this batch = 0.3639464695896477
Error on this batch = 0.372767345785136
Cost on val dataset after 15367 epochs is = 0.0700161931370795
learning rate =  0.04424487658827372
Initial Cost on Val dataset for this epoch 15367 = 0.0700161931370795
Error on this batch = 0.36394629959924046
Error on this batch = 0.372766906936759
Cost on val dataset after 15368 epochs is = 0.07001588213259141
learning rate =  0.04424391689201529
Initial Cost on Val dataset for this epoch 15368 = 0.07001588213259141
Error on this batch = 0.3639461305325272
Error on this batch = 0.37276646756510723
Cost on val dataset after 15369 epochs is = 0.07001557105920474
learning rate =  0.04424295727901685
Initial Cost on Val dataset for this epoch 15369 = 0.07001557105920474
Error on this batch = 0.36

Error on this batch = 0.3727530398746883
Cost on val dataset after 15399 epochs is = 0.07000620687669207
learning rate =  0.04421420754236227
Initial Cost on Val dataset for this epoch 15399 = 0.07000620687669207
Error on this batch = 0.36394135174657366
Error on this batch = 0.37275258395836885
Cost on val dataset after 15400 epochs is = 0.07000589367314211
learning rate =  0.04421325050416759
Initial Cost on Val dataset for this epoch 15400 = 0.07000589367314211
Error on this batch = 0.36394121264048496
Error on this batch = 0.3727521274977512
Cost on val dataset after 15401 epochs is = 0.07000558040106057
learning rate =  0.04421229354882977
Initial Cost on Val dataset for this epoch 15401 = 0.07000558040106057
Error on this batch = 0.36394107448359
Error on this batch = 0.3727516704921613
Cost on val dataset after 15402 epochs is = 0.07000526706045891
learning rate =  0.044211336676336255
Initial Cost on Val dataset for this epoch 15402 = 0.07000526706045891
Error on this batch = 0

Error on this batch = 0.37273722928609304
Cost on val dataset after 15433 epochs is = 0.06999551957752338
learning rate =  0.044181714651453485
Initial Cost on Val dataset for this epoch 15433 = 0.06999551957752338
Error on this batch = 0.36393715955110106
Error on this batch = 0.37273675447908744
Cost on val dataset after 15434 epochs is = 0.06999520405022744
learning rate =  0.04418076042336629
Initial Cost on Val dataset for this epoch 15434 = 0.06999520405022744
Error on this batch = 0.36393705318173675
Error on this batch = 0.3727362791042544
Cost on val dataset after 15435 epochs is = 0.06999488845478026
learning rate =  0.04417980627771068
Initial Cost on Val dataset for this epoch 15435 = 0.06999488845478026
Error on this batch = 0.3639369477901216
Error on this batch = 0.3727358031608815
Cost on val dataset after 15436 epochs is = 0.0699945727911927
learning rate =  0.044178852214474214
Initial Cost on Val dataset for this epoch 15436 = 0.0699945727911927
Error on this batch =

Error on this batch = 0.37272125690590746
Cost on val dataset after 15466 epochs is = 0.06998507125106866
learning rate =  0.044150268580579605
Initial Cost on Val dataset for this epoch 15466 = 0.06998507125106866
Error on this batch = 0.3639341705021727
Error on this batch = 0.3727207629778089
Cost on val dataset after 15467 epochs is = 0.06998475348036573
learning rate =  0.04414931706617025
Initial Cost on Val dataset for this epoch 15467 = 0.06998475348036573
Error on this batch = 0.3639340968752232
Error on this batch = 0.37272026845772127
Cost on val dataset after 15468 epochs is = 0.06998443564185458
learning rate =  0.044148365633782695
Initial Cost on Val dataset for this epoch 15468 = 0.06998443564185458
Error on this batch = 0.36393402425627314
Error on this batch = 0.3727197733448906
Cost on val dataset after 15469 epochs is = 0.06998411773554508
learning rate =  0.04414741428340457
Initial Cost on Val dataset for this epoch 15469 = 0.06998411773554508
Error on this batch 

Error on this batch = 0.37270412666814984
Cost on val dataset after 15500 epochs is = 0.06997422906340448
learning rate =  0.04411796303097897
Initial Cost on Val dataset for this epoch 15500 = 0.06997422906340448
Error on this batch = 0.36393223853611845
Error on this batch = 0.3727036121810742
Cost on val dataset after 15501 epochs is = 0.06997390899245438
learning rate =  0.04411701429838587
Initial Cost on Val dataset for this epoch 15501 = 0.06997390899245438
Error on this batch = 0.3639321997352144
Error on this batch = 0.3727030970755854
Cost on val dataset after 15502 epochs is = 0.06997358885400336
learning rate =  0.04411606564739539
Initial Cost on Val dataset for this epoch 15502 = 0.06997358885400336
Error on this batch = 0.3639321619764355
Error on this batch = 0.372702581350881
Cost on val dataset after 15503 epochs is = 0.06997326864805944
learning rate =  0.04411511707799527
Initial Cost on Val dataset for this epoch 15503 = 0.06997326864805944
Error on this batch = 0.

Error on this batch = 0.37268681763148315
Cost on val dataset after 15533 epochs is = 0.06996363112317351
learning rate =  0.04408669787469234
Initial Cost on Val dataset for this epoch 15533 = 0.06996363112317351
Error on this batch = 0.36393151421498177
Error on this batch = 0.37268628230466105
Cost on val dataset after 15534 epochs is = 0.0699633088286195
learning rate =  0.04408575182851548
Initial Cost on Val dataset for this epoch 15534 = 0.0699633088286195
Error on this batch = 0.3639315103764942
Error on this batch = 0.37268574633212376
Cost on val dataset after 15535 epochs is = 0.06996298646679694
learning rate =  0.044084805863537316
Initial Cost on Val dataset for this epoch 15535 = 0.06996298646679694
Error on this batch = 0.36393150761647475
Error on this batch = 0.37268520971301655
Cost on val dataset after 15536 epochs is = 0.06996266403771169
learning rate =  0.044083859979745676
Initial Cost on Val dataset for this epoch 15536 = 0.06996266403771169
Error on this batch

Error on this batch = 0.37266880617651565
Cost on val dataset after 15566 epochs is = 0.06995295991321668
learning rate =  0.0440555211573437
Initial Cost on Val dataset for this epoch 15566 = 0.06995295991321668
Error on this batch = 0.3639319634110613
Error on this batch = 0.37266824908055646
Cost on val dataset after 15567 epochs is = 0.06995263540138642
learning rate =  0.04405457778429935
Initial Cost on Val dataset for this epoch 15567 = 0.06995263540138642
Error on this batch = 0.36393199578980684
Error on this batch = 0.3726676913097728
Cost on val dataset after 15568 epochs is = 0.06995231082243186
learning rate =  0.04405363449205265
Initial Cost on Val dataset for this epoch 15568 = 0.06995231082243186
Error on this batch = 0.3639320292868944
Error on this batch = 0.3726671328632524
Cost on val dataset after 15569 epochs is = 0.06995198617635572
learning rate =  0.04405269128059148
Initial Cost on Val dataset for this epoch 15569 = 0.06995198617635572
Error on this batch = 0

Error on this batch = 0.37264948079196386
Cost on val dataset after 15600 epochs is = 0.06994188886536157
learning rate =  0.04402349172896921
Initial Cost on Val dataset for this epoch 15600 = 0.06994188886536157
Error on this batch = 0.3639336994310278
Error on this batch = 0.3726489002289803
Cost on val dataset after 15601 epochs is = 0.0699415620721554
learning rate =  0.04402255109626671
Initial Cost on Val dataset for this epoch 15601 = 0.0699415620721554
Error on this batch = 0.36393377056631193
Error on this batch = 0.37264831895909484
Cost on val dataset after 15602 epochs is = 0.06994123521185268
learning rate =  0.044021610543951574
Initial Cost on Val dataset for this epoch 15602 = 0.06994123521185268
Error on this batch = 0.3639338428651027
Error on this batch = 0.37264773698132986
Cost on val dataset after 15603 epochs is = 0.06994090828445208
learning rate =  0.0440206700720118
Initial Cost on Val dataset for this epoch 15603 = 0.06994090828445208
Error on this batch = 0

Cost on val dataset after 15633 epochs is = 0.06993106924927525
learning rate =  0.04399249322883217
Initial Cost on Val dataset for this epoch 15633 = 0.06993106924927525
Error on this batch = 0.363936669004066
Error on this batch = 0.3726293391330141
Cost on val dataset after 15634 epochs is = 0.06993074024037337
learning rate =  0.04399155524257938
Initial Cost on Val dataset for this epoch 15634 = 0.06993074024037337
Error on this batch = 0.36393677929381285
Error on this batch = 0.37262873397454754
Cost on val dataset after 15635 epochs is = 0.06993041116425215
learning rate =  0.043990617336318594
Initial Cost on Val dataset for this epoch 15635 = 0.06993041116425215
Error on this batch = 0.36393689079525954
Error on this batch = 0.3726281280747709
Cost on val dataset after 15636 epochs is = 0.06993008202090523
learning rate =  0.04398967951003788
Initial Cost on Val dataset for this epoch 15636 = 0.06993008202090523
Error on this batch = 0.3639370035099377
Error on this batch = 

Cost on val dataset after 15666 epochs is = 0.06992017642148216
learning rate =  0.04396158185324275
Initial Cost on Val dataset for this epoch 15666 = 0.06992017642148216
Error on this batch = 0.36394095687966344
Error on this batch = 0.37260897167290424
Cost on val dataset after 15667 epochs is = 0.06991984518997908
learning rate =  0.04396064650043707
Initial Cost on Val dataset for this epoch 15667 = 0.06991984518997908
Error on this batch = 0.3639411079882891
Error on this batch = 0.3726083414846435
Cost on val dataset after 15668 epochs is = 0.06991951389094854
learning rate =  0.043959711227230816
Initial Cost on Val dataset for this epoch 15668 = 0.06991951389094854
Error on this batch = 0.363941260361506
Error on this batch = 0.37260771051922925
Cost on val dataset after 15669 epochs is = 0.06991918252437786
learning rate =  0.043958776033612125
Initial Cost on Val dataset for this epoch 15669 = 0.06991918252437786
Error on this batch = 0.3639414140009945
Error on this batch =

Error on this batch = 0.37258841450589486
Cost on val dataset after 15699 epochs is = 0.06990921004918112
learning rate =  0.04393075717457499
Initial Cost on Val dataset for this epoch 15699 = 0.06990921004918112
Error on this batch = 0.36394662053302607
Error on this batch = 0.3725877588778493
Cost on val dataset after 15700 epochs is = 0.06990887658136168
learning rate =  0.04392982444230492
Initial Cost on Val dataset for this epoch 15700 = 0.06990887658136168
Error on this batch = 0.36394681428667797
Error on this batch = 0.37258710243537435
Cost on val dataset after 15701 epochs is = 0.06990854304547665
learning rate =  0.043928891789244424
Initial Cost on Val dataset for this epoch 15701 = 0.06990854304547665
Error on this batch = 0.3639470093629325
Error on this batch = 0.3725864451772629
Cost on val dataset after 15702 epochs is = 0.06990820944150555
learning rate =  0.04392795921538173
Initial Cost on Val dataset for this epoch 15702 = 0.06990820944150555
Error on this batch 

Cost on val dataset after 15731 epochs is = 0.06989850520733208
learning rate =  0.04390094897158494
Initial Cost on Val dataset for this epoch 15731 = 0.06989850520733208
Error on this batch = 0.3639534859862814
Error on this batch = 0.3725663420648143
Cost on val dataset after 15732 epochs is = 0.06989816954992202
learning rate =  0.04390001876819507
Initial Cost on Val dataset for this epoch 15732 = 0.06989816954992202
Error on this batch = 0.3639537230029555
Error on this batch = 0.3725656589084214
Cost on val dataset after 15733 epochs is = 0.06989783382365458
learning rate =  0.043899088643639346
Initial Cost on Val dataset for this epoch 15733 = 0.06989783382365458
Error on this batch = 0.36395396140379604
Error on this batch = 0.3725649748963214
Cost on val dataset after 15734 epochs is = 0.06989749802850008
learning rate =  0.04389815859790606
Initial Cost on Val dataset for this epoch 15734 = 0.06989749802850008
Error on this batch = 0.3639542011908156
Error on this batch = 0

Error on this batch = 0.372544050082956
Cost on val dataset after 15764 epochs is = 0.06988739198091497
learning rate =  0.043870293820456985
Initial Cost on Val dataset for this epoch 15764 = 0.06988739198091497
Error on this batch = 0.3639620495881903
Error on this batch = 0.37254333888520036
Cost on val dataset after 15765 epochs is = 0.06988705403374673
learning rate =  0.04386936621243411
Initial Cost on Val dataset for this epoch 15765 = 0.06988705403374673
Error on this batch = 0.36396233337590533
Error on this batch = 0.37254262678870737
Cost on val dataset after 15766 epochs is = 0.06988671601656432
learning rate =  0.043868438682860875
Initial Cost on Val dataset for this epoch 15766 = 0.06988671601656432
Error on this batch = 0.3639626186172097
Error on this batch = 0.37254191379208146
Cost on val dataset after 15767 epochs is = 0.06988637792932653
learning rate =  0.04386751123172565
Initial Cost on Val dataset for this epoch 15767 = 0.06988637792932653
Error on this batch 

Cost on val dataset after 15797 epochs is = 0.069876202515688
learning rate =  0.043839724113863096
Initial Cost on Val dataset for this epoch 15797 = 0.069876202515688
Error on this batch = 0.36397219434630773
Error on this batch = 0.37251935666884367
Cost on val dataset after 15798 epochs is = 0.06987586223408877
learning rate =  0.04383879908856132
Initial Cost on Val dataset for this epoch 15798 = 0.06987586223408877
Error on this batch = 0.36397252729897217
Error on this batch = 0.37251861411189807
Cost on val dataset after 15799 epochs is = 0.06987552188089827
learning rate =  0.043837874141327345
Initial Cost on Val dataset for this epoch 15799 = 0.06987552188089827
Error on this batch = 0.3639728617811721
Error on this batch = 0.37251787060694513
Cost on val dataset after 15800 epochs is = 0.06987518145606121
learning rate =  0.043836949272149636
Initial Cost on Val dataset for this epoch 15800 = 0.06987518145606121
Error on this batch = 0.3639731977953167
Error on this batch =

Error on this batch = 0.37249434336853043
Cost on val dataset after 15831 epochs is = 0.06986459242534718
learning rate =  0.04380831698075744
Initial Cost on Val dataset for this epoch 15831 = 0.06986459242534718
Error on this batch = 0.3639843875262199
Error on this batch = 0.3724935687116235
Cost on val dataset after 15832 epochs is = 0.06986424967569989
learning rate =  0.04380739460330731
Initial Cost on Val dataset for this epoch 15832 = 0.06986424967569989
Error on this batch = 0.3639847738740258
Error on this batch = 0.37249279305504374
Cost on val dataset after 15833 epochs is = 0.0698639068523018
learning rate =  0.04380647230353434
Initial Cost on Val dataset for this epoch 15833 = 0.0698639068523018
Error on this batch = 0.3639851618370043
Error on this batch = 0.37249201639716234
Cost on val dataset after 15834 epochs is = 0.06986356395508003
learning rate =  0.04380555008142709
Initial Cost on Val dataset for this epoch 15834 = 0.06986356395508003
Error on this batch = 0.

Error on this batch = 0.37246743428840595
Cost on val dataset after 15865 epochs is = 0.06985289709973792
learning rate =  0.0437769996559538
Initial Cost on Val dataset for this epoch 15865 = 0.06985289709973792
Error on this batch = 0.3639984455833614
Error on this batch = 0.3724666247052402
Cost on val dataset after 15866 epochs is = 0.06985255179811725
learning rate =  0.043776079913119825
Initial Cost on Val dataset for this epoch 15866 = 0.06985255179811725
Error on this batch = 0.3639988883701376
Error on this batch = 0.37246581406478674
Cost on val dataset after 15867 epochs is = 0.06985220641992808
learning rate =  0.04377516024757517
Initial Cost on Val dataset for this epoch 15867 = 0.06985220641992808
Error on this batch = 0.3639993328658454
Error on this batch = 0.3724650023652788
Cost on val dataset after 15868 epochs is = 0.06985186096507623
learning rate =  0.04377424065930845
Initial Cost on Val dataset for this epoch 15868 = 0.06985186096507623
Error on this batch = 0

Error on this batch = 0.37244014997681774
Cost on val dataset after 15898 epochs is = 0.0698414611750262
learning rate =  0.04374668888927838
Initial Cost on Val dataset for this epoch 15898 = 0.0698414611750262
Error on this batch = 0.36401397593245577
Error on this batch = 0.372439304546216
Cost on val dataset after 15899 epochs is = 0.06984111329298735
learning rate =  0.04374577169100384
Initial Cost on Val dataset for this epoch 15899 = 0.06984111329298735
Error on this batch = 0.36401447668082315
Error on this batch = 0.3724384579976706
Cost on val dataset after 15900 epochs is = 0.06984076533088583
learning rate =  0.043744854569644824
Initial Cost on Val dataset for this epoch 15900 = 0.06984076533088583
Error on this batch = 0.3640149792375275
Error on this batch = 0.372437610329265
Cost on val dataset after 15901 epochs is = 0.06984041728860257
learning rate =  0.043743937525190034
Initial Cost on Val dataset for this epoch 15901 = 0.06984041728860257
Error on this batch = 0.

Error on this batch = 0.3724107664724262
Cost on val dataset after 15932 epochs is = 0.06982958751849369
learning rate =  0.043715547230116225
Initial Cost on Val dataset for this epoch 15932 = 0.06982958751849369
Error on this batch = 0.36403203508120047
Error on this batch = 0.3724098819268474
Cost on val dataset after 15933 epochs is = 0.06982923684237718
learning rate =  0.043714632640651334
Initial Cost on Val dataset for this epoch 15933 = 0.06982923684237718
Error on this batch = 0.36403259912941255
Error on this batch = 0.3724089961953482
Cost on val dataset after 15934 epochs is = 0.06982888608165948
learning rate =  0.04371371812771952
Initial Cost on Val dataset for this epoch 15934 = 0.06982888608165948
Error on this batch = 0.3640331650977368
Error on this batch = 0.3724081092758378
Cost on val dataset after 15935 epochs is = 0.06982853523619087
learning rate =  0.04371280369130959
Initial Cost on Val dataset for this epoch 15935 = 0.06982853523619087
Error on this batch =

Error on this batch = 0.3723809386658955
Cost on val dataset after 15965 epochs is = 0.06981796967895752
learning rate =  0.04368540612619348
Initial Cost on Val dataset for this epoch 15965 = 0.06981796967895752
Error on this batch = 0.36405168162120083
Error on this batch = 0.37238001385024583
Cost on val dataset after 15966 epochs is = 0.06981761612637921
learning rate =  0.043684494056415855
Initial Cost on Val dataset for this epoch 15966 = 0.06981761612637921
Error on this batch = 0.36405231089801077
Error on this batch = 0.37237908777666806
Cost on val dataset after 15967 epochs is = 0.06981726248371174
learning rate =  0.04368358206280272
Initial Cost on Val dataset for this epoch 15967 = 0.06981726248371174
Error on this batch = 0.3640529422132052
Error on this batch = 0.3723781604428788
Cost on val dataset after 15968 epochs is = 0.06981690875077017
learning rate =  0.04368267014534295
Initial Cost on Val dataset for this epoch 15968 = 0.06981690875077017
Error on this batch 

Error on this batch = 0.3723497428425484
Cost on val dataset after 15998 epochs is = 0.06980625381908856
learning rate =  0.0436553479777537
Initial Cost on Val dataset for this epoch 15998 = 0.06980625381908856
Error on this batch = 0.36407354483354737
Error on this batch = 0.3723487752743614
Cost on val dataset after 15999 epochs is = 0.06980589718937401
learning rate =  0.043654438415540384
Initial Cost on Val dataset for this epoch 15999 = 0.06980589718937401
Error on this batch = 0.3640742434055681
Error on this batch = 0.3723478063694381
Cost on val dataset after 16000 epochs is = 0.06980554046282501
learning rate =  0.043653528929125496
Initial Cost on Val dataset for this epoch 16000 = 0.06980554046282501
Error on this batch = 0.36407494414459274
Error on this batch = 0.37234683612527375
Cost on val dataset after 16001 epochs is = 0.0698051836392154
learning rate =  0.04365261951849799
Initial Cost on Val dataset for this epoch 16001 = 0.0698051836392154
Error on this batch = 0

Error on this batch = 0.37231709332583235
Cost on val dataset after 16031 epochs is = 0.0697944326192638
learning rate =  0.04362537238607172
Initial Cost on Val dataset for this epoch 16031 = 0.0697944326192638
Error on this batch = 0.3640977644947189
Error on this batch = 0.37231608028201424
Cost on val dataset after 16032 epochs is = 0.06979407266698301
learning rate =  0.043624465319382896
Initial Cost on Val dataset for this epoch 16032 = 0.06979407266698301
Error on this batch = 0.3640985367811056
Error on this batch = 0.3723150658148008
Cost on val dataset after 16033 epochs is = 0.06979371260965854
learning rate =  0.04362355832812896
Initial Cost on Val dataset for this epoch 16033 = 0.06979371260965854
Error on this batch = 0.36409931137414975
Error on this batch = 0.37231404992143063
Cost on val dataset after 16034 epochs is = 0.06979335244701648
learning rate =  0.04362265141229892
Initial Cost on Val dataset for this epoch 16034 = 0.06979335244701648
Error on this batch = 

Error on this batch = 0.37228077134447274
Cost on val dataset after 16066 epochs is = 0.06978176991253308
learning rate =  0.04359366986398493
Initial Cost on Val dataset for this epoch 16066 = 0.06978176991253308
Error on this batch = 0.3641261964166265
Error on this batch = 0.37227970678154554
Cost on val dataset after 16067 epochs is = 0.06978140611027754
learning rate =  0.04359276543099994
Initial Cost on Val dataset for this epoch 16067 = 0.06978140611027754
Error on this batch = 0.36412705213162644
Error on this batch = 0.3722786406932967
Cost on val dataset after 16068 epochs is = 0.06978104219242871
learning rate =  0.043591861073066965
Initial Cost on Val dataset for this epoch 16068 = 0.06978104219242871
Error on this batch = 0.3641279103141528
Error on this batch = 0.37227757307664516
Cost on val dataset after 16069 epochs is = 0.06978067815865477
learning rate =  0.043590956790175085
Initial Cost on Val dataset for this epoch 16069 = 0.06978067815865477
Error on this batch

Cost on val dataset after 16100 epochs is = 0.0697693337189277
learning rate =  0.043562961181560944
Initial Cost on Val dataset for this epoch 16100 = 0.0697693337189277
Error on this batch = 0.3641567042254137
Error on this batch = 0.3722425833760286
Cost on val dataset after 16101 epochs is = 0.06976896578952095
learning rate =  0.043562059294243775
Initial Cost on Val dataset for this epoch 16101 = 0.06976896578952095
Error on this batch = 0.3641576465938492
Error on this batch = 0.3722414635262187
Cost on val dataset after 16102 epochs is = 0.06976859773221478
learning rate =  0.04356115748160933
Initial Cost on Val dataset for this epoch 16102 = 0.06976859773221478
Error on this batch = 0.3641585915991806
Error on this batch = 0.37224034203702916
Cost on val dataset after 16103 epochs is = 0.0697682295466142
learning rate =  0.043560255743646796
Initial Cost on Val dataset for this epoch 16103 = 0.0697682295466142
Error on this batch = 0.3641595392465881
Error on this batch = 0.3

Error on this batch = 0.37220591746606846
Cost on val dataset after 16133 epochs is = 0.06975712228155363
learning rate =  0.04353323827362353
Initial Cost on Val dataset for this epoch 16133 = 0.06975712228155363
Error on this batch = 0.3641892233936488
Error on this batch = 0.3722047433808753
Cost on val dataset after 16134 epochs is = 0.06975674991206969
learning rate =  0.043532338845134744
Initial Cost on Val dataset for this epoch 16134 = 0.06975674991206969
Error on this batch = 0.3641902555768526
Error on this batch = 0.37220356753956196
Cost on val dataset after 16135 epochs is = 0.06975637740059583
learning rate =  0.04353143949097274
Initial Cost on Val dataset for this epoch 16135 = 0.06975637740059583
Error on this batch = 0.3641912905738978
Error on this batch = 0.372202389938268
Cost on val dataset after 16136 epochs is = 0.06975600474667025
learning rate =  0.043530540211126775
Initial Cost on Val dataset for this epoch 16136 = 0.06975600474667025
Error on this batch = 

Cost on val dataset after 16165 epochs is = 0.0697451336649248
learning rate =  0.04350449337483888
Initial Cost on Val dataset for this epoch 16165 = 0.0697451336649248
Error on this batch = 0.3642236768671664
Error on this batch = 0.3721662237924994
Cost on val dataset after 16166 epochs is = 0.06974475651225678
learning rate =  0.04350359631948727
Initial Cost on Val dataset for this epoch 16166 = 0.06974475651225678
Error on this batch = 0.36422480190278506
Error on this batch = 0.3721649896407609
Cost on val dataset after 16167 epochs is = 0.06974437920177243
learning rate =  0.0435026993381196
Initial Cost on Val dataset for this epoch 16167 = 0.06974437920177243
Error on this batch = 0.36422592993541353
Error on this batch = 0.37216375359804266
Cost on val dataset after 16168 epochs is = 0.06974400173294182
learning rate =  0.043501802430725205
Initial Cost on Val dataset for this epoch 16168 = 0.06974400173294182
Error on this batch = 0.36422706097095237
Error on this batch = 0

Cost on val dataset after 16200 epochs is = 0.069731835840479
learning rate =  0.043473140388211894
Initial Cost on Val dataset for this epoch 16200 = 0.069731835840479
Error on this batch = 0.3642648754829116
Error on this batch = 0.3721218741325398
Cost on val dataset after 16201 epochs is = 0.06973145283492883
learning rate =  0.04347224591595971
Initial Cost on Val dataset for this epoch 16201 = 0.06973145283492883
Error on this batch = 0.36426610899027384
Error on this batch = 0.3721205711117038
Cost on val dataset after 16202 epochs is = 0.06973106965174238
learning rate =  0.04347135151731907
Initial Cost on Val dataset for this epoch 16202 = 0.06973106965174238
Error on this batch = 0.3642673457070369
Error on this batch = 0.3721192660385312
Cost on val dataset after 16203 epochs is = 0.06973068629031517
learning rate =  0.04347045719227937
Initial Cost on Val dataset for this epoch 16203 = 0.06973068629031517
Error on this batch = 0.3642685856394354
Error on this batch = 0.372

Cost on val dataset after 16235 epochs is = 0.06971832088841518
learning rate =  0.04344187758898535
Initial Cost on Val dataset for this epoch 16235 = 0.06971832088841518
Error on this batch = 0.3643099990591743
Error on this batch = 0.3720750142158035
Cost on val dataset after 16236 epochs is = 0.06971793129251994
learning rate =  0.043440985686843944
Initial Cost on Val dataset for this epoch 16236 = 0.06971793129251994
Error on this batch = 0.3643113486565725
Error on this batch = 0.3720736363273075
Cost on val dataset after 16237 epochs is = 0.06971754149662464
learning rate =  0.043440093857944354
Initial Cost on Val dataset for this epoch 16237 = 0.06971754149662464
Error on this batch = 0.3643127016860966
Error on this batch = 0.37207225620415285
Cost on val dataset after 16238 epochs is = 0.06971715150005531
learning rate =  0.04343920210227605
Initial Cost on Val dataset for this epoch 16238 = 0.06971715150005531
Error on this batch = 0.36431405815422685
Error on this batch =

Cost on val dataset after 16270 epochs is = 0.06970456152855013
learning rate =  0.04341070452414168
Initial Cost on Val dataset for this epoch 16270 = 0.06970456152855013
Error on this batch = 0.3643593197268611
Error on this batch = 0.37202542132488037
Cost on val dataset after 16271 epochs is = 0.06970416452024229
learning rate =  0.04340981517921551
Initial Cost on Val dataset for this epoch 16271 = 0.06970416452024229
Error on this batch = 0.3643607933316109
Error on this batch = 0.372023961811408
Cost on val dataset after 16272 epochs is = 0.06970376728733257
learning rate =  0.04340892590716407
Initial Cost on Val dataset for this epoch 16272 = 0.06970376728733257
Error on this batch = 0.36436227059751497
Error on this batch = 0.3720224998587373
Cost on val dataset after 16273 epochs is = 0.0697033698290904
learning rate =  0.043408036707976896
Initial Cost on Val dataset for this epoch 16273 = 0.0697033698290904
Error on this batch = 0.3643637515311529
Error on this batch = 0.3

Cost on val dataset after 16304 epochs is = 0.0696909328155238
learning rate =  0.043380507616942
Initial Cost on Val dataset for this epoch 16304 = 0.0696909328155238
Error on this batch = 0.3644115155169071
Error on this batch = 0.3719743917289894
Cost on val dataset after 16305 epochs is = 0.06969052775375037
learning rate =  0.04337962074390711
Initial Cost on Val dataset for this epoch 16305 = 0.06969052775375037
Error on this batch = 0.36441311728330034
Error on this batch = 0.3719728457157026
Cost on val dataset after 16306 epochs is = 0.06969012244190068
learning rate =  0.04337873394339285
Initial Cost on Val dataset for this epoch 16306 = 0.06969012244190068
Error on this batch = 0.36441472293339444
Error on this batch = 0.37197129704365794
Cost on val dataset after 16307 epochs is = 0.06968971687921008
learning rate =  0.04337784721538885
Initial Cost on Val dataset for this epoch 16307 = 0.06968971687921008
Error on this batch = 0.36441633247365657
Error on this batch = 0.3

Cost on val dataset after 16339 epochs is = 0.06967660183159076
learning rate =  0.04334951014269448
Initial Cost on Val dataset for this epoch 16339 = 0.06967660183159076
Error on this batch = 0.36446993000618366
Error on this batch = 0.37191865437212557
Cost on val dataset after 16340 epochs is = 0.06967618755978829
learning rate =  0.043348625801721496
Initial Cost on Val dataset for this epoch 16340 = 0.06967618755978829
Error on this batch = 0.3644716714888789
Error on this batch = 0.37191701119303344
Cost on val dataset after 16341 epochs is = 0.06967577301098173
learning rate =  0.04334774153290722
Initial Cost on Val dataset for this epoch 16341 = 0.06967577301098173
Error on this batch = 0.3644734170755702
Error on this batch = 0.37191536511067846
Cost on val dataset after 16342 epochs is = 0.06967535818440311
learning rate =  0.043346857336241336
Initial Cost on Val dataset for this epoch 16342 = 0.06967535818440311
Error on this batch = 0.36447516677230907
Error on this batc

Initial Cost on Val dataset for this epoch 16371 = 0.06966320395509328
Error on this batch = 0.36452772236076725
Error on this batch = 0.37186459656077553
Cost on val dataset after 16372 epochs is = 0.0696627804419077
learning rate =  0.04332036493425269
Initial Cost on Val dataset for this epoch 16372 = 0.0696627804419077
Error on this batch = 0.36452959807504653
Error on this batch = 0.37186285686088866
Cost on val dataset after 16373 epochs is = 0.06966235662756017
learning rate =  0.04331948296908826
Initial Cost on Val dataset for this epoch 16373 = 0.06966235662756017
Error on this batch = 0.3645314780767139
Error on this batch = 0.37186111402325767
Cost on val dataset after 16374 epochs is = 0.06966193251131633
learning rate =  0.04331860107574364
Initial Cost on Val dataset for this epoch 16374 = 0.06966193251131633
Error on this batch = 0.3645333623710965
Error on this batch = 0.3718593680404696
Cost on val dataset after 16375 epochs is = 0.06966150809244355
learning rate =  0

Error on this batch = 0.37180548927916407
Cost on val dataset after 16405 epochs is = 0.06964863125099645
learning rate =  0.043291297948931905
Initial Cost on Val dataset for this epoch 16405 = 0.06964863125099645
Error on this batch = 0.3645939323073597
Error on this batch = 0.37180364212117895
Cost on val dataset after 16406 epochs is = 0.06964819709845167
learning rate =  0.04329041834842881
Initial Cost on Val dataset for this epoch 16406 = 0.06964819709845167
Error on this batch = 0.36459595659108784
Error on this batch = 0.37180179158146354
Cost on val dataset after 16407 epochs is = 0.06964776262098406
learning rate =  0.04328953881940889
Initial Cost on Val dataset for this epoch 16407 = 0.06964776262098406
Error on this batch = 0.3645979853248116
Error on this batch = 0.37179993765270897
Cost on val dataset after 16408 epochs is = 0.06964732781794107
learning rate =  0.043288659361861985
Initial Cost on Val dataset for this epoch 16408 = 0.06964732781794107
Error on this batc

Error on this batch = 0.3717387793979194
Cost on val dataset after 16440 epochs is = 0.06963323849438621
learning rate =  0.043260554397388365
Initial Cost on Val dataset for this epoch 16440 = 0.06963323849438621
Error on this batch = 0.3646674545697753
Error on this batch = 0.3717368096122803
Cost on val dataset after 16441 epochs is = 0.06963279260462714
learning rate =  0.04325967729276144
Initial Cost on Val dataset for this epoch 16441 = 0.06963279260462714
Error on this batch = 0.36466963675137437
Error on this batch = 0.3717348361972773
Cost on val dataset after 16442 epochs is = 0.06963234636931202
learning rate =  0.043258800259263125
Initial Cost on Val dataset for this epoch 16442 = 0.06963234636931202
Error on this batch = 0.3646718234987398
Error on this batch = 0.37173285914617266
Cost on val dataset after 16443 epochs is = 0.06963189978792794
learning rate =  0.04325792329688332
Initial Cost on Val dataset for this epoch 16443 = 0.06963189978792794
Error on this batch =

Error on this batch = 0.3716676347532405
Cost on val dataset after 16475 epochs is = 0.06961742365911328
learning rate =  0.04322989799101068
Initial Cost on Val dataset for this epoch 16475 = 0.06961742365911328
Error on this batch = 0.3647465596971449
Error on this batch = 0.37166553411441583
Cost on val dataset after 16476 epochs is = 0.06961696540391289
learning rate =  0.04322902336989261
Initial Cost on Val dataset for this epoch 16476 = 0.06961696540391289
Error on this batch = 0.3647489026684433
Error on this batch = 0.37166342962795085
Cost on val dataset after 16477 epochs is = 0.06961650678839836
learning rate =  0.04322814881955108
Initial Cost on Val dataset for this epoch 16477 = 0.06961650678839836
Error on this batch = 0.3647512502496388
Error on this batch = 0.37166132128824186
Cost on val dataset after 16478 epochs is = 0.06961604781225456
learning rate =  0.04322727433997607
Initial Cost on Val dataset for this epoch 16478 = 0.06961604781225456
Error on this batch = 

Error on this batch = 0.37159178869354315
Cost on val dataset after 16510 epochs is = 0.06960116863178527
learning rate =  0.04319932829843035
Initial Cost on Val dataset for this epoch 16510 = 0.06960116863178527
Error on this batch = 0.36483130259849866
Error on this batch = 0.3715895503491187
Cost on val dataset after 16511 epochs is = 0.06960069762287248
learning rate =  0.04319845614854118
Initial Cost on Val dataset for this epoch 16511 = 0.06960069762287248
Error on this batch = 0.36483380641071933
Error on this batch = 0.37158730798871853
Cost on val dataset after 16512 epochs is = 0.06960022624676489
learning rate =  0.04319758406907898
Initial Cost on Val dataset for this epoch 16512 = 0.06960022624676489
Error on this batch = 0.3648363147859189
Error on this batch = 0.3715850616084774
Cost on val dataset after 16513 epochs is = 0.06959975450340038
learning rate =  0.04319671206003379
Initial Cost on Val dataset for this epoch 16513 = 0.06959975450340038
Error on this batch =

Error on this batch = 0.3715157843068827
Cost on val dataset after 16543 epochs is = 0.06958543145458755
learning rate =  0.04317058448330921
Initial Cost on Val dataset for this epoch 16543 = 0.06958543145458755
Error on this batch = 0.36491631834818483
Error on this batch = 0.37151341173240937
Cost on val dataset after 16544 epochs is = 0.06958494834040393
learning rate =  0.04316971465226621
Initial Cost on Val dataset for this epoch 16544 = 0.06958494834040393
Error on this batch = 0.36491897073152874
Error on this batch = 0.37151103504779104
Cost on val dataset after 16545 epochs is = 0.06958446486137793
learning rate =  0.043168844891322826
Initial Cost on Val dataset for this epoch 16545 = 0.06958446486137793
Error on this batch = 0.36492162753486107
Error on this batch = 0.37150865425133134
Cost on val dataset after 16546 epochs is = 0.06958398101772874
learning rate =  0.043167975200469166
Initial Cost on Val dataset for this epoch 16546 = 0.06958398101772874
Error on this bat

Error on this batch = 0.3714302915074
Cost on val dataset after 16578 epochs is = 0.06956830729365807
learning rate =  0.04314018204147592
Initial Cost on Val dataset for this epoch 16578 = 0.06956830729365807
Error on this batch = 0.3650117374766071
Error on this batch = 0.3714277745658888
Cost on val dataset after 16579 epochs is = 0.06956781160091847
learning rate =  0.04313931465805037
Initial Cost on Val dataset for this epoch 16579 = 0.06956781160091847
Error on this batch = 0.3650145403637567
Error on this batch = 0.37142525350068556
Cost on val dataset after 16580 epochs is = 0.06956731555654454
learning rate =  0.04313844734437961
Initial Cost on Val dataset for this epoch 16580 = 0.06956731555654454
Error on this batch = 0.36501734740290176
Error on this batch = 0.3714227283128582
Cost on val dataset after 16581 epochs is = 0.06956681916108495
learning rate =  0.043137580100453835
Initial Cost on Val dataset for this epoch 16581 = 0.06956681916108495
Error on this batch = 0.3

Error on this batch = 0.37133975643910605
Cost on val dataset after 16613 epochs is = 0.06955075292572267
learning rate =  0.043109865061539514
Initial Cost on Val dataset for this epoch 16613 = 0.06955075292572267
Error on this batch = 0.36511224155657673
Error on this batch = 0.3713370963674561
Cost on val dataset after 16614 epochs is = 0.06955024531592766
learning rate =  0.04310900011370394
Initial Cost on Val dataset for this epoch 16614 = 0.06955024531592766
Error on this batch = 0.3651151834543363
Error on this batch = 0.37133443226104357
Cost on val dataset after 16615 epochs is = 0.06954973737949235
learning rate =  0.04310813523528075
Initial Cost on Val dataset for this epoch 16615 = 0.06954973737949235
Error on this batch = 0.3651181291117165
Error on this batch = 0.37133176412400143
Cost on val dataset after 16616 epochs is = 0.06954922911730571
learning rate =  0.04310727042626021
Initial Cost on Val dataset for this epoch 16616 = 0.06954922911730571
Error on this batch 

Cost on val dataset after 16647 epochs is = 0.0695333167083288
learning rate =  0.04308049571724372
Initial Cost on Val dataset for this epoch 16647 = 0.0695333167083288
Error on this batch = 0.3652142928554454
Error on this batch = 0.3712442849954519
Cost on val dataset after 16648 epochs is = 0.06953279854820138
learning rate =  0.043079633123972824
Initial Cost on Val dataset for this epoch 16648 = 0.06953279854820138
Error on this batch = 0.36521735470309125
Error on this batch = 0.37124148675324214
Cost on val dataset after 16649 epochs is = 0.06953228009694078
learning rate =  0.04307877059978399
Initial Cost on Val dataset for this epoch 16649 = 0.06953228009694078
Error on this batch = 0.3652204198144304
Error on this batch = 0.37123868467422827
Cost on val dataset after 16650 epochs is = 0.06953176135575119
learning rate =  0.04307790814466754
Initial Cost on Val dataset for this epoch 16650 = 0.06953176135575119
Error on this batch = 0.3652234881732818
Error on this batch = 0

Cost on val dataset after 16682 epochs is = 0.06951501620689755
learning rate =  0.04305034599333072
Initial Cost on Val dataset for this epoch 16682 = 0.06951501620689755
Error on this batch = 0.36532328915926193
Error on this batch = 0.37114411561732247
Cost on val dataset after 16683 epochs is = 0.06951448862880488
learning rate =  0.043049485812184526
Initial Cost on Val dataset for this epoch 16683 = 0.06951448862880488
Error on this batch = 0.36532645507021794
Error on this batch = 0.3711411879647649
Cost on val dataset after 16684 epochs is = 0.06951396080666157
learning rate =  0.043048625699782704
Initial Cost on Val dataset for this epoch 16684 = 0.06951396080666157
Error on this batch = 0.36532962363057864
Error on this batch = 0.37113825677984535
Cost on val dataset after 16685 epochs is = 0.06951343274195194
learning rate =  0.04304776565611564
Initial Cost on Val dataset for this epoch 16685 = 0.06951343274195194
Error on this batch = 0.36533279482146136
Error on this bat

Cost on val dataset after 16717 epochs is = 0.06949641582785858
learning rate =  0.043020280493293386
Initial Cost on Val dataset for this epoch 16717 = 0.06949641582785858
Error on this batch = 0.3654355455974834
Error on this batch = 0.3710396164630385
Cost on val dataset after 16718 epochs is = 0.06949588063407039
learning rate =  0.043019422712492546
Initial Cost on Val dataset for this epoch 16718 = 0.06949588063407039
Error on this batch = 0.36543879260394635
Error on this batch = 0.371036571709389
Cost on val dataset after 16719 epochs is = 0.06949534525196384
learning rate =  0.04301856500010073
Initial Cost on Val dataset for this epoch 16719 = 0.06949534525196384
Error on this batch = 0.3654420415639663
Error on this batch = 0.3710335238222581
Cost on val dataset after 16720 epochs is = 0.06949480968323125
learning rate =  0.043017707356108395
Initial Cost on Val dataset for this epoch 16720 = 0.06949480968323125
Error on this batch = 0.3654452924567569
Error on this batch = 

Cost on val dataset after 16752 epochs is = 0.06947758329078886
learning rate =  0.042990298806250486
Initial Cost on Val dataset for this epoch 16752 = 0.06947758329078886
Error on this batch = 0.36555021524729386
Error on this batch = 0.37093127108526447
Cost on val dataset after 16753 epochs is = 0.06947704253919282
learning rate =  0.04298944341409765
Initial Cost on Val dataset for this epoch 16753 = 0.06947704253919282
Error on this batch = 0.365553518032703
Error on this batch = 0.3709281245192892
Cost on val dataset after 16754 epochs is = 0.06947650166076753
learning rate =  0.042988588090020825
Initial Cost on Val dataset for this epoch 16754 = 0.06947650166076753
Error on this batch = 0.3655568220248646
Error on this batch = 0.3709249752927266
Cost on val dataset after 16755 epochs is = 0.0694759606573213
learning rate =  0.04298773283401053
Initial Cost on Val dataset for this epoch 16755 = 0.0694759606573213
Error on this batch = 0.36556012720202447
Error on this batch = 0

Cost on val dataset after 16787 epochs is = 0.06945859342620792
learning rate =  0.04296040052417834
Initial Cost on Val dataset for this epoch 16787 = 0.06945859342620792
Error on this batch = 0.3656663879255864
Error on this batch = 0.3708196539247938
Cost on val dataset after 16788 epochs is = 0.06945804931923999
learning rate =  0.04295954750905741
Initial Cost on Val dataset for this epoch 16788 = 0.06945804931923999
Error on this batch = 0.3656697199567163
Error on this batch = 0.37081642299139506
Cost on val dataset after 16789 epochs is = 0.06945750514928921
learning rate =  0.0429586945616818
Initial Cost on Val dataset for this epoch 16789 = 0.06945750514928921
Error on this batch = 0.3656730524303069
Error on this batch = 0.3708131899180438
Cost on val dataset after 16790 epochs is = 0.06945696091817948
learning rate =  0.04295784168204208
Initial Cost on Val dataset for this epoch 16790 = 0.06945696091817948
Error on this batch = 0.36567638532459273
Error on this batch = 0.

Cost on val dataset after 16822 epochs is = 0.06943952406794529
learning rate =  0.04293058524188501
Initial Cost on Val dataset for this epoch 16822 = 0.06943952406794529
Error on this batch = 0.36578313072892893
Error on this batch = 0.3707053986344141
Cost on val dataset after 16823 epochs is = 0.06943897883128994
learning rate =  0.04292973459226044
Initial Cost on Val dataset for this epoch 16823 = 0.06943897883128994
Error on this batch = 0.36578646541667426
Error on this batch = 0.3707021019641307
Cost on val dataset after 16824 epochs is = 0.06943843359440159
learning rate =  0.042928884010052765
Initial Cost on Val dataset for this epoch 16824 = 0.06943843359440159
Error on this batch = 0.3657897997972562
Error on this batch = 0.37069880369364866
Cost on val dataset after 16825 epochs is = 0.06943788835902512
learning rate =  0.04292803349525264
Initial Cost on Val dataset for this epoch 16825 = 0.06943788835902512
Error on this batch = 0.36579313384975964
Error on this batch 

Cost on val dataset after 16857 epochs is = 0.06942045186381335
learning rate =  0.04290085255698483
Initial Cost on Val dataset for this epoch 16857 = 0.06942045186381335
Error on this batch = 0.36589952717290547
Error on this batch = 0.37058916342565384
Cost on val dataset after 16858 epochs is = 0.06941990763118305
learning rate =  0.04290000426140086
Initial Cost on Val dataset for this epoch 16858 = 0.06941990763118305
Error on this batch = 0.36590283892449166
Error on this batch = 0.37058581984669103
Cost on val dataset after 16859 epochs is = 0.06941936345692538
learning rate =  0.04289915603290765
Initial Cost on Val dataset for this epoch 16859 = 0.06941936345692538
Error on this batch = 0.36590614966132123
Error on this batch = 0.37058247519922816
Cost on val dataset after 16860 epochs is = 0.06941881934262895
learning rate =  0.042898307871495914
Initial Cost on Val dataset for this epoch 16860 = 0.06941881934262895
Error on this batch = 0.36590945936401703
Error on this bat

Cost on val dataset after 16892 epochs is = 0.0694014485580761
learning rate =  0.042871202069873174
Initial Cost on Val dataset for this epoch 16892 = 0.0694014485580761
Error on this batch = 0.3660147107140081
Error on this batch = 0.3704715980148703
Cost on val dataset after 16893 epochs is = 0.06940090727668516
learning rate =  0.042870356116953154
Initial Cost on Val dataset for this epoch 16893 = 0.06940090727668516
Error on this batch = 0.3660179757786281
Error on this batch = 0.3704682256750262
Cost on val dataset after 16894 epochs is = 0.06940036610584183
learning rate =  0.0428695102308
Initial Cost on Val dataset for this epoch 16894 = 0.06940036610584183
Error on this batch = 0.36602123918350643
Error on this batch = 0.37046485276793933
Cost on val dataset after 16895 epochs is = 0.06939982504692661
learning rate =  0.04286866441140449
Initial Cost on Val dataset for this epoch 16895 = 0.06939982504692661
Error on this batch = 0.3660245009112848
Error on this batch = 0.370

Cost on val dataset after 16927 epochs is = 0.06938257817832778
learning rate =  0.042841633383701554
Initial Cost on Val dataset for this epoch 16927 = 0.06938257817832778
Error on this batch = 0.3661278901754041
Error on this batch = 0.3703533162157811
Cost on val dataset after 16928 epochs is = 0.06938204154456487
learning rate =  0.0428407897621472
Initial Cost on Val dataset for this epoch 16928 = 0.06938204154456487
Error on this batch = 0.36613108724257065
Error on this batch = 0.3703499319131729
Cost on val dataset after 16929 epochs is = 0.06938150506574314
learning rate =  0.04283994620703805
Initial Cost on Val dataset for this epoch 16929 = 0.06938150506574314
Error on this batch = 0.3661342820811089
Error on this batch = 0.3703465474987119
Cost on val dataset after 16930 epochs is = 0.0693809687430103
learning rate =  0.04283910271836495
Initial Cost on Val dataset for this epoch 16930 = 0.0693809687430103
Error on this batch = 0.36613747467596935
Error on this batch = 0.3

Cost on val dataset after 16962 epochs is = 0.069363895344964
learning rate =  0.042812146104352904
Initial Cost on Val dataset for this epoch 16962 = 0.069363895344964
Error on this batch = 0.3662383666142584
Error on this batch = 0.37023487683804845
Cost on val dataset after 16963 epochs is = 0.0693633647729156
learning rate =  0.04281130480294362
Initial Cost on Val dataset for this epoch 16963 = 0.0693633647729156
Error on this batch = 0.366241477178545
Error on this batch = 0.37023149560941676
Cost on val dataset after 16964 epochs is = 0.069362834391979
learning rate =  0.0428104635676601
Initial Cost on Val dataset for this epoch 16964 = 0.069362834391979
Error on this batch = 0.3662445850288361
Error on this batch = 0.37022811467050765
Cost on val dataset after 16965 epochs is = 0.06936230420306821
learning rate =  0.04280962239849323
Initial Cost on Val dataset for this epoch 16965 = 0.06936230420306821
Error on this batch = 0.36624769015252945
Error on this batch = 0.37022473

Error on this batch = 0.37012687244198517
Cost on val dataset after 16995 epochs is = 0.06934649209906528
learning rate =  0.042784418022694455
Initial Cost on Val dataset for this epoch 16995 = 0.06934649209906528
Error on this batch = 0.36633951638738554
Error on this batch = 0.3701235055448867
Cost on val dataset after 16996 epochs is = 0.0693459682814939
learning rate =  0.04278357889864257
Initial Cost on Val dataset for this epoch 16996 = 0.0693459682814939
Error on this batch = 0.36634253112252096
Error on this batch = 0.37012013925610704
Cost on val dataset after 16997 epochs is = 0.06934544468185282
learning rate =  0.04278273984041724
Initial Cost on Val dataset for this epoch 16997 = 0.06934544468185282
Error on this batch = 0.36634554276521897
Error on this batch = 0.37011677358486966
Cost on val dataset after 16998 epochs is = 0.06934492130085033
learning rate =  0.04278190084800943
Initial Cost on Val dataset for this epoch 16998 = 0.06934492130085033
Error on this batch 

Error on this batch = 0.37000945110958733
Cost on val dataset after 17030 epochs is = 0.06932829248580964
learning rate =  0.042755087788630866
Initial Cost on Val dataset for this epoch 17030 = 0.06932829248580964
Error on this batch = 0.3664431286361932
Error on this batch = 0.37000611071132844
Cost on val dataset after 17031 epochs is = 0.06932777667966737
learning rate =  0.042754250963143524
Initial Cost on Val dataset for this epoch 17031 = 0.06932777667966737
Error on this batch = 0.36644602949034394
Error on this batch = 0.3700027712189016
Cost on val dataset after 17032 epochs is = 0.06932726111286971
learning rate =  0.042753414203167514
Initial Cost on Val dataset for this epoch 17032 = 0.06932726111286971
Error on this batch = 0.36644892693466036
Error on this batch = 0.36999943264006235
Cost on val dataset after 17033 epochs is = 0.06932674578593105
learning rate =  0.04275257750869387
Initial Cost on Val dataset for this epoch 17033 = 0.06932674578593105
Error on this bat

Error on this batch = 0.3698931248291907
Cost on val dataset after 17065 epochs is = 0.0693103847860938
learning rate =  0.042725837817175066
Initial Cost on Val dataset for this epoch 17065 = 0.0693103847860938
Error on this batch = 0.36654258211718926
Error on this batch = 0.369889820491885
Cost on val dataset after 17066 epochs is = 0.06930987763066412
learning rate =  0.04272500327925628
Initial Cost on Val dataset for this epoch 17066 = 0.06930987763066412
Error on this batch = 0.3665453594243705
Error on this batch = 0.36988651730860567
Cost on val dataset after 17067 epochs is = 0.06930937072967717
learning rate =  0.04272416880653577
Initial Cost on Val dataset for this epoch 17067 = 0.06930937072967717
Error on this batch = 0.3665481330886749
Error on this batch = 0.36988321528576124
Cost on val dataset after 17068 epochs is = 0.06930886408348122
learning rate =  0.04272333439900462
Initial Cost on Val dataset for this epoch 17068 = 0.06930886408348122
Error on this batch = 0.

Error on this batch = 0.3697781997142886
Cost on val dataset after 17100 epochs is = 0.06929278779835198
learning rate =  0.04269666772474459
Initial Cost on Val dataset for this epoch 17100 = 0.06929278779835198
Error on this batch = 0.36663758806065533
Error on this batch = 0.36977493934465244
Cost on val dataset after 17101 epochs is = 0.06929228972832434
learning rate =  0.042695835463473386
Initial Cost on Val dataset for this epoch 17101 = 0.06929228972832434
Error on this batch = 0.3666402350455077
Error on this batch = 0.3697716803320069
Cost on val dataset after 17102 epochs is = 0.06929179192248379
learning rate =  0.04269500326708952
Initial Cost on Val dataset for this epoch 17102 = 0.06929179192248379
Error on this batch = 0.3666428782348568
Error on this batch = 0.3697684226815227
Cost on val dataset after 17103 epochs is = 0.06929129438103916
learning rate =  0.042694171135584144
Initial Cost on Val dataset for this epoch 17103 = 0.06929129438103916
Error on this batch =

Error on this batch = 0.3696649262764373
Cost on val dataset after 17135 epochs is = 0.06927551372240143
learning rate =  0.042667577130370477
Initial Cost on Val dataset for this epoch 17135 = 0.06927551372240143
Error on this batch = 0.36672795619529425
Error on this batch = 0.369661716259857
Cost on val dataset after 17136 epochs is = 0.06927502500050986
learning rate =  0.042666747134900186
Initial Cost on Val dataset for this epoch 17136 = 0.06927502500050986
Error on this batch = 0.3667304688052749
Error on this batch = 0.369658507761536
Cost on val dataset after 17137 epochs is = 0.06927453654808712
learning rate =  0.04266591720400842
Initial Cost on Val dataset for this epoch 17137 = 0.06927453654808712
Error on this batch = 0.3667329775418488
Error on this batch = 0.369655300785501
Cost on val dataset after 17138 epochs is = 0.06927404836522666
learning rate =  0.04266508733768638
Initial Cost on Val dataset for this epoch 17138 = 0.06927404836522666
Error on this batch = 0.3

Cost on val dataset after 17169 epochs is = 0.06925904877607914
learning rate =  0.042639393460396996
Initial Cost on Val dataset for this epoch 17169 = 0.06925904877607914
Error on this batch = 0.3668112072826872
Error on this batch = 0.36955350379616997
Cost on val dataset after 17170 epochs is = 0.06925856925153916
learning rate =  0.04263856565567411
Initial Cost on Val dataset for this epoch 17170 = 0.06925856925153916
Error on this batch = 0.366813587844829
Error on this batch = 0.3695503491130507
Cost on val dataset after 17171 epochs is = 0.06925808999807742
learning rate =  0.04263773791523176
Initial Cost on Val dataset for this epoch 17171 = 0.06925808999807742
Error on this batch = 0.3668159645224944
Error on this batch = 0.3695471960709406
Cost on val dataset after 17172 epochs is = 0.06925761101569561
learning rate =  0.04263691023906121
Initial Cost on Val dataset for this epoch 17172 = 0.06925761101569561
Error on this batch = 0.36681833731624197
Error on this batch = 0

Cost on val dataset after 17204 epochs is = 0.06924242659784907
learning rate =  0.042610458484934596
Initial Cost on Val dataset for this epoch 17204 = 0.06924242659784907
Error on this batch = 0.3668922217936433
Error on this batch = 0.3694440842687255
Cost on val dataset after 17205 epochs is = 0.06924195654698827
learning rate =  0.042609632924844404
Initial Cost on Val dataset for this epoch 17205 = 0.06924195654698827
Error on this batch = 0.36689446703432893
Error on this batch = 0.3694409886062417
Cost on val dataset after 17206 epochs is = 0.06924148676592187
learning rate =  0.04260880742873003
Initial Cost on Val dataset for this epoch 17206 = 0.06924148676592187
Error on this batch = 0.3668967084372287
Error on this batch = 0.36943789467228216
Cost on val dataset after 17207 epochs is = 0.06924101725457563
learning rate =  0.042607981996582805
Initial Cost on Val dataset for this epoch 17207 = 0.06924101725457563
Error on this batch = 0.3668989460044512
Error on this batch 

Cost on val dataset after 17239 epochs is = 0.06922613476358766
learning rate =  0.04258160189070233
Initial Cost on Val dataset for this epoch 17239 = 0.06922613476358766
Error on this batch = 0.3669685375700456
Error on this batch = 0.3693367764475835
Cost on val dataset after 17240 epochs is = 0.06922567409952597
learning rate =  0.042580778564615174
Initial Cost on Val dataset for this epoch 17240 = 0.06922567409952597
Error on this batch = 0.3669706499944511
Error on this batch = 0.3693337423209793
Cost on val dataset after 17241 epochs is = 0.06922521370160792
learning rate =  0.042579955302201204
Initial Cost on Val dataset for this epoch 17241 = 0.06922521370160792
Error on this batch = 0.3669727586773401
Error on this batch = 0.3693307099788602
Cost on val dataset after 17242 epochs is = 0.06922475356970006
learning rate =  0.0425791321034518
Initial Cost on Val dataset for this epoch 17242 = 0.06922475356970006
Error on this batch = 0.3669748636220952
Error on this batch = 0.

Error on this batch = 0.36923462325992495
Cost on val dataset after 17274 epochs is = 0.0692101689305196
learning rate =  0.04255282330688479
Initial Cost on Val dataset for this epoch 17274 = 0.0692101689305196
Error on this batch = 0.36704026998026507
Error on this batch = 0.36923165033567446
Cost on val dataset after 17275 epochs is = 0.0692097174930985
learning rate =  0.04255200220424282
Initial Cost on Val dataset for this epoch 17275 = 0.0692097174930985
Error on this batch = 0.36704225366401605
Error on this batch = 0.3692286792238999
Cost on val dataset after 17276 epochs is = 0.06920926631635371
learning rate =  0.04255118116497343
Initial Cost on Val dataset for this epoch 17276 = 0.06920926631635371
Error on this batch = 0.3670442337424062
Error on this batch = 0.3692257099250712
Cost on val dataset after 17277 epochs is = 0.06920881540010725
learning rate =  0.04255036018906806
Initial Cost on Val dataset for this epoch 17277 = 0.06920881540010725
Error on this batch = 0.3

Error on this batch = 0.3671038527735701
Error on this batch = 0.36913456259162347
Cost on val dataset after 17308 epochs is = 0.06919496518545291
learning rate =  0.04252494131793372
Initial Cost on Val dataset for this epoch 17308 = 0.06919496518545291
Error on this batch = 0.367105719919238
Error on this batch = 0.3691316514458998
Cost on val dataset after 17309 epochs is = 0.0691945225051995
learning rate =  0.0425241223651677
Initial Cost on Val dataset for this epoch 17309 = 0.0691945225051995
Error on this batch = 0.36710758361596363
Error on this batch = 0.369128742118542
Cost on val dataset after 17310 epochs is = 0.06919408007902605
learning rate =  0.04252330347548417
Initial Cost on Val dataset for this epoch 17310 = 0.06919408007902605
Error on this batch = 0.36710944386882943
Error on this batch = 0.36912583460942755
Cost on val dataset after 17311 epochs is = 0.06919363790672348
learning rate =  0.04252248464887465
Initial Cost on Val dataset for this epoch 17311 = 0.069

Cost on val dataset after 17343 epochs is = 0.06917962114840791
learning rate =  0.04249631544964307
Initial Cost on Val dataset for this epoch 17343 = 0.06917962114840791
Error on this batch = 0.367168934450264
Error on this batch = 0.3690309051463928
Cost on val dataset after 17344 epochs is = 0.06917918723199326
learning rate =  0.04249549869971592
Initial Cost on Val dataset for this epoch 17344 = 0.06917918723199326
Error on this batch = 0.3671706807519186
Error on this batch = 0.3690280592773506
Cost on val dataset after 17345 epochs is = 0.06917875356194182
learning rate =  0.042494682012574614
Initial Cost on Val dataset for this epoch 17345 = 0.06917875356194182
Error on this batch = 0.3671724237974645
Error on this batch = 0.3690252152129665
Cost on val dataset after 17346 epochs is = 0.06917832013802275
learning rate =  0.04249386538821074
Initial Cost on Val dataset for this epoch 17346 = 0.06917832013802275
Error on this batch = 0.36717416359250693
Error on this batch = 0.

Cost on val dataset after 17378 epochs is = 0.06916457912578595
learning rate =  0.04246776650458845
Initial Cost on Val dataset for this epoch 17378 = 0.06916457912578595
Error on this batch = 0.3672281548391019
Error on this batch = 0.3689323687123533
Cost on val dataset after 17379 epochs is = 0.06916415369189113
learning rate =  0.04246695194715184
Initial Cost on Val dataset for this epoch 17379 = 0.06916415369189113
Error on this batch = 0.3672297905763463
Error on this batch = 0.3689295855501936
Cost on val dataset after 17380 epochs is = 0.06916372849603318
learning rate =  0.042466137452206454
Initial Cost on Val dataset for this epoch 17380 = 0.06916372849603318
Error on this batch = 0.3672314232592235
Error on this batch = 0.36892680416347423
Cost on val dataset after 17381 epochs is = 0.06916330353796805
learning rate =  0.04246532301974389
Initial Cost on Val dataset for this epoch 17381 = 0.06916330353796805
Error on this batch = 0.36723305289363195
Error on this batch = 

Error on this batch = 0.36883873060696654
Cost on val dataset after 17413 epochs is = 0.06914982896144241
learning rate =  0.042439294121759064
Initial Cost on Val dataset for this epoch 17413 = 0.06914982896144241
Error on this batch = 0.3672836271503088
Error on this batch = 0.368836007174713
Cost on val dataset after 17414 epochs is = 0.06914941171210662
learning rate =  0.042438481746534015
Initial Cost on Val dataset for this epoch 17414 = 0.06914941171210662
Error on this batch = 0.36728515952390445
Error on this batch = 0.36883328547763866
Cost on val dataset after 17415 epochs is = 0.06914899469213642
learning rate =  0.042437669433507505
Initial Cost on Val dataset for this epoch 17415 = 0.06914899469213642
Error on this batch = 0.36728668905202055
Error on this batch = 0.3688305655144093
Cost on val dataset after 17416 epochs is = 0.06914857790128114
learning rate =  0.042436857182671206
Initial Cost on Val dataset for this epoch 17416 = 0.06914857790128114
Error on this batc

Cost on val dataset after 17447 epochs is = 0.06913576965406967
learning rate =  0.04241170820769957
Initial Cost on Val dataset for this epoch 17447 = 0.06913576965406967
Error on this batch = 0.3673341675362704
Error on this batch = 0.36874443378827393
Cost on val dataset after 17448 epochs is = 0.06913536006205544
learning rate =  0.04241089794255967
Initial Cost on Val dataset for this epoch 17448 = 0.06913536006205544
Error on this batch = 0.3673356065376091
Error on this batch = 0.36874177024539834
Cost on val dataset after 17449 epochs is = 0.06913495069084466
learning rate =  0.042410087739335876
Initial Cost on Val dataset for this epoch 17449 = 0.06913495069084466
Error on this batch = 0.36733704289778873
Error on this batch = 0.3687391083867296
Cost on val dataset after 17450 epochs is = 0.06913454154018496
learning rate =  0.04240927759801991
Initial Cost on Val dataset for this epoch 17450 = 0.06913454154018496
Error on this batch = 0.367338476622793
Error on this batch = 

Cost on val dataset after 17482 epochs is = 0.06912156366108851
learning rate =  0.04238338571378156
Initial Cost on Val dataset for this epoch 17482 = 0.06912156366108851
Error on this batch = 0.36738300008487396
Error on this batch = 0.36865220128755527
Cost on val dataset after 17483 epochs is = 0.06912116164721817
learning rate =  0.04238257761078956
Initial Cost on Val dataset for this epoch 17483 = 0.06912116164721817
Error on this batch = 0.36738435018343796
Error on this batch = 0.3686495957184353
Cost on val dataset after 17484 epochs is = 0.06912075984533944
learning rate =  0.04238176956942484
Initial Cost on Val dataset for this epoch 17484 = 0.06912075984533944
Error on this batch = 0.3673856978480734
Error on this batch = 0.36864699177516763
Cost on val dataset after 17485 epochs is = 0.0691203582552017
learning rate =  0.04238096158967916
Initial Cost on Val dataset for this epoch 17485 = 0.0691203582552017
Error on this batch = 0.36738704308461695
Error on this batch = 

Cost on val dataset after 17517 epochs is = 0.06910761767558823
learning rate =  0.042355138723542525
Initial Cost on Val dataset for this epoch 17517 = 0.06910761767558823
Error on this batch = 0.3674288432304785
Error on this batch = 0.36856196203913627
Cost on val dataset after 17518 epochs is = 0.06910722293298431
learning rate =  0.042354332772621864
Initial Cost on Val dataset for this epoch 17518 = 0.06910722293298431
Error on this batch = 0.3674301115738609
Error on this batch = 0.3685594123031695
Cost on val dataset after 17519 epochs is = 0.06910682839366503
learning rate =  0.04235352688304155
Initial Cost on Val dataset for this epoch 17519 = 0.06910682839366503
Error on this batch = 0.36743137768379913
Error on this batch = 0.3685568641292188
Cost on val dataset after 17520 epochs is = 0.06910643405738398
learning rate =  0.04235272105479342
Initial Cost on Val dataset for this epoch 17520 = 0.06910643405738398
Error on this batch = 0.3674326415658929
Error on this batch =

Cost on val dataset after 17552 epochs is = 0.06909392103356077
learning rate =  0.04232696688544195
Initial Cost on Val dataset for this epoch 17552 = 0.06909392103356077
Error on this batch = 0.3674719425746811
Error on this batch = 0.3684736381664981
Cost on val dataset after 17553 epochs is = 0.06909353326026883
learning rate =  0.04232616307658298
Initial Cost on Val dataset for this epoch 17553 = 0.06909353326026883
Error on this batch = 0.3674731360256153
Error on this batch = 0.3684711419586034
Cost on val dataset after 17554 epochs is = 0.06909314568172775
learning rate =  0.04232535932877936
Initial Cost on Val dataset for this epoch 17554 = 0.06909314568172775
Error on this batch = 0.3674743274341247
Error on this batch = 0.3684686472449646
Cost on val dataset after 17555 epochs is = 0.0690927582976967
learning rate =  0.04232455564202296
Initial Cost on Val dataset for this epoch 17555 = 0.0690927582976967
Error on this batch = 0.367475516805507
Error on this batch = 0.3684

Cost on val dataset after 17587 epochs is = 0.06908046327835685
learning rate =  0.042298869850272834
Initial Cost on Val dataset for this epoch 17587 = 0.06908046327835685
Error on this batch = 0.3675125322873459
Error on this batch = 0.36838714690853097
Cost on val dataset after 17588 epochs is = 0.0690800821792414
learning rate =  0.042298068173532315
Initial Cost on Val dataset for this epoch 17588 = 0.0690800821792414
Error on this batch = 0.3675136573425691
Error on this batch = 0.36838470180877236
Cost on val dataset after 17589 epochs is = 0.06907970126655667
learning rate =  0.04229726655756401
Initial Cost on Val dataset for this epoch 17589 = 0.06907970126655667
Error on this batch = 0.36751478053500675
Error on this batch = 0.3683822581328649
Cost on val dataset after 17590 epochs is = 0.06907932054006838
learning rate =  0.042296465002359854
Initial Cost on Val dataset for this epoch 17590 = 0.06907932054006838
Error on this batch = 0.36751590186961053
Error on this batch 

Cost on val dataset after 17622 epochs is = 0.06906723421200739
learning rate =  0.04227084727114167
Initial Cost on Val dataset for this epoch 17622 = 0.06906723421200739
Error on this batch = 0.36755083272285616
Error on this batch = 0.3683024022006141
Cost on val dataset after 17623 epochs is = 0.06906685949980591
learning rate =  0.04227004771664213
Initial Cost on Val dataset for this epoch 17623 = 0.06906685949980591
Error on this batch = 0.36755189545805633
Error on this batch = 0.3683000057149819
Cost on val dataset after 17624 epochs is = 0.06906648496594889
learning rate =  0.042269248222633574
Initial Cost on Val dataset for this epoch 17624 = 0.06906648496594889
Error on this batch = 0.36755295649749825
Error on this batch = 0.36829761058120386
Cost on val dataset after 17625 epochs is = 0.06906611061020884
learning rate =  0.042268448789107974
Initial Cost on Val dataset for this epoch 17625 = 0.06906611061020884
Error on this batch = 0.3675540158457612
Error on this batch

Cost on val dataset after 17657 epochs is = 0.06905422392106439
learning rate =  0.042242898803448496
Initial Cost on Val dataset for this epoch 17657 = 0.06905422392106439
Error on this batch = 0.3675870488760326
Error on this batch = 0.36821931597575946
Cost on val dataset after 17658 epochs is = 0.06905385531682617
learning rate =  0.04224210136137775
Initial Cost on Val dataset for this epoch 17658 = 0.06905385531682617
Error on this batch = 0.3675880549125829
Error on this batch = 0.36821696556978384
Cost on val dataset after 17659 epochs is = 0.06905348688308545
learning rate =  0.042241303979518585
Initial Cost on Val dataset for this epoch 17659 = 0.06905348688308545
Error on this batch = 0.367589059407118
Error on this batch = 0.36821461644294273
Cost on val dataset after 17660 epochs is = 0.06905311861962148
learning rate =  0.04224050665786303
Initial Cost on Val dataset for this epoch 17660 = 0.06905311861962148
Error on this batch = 0.3675900623638325
Error on this batch =

Error on this batch = 0.36814010732025476
Cost on val dataset after 17692 epochs is = 0.06904142278412888
learning rate =  0.04221502410486728
Initial Cost on Val dataset for this epoch 17692 = 0.06904142278412888
Error on this batch = 0.36762136952953933
Error on this batch = 0.3681377992382848
Cost on val dataset after 17693 epochs is = 0.06904106001722113
learning rate =  0.0422142287654778
Initial Cost on Val dataset for this epoch 17693 = 0.06904106001722113
Error on this batch = 0.3676223240188824
Error on this batch = 0.3681354923647973
Cost on val dataset after 17694 epochs is = 0.06904069741319842
learning rate =  0.042213433486022335
Initial Cost on Val dataset for this epoch 17694 = 0.06904069741319842
Error on this batch = 0.36762327710644344
Error on this batch = 0.3681331866977196
Cost on val dataset after 17695 epochs is = 0.06904033497184661
learning rate =  0.042212638266492984
Initial Cost on Val dataset for this epoch 17695 = 0.06904033497184661
Error on this batch =

Error on this batch = 0.3680600299645383
Cost on val dataset after 17727 epochs is = 0.06902882146677636
learning rate =  0.042187222835326416
Initial Cost on Val dataset for this epoch 17727 = 0.06902882146677636
Error on this batch = 0.36765396695280145
Error on this batch = 0.36805776295024495
Cost on val dataset after 17728 epochs is = 0.06902846427460074
learning rate =  0.0421864295889348
Initial Cost on Val dataset for this epoch 17728 = 0.06902846427460074
Error on this batch = 0.3676548745746428
Error on this batch = 0.36805549707227014
Cost on val dataset after 17729 epochs is = 0.06902810723792113
learning rate =  0.04218563640220147
Initial Cost on Val dataset for this epoch 17729 = 0.06902810723792113
Error on this batch = 0.36765578092154044
Error on this batch = 0.3680532323285654
Cost on val dataset after 17730 epochs is = 0.06902775035652955
learning rate =  0.04218484327511857
Initial Cost on Val dataset for this epoch 17730 = 0.06902775035652955
Error on this batch =

Cost on val dataset after 17761 epochs is = 0.06901676292930448
learning rate =  0.0421602858793952
Initial Cost on Val dataset for this epoch 17761 = 0.06901676292930448
Error on this batch = 0.3676841308887148
Error on this batch = 0.36798134721585757
Cost on val dataset after 17762 epochs is = 0.06901641090831703
learning rate =  0.04215949465698947
Initial Cost on Val dataset for this epoch 17762 = 0.06901641090831703
Error on this batch = 0.3676849970237396
Error on this batch = 0.36797911876013245
Cost on val dataset after 17763 epochs is = 0.06901605903584739
learning rate =  0.04215870349397591
Initial Cost on Val dataset for this epoch 17763 = 0.06901605903584739
Error on this batch = 0.3676858619945522
Error on this batch = 0.36797689136961353
Cost on val dataset after 17764 epochs is = 0.06901570731169313
learning rate =  0.042157912390346705
Initial Cost on Val dataset for this epoch 17764 = 0.06901570731169313
Error on this batch = 0.3676867258042282
Error on this batch = 

Cost on val dataset after 17796 epochs is = 0.06900452924608098
learning rate =  0.04213262838254841
Initial Cost on Val dataset for this epoch 17796 = 0.06900452924608098
Error on this batch = 0.36771377250572435
Error on this batch = 0.36790397195701174
Cost on val dataset after 17797 epochs is = 0.0690041823037819
learning rate =  0.04213183923423613
Initial Cost on Val dataset for this epoch 17797 = 0.0690041823037819
Error on this batch = 0.3677145996569878
Error on this batch = 0.3679017795943095
Cost on val dataset after 17798 epochs is = 0.06900383550300654
learning rate =  0.04213105014504384
Initial Cost on Val dataset for this epoch 17798 = 0.06900383550300654
Error on this batch = 0.36771542574568755
Error on this batch = 0.36789958822713364
Cost on val dataset after 17799 epochs is = 0.06900348884355775
learning rate =  0.042130261114963784
Initial Cost on Val dataset for this epoch 17799 = 0.06900348884355775
Error on this batch = 0.3677162507745497
Error on this batch = 

Cost on val dataset after 17831 epochs is = 0.06899246918907792
learning rate =  0.04210504331735819
Initial Cost on Val dataset for this epoch 17831 = 0.06899246918907792
Error on this batch = 0.3677421079135626
Error on this batch = 0.3678278188040376
Cost on val dataset after 17832 epochs is = 0.06899212708370518
learning rate =  0.04210425623364332
Initial Cost on Val dataset for this epoch 17832 = 0.06899212708370518
Error on this batch = 0.3677428994343733
Error on this batch = 0.36782566012406775
Cost on val dataset after 17833 epochs is = 0.06899178511303757
learning rate =  0.04210346920877803
Initial Cost on Val dataset for this epoch 17833 = 0.06899178511303757
Error on this batch = 0.3677436899823543
Error on this batch = 0.367823502371662
Cost on val dataset after 17834 epochs is = 0.06899144327688266
learning rate =  0.04210268224275461
Initial Cost on Val dataset for this epoch 17834 = 0.06899144327688266
Error on this batch = 0.3677444795599007
Error on this batch = 0.3

Cost on val dataset after 17866 epochs is = 0.06898057439799617
learning rate =  0.04207753035251808
Initial Cost on Val dataset for this epoch 17866 = 0.06898057439799617
Error on this batch = 0.36776924750718615
Error on this batch = 0.36775280445879077
Cost on val dataset after 17867 epochs is = 0.06898023689365593
learning rate =  0.04207674532396655
Initial Cost on Val dataset for this epoch 17867 = 0.06898023689365593
Error on this batch = 0.36777000634459067
Error on this batch = 0.36775067711488435
Cost on val dataset after 17868 epochs is = 0.06897989951736187
learning rate =  0.04207596035399596
Initial Cost on Val dataset for this epoch 17868 = 0.06897989951736187
Error on this batch = 0.36777076428761507
Error on this batch = 0.3677485506324872
Cost on val dataset after 17869 epochs is = 0.0689795622689259
learning rate =  0.04207517544259865
Initial Cost on Val dataset for this epoch 17869 = 0.0689795622689259
Error on this batch = 0.3677715213383412
Error on this batch = 

Cost on val dataset after 17901 epochs is = 0.06896883670886696
learning rate =  0.04205008915888226
Initial Cost on Val dataset for this epoch 17901 = 0.06896883670886696
Error on this batch = 0.3677952878325387
Error on this batch = 0.367678847945012
Cost on val dataset after 17902 epochs is = 0.06896850357501268
learning rate =  0.04204930617612146
Initial Cost on Val dataset for this epoch 17902 = 0.06896850357501268
Error on this batch = 0.3677960165488579
Error on this batch = 0.36767674965913316
Cost on val dataset after 17903 epochs is = 0.06896817056269149
learning rate =  0.04204852325167472
Initial Cost on Val dataset for this epoch 17903 = 0.06896817056269149
Error on this batch = 0.36779674443855215
Error on this batch = 0.36767465217070916
Cost on val dataset after 17904 epochs is = 0.06896783767171931
learning rate =  0.042047740385534424
Initial Cost on Val dataset for this epoch 17904 = 0.06896783767171931
Error on this batch = 0.36779747150340525
Error on this batch =

Cost on val dataset after 17935 epochs is = 0.06895757724148043
learning rate =  0.04202350041378541
Initial Cost on Val dataset for this epoch 17935 = 0.06895757724148043
Error on this batch = 0.367819610743165
Error on this batch = 0.36760794289764326
Cost on val dataset after 17936 epochs is = 0.06895724813713848
learning rate =  0.04202271940944725
Initial Cost on Val dataset for this epoch 17936 = 0.06895724813713848
Error on this batch = 0.36782031230574097
Error on this batch = 0.36760587072476086
Cost on val dataset after 17937 epochs is = 0.06895691914813176
learning rate =  0.04202193846316553
Initial Cost on Val dataset for this epoch 17937 = 0.06895691914813176
Error on this batch = 0.3678210130977399
Error on this batch = 0.36760379928901493
Cost on val dataset after 17938 epochs is = 0.0689565902742798
learning rate =  0.04202115757493271
Initial Cost on Val dataset for this epoch 17938 = 0.0689565902742798
Error on this batch = 0.3678217131206685
Error on this batch = 0.

Error on this batch = 0.36753994093322717
Cost on val dataset after 17969 epochs is = 0.06894645132187982
learning rate =  0.041996978790827956
Initial Cost on Val dataset for this epoch 17969 = 0.06894645132187982
Error on this batch = 0.36784304022067765
Error on this batch = 0.36753789217185867
Cost on val dataset after 17970 epochs is = 0.0689461260383173
learning rate =  0.04199619975618067
Initial Cost on Val dataset for this epoch 17970 = 0.0689461260383173
Error on this batch = 0.36784371638093705
Error on this batch = 0.3675358440907744
Cost on val dataset after 17971 epochs is = 0.06894580086401204
learning rate =  0.04199542077933385
Initial Cost on Val dataset for this epoch 17971 = 0.06894580086401204
Error on this batch = 0.36784439181736306
Error on this batch = 0.3675337966882757
Cost on val dataset after 17972 epochs is = 0.06894547579878701
learning rate =  0.04199464186027999
Initial Cost on Val dataset for this epoch 17972 = 0.06894547579878701
Error on this batch =

Error on this batch = 0.36747065458191003
Cost on val dataset after 18003 epochs is = 0.06893545191952073
learning rate =  0.041970523994038056
Initial Cost on Val dataset for this epoch 18003 = 0.06893545191952073
Error on this batch = 0.36786563062848693
Error on this batch = 0.3674686280049802
Cost on val dataset after 18004 epochs is = 0.06893513025200308
learning rate =  0.041969746920404845
Initial Cost on Val dataset for this epoch 18004 = 0.06893513025200308
Error on this batch = 0.36786628282448947
Error on this batch = 0.3674666020512903
Cost on val dataset after 18005 epochs is = 0.06893480868778046
learning rate =  0.04196896990431772
Initial Cost on Val dataset for this epoch 18005 = 0.06893480868778046
Error on this batch = 0.36786693433424517
Error on this batch = 0.3674645767191829
Cost on val dataset after 18006 epochs is = 0.06893448722667925
learning rate =  0.04196819294576923
Initial Cost on Val dataset for this epoch 18006 = 0.06893448722667925
Error on this batch

Error on this batch = 0.36740008442147526
Cost on val dataset after 18038 epochs is = 0.06892425388578259
learning rate =  0.041943360608063314
Initial Cost on Val dataset for this epoch 18038 = 0.06892425388578259
Error on this batch = 0.36788805508019323
Error on this batch = 0.3673980786781476
Cost on val dataset after 18039 epochs is = 0.06892393573093646
learning rate =  0.04194258554411517
Initial Cost on Val dataset for this epoch 18039 = 0.06892393573093646
Error on this batch = 0.3678886837803137
Error on this batch = 0.36739607350060705
Cost on val dataset after 18040 epochs is = 0.06892361767336784
learning rate =  0.04194181053745293
Initial Cost on Val dataset for this epoch 18040 = 0.06892361767336784
Error on this batch = 0.3678893118232399
Error on this batch = 0.36739406888722614
Cost on val dataset after 18041 epochs is = 0.06892329971290671
learning rate =  0.04194103558806919
Initial Cost on Val dataset for this epoch 18041 = 0.06892329971290671
Error on this batch 

Error on this batch = 0.3673302094258989
Cost on val dataset after 18073 epochs is = 0.06891317523828666
learning rate =  0.04191626740656904
Initial Cost on Val dataset for this epoch 18073 = 0.06891317523828666
Error on this batch = 0.3679096725897648
Error on this batch = 0.36732822252022473
Cost on val dataset after 18074 epochs is = 0.0689128603876101
learning rate =  0.04191549434322797
Initial Cost on Val dataset for this epoch 18074 = 0.0689128603876101
Error on this batch = 0.36791027863988524
Error on this batch = 0.36732623612365833
Cost on val dataset after 18075 epochs is = 0.06891254562831858
learning rate =  0.0419147213369143
Initial Cost on Val dataset for this epoch 18075 = 0.06891254562831858
Error on this batch = 0.36791088405171535
Error on this batch = 0.3673242502345853
Cost on val dataset after 18076 epochs is = 0.06891223096024564
learning rate =  0.04191394838762065
Initial Cost on Val dataset for this epoch 18076 = 0.06891223096024564
Error on this batch = 0.

Cost on val dataset after 18107 epochs is = 0.06890252059124266
learning rate =  0.041890015201351945
Initial Cost on Val dataset for this epoch 18107 = 0.06890252059124266
Error on this batch = 0.3679299220961453
Error on this batch = 0.36726096007671727
Cost on val dataset after 18108 epochs is = 0.06890220875492793
learning rate =  0.04188924407282055
Initial Cost on Val dataset for this epoch 18108 = 0.06890220875492793
Error on this batch = 0.367930506605905
Error on this batch = 0.36725899002858897
Cost on val dataset after 18109 epochs is = 0.06890189700440044
learning rate =  0.04188847300106702
Initial Cost on Val dataset for this epoch 18109 = 0.06890189700440044
Error on this batch = 0.36793109048532047
Error on this batch = 0.36725702043296915
Cost on val dataset after 18110 epochs is = 0.06890158533949753
learning rate =  0.04188770198608404
Initial Cost on Val dataset for this epoch 18110 = 0.06890158533949753
Error on this batch = 0.3679316737344597
Error on this batch =

Cost on val dataset after 18142 epochs is = 0.06889165630527809
learning rate =  0.04186305943777136
Initial Cost on Val dataset for this epoch 18142 = 0.06889165630527809
Error on this batch = 0.36795000480108203
Error on this batch = 0.36719226695943163
Cost on val dataset after 18143 epochs is = 0.06889134737551975
learning rate =  0.04186229029212119
Initial Cost on Val dataset for this epoch 18143 = 0.06889134737551975
Error on this batch = 0.36795056722291347
Error on this batch = 0.3671903117768167
Cost on val dataset after 18144 epochs is = 0.06889103852592762
learning rate =  0.04186152120299365
Initial Cost on Val dataset for this epoch 18144 = 0.06889103852592762
Error on this batch = 0.36795112901067617
Error on this batch = 0.3671883569892843
Cost on val dataset after 18145 epochs is = 0.06889072975634337
learning rate =  0.04186075217038145
Initial Cost on Val dataset for this epoch 18145 = 0.06889072975634337
Error on this batch = 0.3679516901640696
Error on this batch =

Error on this batch = 0.3671298887586902
Cost on val dataset after 18175 epochs is = 0.06888150309259386
learning rate =  0.04183770743565518
Initial Cost on Val dataset for this epoch 18175 = 0.06888150309259386
Error on this batch = 0.3679682278184074
Error on this batch = 0.3671279453841514
Cost on val dataset after 18176 epochs is = 0.06888119672545302
learning rate =  0.04183694015142085
Initial Cost on Val dataset for this epoch 18176 = 0.06888119672545302
Error on this batch = 0.367968769093319
Error on this batch = 0.3671260023504745
Cost on val dataset after 18177 epochs is = 0.06888089043332521
learning rate =  0.041836172923469986
Initial Cost on Val dataset for this epoch 18177 = 0.06888089043332521
Error on this batch = 0.36796930971795605
Error on this batch = 0.36712405965592626
Cost on val dataset after 18178 epochs is = 0.06888058421605671
learning rate =  0.041835405751795354
Initial Cost on Val dataset for this epoch 18178 = 0.06888058421605671
Error on this batch = 

Error on this batch = 0.36706592779395936
Cost on val dataset after 18208 epochs is = 0.06887143175138441
learning rate =  0.04181241673422423
Initial Cost on Val dataset for this epoch 18208 = 0.06887143175138441
Error on this batch = 0.3679857421256017
Error on this batch = 0.3670639947358312
Cost on val dataset after 18209 epochs is = 0.06887112777935273
learning rate =  0.04181165130353658
Initial Cost on Val dataset for this epoch 18209 = 0.06887112777935273
Error on this batch = 0.3679862614921078
Error on this batch = 0.36706206195966623
Cost on val dataset after 18210 epochs is = 0.06887082387734432
learning rate =  0.041810885928894675
Initial Cost on Val dataset for this epoch 18210 = 0.06887082387734432
Error on this batch = 0.3679867801774933
Error on this batch = 0.367060129463616
Cost on val dataset after 18211 epochs is = 0.06887052004521084
learning rate =  0.041810120610291324
Initial Cost on Val dataset for this epoch 18211 = 0.06887052004521084
Error on this batch = 

Error on this batch = 0.3670022754774437
Cost on val dataset after 18241 epochs is = 0.06886143684470337
learning rate =  0.04178718707455611
Initial Cost on Val dataset for this epoch 18241 = 0.06886143684470337
Error on this batch = 0.3680025143620006
Error on this batch = 0.3670003507222514
Cost on val dataset after 18242 epochs is = 0.06886113510612088
learning rate =  0.04178642348959341
Initial Cost on Val dataset for this epoch 18242 = 0.06886113510612088
Error on this batch = 0.36800301052519485
Error on this batch = 0.36699842618542683
Cost on val dataset after 18243 epochs is = 0.0688608334327659
learning rate =  0.04178565996044016
Initial Cost on Val dataset for this epoch 18243 = 0.0688608334327659
Error on this batch = 0.36800350595841025
Error on this batch = 0.36699650186494737
Cost on val dataset after 18244 epochs is = 0.06886053182449658
learning rate =  0.04178489648708923
Initial Cost on Val dataset for this epoch 18244 = 0.06886053182449658
Error on this batch = 0

Error on this batch = 0.3669388625369484
Cost on val dataset after 18274 epochs is = 0.06885151314707957
learning rate =  0.041762018199287904
Initial Cost on Val dataset for this epoch 18274 = 0.06885151314707957
Error on this batch = 0.3680184915343213
Error on this batch = 0.3669369438815944
Cost on val dataset after 18275 epochs is = 0.06885121348738096
learning rate =  0.04176125645227549
Initial Cost on Val dataset for this epoch 18275 = 0.06885121348738096
Error on this batch = 0.3680189625294612
Error on this batch = 0.3669350253740224
Cost on val dataset after 18276 epochs is = 0.06885091388835504
learning rate =  0.041760494760837666
Initial Cost on Val dataset for this epoch 18276 = 0.06885091388835504
Error on this batch = 0.36801943272267124
Error on this batch = 0.36693310701195303
Cost on val dataset after 18277 epochs is = 0.06885061434986821
learning rate =  0.04175973312496734
Initial Cost on Val dataset for this epoch 18277 = 0.06885061434986821
Error on this batch =

Error on this batch = 0.36687561209188957
Cost on val dataset after 18307 epochs is = 0.0688416556959597
learning rate =  0.04173690985260406
Initial Cost on Val dataset for this epoch 18307 = 0.0688416556959597
Error on this batch = 0.3680335954627292
Error on this batch = 0.36687369705387385
Cost on val dataset after 18308 epochs is = 0.0688413579696323
learning rate =  0.041736149935813975
Initial Cost on Val dataset for this epoch 18308 = 0.0688413579696323
Error on this batch = 0.36803403844712684
Error on this batch = 0.3668717820828154
Cost on val dataset after 18309 epochs is = 0.06884106029973891
learning rate =  0.041735390074365025
Initial Cost on Val dataset for this epoch 18309 = 0.06884106029973891
Error on this batch = 0.36803448052670723
Error on this batch = 0.3668698671760599
Cost on val dataset after 18310 epochs is = 0.06884076268615713
learning rate =  0.04173463026825016
Initial Cost on Val dataset for this epoch 18310 = 0.06884076268615713
Error on this batch = 0

Error on this batch = 0.3668124361052755
Cost on val dataset after 18340 epochs is = 0.06883185987434269
learning rate =  0.04171186178022426
Initial Cost on Val dataset for this epoch 18340 = 0.06883185987434269
Error on this batch = 0.36804771419437865
Error on this batch = 0.36681052179320367
Cost on val dataset after 18341 epochs is = 0.06883156394813196
learning rate =  0.04171110368597488
Initial Cost on Val dataset for this epoch 18341 = 0.06883156394813196
Error on this batch = 0.3680481251217815
Error on this batch = 0.3668086074522641
Cost on val dataset after 18342 epochs is = 0.06883126807455796
learning rate =  0.04171034564683458
Initial Cost on Val dataset for this epoch 18342 = 0.06883126807455796
Error on this batch = 0.3680485349981323
Error on this batch = 0.3668066930792513
Cost on val dataset after 18343 epochs is = 0.06883097225351445
learning rate =  0.041709587662796345
Initial Cost on Val dataset for this epoch 18343 = 0.06883097225351445
Error on this batch = 

Error on this batch = 0.36674923018316874
Cost on val dataset after 18373 epochs is = 0.0688221215484532
learning rate =  0.041686873729391505
Initial Cost on Val dataset for this epoch 18373 = 0.0688221215484532
Error on this batch = 0.3680606884159093
Error on this batch = 0.3667473131044389
Cost on val dataset after 18374 epochs is = 0.06882182730677747
learning rate =  0.041686117450047175
Initial Cost on Val dataset for this epoch 18374 = 0.06882182730677747
Error on this batch = 0.3680610615166267
Error on this batch = 0.36674539587887756
Cost on val dataset after 18375 epochs is = 0.06882153311459659
learning rate =  0.04168536122558126
Initial Cost on Val dataset for this epoch 18375 = 0.06882153311459659
Error on this batch = 0.36806143335677177
Error on this batch = 0.3667434785024629
Cost on val dataset after 18376 epochs is = 0.06882123897182896
learning rate =  0.041684605055986784
Initial Cost on Val dataset for this epoch 18376 = 0.06882123897182896
Error on this batch =

Cost on val dataset after 18406 epochs is = 0.06881243730278629
learning rate =  0.041661945448860226
Initial Cost on Val dataset for this epoch 18406 = 0.06881243730278629
Error on this batch = 0.3680722896499712
Error on this batch = 0.36668394156288897
Cost on val dataset after 18407 epochs is = 0.0688121446570449
learning rate =  0.04166119097683093
Initial Cost on Val dataset for this epoch 18407 = 0.0688121446570449
Error on this batch = 0.36807261659537405
Error on this batch = 0.36668201703736225
Cost on val dataset after 18408 epochs is = 0.06881185205868062
learning rate =  0.041660436559450745
Initial Cost on Val dataset for this epoch 18408 = 0.06881185205868062
Error on this batch = 0.368072941975481
Error on this batch = 0.3666800922090036
Cost on val dataset after 18409 epochs is = 0.06881155950765108
learning rate =  0.04165968219671275
Initial Cost on Val dataset for this epoch 18409 = 0.06881155950765108
Error on this batch = 0.3680732657791046
Error on this batch = 0

Error on this batch = 0.36662024160946016
Cost on val dataset after 18440 epochs is = 0.06880251374874997
learning rate =  0.041636324016625585
Initial Cost on Val dataset for this epoch 18440 = 0.06880251374874997
Error on this batch = 0.36808245409165324
Error on this batch = 0.36661830404215884
Cost on val dataset after 18441 epochs is = 0.06880222270016612
learning rate =  0.04163557139878773
Initial Cost on Val dataset for this epoch 18441 = 0.06880222270016612
Error on this batch = 0.36808272066717024
Error on this batch = 0.36661636597169533
Cost on val dataset after 18442 epochs is = 0.06880193169851032
learning rate =  0.04163481883536418
Initial Cost on Val dataset for this epoch 18442 = 0.06880193169851032
Error on this batch = 0.36808298521212895
Error on this batch = 0.3666144273910576
Cost on val dataset after 18443 epochs is = 0.06880164074380583
learning rate =  0.04163406632634805
Initial Cost on Val dataset for this epoch 18443 = 0.06880164074380583
Error on this batc

Error on this batch = 0.3665559953675161
Cost on val dataset after 18473 epochs is = 0.06879293415858886
learning rate =  0.04161151632121835
Initial Cost on Val dataset for this epoch 18473 = 0.06879293415858886
Error on this batch = 0.36809007939354443
Error on this batch = 0.36655403714484003
Cost on val dataset after 18474 epochs is = 0.06879264468566303
learning rate =  0.041610765495424334
Initial Cost on Val dataset for this epoch 18474 = 0.06879264468566303
Error on this batch = 0.3680902690020363
Error on this batch = 0.36655207815347196
Cost on val dataset after 18475 epochs is = 0.06879235526191288
learning rate =  0.04161001472381809
Initial Cost on Val dataset for this epoch 18475 = 0.06879235526191288
Error on this batch = 0.36809045591605066
Error on this batch = 0.366550118384233
Cost on val dataset after 18476 epochs is = 0.06879206588746084
learning rate =  0.041609264006392775
Initial Cost on Val dataset for this epoch 18476 = 0.06879206588746084
Error on this batch 

Cost on val dataset after 18506 epochs is = 0.06878340833724052
learning rate =  0.041586767643872695
Initial Cost on Val dataset for this epoch 18506 = 0.06878340833724052
Error on this batch = 0.36809477303105353
Error on this batch = 0.36648892677078365
Cost on val dataset after 18507 epochs is = 0.06878312057229491
learning rate =  0.04158601860266858
Initial Cost on Val dataset for this epoch 18507 = 0.06878312057229491
Error on this batch = 0.36809485975931183
Error on this batch = 0.36648693690509726
Cost on val dataset after 18508 epochs is = 0.06878283286258008
learning rate =  0.041585269615427044
Initial Cost on Val dataset for this epoch 18508 = 0.06878283286258008
Error on this batch = 0.3680949428795985
Error on this batch = 0.3664849459271583
Cost on val dataset after 18509 epochs is = 0.0687825452083465
learning rate =  0.041584520682141295
Initial Cost on Val dataset for this epoch 18509 = 0.0687825452083465
Error on this batch = 0.3680950223608761
Error on this batch 

Error on this batch = 0.3680956019719857
Error on this batch = 0.36642464659195284
Cost on val dataset after 18539 epochs is = 0.06877394276727473
learning rate =  0.04156207773931696
Initial Cost on Val dataset for this epoch 18539 = 0.06877394276727473
Error on this batch = 0.36809555755845424
Error on this batch = 0.36642261591964265
Cost on val dataset after 18540 epochs is = 0.06877365697578718
learning rate =  0.04156133047529301
Initial Cost on Val dataset for this epoch 18540 = 0.06877365697578718
Error on this batch = 0.368095508533268
Error on this batch = 0.36642058381074116
Cost on val dataset after 18541 epochs is = 0.06877337124946123
learning rate =  0.041560583265007785
Initial Cost on Val dataset for this epoch 18541 = 0.06877337124946123
Error on this batch = 0.3680954548674433
Error on this batch = 0.36641855025687264
Cost on val dataset after 18542 epochs is = 0.06877308558863097
learning rate =  0.04155983610845455
Initial Cost on Val dataset for this epoch 18542 =

Error on this batch = 0.36635684038277205
Cost on val dataset after 18572 epochs is = 0.06876454782129429
learning rate =  0.04153744636373305
Initial Cost on Val dataset for this epoch 18572 = 0.06876454782129429
Error on this batch = 0.3680913559227186
Error on this batch = 0.36635475933303824
Cost on val dataset after 18573 epochs is = 0.06876426434663774
learning rate =  0.04153670086952341
Initial Cost on Val dataset for this epoch 18573 = 0.06876426434663774
Error on this batch = 0.36809114192223497
Error on this batch = 0.3663526767259872
Cost on val dataset after 18574 epochs is = 0.06876398094677916
learning rate =  0.04153595542882998
Initial Cost on Val dataset for this epoch 18574 = 0.06876398094677916
Error on this batch = 0.3680909226969179
Error on this batch = 0.36635059256485647
Cost on val dataset after 18575 epochs is = 0.06876369762190938
learning rate =  0.041535210041646044
Initial Cost on Val dataset for this epoch 18575 = 0.06876369762190938
Error on this batch 

Cost on val dataset after 18605 epochs is = 0.06875523321987556
learning rate =  0.04151287327474528
Initial Cost on Val dataset for this epoch 18605 = 0.06875523321987556
Error on this batch = 0.3680815907531401
Error on this batch = 0.3662852561119789
Cost on val dataset after 18606 epochs is = 0.06875495224883546
learning rate =  0.04151212954302764
Initial Cost on Val dataset for this epoch 18606 = 0.06875495224883546
Error on this batch = 0.3680812119989597
Error on this batch = 0.3662831272870811
Cost on val dataset after 18607 epochs is = 0.06875467135218252
learning rate =  0.041511385864605004
Initial Cost on Val dataset for this epoch 18607 = 0.06875467135218252
Error on this batch = 0.3680808287890568
Error on this batch = 0.3662809973341453
Cost on val dataset after 18608 epochs is = 0.06875439052964863
learning rate =  0.04151064223947067
Initial Cost on Val dataset for this epoch 18608 = 0.06875439052964863
Error on this batch = 0.3680804411731195
Error on this batch = 0.

Cost on val dataset after 18638 epochs is = 0.06874599833782122
learning rate =  0.04148835823140931
Initial Cost on Val dataset for this epoch 18638 = 0.06874599833782122
Error on this batch = 0.36806706234210423
Error on this batch = 0.3662145523673962
Cost on val dataset after 18639 epochs is = 0.06874571959448236
learning rate =  0.04148761625490455
Initial Cost on Val dataset for this epoch 18639 = 0.06874571959448236
Error on this batch = 0.36806656937219084
Error on this batch = 0.3662124007293984
Cost on val dataset after 18640 epochs is = 0.06874544090866484
learning rate =  0.04148687433147488
Initial Cost on Val dataset for this epoch 18640 = 0.06874544090866484
Error on this batch = 0.36806607416457404
Error on this batch = 0.3662102489245679
Cost on val dataset after 18641 epochs is = 0.06874516227963896
learning rate =  0.041486132461113656
Initial Cost on Val dataset for this epoch 18641 = 0.06874516227963896
Error on this batch = 0.3680655767971436
Error on this batch =

Cost on val dataset after 18671 epochs is = 0.06873682590314907
learning rate =  0.04146390099420113
Initial Cost on Val dataset for this epoch 18671 = 0.06873682590314907
Error on this batch = 0.3680500301676993
Error on this batch = 0.36614363674396155
Cost on val dataset after 18672 epochs is = 0.06873654863703253
learning rate =  0.041463160765673
Initial Cost on Val dataset for this epoch 18672 = 0.06873654863703253
Error on this batch = 0.3680495030829509
Error on this batch = 0.3661414964028951
Cost on val dataset after 18673 epochs is = 0.06873627140147785
learning rate =  0.04146242059000135
Initial Cost on Val dataset for this epoch 18673 = 0.06873627140147785
Error on this batch = 0.36804897613799875
Error on this batch = 0.36613935690007354
Cost on val dataset after 18674 epochs is = 0.06873599419564677
learning rate =  0.041461680467179574
Initial Cost on Val dataset for this epoch 18674 = 0.06873599419564677
Error on this batch = 0.3680484493921635
Error on this batch = 0

Error on this batch = 0.36803298913805566
Error on this batch = 0.3660735635445134
Cost on val dataset after 18705 epochs is = 0.06872741120420647
learning rate =  0.04143876283726104
Initial Cost on Val dataset for this epoch 18705 = 0.06872741120420647
Error on this batch = 0.3680324919953476
Error on this batch = 0.366071461243897
Cost on val dataset after 18706 epochs is = 0.06872713453797617
learning rate =  0.041438024402155
Initial Cost on Val dataset for this epoch 18706 = 0.06872713453797617
Error on this batch = 0.368031996368089
Error on this batch = 0.36606936032941156
Cost on val dataset after 18707 epochs is = 0.06872685787710109
learning rate =  0.04143728601968153
Initial Cost on Val dataset for this epoch 18707 = 0.06872685787710109
Error on this batch = 0.3680315022795454
Error on this batch = 0.366067260808441
Cost on val dataset after 18708 epochs is = 0.06872658122098309
learning rate =  0.041436547689834076
Initial Cost on Val dataset for this epoch 18708 = 0.0687

Error on this batch = 0.36801747349572694
Error on this batch = 0.36600493962826924
Cost on val dataset after 18738 epochs is = 0.06871828118429932
learning rate =  0.04141442223299512
Initial Cost on Val dataset for this epoch 18738 = 0.06871828118429932
Error on this batch = 0.3680170338510276
Error on this batch = 0.3660028843415157
Cost on val dataset after 18739 epochs is = 0.06871800443234632
learning rate =  0.041413685531304464
Initial Cost on Val dataset for this epoch 18739 = 0.06871800443234632
Error on this batch = 0.36801659604839143
Error on this batch = 0.3660008304406341
Cost on val dataset after 18740 epochs is = 0.0687177276711265
learning rate =  0.04141294888203038
Initial Cost on Val dataset for this epoch 18740 = 0.0687177276711265
Error on this batch = 0.3680161600864048
Error on this batch = 0.3659987779191312
Cost on val dataset after 18741 epochs is = 0.06871745090035188
learning rate =  0.041412212285166325
Initial Cost on Val dataset for this epoch 18741 = 0

Error on this batch = 0.36593780162395106
Cost on val dataset after 18771 epochs is = 0.06870914220609263
learning rate =  0.04139013871758942
Initial Cost on Val dataset for this epoch 18771 = 0.06870914220609263
Error on this batch = 0.3680035315752671
Error on this batch = 0.36593578740018956
Cost on val dataset after 18772 epochs is = 0.06870886503513648
learning rate =  0.04138940374220584
Initial Cost on Val dataset for this epoch 18772 = 0.06870886503513648
Error on this batch = 0.3680031514509492
Error on this batch = 0.36593377423904627
Cost on val dataset after 18773 epochs is = 0.06870858784897867
learning rate =  0.04138866881902407
Initial Cost on Val dataset for this epoch 18773 = 0.06870858784897867
Error on this batch = 0.36800277292241873
Error on this batch = 0.365931762128563
Cost on val dataset after 18774 epochs is = 0.06870831064753047
learning rate =  0.041387933948037615
Initial Cost on Val dataset for this epoch 18774 = 0.06870831064753047
Error on this batch =

Error on this batch = 0.3679920999401773
Error on this batch = 0.36587182700870263
Cost on val dataset after 18804 epochs is = 0.06869998719114274
learning rate =  0.041365912057135315
Initial Cost on Val dataset for this epoch 18804 = 0.06869998719114274
Error on this batch = 0.36799176488693625
Error on this batch = 0.3658698414365189
Cost on val dataset after 18805 epochs is = 0.06869970948909462
learning rate =  0.04136517880099205
Initial Cost on Val dataset for this epoch 18805 = 0.06869970948909462
Error on this batch = 0.3679914310355449
Error on this batch = 0.3658678565284791
Cost on val dataset after 18806 epochs is = 0.06869943177046633
learning rate =  0.04136444559683711
Initial Cost on Val dataset for this epoch 18806 = 0.06869943177046633
Error on this batch = 0.3679910983734024
Error on this batch = 0.36586587227305317
Cost on val dataset after 18807 epochs is = 0.06869915403525027
learning rate =  0.04136371244466402
Initial Cost on Val dataset for this epoch 18807 = 

Error on this batch = 0.36580659384718567
Cost on val dataset after 18837 epochs is = 0.06869081426474564
learning rate =  0.04134174201909092
Initial Cost on Val dataset for this epoch 18837 = 0.06869081426474564
Error on this batch = 0.3679813085504091
Error on this batch = 0.3658046245097371
Cost on val dataset after 18838 epochs is = 0.06869053601608285
learning rate =  0.041341010475162476
Initial Cost on Val dataset for this epoch 18838 = 0.06869053601608285
Error on this batch = 0.36798100751331786
Error on this batch = 0.36580265549679075
Cost on val dataset after 18839 epochs is = 0.06869025775096979
learning rate =  0.0413402789830101
Initial Cost on Val dataset for this epoch 18839 = 0.06869025775096979
Error on this batch = 0.36798070727299625
Error on this batch = 0.3658006867995286
Cost on val dataset after 18840 epochs is = 0.06868997946941596
learning rate =  0.04133954754262737
Initial Cost on Val dataset for this epoch 18840 = 0.06868997946941596
Error on this batch =

Cost on val dataset after 18869 epochs is = 0.06868190219212753
learning rate =  0.04131835826254027
Initial Cost on Val dataset for this epoch 18869 = 0.06868190219212753
Error on this batch = 0.3679720188282349
Error on this batch = 0.3657417326529641
Cost on val dataset after 18870 epochs is = 0.06868162342121428
learning rate =  0.04131762837227068
Initial Cost on Val dataset for this epoch 18870 = 0.06868162342121428
Error on this batch = 0.3679717382091485
Error on this batch = 0.365739769861442
Cost on val dataset after 18871 epochs is = 0.06868134463408201
learning rate =  0.04131689853357251
Initial Cost on Val dataset for this epoch 18871 = 0.06868134463408201
Error on this batch = 0.3679714580714368
Error on this batch = 0.36573780715074444
Cost on val dataset after 18872 epochs is = 0.06868106583073355
learning rate =  0.041316168746439376
Initial Cost on Val dataset for this epoch 18872 = 0.06868106583073355
Error on this batch = 0.3679711784063933
Error on this batch = 0.

Error on this batch = 0.3656769740145623
Cost on val dataset after 18903 epochs is = 0.06867241488189792
learning rate =  0.041293570886835176
Initial Cost on Val dataset for this epoch 18903 = 0.06867241488189792
Error on this batch = 0.36796269885113037
Error on this batch = 0.36567501117391243
Cost on val dataset after 18904 epochs is = 0.06867213555887443
learning rate =  0.04129284274642334
Initial Cost on Val dataset for this epoch 18904 = 0.06867213555887443
Error on this batch = 0.3679624301141209
Error on this batch = 0.36567304825750413
Cost on val dataset after 18905 epochs is = 0.06867185621951384
learning rate =  0.041292114657366756
Initial Cost on Val dataset for this epoch 18905 = 0.06867185621951384
Error on this batch = 0.36796216159945455
Error on this batch = 0.3656710852617278
Cost on val dataset after 18906 epochs is = 0.06867157686380573
learning rate =  0.04129138661965909
Initial Cost on Val dataset for this epoch 18906 = 0.06867157686380573
Error on this batch

Error on this batch = 0.3656121428629618
Cost on val dataset after 18936 epochs is = 0.06866318852336753
learning rate =  0.04126956933428088
Initial Cost on Val dataset for this epoch 18936 = 0.06866318852336753
Error on this batch = 0.36795391461221444
Error on this batch = 0.36561017591546097
Cost on val dataset after 18937 epochs is = 0.06866290865361196
learning rate =  0.04126884288525178
Initial Cost on Val dataset for this epoch 18937 = 0.06866290865361196
Error on this batch = 0.3679536500688973
Error on this batch = 0.3656082088011496
Cost on val dataset after 18938 epochs is = 0.06866262876697438
learning rate =  0.04126811648736937
Initial Cost on Val dataset for this epoch 18938 = 0.06866262876697438
Error on this batch = 0.36795338556124807
Error on this batch = 0.36560624151806564
Cost on val dataset after 18939 epochs is = 0.06866234886343216
learning rate =  0.041267390140627316
Initial Cost on Val dataset for this epoch 18939 = 0.06866234886343216
Error on this batch 

Error on this batch = 0.36554713636777125
Cost on val dataset after 18969 epochs is = 0.06865394377133605
learning rate =  0.04124562348743011
Initial Cost on Val dataset for this epoch 18969 = 0.06865394377133605
Error on this batch = 0.3679451791065965
Error on this batch = 0.36554516308254564
Cost on val dataset after 18970 epochs is = 0.06865366333082436
learning rate =  0.04124489872292018
Initial Cost on Val dataset for this epoch 18970 = 0.06865366333082436
Error on this batch = 0.36794491344062513
Error on this batch = 0.36554318958475784
Cost on val dataset after 18971 epochs is = 0.068653382872523
learning rate =  0.04124417400934955
Initial Cost on Val dataset for this epoch 18971 = 0.068653382872523
Error on this batch = 0.3679446476741778
Error on this batch = 0.365541215873555
Cost on val dataset after 18972 epochs is = 0.06865310239639975
learning rate =  0.04124344934671197
Initial Cost on Val dataset for this epoch 18972 = 0.06865310239639975
Error on this batch = 0.36

Error on this batch = 0.3654819020977243
Cost on val dataset after 19002 epochs is = 0.06864467965636406
learning rate =  0.041221733120421086
Initial Cost on Val dataset for this epoch 19002 = 0.06864467965636406
Error on this batch = 0.36793634151620597
Error on this batch = 0.36547992148196257
Cost on val dataset after 19003 epochs is = 0.06864439861055506
learning rate =  0.04122101003360644
Initial Cost on Val dataset for this epoch 19003 = 0.06864439861055506
Error on this batch = 0.3679360708821671
Error on this batch = 0.3654779406380564
Cost on val dataset after 19004 epochs is = 0.06864411754577791
learning rate =  0.041220286997524946
Initial Cost on Val dataset for this epoch 19004 = 0.06864411754577791
Error on this batch = 0.36793580005010257
Error on this batch = 0.36547595956588597
Cost on val dataset after 19005 epochs is = 0.06864383646199369
learning rate =  0.041219564012170376
Initial Cost on Val dataset for this epoch 19005 = 0.06864383646199369
Error on this batc

Error on this batch = 0.36541642134460645
Cost on val dataset after 19035 epochs is = 0.0686353949106619
learning rate =  0.04119789800869795
Initial Cost on Val dataset for this epoch 19035 = 0.0686353949106619
Error on this batch = 0.3679272936216721
Error on this batch = 0.3654144332258554
Cost on val dataset after 19036 epochs is = 0.06863511321752043
learning rate =  0.04119717659279417
Initial Cost on Val dataset for this epoch 19036 = 0.06863511321752043
Error on this batch = 0.36792701528667054
Error on this batch = 0.3654124448833478
Cost on val dataset after 19037 epochs is = 0.06863483150404984
learning rate =  0.041196455227418556
Initial Cost on Val dataset for this epoch 19037 = 0.06863483150404984
Error on this batch = 0.36792673668502857
Error on this batch = 0.3654104563174526
Cost on val dataset after 19038 epochs is = 0.06863454977020687
learning rate =  0.04119573391256492
Initial Cost on Val dataset for this epoch 19038 = 0.06863454977020687
Error on this batch = 0

Error on this batch = 0.3653506977594329
Cost on val dataset after 19068 epochs is = 0.06862608806365093
learning rate =  0.041174117929001046
Initial Cost on Val dataset for this epoch 19068 = 0.06862608806365093
Error on this batch = 0.36791795920273923
Error on this batch = 0.36534870251235063
Cost on val dataset after 19069 epochs is = 0.06862580567635233
learning rate =  0.041173398177262815
Initial Cost on Val dataset for this epoch 19069 = 0.06862580567635233
Error on this batch = 0.36791767126470504
Error on this batch = 0.3653467070594095
Cost on val dataset after 19070 epochs is = 0.06862552326725567
learning rate =  0.041172678475848955
Initial Cost on Val dataset for this epoch 19070 = 0.06862552326725567
Error on this batch = 0.3679173830129635
Error on this batch = 0.36534471140131686
Cost on val dataset after 19071 epochs is = 0.06862524083631542
learning rate =  0.04117195882475331
Initial Cost on Val dataset for this epoch 19071 = 0.06862524083631542
Error on this batc

Error on this batch = 0.3652847501056348
Cost on val dataset after 19101 epochs is = 0.06861675752364045
learning rate =  0.04115039265935719
Initial Cost on Val dataset for this epoch 19101 = 0.06861675752364045
Error on this batch = 0.3679082858906628
Error on this batch = 0.36528274848267644
Cost on val dataset after 19102 epochs is = 0.0686164743927966
learning rate =  0.041149674565078016
Initial Cost on Val dataset for this epoch 19102 = 0.0686164743927966
Error on this batch = 0.3679079870663889
Error on this batch = 0.36528074668136823
Cost on val dataset after 19103 epochs is = 0.0686161912386378
learning rate =  0.0411489565209206
Initial Cost on Val dataset for this epoch 19103 = 0.0686161912386378
Error on this batch = 0.36790768789741735
Error on this batch = 0.36527874470266675
Cost on val dataset after 19104 epochs is = 0.06861590806111792
learning rate =  0.04114823852687881
Initial Cost on Val dataset for this epoch 19104 = 0.06861590806111792
Error on this batch = 0.3

Error on this batch = 0.3652186078623689
Cost on val dataset after 19134 epochs is = 0.06860740164389349
learning rate =  0.041126721979070004
Initial Cost on Val dataset for this epoch 19134 = 0.06860740164389349
Error on this batch = 0.3678982391145321
Error on this batch = 0.36521660089375796
Cost on val dataset after 19135 epochs is = 0.06860711771932919
learning rate =  0.04112600553558196
Initial Cost on Val dataset for this epoch 19135 = 0.06860711771932919
Error on this batch = 0.3678979285841143
Error on this batch = 0.365214593781678
Cost on val dataset after 19136 epochs is = 0.06860683376993294
learning rate =  0.04112528914201421
Initial Cost on Val dataset for this epoch 19136 = 0.06860683376993294
Error on this batch = 0.3678976176901046
Error on this batch = 0.36521258652728655
Cost on val dataset after 19137 epochs is = 0.06860654979565925
learning rate =  0.04112457279836066
Initial Cost on Val dataset for this epoch 19137 = 0.06860654979565925
Error on this batch = 0

Error on this batch = 0.36515230868569676
Cost on val dataset after 19167 epochs is = 0.06859801877522437
learning rate =  0.04110310566871045
Initial Cost on Val dataset for this epoch 19167 = 0.06859801877522437
Error on this batch = 0.3678877976885346
Error on this batch = 0.3651502976234539
Cost on val dataset after 19168 epochs is = 0.06859773400739472
learning rate =  0.04110239086938386
Initial Cost on Val dataset for this epoch 19168 = 0.06859773400739472
Error on this batch = 0.3678874749826592
Error on this batch = 0.365148286458788
Cost on val dataset after 19169 epochs is = 0.06859744921325436
learning rate =  0.04110167611977725
Initial Cost on Val dataset for this epoch 19169 = 0.06859744921325436
Error on this batch = 0.3678871519035175
Error on this batch = 0.36514627519303167
Cost on val dataset after 19170 epochs is = 0.06859716439275945
learning rate =  0.04110096141988457
Initial Cost on Val dataset for this epoch 19170 = 0.06859716439275945
Error on this batch = 0.

Error on this batch = 0.36508589702870126
Cost on val dataset after 19200 epochs is = 0.06858860730836029
learning rate =  0.041079543510107355
Initial Cost on Val dataset for this epoch 19200 = 0.06858860730836029
Error on this batch = 0.36787695061967157
Error on this batch = 0.36508388331745606
Cost on val dataset after 19201 epochs is = 0.06858832164950905
learning rate =  0.0410788303483505
Initial Cost on Val dataset for this epoch 19201 = 0.06858832164950905
Error on this batch = 0.36787661553803797
Error on this batch = 0.36508186955039745
Cost on val dataset after 19202 epochs is = 0.06858803596293865
learning rate =  0.041078117236114466
Initial Cost on Val dataset for this epoch 19202 = 0.06858803596293865
Error on this batch = 0.36787628008059176
Error on this batch = 0.36507985572902124
Cost on val dataset after 19203 epochs is = 0.06858775024860779
learning rate =  0.04107740417339324
Initial Cost on Val dataset for this epoch 19203 = 0.06858775024860779
Error on this bat

Error on this batch = 0.36501942344594457
Cost on val dataset after 19233 epochs is = 0.06857916570923844
learning rate =  0.04105603528633805
Initial Cost on Val dataset for this epoch 19233 = 0.06857916570923844
Error on this batch = 0.3678656947853617
Error on this batch = 0.36501740870882615
Cost on val dataset after 19234 epochs is = 0.0685788791143871
learning rate =  0.04105532375559691
Initial Cost on Val dataset for this epoch 19234 = 0.0685788791143871
Error on this batch = 0.36786534733676507
Error on this batch = 0.365015393967783
Cost on val dataset after 19235 epochs is = 0.06857859249050652
learning rate =  0.04105461227417857
Initial Cost on Val dataset for this epoch 19235 = 0.06857859249050652
Error on this batch = 0.36786499951537666
Error on this batch = 0.3650133792244681
Cost on val dataset after 19236 epochs is = 0.06857830583755883
learning rate =  0.04105390084207705
Initial Cost on Val dataset for this epoch 19236 = 0.06857830583755883
Error on this batch = 0.

Error on this batch = 0.36495294425756625
Cost on val dataset after 19266 epochs is = 0.06856969254988576
learning rate =  0.041032580781719105
Initial Cost on Val dataset for this epoch 19266 = 0.06856969254988576
Error on this batch = 0.36785403320134824
Error on this batch = 0.3649509302890174
Cost on val dataset after 19267 epochs is = 0.06856940497772279
learning rate =  0.041031870875477064
Initial Cost on Val dataset for this epoch 19267 = 0.06856940497772279
Error on this batch = 0.3678536735579533
Error on this batch = 0.3649489163735342
Cost on val dataset after 19268 epochs is = 0.06856911737534489
learning rate =  0.041031161018360934
Initial Cost on Val dataset for this epoch 19268 = 0.06856911737534489
Error on this batch = 0.3678533135491255
Error on this batch = 0.36494690251292156
Cost on val dataset after 19269 epochs is = 0.06856882974271836
learning rate =  0.04103045121036476
Initial Cost on Val dataset for this epoch 19269 = 0.06856882974271836
Error on this batch

Error on this batch = 0.36488652134105837
Cost on val dataset after 19299 epochs is = 0.0685601865368362
learning rate =  0.041009179781797134
Initial Cost on Val dataset for this epoch 19299 = 0.0685601865368362
Error on this batch = 0.3678419736602854
Error on this batch = 0.36488451010055417
Cost on val dataset after 19300 epochs is = 0.06855989795055452
learning rate =  0.04100847149357471
Initial Cost on Val dataset for this epoch 19300 = 0.06855989795055452
Error on this batch = 0.3678416021202388
Error on this batch = 0.3648824989749797
Cost on val dataset after 19301 epochs is = 0.06855960933302152
learning rate =  0.041007763254282424
Initial Cost on Val dataset for this epoch 19301 = 0.06855960933302152
Error on this batch = 0.3678412302254019
Error on this batch = 0.3648804879662821
Cost on val dataset after 19302 epochs is = 0.06855932068420838
learning rate =  0.04100705506391437
Initial Cost on Val dataset for this epoch 19302 = 0.06855932068420838
Error on this batch = 0

Error on this batch = 0.36481821547688437
Cost on val dataset after 19333 epochs is = 0.06855035690641428
learning rate =  0.04098512539669427
Initial Cost on Val dataset for this epoch 19333 = 0.06855035690641428
Error on this batch = 0.36782914452363513
Error on this batch = 0.3648162092601319
Cost on val dataset after 19334 epochs is = 0.06855006724240822
learning rate =  0.04098441876878433
Initial Cost on Val dataset for this epoch 19334 = 0.06855006724240822
Error on this batch = 0.3678287611356366
Error on this batch = 0.36481420322671476
Cost on val dataset after 19335 epochs is = 0.06854977754626472
learning rate =  0.04098371218960398
Initial Cost on Val dataset for this epoch 19335 = 0.06854977754626472
Error on this batch = 0.36782837740619606
Error on this batch = 0.3648121973787097
Cost on val dataset after 19336 epochs is = 0.06854948781796076
learning rate =  0.04098300565914735
Initial Cost on Val dataset for this epoch 19336 = 0.06854948781796076
Error on this batch =

Error on this batch = 0.36475011954035724
Cost on val dataset after 19367 epochs is = 0.06854049017204118
learning rate =  0.04096112734991928
Initial Cost on Val dataset for this epoch 19367 = 0.06854049017204118
Error on this batch = 0.3678159203279737
Error on this batch = 0.36474812074169977
Cost on val dataset after 19368 epochs is = 0.06854019940357305
learning rate =  0.04096042237552281
Initial Cost on Val dataset for this epoch 19368 = 0.06854019940357305
Error on this batch = 0.3678155255707863
Error on this batch = 0.36474612219877955
Cost on val dataset after 19369 epochs is = 0.06853990860229102
learning rate =  0.04095971744965657
Initial Cost on Val dataset for this epoch 19369 = 0.06853990860229102
Error on this batch = 0.367815130486819
Error on this batch = 0.36474412391377614
Cost on val dataset after 19370 epochs is = 0.06853961776817864
learning rate =  0.04095901257231472
Initial Cost on Val dataset for this epoch 19370 = 0.06853961776817864
Error on this batch = 

Error on this batch = 0.3646823170025248
Cost on val dataset after 19401 epochs is = 0.0685305855464997
learning rate =  0.04093718541096226
Initial Cost on Val dataset for this epoch 19401 = 0.0685305855464997
Error on this batch = 0.3678023178834715
Error on this batch = 0.3646803281348952
Cost on val dataset after 19402 epochs is = 0.06853029365435397
learning rate =  0.04093648208331997
Initial Cost on Val dataset for this epoch 19402 = 0.06853029365435397
Error on this batch = 0.36780191226121156
Error on this batch = 0.3646783395982878
Cost on val dataset after 19403 epochs is = 0.06853000172895093
learning rate =  0.04093577880400972
Initial Cost on Val dataset for this epoch 19403 = 0.06853000172895093
Error on this batch = 0.3678015063269638
Error on this batch = 0.36467635139494625
Cost on val dataset after 19404 epochs is = 0.06852970977028129
learning rate =  0.04093507557302568
Initial Cost on Val dataset for this epoch 19404 = 0.06852970977028129
Error on this batch = 0.3

Error on this batch = 0.36461489467488817
Cost on val dataset after 19435 epochs is = 0.068520642510805
learning rate =  0.04091329935065831
Initial Cost on Val dataset for this epoch 19435 = 0.068520642510805
Error on this batch = 0.36778835418590883
Error on this batch = 0.36461291832571235
Cost on val dataset after 19436 epochs is = 0.06852034948402154
learning rate =  0.040912597663050325
Initial Cost on Val dataset for this epoch 19436 = 0.06852034948402154
Error on this batch = 0.3677879381867013
Error on this batch = 0.3646109423842405
Cost on val dataset after 19437 epochs is = 0.06852005642378968
learning rate =  0.04091189602357732
Initial Cost on Val dataset for this epoch 19437 = 0.06852005642378968
Error on this batch = 0.36778752188918545
Error on this batch = 0.36460896685273114
Cost on val dataset after 19438 epochs is = 0.0685197633301078
learning rate =  0.040911194432233504
Initial Cost on Val dataset for this epoch 19438 = 0.0685197633301078
Error on this batch = 0.

In [42]:
test_accuracy

[3.8461538461538463,
 14.046153846153848,
 68.27692307692308,
 86.01538461538462,
 80.07692307692308,
 77.04615384615384,
 80.87692307692308,
 79.93846153846154,
 3.8461538461538463,
 3.8461538461538463,
 20.200000000000003,
 69.15384615384616,
 84.96923076923078,
 84.43076923076923]

In [43]:
epochs

[52,
 4437,
 9803,
 7909,
 14918,
 9888,
 10596,
 6803,
 145,
 180,
 2414,
 1578,
 3823,
 2796]

In [44]:
valid_accuracy


[3.128205128205128,
 14.000000000000002,
 69.94871794871796,
 86.56410256410257,
 80.35897435897435,
 77.64102564102564,
 81.8974358974359,
 80.82051282051282,
 3.897435897435898,
 3.128205128205128,
 19.692307692307693,
 69.28205128205128,
 87.02564102564102,
 84.1025641025641]

In [40]:
1.5/np.power(2039, (1/3))

0.118291130898163