In [1]:
import torch
import numpy as np
import pandas as pd
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 1024, 288, 128, 1
from sklearn.utils import shuffle

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

### Data preprocessing. Inspiration - https://www.kaggle.com/vikassingh1996/extensive-data-preprocessing-and-modeling#4.-Feature-Engineering-

In [2]:
df = pd.read_csv('train.csv') ## Import data
labe = df['SalePrice'].to_numpy()
df.drop(columns = ['SalePrice','Id'], inplace=True)
df = pd.get_dummies(df, dummy_na=True, drop_first=True)
df.fillna(df.median(),inplace=True)
a = df.to_numpy()

# col = [i for i in df] 
# print(col)

RS = RobustScaler().fit(a)
scaledA = RS.transform(a)
data = scaledA
scF = np.max(labe)
labe = labe/np.max(labe)
train_data, test_data, train_labels, test_labels = train_test_split(data, labe, test_size=0.2)

# print(len(data),len(data[0]))
# print(data)
# print(labe)

In [3]:
# Use the nn package to define our model and loss function.
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(H, D_out),
)

loss_fn = torch.nn.MSELoss(reduction='mean')
learning_rate = 5e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 1500
for t in range(epochs):
    j = 0
    data, labe = shuffle(train_data, train_labels)
    while j < len(train_data):
        x = data[j:j+N]
        x = torch.tensor(x, dtype=torch.float32)
        y = labe[j:j+N]
        y = torch.tensor(y, dtype=torch.float32)
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        j += N
    print(t, loss.item())

0 124.95574188232422
1 40.523963928222656
2 11.767199516296387
3 26.439525604248047
4 18.34813690185547
5 5.183289051055908
6 21.496036529541016
7 5.053553581237793
8 0.6327288746833801
9 11.064962387084961
10 2.853367805480957
11 0.5255699753761292
12 0.17256441712379456
13 8.445733070373535
14 0.955461323261261
15 0.5380237102508545
16 0.14092251658439636
17 0.2393057942390442
18 0.8907908201217651
19 0.6954675912857056
20 0.22845816612243652
21 0.5129785537719727
22 1.8421268463134766
23 1.2161661386489868
24 0.03002006933093071
25 0.04648520424962044
26 0.1197071373462677
27 0.09689708799123764
28 0.056115828454494476
29 0.04952005669474602
30 0.90177983045578
31 0.7804391980171204
32 0.044132474809885025
33 0.30222970247268677
34 0.07248362898826599
35 0.16252578794956207
36 0.11256521195173264
37 0.19442646205425262
38 0.30732962489128113
39 0.056061752140522
40 0.09959154576063156
41 0.09563735127449036
42 0.14734527468681335
43 0.11596790701150894
44 0.11193390190601349
45 0.03

358 0.0069670346565544605
359 0.011066083796322346
360 0.008161421865224838
361 0.008940350264310837
362 0.01535840891301632
363 0.011671733111143112
364 0.01936340518295765
365 0.011076010763645172
366 0.02463407814502716
367 0.012312213890254498
368 0.010207880288362503
369 0.0484325997531414
370 0.017664140090346336
371 0.02290445752441883
372 0.01539277657866478
373 0.01182077918201685
374 0.012171292677521706
375 0.035270895808935165
376 0.012513411231338978
377 0.01949094980955124
378 0.010594535619020462
379 0.013541643507778645
380 0.016445770859718323
381 0.029328756034374237
382 0.010789581574499607
383 0.3811105191707611
384 0.5838192105293274
385 1.2766714096069336
386 0.36234521865844727
387 0.1993764191865921
388 3.7427618503570557
389 0.35581308603286743
390 0.17321822047233582
391 0.13751491904258728
392 0.3364578187465668
393 0.5446943640708923
394 0.5414497256278992
395 0.43368539214134216
396 0.30594396591186523
397 10.229581832885742
398 1.7094117403030396
399 2.939

696 0.012255732901394367
697 0.009347283281385899
698 0.008762314915657043
699 0.013924679718911648
700 0.00942133367061615
701 0.01271222997456789
702 0.009547160938382149
703 0.015146036632359028
704 0.008181664161384106
705 0.01926719769835472
706 0.013444339856505394
707 0.013762959279119968
708 0.007253074552863836
709 0.01652638614177704
710 0.009257485158741474
711 0.013175093568861485
712 0.014870759099721909
713 0.010552085936069489
714 0.011294201016426086
715 0.01257442869246006
716 0.01010206714272499
717 0.011973543092608452
718 0.010741636157035828
719 0.0091824596747756
720 0.009493841789662838
721 0.014002257958054543
722 0.012160449288785458
723 0.010312479920685291
724 0.013784498907625675
725 0.009219927713274956
726 0.012844827957451344
727 0.014087487943470478
728 0.012857175432145596
729 0.0115877790376544
730 0.008488871157169342
731 0.016648638993501663
732 0.011312590911984444
733 0.01133287139236927
734 0.009730692952871323
735 0.009451497346162796
736 0.01698

1036 0.008596978150308132
1037 0.011618425138294697
1038 0.00830928236246109
1039 0.013774188235402107
1040 0.009287811815738678
1041 0.008841753005981445
1042 0.008326956070959568
1043 0.007968708872795105
1044 0.008573812432587147
1045 0.0075485012494027615
1046 0.012458217330276966
1047 0.008127047680318356
1048 0.008078960701823235
1049 0.014461023733019829
1050 0.009897907264530659
1051 0.011107398197054863
1052 0.009483253583312035
1053 0.01339881680905819
1054 0.007776280865073204
1055 0.009262179024517536
1056 0.006291226949542761
1057 0.014291830360889435
1058 0.013198275119066238
1059 0.012546784244477749
1060 0.014403233304619789
1061 0.011110642924904823
1062 0.012735764496028423
1063 0.012198586948215961
1064 0.00829677376896143
1065 0.007077235262840986
1066 0.009933357127010822
1067 0.009870720095932484
1068 0.013038749806582928
1069 0.009105532430112362
1070 0.012110047973692417
1071 0.009511876851320267
1072 0.01120125874876976
1073 0.009113832376897335
1074 0.01008386

1356 0.008071096614003181
1357 0.01644287072122097
1358 0.012082585133612156
1359 0.012892009690403938
1360 0.009264642372727394
1361 0.013154802843928337
1362 0.014885051175951958
1363 0.014431961812078953
1364 0.009269188158214092
1365 0.009416764602065086
1366 0.015291531570255756
1367 0.014214674942195415
1368 0.010080416686832905
1369 0.009306362830102444
1370 0.009118558838963509
1371 0.014648549258708954
1372 0.014068957418203354
1373 0.009770389646291733
1374 0.010090518742799759
1375 0.012437167577445507
1376 0.008132949471473694
1377 0.007609936408698559
1378 0.007568021770566702
1379 0.008522969670593739
1380 0.00986472237855196
1381 0.00996408425271511
1382 0.014842864125967026
1383 0.009698566049337387
1384 0.01392416376620531
1385 0.009829512797296047
1386 0.01111197005957365
1387 0.013824711553752422
1388 0.00867523904889822
1389 0.010144311003386974
1390 0.010296113789081573
1391 0.010481052100658417
1392 0.010739766992628574
1393 0.011602254584431648
1394 0.01454524509

In [4]:
with torch.no_grad():
    x = torch.tensor(test_data, dtype=torch.float32)
    y = torch.tensor(test_labels, dtype=torch.float32)
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    print(loss.item()*scF)

9738.261383026838


### Thats a lot of error when thinking about the loss - that it's averaged over all the samples and still the error is that high. Introducing more layers and reducing the depth of each layer might help

Also reducing the learning rate just to see if that is the issue

In [5]:
N, D_in, H1, H2, D_out = 256, 288, 32, 32, 1
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H1),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(H1, H2),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(H2, D_out)
)

loss_fn = torch.nn.MSELoss(reduction='mean')
learning_rate = 5e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 1500
for t in range(epochs):
    j = 0
    data, labe = shuffle(train_data, train_labels)
    while j < len(train_data):
        x = data[j:j+N]
        x = torch.tensor(x, dtype=torch.float32)
        y = labe[j:j+N]
        y = torch.tensor(y, dtype=torch.float32)
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        j += N
    print(t, loss.item())

0 0.3592989444732666
1 0.26195335388183594
2 0.1298445165157318
3 0.10053277015686035
4 0.07124651968479156
5 0.07228346168994904
6 0.049221962690353394
7 0.049608014523983
8 0.032699499279260635
9 0.04856226593255997
10 0.022759869694709778
11 0.026396850124001503
12 0.21838366985321045
13 0.014511722140014172
14 0.015534425154328346
15 0.015524135902523994
16 0.0335916131734848
17 0.026985108852386475
18 0.012655477039515972
19 0.012072873301804066
20 0.014790750108659267
21 0.017557986080646515
22 0.016182612627744675
23 0.022716769948601723
24 0.011299481615424156
25 0.040731266140937805
26 0.015760567039251328
27 0.015008032321929932
28 0.01412620022892952
29 0.012797820381820202
30 0.014731219038367271
31 0.011722671799361706
32 0.01461601909250021
33 0.008543485775589943
34 0.009354121051728725
35 0.018966322764754295
36 0.03519495576620102
37 0.011872726492583752
38 0.033644601702690125
39 0.019199173897504807
40 0.016702808439731598
41 0.015424980781972408
42 0.011603211984038

337 0.009124184027314186
338 0.01015901193022728
339 0.013349313288927078
340 0.010048795491456985
341 0.0136214978992939
342 0.012975076213479042
343 0.00909969862550497
344 0.009181381203234196
345 0.016256555914878845
346 0.010362439788877964
347 0.012131315656006336
348 0.008857040666043758
349 0.010346428491175175
350 0.013328582979738712
351 0.010392355732619762
352 0.01144661009311676
353 0.009203111752867699
354 0.011147367767989635
355 0.010253840126097202
356 0.013791105709969997
357 0.008127463981509209
358 0.00886694435030222
359 0.007933464832603931
360 0.008451307192444801
361 0.009769024327397346
362 0.012399767525494099
363 0.014602682553231716
364 0.009323189966380596
365 0.01020089816302061
366 0.013228930532932281
367 0.008287717588245869
368 0.00843851175159216
369 0.008478846400976181
370 0.008086876012384892
371 0.00745589192956686
372 0.009152082726359367
373 0.010015977546572685
374 0.009687569923698902
375 0.011200079694390297
376 0.008620858192443848
377 0.008

672 0.009907529689371586
673 0.011136721819639206
674 0.008054513484239578
675 0.013057084754109383
676 0.013910207897424698
677 0.009425368160009384
678 0.012611469253897667
679 0.007425905670970678
680 0.005845761392265558
681 0.020332230255007744
682 0.009158128872513771
683 0.012494435533881187
684 0.008727139793336391
685 0.008411427028477192
686 0.010129077360033989
687 0.00800181832164526
688 0.01271362416446209
689 0.01152283139526844
690 0.010773121379315853
691 0.00998991634696722
692 0.00792612973600626
693 0.009341361001133919
694 0.016163300722837448
695 0.0124745424836874
696 0.010712242685258389
697 0.013699670322239399
698 0.008464970625936985
699 0.007188206072896719
700 0.009930431842803955
701 0.010040110908448696
702 0.012108853086829185
703 0.008675704710185528
704 0.008580478839576244
705 0.012335317209362984
706 0.011388396844267845
707 0.00921452883630991
708 0.01186183001846075
709 0.010717007331550121
710 0.007741489913314581
711 0.014551257714629173
712 0.009

1008 0.12304665893316269
1009 0.17028477787971497
1010 0.016006458550691605
1011 0.009487520903348923
1012 0.010846557095646858
1013 0.014728539623320103
1014 0.010543852113187313
1015 0.009771501645445824
1016 0.014796454459428787
1017 0.012591870501637459
1018 0.01279013603925705
1019 0.013578166253864765
1020 0.013446980156004429
1021 0.009315596893429756
1022 0.009005321189761162
1023 0.007567057851701975
1024 0.009058356285095215
1025 0.008586346171796322
1026 0.010531640611588955
1027 0.011655978858470917
1028 0.009595818817615509
1029 0.013176338747143745
1030 0.013587388209998608
1031 0.01622101664543152
1032 0.010688410140573978
1033 0.008844725787639618
1034 0.009450595825910568
1035 0.011072926223278046
1036 0.010642962530255318
1037 0.008634286932647228
1038 0.012819578871130943
1039 0.007376651745289564
1040 0.00951082818210125
1041 0.009589564055204391
1042 0.007872103713452816
1043 0.010266023688018322
1044 0.010463256388902664
1045 0.009542621672153473
1046 0.0126866335

1332 0.012184740044176579
1333 0.01021733507514
1334 0.011804083362221718
1335 0.01494548562914133
1336 0.011960855685174465
1337 0.011293113231658936
1338 0.010244021192193031
1339 0.010386321693658829
1340 0.014591885730624199
1341 0.009326042607426643
1342 0.010983016341924667
1343 0.010129122994840145
1344 0.008204331621527672
1345 0.015919581055641174
1346 0.01339248288422823
1347 0.018066802993416786
1348 0.015179691836237907
1349 0.012589246034622192
1350 0.009908990934491158
1351 0.00927075743675232
1352 0.010727551765739918
1353 0.008663362823426723
1354 0.008122947998344898
1355 0.014617309905588627
1356 0.0106986528262496
1357 0.012937763705849648
1358 0.007221024949103594
1359 0.010606259107589722
1360 0.009765559807419777
1361 0.012809136882424355
1362 0.014381451532244682
1363 0.009080495685338974
1364 0.008681527338922024
1365 0.009337298572063446
1366 0.010448507964611053
1367 0.008625214919447899
1368 0.011577315628528595
1369 0.019892390817403793
1370 0.00857642758637

In [6]:
with torch.no_grad():
    x = torch.tensor(test_data, dtype=torch.float32)
    y = torch.tensor(test_labels, dtype=torch.float32)
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    print(loss.item()*scF)
    print(loss.item())    

13243.724070489407
0.01754135638475418


Sometimes this model gives worse results than the previos network. Tweaking the second network to improve results

Trying to introduce weights decay and dropout and see if that helps.

In [7]:
N, D_in, H1, H2, D_out = 32, 288, 64, 64, 1
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H1),
    torch.nn.Dropout(p=0.2),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(H1, H2),
    torch.nn.Dropout(p=0.2),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(H2, D_out)
)

loss_fn = torch.nn.MSELoss(reduction='mean')
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 1500
for t in range(epochs):
    j = 0
    data, labe = shuffle(train_data, train_labels)
    while j < len(train_data):
        x = data[j:j+N]
        x = torch.tensor(x, dtype=torch.float32)
        y = labe[j:j+N]
        y = torch.tensor(y, dtype=torch.float32)
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        j += N
    print(t, loss.item())

0 1.4796336889266968
1 0.8869594931602478
2 0.04903794825077057
3 1.214467167854309
4 1.0119497776031494
5 2.4056503772735596
6 0.1814238578081131
7 0.7662807106971741
8 0.046974342316389084
9 528.9219360351562
10 22.21978759765625
11 0.03511042892932892
12 0.044189631938934326
13 0.20137056708335876
14 0.0518653504550457
15 0.04865237697958946
16 0.19937919080257416
17 0.27900564670562744
18 0.27209553122520447
19 0.03930036351084709
20 0.17531630396842957
21 0.045842431485652924
22 0.046023935079574585
23 0.533784031867981
24 3.008406639099121
25 0.020223384723067284
26 0.04383789747953415
27 0.06634587049484253
28 2.950766086578369
29 0.12124144285917282
30 0.1852816492319107
31 0.2473178207874298
32 0.37987056374549866
33 0.2054332047700882
34 0.4648343324661255
35 0.04380173236131668
36 152.45162963867188
37 0.04457535594701767
38 0.09829502552747726
39 0.03136123716831207
40 1.024716854095459
41 0.08557074517011642
42 0.4599197804927826
43 0.040617793798446655
44 0.10177089273929

345 0.010708114132285118
346 0.025649819523096085
347 0.00879107415676117
348 0.05282554030418396
349 0.03164074942469597
350 0.006696443073451519
351 0.004018863197416067
352 0.010971075855195522
353 0.007618536241352558
354 0.010668132454156876
355 0.028802894055843353
356 0.01909288763999939
357 0.008732836693525314
358 0.003517336444929242
359 0.006297588814049959
360 0.008564193733036518
361 0.008425364270806313
362 0.05597934499382973
363 0.0255319494754076
364 0.004795894958078861
365 0.03379899635910988
366 0.012943279929459095
367 0.006702263839542866
368 0.008934821002185345
369 0.0076048718765378
370 0.009113074280321598
371 0.012967129237949848
372 0.013273106887936592
373 0.008953251875936985
374 0.00472600432112813
375 0.017689881846308708
376 0.01782473735511303
377 0.004314419813454151
378 0.24448391795158386
379 0.002530753379687667
380 0.009478853084146976
381 0.031877290457487106
382 0.016583411023020744
383 0.02238965593278408
384 0.00874832458794117
385 0.008369195

675 0.01026697363704443
676 0.0041256737895309925
677 0.012899533845484257
678 0.014394395053386688
679 0.01043898705393076
680 0.011785188689827919
681 0.0095005352050066
682 0.010855761356651783
683 0.006567884236574173
684 0.011681275442242622
685 0.015136494301259518
686 0.015266072005033493
687 0.006584291812032461
688 0.007088526152074337
689 0.043661873787641525
690 0.003749685827642679
691 0.01332833245396614
692 0.003772031283006072
693 0.0017838585190474987
694 0.010893656872212887
695 0.003060123650357127
696 0.01917271874845028
697 0.015776388347148895
698 0.004573107697069645
699 0.006154042202979326
700 0.004650171380490065
701 0.00801083818078041
702 0.019037676975131035
703 0.014861095696687698
704 0.0052647097036242485
705 0.0035236780531704426
706 0.003702618181705475
707 0.011700646951794624
708 0.00861949473619461
709 0.03817860037088394
710 0.010761894285678864
711 0.008632464334368706
712 0.0034611173905432224
713 0.013786066323518753
714 0.0038336357101798058
715

1006 0.01766812801361084
1007 0.01495040487498045
1008 0.0027355621568858624
1009 0.0054706791415810585
1010 0.009575806558132172
1011 0.01121492125093937
1012 0.012777150608599186
1013 0.011552796699106693
1014 0.026212124153971672
1015 0.01639140583574772
1016 0.01343977078795433
1017 0.005378151778131723
1018 0.006303888279944658
1019 0.011656795628368855
1020 0.009801945649087429
1021 0.011359975673258305
1022 0.010201861150562763
1023 0.01620548963546753
1024 0.012783796526491642
1025 0.00401498656719923
1026 0.01046916376799345
1027 0.0061399610713124275
1028 0.00830824300646782
1029 0.00903621781617403
1030 0.004641668405383825
1031 0.008105942979454994
1032 0.0035374732688069344
1033 0.008355772122740746
1034 0.017253847792744637
1035 0.009127333760261536
1036 0.011954814195632935
1037 0.009462174959480762
1038 0.011026611551642418
1039 0.0160409826785326
1040 0.007068802136927843
1041 0.029540497809648514
1042 0.01314124558120966
1043 0.008200750686228275
1044 0.00897769443690

1323 0.0069551835767924786
1324 0.007282059174031019
1325 0.006972624454647303
1326 0.006407918408513069
1327 0.005439934320747852
1328 0.01022075954824686
1329 0.008641756139695644
1330 0.04717861860990524
1331 0.003699331544339657
1332 0.009495516307651997
1333 0.017892658710479736
1334 0.02883565053343773
1335 0.005598417948931456
1336 0.008485973812639713
1337 0.004411309491842985
1338 0.005237003322690725
1339 0.004311491269618273
1340 0.005392557941377163
1341 0.005581061355769634
1342 0.005575233139097691
1343 0.017005613073706627
1344 0.0036709897685796022
1345 0.008106428198516369
1346 0.004988905508071184
1347 0.007604948244988918
1348 0.005261463578790426
1349 0.0076679945923388
1350 0.006385669577866793
1351 0.005287314299494028
1352 0.004826528485864401
1353 0.010205449536442757
1354 0.004298175685107708
1355 0.00720023550093174
1356 0.019187118858098984
1357 0.013789293356239796
1358 0.010638141073286533
1359 0.007561953738331795
1360 0.013232353143393993
1361 0.008461000

In [8]:
with torch.no_grad():
    x = torch.tensor(test_data, dtype=torch.float32)
    y = torch.tensor(test_labels, dtype=torch.float32)
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    print(loss.item()*scF)
    print(loss.item())    

9370.00350560993
0.012410600669682026


Much better than the previous case. Dropout indeed works :)

In [9]:
N, D_in, H1, H2, D_out = 32, 288, 64, 64, 1
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H1),
    torch.nn.Dropout(p=0.2),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(H1, H2),
    torch.nn.Dropout(p=0.2),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(H2, D_out)
)

loss_fn = torch.nn.MSELoss(reduction='mean')
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-2) ## Has inbuilt weights decay param of 1e-2
epochs = 1500
for t in range(epochs):
    j = 0
    data, labe = shuffle(train_data, train_labels)
    while j < len(train_data):
        x = data[j:j+N]
        x = torch.tensor(x, dtype=torch.float32)
        y = labe[j:j+N]
        y = torch.tensor(y, dtype=torch.float32)
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        j += N
    print(t, loss.item())

0 0.1670459657907486
1 0.7326758503913879
2 1.0717443227767944
3 0.08153651654720306
4 0.5891566276550293
5 0.09926246851682663
6 0.38560378551483154
7 0.058878857642412186
8 2.048258066177368
9 0.48684167861938477
10 3.587800979614258
11 0.08290214836597443
12 1.6586703062057495
13 0.32852527499198914
14 0.07316479086875916
15 0.34542468190193176
16 0.7723342776298523
17 0.17463287711143494
18 0.299919456243515
19 1.805903673171997
20 1.1351232528686523
21 0.028882591053843498
22 0.10534294694662094
23 0.5238568782806396
24 0.019840650260448456
25 0.0223807692527771
26 0.18464826047420502
27 0.023757057264447212
28 0.28383323550224304
29 0.19861949980258942
30 0.06883785873651505
31 0.15224473178386688
32 0.7435864210128784
33 0.25381365418434143
34 0.31164810061454773
35 0.8471640348434448
36 0.19364963471889496
37 0.03148633986711502
38 0.039537813514471054
39 0.11205454915761948
40 0.036386143416166306
41 0.37027037143707275
42 0.056171149015426636
43 0.043661732226610184
44 0.0408

341 0.024858420714735985
342 0.009143463335931301
343 0.014701664447784424
344 0.011248395778238773
345 0.010598312132060528
346 0.010971345007419586
347 0.005620581563562155
348 0.006321257911622524
349 0.00978265330195427
350 0.009198617190122604
351 0.009532505646348
352 0.007423121016472578
353 0.053393617272377014
354 0.01869712583720684
355 0.01361826341599226
356 0.005752609111368656
357 0.009155032224953175
358 0.011656884104013443
359 0.004526384174823761
360 0.013663112185895443
361 0.02175493910908699
362 0.01880713365972042
363 0.004791497718542814
364 0.0032098093070089817
365 0.011925962753593922
366 0.006831835024058819
367 0.020897338166832924
368 0.02934986911714077
369 0.005046581383794546
370 0.010981392115354538
371 0.004913384094834328
372 0.026104459539055824
373 0.0021580832544714212
374 0.005199291743338108
375 0.006734601221978664
376 0.008911960758268833
377 0.01272822916507721
378 0.009163526818156242
379 0.009118343703448772
380 0.014996514655649662
381 0.01

673 0.015559041872620583
674 0.046811819076538086
675 0.01407474372535944
676 0.028371095657348633
677 0.011475888080894947
678 0.004438743926584721
679 0.005860783625394106
680 0.021018119528889656
681 0.00506998598575592
682 0.007734448183327913
683 0.020707208663225174
684 0.005697459913790226
685 0.014407586306333542
686 0.0038912901654839516
687 0.004384489730000496
688 0.00759143615141511
689 0.00472381291911006
690 0.013800464570522308
691 0.00787564180791378
692 0.004978900775313377
693 0.010563882067799568
694 0.017068514600396156
695 0.022373707965016365
696 0.01967436447739601
697 0.005696531385183334
698 0.0049332682974636555
699 0.007088841870427132
700 0.003990976605564356
701 0.01975301094353199
702 0.014281491748988628
703 0.005726523231714964
704 0.0036242902278900146
705 0.0032106414437294006
706 0.020632758736610413
707 0.006358184386044741
708 0.004512525629252195
709 0.003984585404396057
710 0.00448672054335475
711 0.00861890148371458
712 0.00645800493657589
713 0.

1005 0.009269685484468937
1006 0.008551597595214844
1007 0.013899844139814377
1008 0.004536855034530163
1009 0.0150988157838583
1010 0.006327131763100624
1011 0.012689569033682346
1012 0.0048086317256093025
1013 0.02460479363799095
1014 0.007852367125451565
1015 0.009771736338734627
1016 0.012569528073072433
1017 0.00388672249391675
1018 0.006717042066156864
1019 0.008334479294717312
1020 0.004712922498583794
1021 0.005243954248726368
1022 0.00869228970259428
1023 0.023379480466246605
1024 0.005688880570232868
1025 0.010959796607494354
1026 0.007854996249079704
1027 0.006620793137699366
1028 0.012075318954885006
1029 0.00491303438320756
1030 0.010587924160063267
1031 0.007194621488451958
1032 0.005699654575437307
1033 0.007635089103132486
1034 0.008736144751310349
1035 0.007076899521052837
1036 0.00880495272576809
1037 0.005738469772040844
1038 0.007250962313264608
1039 0.018866993486881256
1040 0.0071625616401433945
1041 0.007756927516311407
1042 0.012727239169180393
1043 0.0110747953

1323 0.013214676640927792
1324 0.003268200671300292
1325 0.022206297144293785
1326 0.015559375286102295
1327 0.004502526018768549
1328 0.002303245011717081
1329 0.010063608177006245
1330 0.004030169919133186
1331 0.0026833254378288984
1332 0.01004472840577364
1333 0.009288152679800987
1334 0.01116891484707594
1335 0.008412742987275124
1336 0.013477517291903496
1337 0.008741209283471107
1338 0.010755001567304134
1339 0.014816821552813053
1340 0.006814063526690006
1341 0.004393916577100754
1342 0.008692317642271519
1343 0.008697607554495335
1344 0.02490098401904106
1345 0.007690830156207085
1346 0.04278477281332016
1347 0.042104821652173996
1348 0.013767695054411888
1349 0.0031977896578609943
1350 0.009126903489232063
1351 0.006364502012729645
1352 0.0199204683303833
1353 0.0464433915913105
1354 0.009310813620686531
1355 0.00977188628166914
1356 0.006602826062589884
1357 0.01718025654554367
1358 0.006423007231205702
1359 0.0074876234866678715
1360 0.006471989676356316
1361 0.004880981985

In [10]:
with torch.no_grad():
    x = torch.tensor(test_data, dtype=torch.float32)
    y = torch.tensor(test_labels, dtype=torch.float32)
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    print(loss.item()*scF)
    print(loss.item())

9385.779346339405
0.012431495822966099


Weights decay + dropout + more depth with shallower layers has given the best results till now. This is mainly because the data has lesser samples and more features per sample. We have to base the network architecture accordingly to get the maximum benifit.