In [2]:
from functools import partial
from typing import Callable
from modules import dataloaders, schedulers, model, train
from torch import nn, optim
from torchvision import transforms
import torch
import pandas as pd

In [3]:
print(torch.cuda.is_available(), torch.backends.cudnn.enabled)
cuda_flag = torch.cuda.is_available()
device = torch.device('cuda') if cuda_flag else torch.device('cpu')

True True


In [4]:
dl, _ = dataloaders.get_cifar10_data_loaders(data_dir='./data/cifar10', batch_size=100)
rgb_ave = dataloaders.channel_avg(dl)
tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=rgb_ave, std=[1,1,1])])
train_dl, test_dl = dataloaders.get_cifar10_data_loaders(data_dir='./data/cifar10',
                                             batch_size=100,
                                             num_workers=8,
                                             pin_memory=cuda_flag,
                                             train_transform=tfms,
                                             test_transform=tfms)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [5]:
def run_experiment(train_dl:torch.utils.data.DataLoader,
                   test_dl:torch.utils.data.DataLoader,
                   net:nn.Module, 
                   scheduler_init:Callable[[optim.Optimizer], optim.lr_scheduler._LRScheduler],
                   device:torch.device,
                   iterations:int,
                   result_path:str,
                   criterion:nn.Module=nn.CrossEntropyLoss(),
                   validate_it=500):
    'TODO: docstring'
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.004)
    scheduler = scheduler_init(optimizer)
    recorder = {
        'iteration' : [],
        'trn_loss' : [],
        'lr' : [],
        'val_loss' : [],
        'val_acc' : []}
    obe = partial(train.on_batch_end,
                  recorder,
                  test_dl,
                  net,
                  criterion,
                  device,
                  validate_it=validate_it)
    train.train_run(net,
                    train_dl,
                    criterion,
                    optimizer,
                    scheduler,
                    iterations,
                    obe,
                    device)
    return pd.DataFrame(recorder).to_csv(result_path, index=False)

In [None]:
# Cifar10Net_full EXPERIMENTS

# EXPERIMENT: learning rate range test

net = model.Cifar10Net_full().to(device)
model.init_weights(net)
sched_init = partial(schedulers.TriangularScheduler, 4000, 0.001, 0.04)
run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               4000,
               './results/full/lrrt.csv',
               validate_it=25)

# EXPERIMENT: fixed learning rate policy

net = model.Cifar10Net_full().to(device)
model.init_weights(net)
run_experiment(train_dl,
               test_dl,
               net,
               schedulers.FixedScheduler,
               device,
               70000,
               './results/full/fixed.csv')

# EXPERIMENT: triangular2 (CLR) learning rate policy 

net = model.Cifar10Net_full().to(device)
model.init_weights(net)
sched_init = partial(schedulers.Triangular2Scheduler, 2000, 0.001, 0.006)

run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               25000,
               './results/full/triangular2_2.csv')

# EXPERIMENT: decay learning rate policy

net = model.Cifar10Net_full().to(device)
model.init_weights(net)
sched_init = partial(schedulers.DecayScheduler, 4000, 0.001, 0.007)
run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               25000,
               './results/full/decay_2.csv')

# EXPERIMENT: exp learning rate policy

net = model.Cifar10Net_full().to(device)
model.init_weights(net)
crit = nn.CrossEntropyLoss()
sched_init = partial(schedulers.ExpScheduler, 0.99994, 0.001)

run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               70000,
               './results/full/exp_3.csv')

# EXPERIMENT: exp_range learning rate policy

net = model.Cifar10Net_full().to(device)
model.init_weights(net)
sched = partial(schedulers.ExpRangeScheduler,0.99994, 2000,0.001, 0.006)
run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               42000,
               './results/full/exp_range_2.csv')

HBox(children=(IntProgress(value=0, max=4000), HTML(value='')))

2.30465030670166 | 2.303711314201355 | 0.1
2.2985925674438477 | 2.3035845804214476 | 0.1
2.301647186279297 | 2.3033492517471315 | 0.1
2.3030312061309814 | 2.303114347457886 | 0.1
2.2966582775115967 | 2.302780387401581 | 0.1
2.3018317222595215 | 2.302462773323059 | 0.1
2.299055337905884 | 2.3020982003211974 | 0.1
2.3008315563201904 | 2.3017457056045534 | 0.1
2.299175977706909 | 2.3013187313079833 | 0.101
2.3081653118133545 | 2.300796992778778 | 0.1727
2.2984344959259033 | 2.299976770877838 | 0.1797
2.2962646484375 | 2.2988478207588194 | 0.1723
2.2952635288238525 | 2.2971551036834716 | 0.1785
2.291926860809326 | 2.2945047998428345 | 0.2088
2.2897047996520996 | 2.289475178718567 | 0.1849
2.2819466590881348 | 2.278887469768524 | 0.1594
2.25984525680542 | 2.251675727367401 | 0.1844
2.159640073776245 | 2.178405840396881 | 0.2028
2.2562553882598877 | 2.1036225390434264 | 0.2165
2.023164749145508 | 2.0780276000499724 | 0.2423
2.0153701305389404 | 2.0400673723220826 | 0.2339
1.911908745765686 |

HBox(children=(IntProgress(value=0, max=70000), HTML(value='')))

2.285419464111328 | 2.305841493606567 | 0.0985
2.294179677963257 | 2.2940831208229064 | 0.1754
2.099827527999878 | 2.125884523391724 | 0.2152
1.8946256637573242 | 1.9919047474861145 | 0.2633
1.9036554098129272 | 1.8771346306800842 | 0.3271
1.616281270980835 | 1.7252174282073975 | 0.3646
1.6381661891937256 | 1.6438821613788606 | 0.3926
1.5122538805007935 | 1.5747453129291535 | 0.4185
1.4543827772140503 | 1.5425221991539002 | 0.4313
1.6052641868591309 | 1.5154413068294526 | 0.437
1.32778000831604 | 1.4798981201648713 | 0.4539
1.5600225925445557 | 1.4666446721553803 | 0.4574
1.532966136932373 | 1.448083268404007 | 0.4639
1.3878753185272217 | 1.3930725359916687 | 0.4895
1.626566767692566 | 1.3742545688152312 | 0.501
1.3297207355499268 | 1.3633630168437958 | 0.5046
1.2833503484725952 | 1.3447593033313752 | 0.5114
1.2672240734100342 | 1.3279960131645203 | 0.5154
1.2740700244903564 | 1.302495356798172 | 0.53
1.3121875524520874 | 1.2885657680034637 | 0.5365
1.3540982007980347 | 1.2839729022979

In [None]:
# Cifar10Net_quick EXPERIMENTS

# EXPERIMENT: learning rate range test

net = model.Cifar10Net_quick().to(device)
model.init_weights(net)
sched_init = partial(schedulers.TriangularScheduler, 4000, 0.001, 0.04)
run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               4000,
               './results/full/lrrt.csv',
               validate_it=25)

# EXPERIMENT: fixed learning rate policy

net = model.Cifar10Net_quick().to(device)
model.init_weights(net)
run_experiment(train_dl,
               test_dl,
               net,
               schedulers.FixedScheduler,
               device,
               70000,
               './results/full/fixed.csv')

# EXPERIMENT: triangular2 (CLR) learning rate policy 

net = model.Cifar10Net_quick().to(device)
model.init_weights(net)
sched_init = partial(schedulers.Triangular2Scheduler, 2000, 0.001, 0.006)

run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               25000,
               './results/full/triangular2_2.csv')

# EXPERIMENT: decay learning rate policy

net = model.Cifar10Net_quick().to(device)
model.init_weights(net)
sched_init = partial(schedulers.DecayScheduler, 4000, 0.001, 0.007)
run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               25000,
               './results/full/decay_2.csv')

# EXPERIMENT: exp learning rate policy

net = model.Cifar10Net_quick().to(device)
model.init_weights(net)
crit = nn.CrossEntropyLoss()
sched_init = partial(schedulers.ExpScheduler, 0.99994, 0.001)

run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               70000,
               './results/full/exp_3.csv')

# EXPERIMENT: exp_range learning rate policy

net = model.Cifar10Net_quick().to(device)
model.init_weights(net)
sched = partial(schedulers.ExpRangeScheduler,0.99994, 2000,0.001, 0.006)
run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               42000,
               './results/full/exp_range_2.csv')

In [None]:
# TODO: DELETE BELOW

In [9]:
# EXPERIMENT: learning rate range test

net = model.Cifar10Net_quick().to(device)
model.init_weights(net)
sched_init = partial(schedulers.TriangularScheduler, 4000, 0.001, 0.04)
run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               4000,
               './results/lrrt2.csv',
               validate_it=25)

HBox(children=(IntProgress(value=0, max=4000), HTML(value='')))

2.304516077041626 | 2.3048609638214113 | 0.1054
2.3069167137145996 | 2.304488797187805 | 0.1007
2.3039209842681885 | 2.3039527559280395 | 0.1028
2.295208692550659 | 2.3033598685264587 | 0.1181
2.302865982055664 | 2.3027776288986206 | 0.1088
2.300081253051758 | 2.3019788885116577 | 0.1273
2.295717716217041 | 2.3011337399482725 | 0.1353
2.298647403717041 | 2.3001773381233215 | 0.1606
2.293503761291504 | 2.298734428882599 | 0.1265
2.2935144901275635 | 2.297237000465393 | 0.1499
2.2919416427612305 | 2.2943431997299193 | 0.1296
2.2887930870056152 | 2.2895711350440977 | 0.1286
2.2857391834259033 | 2.281714277267456 | 0.1801
2.2836966514587402 | 2.265476853847504 | 0.1974
2.2485594749450684 | 2.2282635164260864 | 0.2065
2.159878730773926 | 2.1658349990844727 | 0.2033
2.151170253753662 | 2.108147109746933 | 0.2308
2.1795685291290283 | 2.0764776492118835 | 0.2257
2.062422275543213 | 2.0214650619029997 | 0.2495
2.0164906978607178 | 2.011452604532242 | 0.2498
1.8957984447479248 | 2.00157815337181

In [10]:
# EXPERIMENT: fixed learning rate policy

net = model.Cifar10Net_quick().to(device)
model.init_weights(net)
run_experiment(train_dl,
               test_dl,
               net,
               schedulers.FixedScheduler,
               device,
               70000,
               'fixed2.csv')

HBox(children=(IntProgress(value=0, max=70000), HTML(value='')))

2.292389392852783 | 2.305676529407501 | 0.1
2.2962806224823 | 2.301499183177948 | 0.1009
2.2934370040893555 | 2.2942102193832397 | 0.1704
2.1653144359588623 | 2.1828091287612916 | 0.2097
1.927932620048523 | 1.991353269815445 | 0.2674
1.8914573192596436 | 1.854973202943802 | 0.3144
1.6109402179718018 | 1.7514321291446686 | 0.3598
1.5970404148101807 | 1.672984594106674 | 0.3814
1.6003726720809937 | 1.622306238412857 | 0.3915
1.548377275466919 | 1.5781875145435333 | 0.4125
1.6007107496261597 | 1.5399190866947174 | 0.4292
1.4526408910751343 | 1.5022450697422027 | 0.4417
1.4007147550582886 | 1.4866573119163513 | 0.4504
1.3464469909667969 | 1.444975861310959 | 0.4614
1.4099258184432983 | 1.4501166594028474 | 0.4641
1.5757185220718384 | 1.3966692447662354 | 0.4834
1.3880709409713745 | 1.3730267000198364 | 0.4972
1.3430217504501343 | 1.360551611185074 | 0.5023
1.4896854162216187 | 1.3289223742485046 | 0.5183
1.177941918373108 | 1.3132964408397674 | 0.5232
1.2616137266159058 | 1.290154372453689

In [11]:
# EXPERIMENT: triangular2 (CLR) learning rate policy 

net = model.Cifar10Net_quick().to(device)
model.init_weights(net)
sched_init = partial(schedulers.Triangular2Scheduler, 2000, 0.001, 0.006)

run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               25000,
               './results/triangular2_2.csv')

HBox(children=(IntProgress(value=0, max=25000), HTML(value='')))

2.2968926429748535 | 2.30457909822464 | 0.1
2.2868664264678955 | 2.286785531044006 | 0.197
1.9440335035324097 | 1.8334182965755463 | 0.3164
1.7572338581085205 | 1.6157864546775818 | 0.4007
1.5581908226013184 | 1.5078701889514923 | 0.4485
1.358525037765503 | 1.3846300208568574 | 0.4987
1.4854240417480469 | 1.2941029393672943 | 0.5328
1.1617599725723267 | 1.24062604367733 | 0.5534
1.2970151901245117 | 1.2233579629659652 | 0.5633
1.0547809600830078 | 1.1899817889928819 | 0.5721
1.2854118347167969 | 1.2040015226602554 | 0.5681
1.2484716176986694 | 1.1965202814340592 | 0.5689
1.1591776609420776 | 1.1221379524469375 | 0.6
0.9968650341033936 | 1.0945366656780242 | 0.6129
1.1042494773864746 | 1.060657023191452 | 0.6247
1.0013238191604614 | 1.0282442647218704 | 0.6414
1.151883602142334 | 1.0122625637054443 | 0.6456
1.0135200023651123 | 1.0158803641796113 | 0.6443
1.0705336332321167 | 0.996675221323967 | 0.6509
0.762789785861969 | 1.0033236515522004 | 0.6489
0.9662367105484009 | 0.98576820611953

In [12]:
# EXPERIMENT: decay learning rate policy

net = model.Cifar10Net_quick().to(device)
model.init_weights(net)
sched_init = partial(schedulers.DecayScheduler, 4000, 0.001, 0.007)
run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               25000,
               './results/decay_2.csv')

HBox(children=(IntProgress(value=0, max=25000), HTML(value='')))

2.3068320751190186 | 2.305737099647522 | 0.1
1.8455226421356201 | 1.741349081993103 | 0.341
1.3168511390686035 | 1.5426330029964448 | 0.4306
1.2807995080947876 | 1.4034880530834197 | 0.4856
1.3582720756530762 | 1.3313095653057099 | 0.5173
1.4101901054382324 | 1.2592725324630738 | 0.5426
1.1797466278076172 | 1.2313115227222442 | 0.556
1.3030451536178589 | 1.1897713398933412 | 0.5725
1.2574032545089722 | 1.1579323160648345 | 0.5857
1.0271835327148438 | 1.1512867194414138 | 0.5916
1.120173692703247 | 1.1208481669425965 | 0.5996
1.1658633947372437 | 1.1313316065073014 | 0.598
1.0303394794464111 | 1.0943329101800918 | 0.6124
1.1335058212280273 | 1.0864201718568802 | 0.6163
1.1425772905349731 | 1.1019526296854019 | 0.6139
1.238189935684204 | 1.105303453207016 | 0.6112
1.2152143716812134 | 1.066099419593811 | 0.6215
0.9749917387962341 | 1.0517398589849472 | 0.6299
1.1790443658828735 | 1.0545223528146743 | 0.6269
1.1043736934661865 | 1.0503594851493836 | 0.6256
1.143110990524292 | 1.0239750176

In [None]:
# EXPERIMENT: exp learning rate policy

net = model.Cifar10Net_quick().to(device)
model.init_weights(net)
crit = nn.CrossEntropyLoss()
sched_init = partial(schedulers.ExpScheduler, 0.99994, 0.001)

run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               70000,
               './results/exp_3.csv')

In [None]:
# EXPERIMENT: exp_range learning rate policy

net = model.Cifar10Net_quick().to(device)
model.init_weights(net)
sched = partial(schedulers.ExpRangeScheduler,0.99994, 2000,0.001, 0.006)
run_experiment(train_dl,
               test_dl,
               net,
               sched_init,
               device,
               42000,
               './results/exp_range_2.csv')

HBox(children=(IntProgress(value=0, max=42000), HTML(value='')))

2.3033275604248047 | 2.3048804426193237 | 0.125
1.7304213047027588 | 1.7981905615329743 | 0.3349
1.6655535697937012 | 1.5733483064174651 | 0.4171
1.2522352933883667 | 1.4444288444519042 | 0.4615
1.2984336614608765 | 1.373805477619171 | 0.5014
1.1318652629852295 | 1.3159936928749085 | 0.5188
1.2138344049453735 | 1.2724424839019775 | 0.5443
1.2376002073287964 | 1.2243223750591279 | 0.5564
1.1638312339782715 | 1.2007113236188889 | 0.5742
1.2266716957092285 | 1.1739650183916093 | 0.5832
1.2556490898132324 | 1.1645180624723435 | 0.5815
1.066576600074768 | 1.1505553632974626 | 0.594
1.0201787948608398 | 1.1410143041610719 | 0.5931
0.980394721031189 | 1.1188978105783463 | 0.6053
1.1203137636184692 | 1.1156153786182403 | 0.6074
1.0896766185760498 | 1.10135098695755 | 0.613
1.017928957939148 | 1.0824932366609574 | 0.6237
1.1794638633728027 | 1.0765542471408844 | 0.6213
0.9724977612495422 | 1.0764544117450714 | 0.6193
1.0801877975463867 | 1.0585443252325057 | 0.6263
1.2273309230804443 | 1.045135

In [21]:
# TODO: DELETE

class DemoNet(nn.Module):
    'Port of caffe architecture: https://github.com/BVLC/caffe/blob/master/examples/cifar10/cifar10_quick_train_test.prototxt'

    def __init__(self, num_classes=10):
        'TODO: docstring'
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.AvgPool2d(kernel_size=3, stride=2)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.AvgPool2d(kernel_size=3, stride=2)
        self.ip1 = nn.Linear(64 * 3 * 3, 64)
        self.ip2 = nn.Linear(64, 10)

    def forward(self, x):
        'TODO: docstring'
        out = self.conv1(x)
        out = self.pool1(out)
        out = self.relu1(out)
        out = self.conv2(out)
        out = self.relu2(out)
        out = self.pool2(out)
        out = self.conv3(out)
        out = self.relu3(out)
        out = self.pool3(out)
        out = self.ip1(out.view(out.shape[0], out.shape[1] * out.shape[2] * out.shape[3]))
        out = self.ip2(out)
        return out


inp = next(iter(train_dl))[0]
net = DemoNet()
net(inp).shape

torch.Size([100, 10])

In [None]:
 #  ------- LEGACY -------

In [None]:

def record_run(train_dl:torch.utils.data.DataLoader,
               test_dl:torch.utils.data.DataLoader,
               net:nn.Module,
               crit:nn.Module,
               optimizer:optim.Optimizer,
               scheduler:optim.lr_scheduler._LRScheduler,
               device:torch.device,
               iterations:int,
               result_path:str,
               validate_it=500):
    'TODO:docstring'
    recorder = {
        'iteration' : [],
        'trn_loss' : [],
        'lr' : [],
        'val_loss' : [],
        'val_acc' : []}
    obe = partial(train.on_batch_end, recorder, test_dl, net, crit, device, validate_it=validate_it)
    train.train_run(net, train_dl, crit, opt, scheduler, iterations, obe, device)
    return pd.DataFrame(recorder).to_csv(result_path, index=False)

In [6]:
# learning rate range test
tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=rgb_ave, std=[1,1,1])])
train_dl, test_dl = dataloaders.get_cifar10_data_loaders(data_dir='./data/cifar10',
                                             batch_size=100,
                                             num_workers=8,
                                             pin_memory=cuda_flag,
                                             train_transform=tfms,
                                             test_transform=tfms)

net = model.Cifar10Net_quick().to(device)
model.init_weights(net)
crit = nn.CrossEntropyLoss()
opt = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.004)
sched = schedulers.TriangularScheduler(4000, 0.001, 0.04, opt)
record_run(train_dl,
           test_dl,
           net,
           crit,
           opt,
           sched,
           device,
           4000,
           './results/lrrt2.csv',
           25)

Files already downloaded and verified
Files already downloaded and verified


HBox(children=(IntProgress(value=0, max=4000), HTML(value='')))

2.297976016998291 | 2.3042796659469604 | 0.1
2.309221029281616 | 2.303995385169983 | 0.1
2.30049204826355 | 2.3035417294502256 | 0.1
2.299346685409546 | 2.3030977058410644 | 0.1
2.3032021522521973 | 2.302693500518799 | 0.1
2.3029372692108154 | 2.3023087000846862 | 0.1
2.3073477745056152 | 2.301745216846466 | 0.1
2.3032102584838867 | 2.3009733629226683 | 0.1
2.293759346008301 | 2.3001813006401064 | 0.1005
2.298952341079712 | 2.299080331325531 | 0.1007
2.3037867546081543 | 2.297426505088806 | 0.1068
2.2963027954101562 | 2.294880123138428 | 0.1372
2.2922909259796143 | 2.290413691997528 | 0.1531
2.2857234477996826 | 2.2822949862480164 | 0.1855
2.2633185386657715 | 2.2613956046104433 | 0.1829
2.2346177101135254 | 2.2079072213172912 | 0.1883
2.1203038692474365 | 2.129856433868408 | 0.2057
2.1327919960021973 | 2.0665711760520935 | 0.2266
2.0027883052825928 | 2.0484903216362 | 0.2313
2.0461008548736572 | 2.0263922429084777 | 0.24
1.928764820098877 | 2.0085027587413786 | 0.2555
1.97331178188323

In [19]:
# fixed
tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=rgb_ave, std=[1,1,1])])
train_dl, test_dl = dataloaders.get_cifar10_data_loaders(data_dir='./data/cifar10',
                                             batch_size=100,
                                             num_workers=8,
                                             pin_memory=cuda_flag,
                                             train_transform=tfms,
                                             test_transform=tfms)

net = model.Cifar10Net().to(device)
model.init_weights(net)
crit = nn.CrossEntropyLoss()
opt = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.004)
sched = schedulers.FixedScheduler(optimizer=opt)
record_run(train_dl,
           test_dl,
           net,
           crit,
           opt,
           sched,
           device,
           70000,
           './results/fixed.csv')

Files already downloaded and verified
Files already downloaded and verified


HBox(children=(IntProgress(value=0, max=70000), HTML(value='')))

2.2959251403808594 | 2.3048656678199766 | 0.1
2.2366421222686768 | 2.252965657711029 | 0.2115
1.9379587173461914 | 1.9531116092205048 | 0.2997
1.8152180910110474 | 1.7316903233528138 | 0.3808
1.6335580348968506 | 1.598079216480255 | 0.4195
1.3986411094665527 | 1.522326169013977 | 0.4467
1.389978289604187 | 1.475188820362091 | 0.4663
1.5397993326187134 | 1.4426518404483795 | 0.4792
1.3852248191833496 | 1.3794143879413605 | 0.5059
1.40769624710083 | 1.3441275775432586 | 0.521
1.2174060344696045 | 1.3135958635807037 | 0.5323
1.3884704113006592 | 1.2910792154073716 | 0.5377
1.1617053747177124 | 1.2626908934116363 | 0.5492
1.2419323921203613 | 1.2411016094684602 | 0.5572
0.9752600789070129 | 1.2126768308877944 | 0.5731
1.1910371780395508 | 1.2149149960279464 | 0.569
1.2233253717422485 | 1.1825607550144195 | 0.5805
1.1781970262527466 | 1.2094527328014373 | 0.5747
1.2020012140274048 | 1.1470334786176681 | 0.5952
1.2650545835494995 | 1.1130068135261535 | 0.6075
1.0533565282821655 | 1.144056512

In [8]:
# triangular 2
tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=rgb_ave, std=[1,1,1])])
train_dl, test_dl = dataloaders.get_cifar10_data_loaders(data_dir='./data/cifar10',
                                             batch_size=100,
                                             num_workers=8,
                                             pin_memory=cuda_flag,
                                             train_transform=tfms,
                                             test_transform=tfms)

net = model.Cifar10Net_quick().to(device)
model.init_weights(net)
crit = nn.CrossEntropyLoss()
opt = optim.SGD(net.parameters(), lr=0.0001, momentum=0.9, weight_decay=0.004)
sched = schedulers.Triangular2Scheduler(step_size=2000, min_lr=0.001, max_lr=0.006, optimizer=opt)
record_run(train_dl,
           test_dl,
           net,
           crit,
           opt,
           sched,
           device,
           25000,
           './results/triangular2_2.csv')

Files already downloaded and verified
Files already downloaded and verified


HBox(children=(IntProgress(value=0, max=25000), HTML(value='')))

2.3118653297424316 | 2.305169324874878 | 0.1
2.2890708446502686 | 2.289864807128906 | 0.1631
1.8108723163604736 | 1.889981905221939 | 0.3069
1.554026484489441 | 1.6907937335968017 | 0.3728
1.5032235383987427 | 1.481263747215271 | 0.4569
1.3109405040740967 | 1.4180316984653474 | 0.4863
1.328588604927063 | 1.2947986006736756 | 0.5335
1.1822865009307861 | 1.2496675336360932 | 0.5459
1.0985366106033325 | 1.1947410076856613 | 0.5725
1.110060691833496 | 1.2258172476291656 | 0.5624
1.214227318763733 | 1.1856699573993683 | 0.5734
1.2990254163742065 | 1.167282493710518 | 0.5839
1.0115678310394287 | 1.157571920156479 | 0.5914
1.1854610443115234 | 1.0996272665262223 | 0.6075
1.1256828308105469 | 1.0671267771720887 | 0.6234
1.1380622386932373 | 1.0444374054670333 | 0.6326
0.8972554802894592 | 1.009858409166336 | 0.6486
1.071244239807129 | 1.0174080002307893 | 0.6416
1.0800983905792236 | 1.0005807238817215 | 0.6503
1.025336742401123 | 1.0176855820417403 | 0.6436
1.0522180795669556 | 0.9715644073486

In [16]:
# decay
tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=rgb_ave, std=[1,1,1])])
train_dl, test_dl = dataloaders.get_cifar10_data_loaders(data_dir='./data/cifar10',
                                             batch_size=100,
                                             num_workers=8,
                                             pin_memory=cuda_flag,
                                             train_transform=tfms,
                                             test_transform=tfms)

net = model.Cifar10Net().to(device)
model.init_weights(net)
crit = nn.CrossEntropyLoss()
opt = optim.SGD(net.parameters(), lr=0.0001, momentum=0.9, weight_decay=0.004)
sched = schedulers.DecayScheduler(step_size=4000, min_lr=0.001, max_lr=0.007, optimizer=opt)
record_run(train_dl,
           test_dl,
           net,
           crit,
           opt,
           sched,
           device,
           25000,
           './results/decay.csv')

Files already downloaded and verified
Files already downloaded and verified


HBox(children=(IntProgress(value=0, max=25000), HTML(value='')))

2.3158750534057617 | 2.3052439451217652 | 0.0939
1.55889892578125 | 1.5285473811626433 | 0.4411
1.4037697315216064 | 1.3739276313781739 | 0.5096
1.0877312421798706 | 1.2430369985103606 | 0.5566
1.0989784002304077 | 1.1535150969028474 | 0.6035
1.1141140460968018 | 1.0960009902715684 | 0.6123
1.0204178094863892 | 1.0451411986351014 | 0.6345
1.0428575277328491 | 1.0014646846055983 | 0.6501
1.1275478601455688 | 0.9752665412425995 | 0.6587
0.8905993700027466 | 0.9625169438123703 | 0.6675
0.9804762005805969 | 0.958123340010643 | 0.664
0.8910219073295593 | 0.9456889796257019 | 0.671
1.0942773818969727 | 0.9407467526197434 | 0.6734
0.8029584288597107 | 0.9291936069726944 | 0.6726
0.8100183606147766 | 0.9213050878047944 | 0.6782
0.9432716965675354 | 0.9157968461513519 | 0.6824
0.9402356147766113 | 0.9094270581007003 | 0.6816
0.7231302857398987 | 0.9107110375165939 | 0.6791
0.7717517018318176 | 0.9013521921634674 | 0.6847
0.9524957537651062 | 0.8880109769105912 | 0.6909
0.8856656551361084 | 0.89

In [13]:
# exp
tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=rgb_ave, std=[1,1,1])])
train_dl, test_dl = dataloaders.get_cifar10_data_loaders(data_dir='./data/cifar10',
                                             batch_size=100,
                                             num_workers=8,
                                             pin_memory=cuda_flag,
                                             train_transform=tfms,
                                             test_transform=tfms)

net = model.Cifar10Net().to(device)
model.init_weights(net)
crit = nn.CrossEntropyLoss()
opt = optim.SGD(net.parameters(), lr=0.0001, momentum=0.9, weight_decay=0.004)
sched = schedulers.ExpScheduler(gamma= 0.99994, initial_lr=0.001, optimizer=opt)
record_run(train_dl,
           test_dl,
           net,
           crit,
           opt,
           sched,
           device,
           70000,
           './results/exp_2.csv')

Files already downloaded and verified
Files already downloaded and verified


HBox(children=(IntProgress(value=0, max=70000), HTML(value='')))

2.3062984943389893 | 2.304504623413086 | 0.1
2.039564609527588 | 2.1313498187065125 | 0.2341
1.9713293313980103 | 1.9188920211791993 | 0.315
1.667871117591858 | 1.722483547925949 | 0.3802
1.608794093132019 | 1.6095246386528015 | 0.4151
1.4804493188858032 | 1.5190627157688141 | 0.4431
1.4638692140579224 | 1.4656637454032897 | 0.466
1.3903836011886597 | 1.4305872213840485 | 0.4819
1.1998200416564941 | 1.4089925062656403 | 0.4867
1.3671257495880127 | 1.372826428413391 | 0.5058
1.4028111696243286 | 1.346828166246414 | 0.5166
1.2423450946807861 | 1.3199831914901734 | 0.5302
1.1036854982376099 | 1.320092191696167 | 0.5275
1.2234302759170532 | 1.2978788781166077 | 0.541
1.2314274311065674 | 1.2897173726558686 | 0.5384
1.1907132863998413 | 1.247455985546112 | 0.56
1.2181692123413086 | 1.2384896421432494 | 0.5607
1.1905916929244995 | 1.2258339285850526 | 0.5674
1.1279654502868652 | 1.2145337605476378 | 0.5695
1.3476121425628662 | 1.2191427284479142 | 0.5657
1.1123565435409546 | 1.19009255766868

In [14]:
# exp_range
tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=rgb_ave, std=[1,1,1])])
train_dl, test_dl = dataloaders.get_cifar10_data_loaders(data_dir='./data/cifar10',
                                             batch_size=100,
                                             num_workers=8,
                                             pin_memory=cuda_flag,
                                             train_transform=tfms,
                                             test_transform=tfms)

net = model.Cifar10Net().to(device)
model.init_weights(net)
crit = nn.CrossEntropyLoss()
opt = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.004)
sched = schedulers.ExpRangeScheduler(gamma=0.99994, step_size=2000, min_lr=0.001, max_lr=0.006, optimizer=opt)
record_run(train_dl,
           test_dl,
           net,
           crit,
           opt,
           sched,
           device,
           42000,
           './results/exp_range_2.csv')

Files already downloaded and verified
Files already downloaded and verified


HBox(children=(IntProgress(value=0, max=42000), HTML(value='')))

2.3022027015686035 | 2.3050201654434206 | 0.1
2.094893455505371 | 2.0143145072460173 | 0.2726
1.4727064371109009 | 1.60831512093544 | 0.4069
1.4026302099227905 | 1.4376909470558166 | 0.4797
1.4212661981582642 | 1.321196415424347 | 0.5283
1.3423069715499878 | 1.223569244146347 | 0.5675
1.3500639200210571 | 1.1336965662240983 | 0.6035
0.9937301874160767 | 1.0745982599258423 | 0.626
0.9549305438995361 | 1.0473684185743333 | 0.6372
1.0692508220672607 | 1.035099944472313 | 0.6449
1.079912781715393 | 1.0299227404594422 | 0.6426
0.9949414134025574 | 1.040691353082657 | 0.6359
0.8612189292907715 | 0.9839828884601594 | 0.6582
0.9411316514015198 | 0.9726246029138566 | 0.6592
1.0404269695281982 | 0.9285584843158722 | 0.6766
0.9078772068023682 | 0.8814988321065903 | 0.6983
0.6949732899665833 | 0.8608512270450592 | 0.7049
0.7867686748504639 | 0.8890821141004562 | 0.6884
0.7824012041091919 | 0.879164976477623 | 0.7014
0.9796743988990784 | 0.8612118291854859 | 0.7041
0.9537234306335449 | 0.8866164934