## CIFAR 10

In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
from fastai.conv_learner import *
from fastai.models.cifar10.wideresnet import wrn_22_cat, wrn_22, WideResNetConcat
torch.backends.cudnn.benchmark = True
PATH = Path("data/cifar10/")
os.makedirs(PATH,exist_ok=True)

In [3]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))

bs=512
sz=32
workers=7

In [4]:
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
def pad(img, p=4, padding_mode='reflect'):
    return Image.fromarray(np.pad(np.asarray(img), ((p, p), (p, p), (0, 0)), padding_mode))

def torch_loader(data_path, size, prefetcher=True):
    if not os.path.exists(data_path/'train'): download_cifar10(data_path)

    # Data loading code
    traindir = str(data_path/'train')
    valdir = str(data_path/'test')
    tfms = [transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]

    train_tfms = transforms.Compose([
        pad, # TODO: use `padding` rather than assuming 4
        transforms.RandomCrop(size),
        transforms.RandomHorizontalFlip(),
    ] + tfms)

    train_dataset = datasets.ImageFolder(traindir, train_tfms)
    val_dataset = datasets.ImageFolder(valdir, transforms.Compose(tfms))

    train_loader = DataLoader(
        train_dataset, batch_size=bs, shuffle=True,
        num_workers=workers, pin_memory=True)

    val_loader = DataLoader(
        val_dataset, batch_size=bs, shuffle=False,
        num_workers=workers, pin_memory=True)
    
    aug_loader = DataLoader(
        datasets.ImageFolder(valdir, train_tfms),
        batch_size=bs, shuffle=False,
        num_workers=workers, pin_memory=True)

    if prefetcher:
        train_loader = DataPrefetcher(train_loader)
        val_loader = DataPrefetcher(val_loader)
        aug_loader = DataPrefetcher(aug_loader)
    
    data = ModelData(data_path, train_loader, val_loader)
    data.sz = size
    data.aug_dl = aug_loader
    return data

# Seems to speed up training by ~2%
class DataPrefetcher():
    def __init__(self, loader, stop_after=None):
        self.loader = loader
        self.dataset = loader.dataset
        self.stream = torch.cuda.Stream()
        self.stop_after = stop_after
        self.next_input = None
        self.next_target = None

    def __len__(self):
        return len(self.loader)

    def preload(self):
        try:
            self.next_input, self.next_target = next(self.loaditer)
        except StopIteration:
            self.next_input = None
            self.next_target = None
            return
        with torch.cuda.stream(self.stream):
            self.next_input = self.next_input.cuda(async=True)
            self.next_target = self.next_target.cuda(async=True)

    def __iter__(self):
        count = 0
        self.loaditer = iter(self.loader)
        self.preload()
        while self.next_input is not None:
            torch.cuda.current_stream().wait_stream(self.stream)
            input = self.next_input
            target = self.next_target
            self.preload()
            count += 1
            yield input, target
            if type(self.stop_after) is int and (count > self.stop_after):
                break

In [5]:
data = torch_loader(PATH, sz)

In [6]:
'''Pre-activation ResNet in PyTorch.

Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Identity Mappings in Deep Residual Networks. arXiv:1603.05027
'''
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.autograd import Variable


class AdaptiveConcatPool2d(nn.Module):
    def __init__(self, sz=None):
        super().__init__()
        sz = sz or (1,1)
        self.ap = nn.AdaptiveAvgPool2d(sz)
        self.mp = nn.AdaptiveMaxPool2d(sz)
    def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)
    

class PreActBlock(nn.Module):
    '''Pre-activation version of the BasicBlock.'''
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(PreActBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.bn2.bias.data.zero_()
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)

        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
            )

    def forward(self, x):
        out = F.relu(self.bn1(x), inplace=True)
        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
        out = self.conv1(out)
        out = self.conv2(F.relu(self.bn2(out), inplace=True))
        out += shortcut
        return out


class PreActBottleneck(nn.Module):
    '''Pre-activation version of the original Bottleneck module.'''
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(PreActBottleneck, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)

        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
            )

    def forward(self, x):
        out = F.relu(self.bn1(x))
        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
        out = self.conv1(out)
        out = self.conv2(F.relu(self.bn2(out)))
        out = self.conv3(F.relu(self.bn3(out)))
        out += shortcut
        return out


class PreActResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10, concatpool=False):
        super(PreActResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.pool = AdaptiveConcatPool2d() if concatpool else nn.AdaptiveMaxPool2d((1,1))
        
        self.linear = nn.Linear(512*block.expansion*(concatpool+1), num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
#         out = F.adaptive_max_pool2d(out, 1)
        out = self.pool(out)
        out = out.view(out.size(0), -1)
        return F.log_softmax(self.linear(out))

def preact_resnet18(): return PreActResNet(PreActBlock, [2,2,2,2])
def preact_resnet2332(): return PreActResNet(PreActBlock, [2,3,3,2])
def preact_resnet3333(): return PreActResNet(PreActBlock, [3,3,3,3])
def preact_resnet34(): return PreActResNet(PreActBlock, [3,4,6,3])
def preact_resnet50(): return PreActResNet(PreActBottleneck, [3,4,6,3])
def preActResNet101(): return PreActResNet(PreActBottleneck, [3,4,23,3])
def preActResNet152(): return PreActResNet(PreActBottleneck, [3,8,36,3])


In [7]:
# m = WideResNetConcat(num_groups=3, N=3, num_classes=10, k=1, drop_p=0.)

In [8]:
def get_TTA_accuracy(learn):
    preds, targs = learn.TTA()
    # combining the predictions across augmented and non augmented inputs
    preds = 0.6 * preds[0] + 0.4 * preds[1:].sum(0)
    return accuracy_np(preds, targs)

def get_TTA_accuracy_2(learn):
    log_preds,y = learn.TTA()
    preds = np.mean(np.exp(log_preds),0)
    acc = accuracy(torch.FloatTensor(preds),torch.LongTensor(y))
    print('TTA acc:', acc)

In [16]:
m = PreActResNet(PreActBlock, [2,2,2,2], concatpool=True)
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=4e-4
# learn.clip = 1e-1
def_phase = {'opt_fn':optim.SGD, 'wds':wd}
# TODO: add momentum
# %time learn.fit(lr, 1, wds=wd, cycle_len=23, use_clr_beta=(20,22,0.95,0.85), loss_scale=512)
phases = [
    TrainingPhase(**def_phase, epochs=3, lr=(.04, .08), lr_decay=DecayType.LINEAR, momentum=(0.75,0.95), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=12, lr=(.08,.7), lr_decay=DecayType.LINEAR, momentum=(0.95,0.85), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=12, lr=(.7,.04), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR),
]

learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=27), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      1.412257   1.403386   0.5082    
    1      1.059565   1.679631   0.4933                   
    2      0.918801   1.390327   0.5803                    
    3      0.88158    0.789558   0.7298                    
    4      0.665848   0.786746   0.7241                    
    5      0.583451   0.895092   0.7075                    
    6      0.525627   0.800659   0.7372                    
    7      0.485268   0.634775   0.7827                    
    8      0.478545   0.624845   0.7952                    
    9      0.477403   0.514541   0.824                     
    10     0.466429   0.689879   0.7887                    
    11     0.455498   0.855484   0.7408                    
    12     0.433913   0.924021   0.7304                    
    13     0.448637   0.790786   0.7451                    
    14     0.439411   0.782915   0.7535                    
    15     0.431638   0.541921   0.8207                   

[array([0.26132]), 0.9122000007629395]

In [23]:
learn.load('att6-tta')

In [24]:
phases = [TrainingPhase(**def_phase, epochs=4, lr=(.04,.001), lr_decay=DecayType.LINEAR, momentum=(0.95))]
learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                   
    0      0.098208   0.243453   0.9231    
    1      0.08352    0.228644   0.9297                     
    2      0.068049   0.218568   0.9307                     
    3      0.05425    0.213954   0.9319                     



[array([0.21395]), 0.9319000005722046]

In [25]:
tta_data = torch_loader(PATH, sz, prefetcher=False)

In [26]:
learn.data_ = tta_data

In [27]:
get_TTA_accuracy(learn)

                                             

0.9391

In [28]:
get_TTA_accuracy_2(learn)

TTA acc: 0.9383                              


In [31]:
m = PreActResNet(PreActBlock, [2,2,2,2], concatpool=True)
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=2e-4
learn.clip = 3e-1
def_phase = {'opt_fn':optim.SGD, 'wds':wd}
# TODO: add momentum
# %time learn.fit(lr, 1, wds=wd, cycle_len=23, use_clr_beta=(20,22,0.95,0.85), loss_scale=512)
phases = [
    TrainingPhase(**def_phase, epochs=3, lr=(.04, .08), lr_decay=DecayType.LINEAR, momentum=(0.75,0.95), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=12, lr=(.08,1), lr_decay=DecayType.LINEAR, momentum=(0.95,0.85), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=12, lr=(1,.04), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR),
]

learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=27), HTML(value='')))

 40%|███▉      | 39/98 [00:03<00:05, 11.67it/s, loss=1.76]
 42%|████▏     | 41/98 [00:03<00:04, 11.51it/s, loss=1.73]

Exception in thread Thread-154:
Traceback (most recent call last):
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/tqdm/_monitor.py", line 62, in run
    for instance in self.tqdm_cls._instances:
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration



epoch      trn_loss   val_loss   accuracy                 
    0      1.383582   1.218085   0.5638    
    1      1.036185   1.419456   0.5673                   
    2      0.910228   1.551581   0.5425                    
    3      0.948948   1.163493   0.6279                    
    4      0.700445   0.846213   0.7439                    
    5      0.581425   0.622831   0.7913                    
    6      0.54283    0.678693   0.7741                    
    7      0.511617   0.695092   0.7629                    
    8      0.476352   0.672634   0.7776                    
    9      0.480119   0.597247   0.8089                    
    10     0.477554   0.970222   0.7235                    
    11     0.466223   0.655534   0.7844                    
    12     0.456882   0.973388   0.6928                    
    13     0.473979   0.657018   0.789                     
    14     0.441907   0.71927    0.7778                    
    15     0.450824   0.669261   0.7734                   

[array([0.25681]), 0.9188000005722046]

In [37]:
learn.save('att6-lr1')

In [38]:
phases = [TrainingPhase(**def_phase, epochs=4, lr=(.04,.001), lr_decay=DecayType.LINEAR, momentum=(0.95))]
learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                   
    0      0.066247   0.247223   0.9285    
    1      0.057653   0.241219   0.9292                     
    2      0.050102   0.238205   0.9301                     
    3      0.042713   0.235338   0.9313                     



[array([0.23534]), 0.9312999992370605]

In [40]:
tta_data = torch_loader(PATH, sz, prefetcher=False)
learn.data_ = tta_data
get_TTA_accuracy(learn), get_TTA_accuracy_2(learn)

TTA acc: 0.9358                              


(0.9385, None)

In [42]:
m = PreActResNet(PreActBlock, [2,2,2,2], concatpool=True)
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=2e-4
lr=1.2
learn.clip = 3e-1
def_phase = {'opt_fn':optim.SGD, 'wds':wd}
# TODO: add momentum
# %time learn.fit(lr, 1, wds=wd, cycle_len=23, use_clr_beta=(20,22,0.95,0.85), loss_scale=512)
phases = [
    TrainingPhase(**def_phase, epochs=2, lr=(.04, .08), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=13, lr=(.08,.7), lr_decay=DecayType.LINEAR, momentum=(0.95,0.85), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=13, lr=(.7,.04), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR)
]

learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=28), HTML(value='')))

 36%|███▌      | 35/98 [00:03<00:05, 11.21it/s, loss=1.79] 
 40%|███▉      | 39/98 [00:03<00:05, 11.72it/s, loss=1.77]

Exception in thread Thread-354:
Traceback (most recent call last):
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/tqdm/_monitor.py", line 62, in run
    for instance in self.tqdm_cls._instances:
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration



epoch      trn_loss   val_loss   accuracy                 
    0      1.404054   1.398838   0.5089    
    1      1.125469   1.860868   0.5295                   
    2      0.980382   1.011459   0.6895                    
    3      0.740379   0.648137   0.7669                    
    4      0.613539   0.834201   0.7225                    
    5      0.548261   0.656363   0.7798                    
    6      0.494997   0.787101   0.7415                    
    7      0.472443   0.612136   0.7947                    
    8      0.445408   0.723651   0.7686                    
    9      0.430274   0.687877   0.7783                    
    10     0.400086   0.58851    0.8105                    
    11     0.386951   0.633652   0.7998                    
    12     0.391113   0.604603   0.8081                    
    13     0.396721   0.642429   0.8031                    
    14     0.37233    0.483478   0.8406                    
    15     0.355568   0.871443   0.7599                   

[array([0.24767]), 0.9233000000953674]

In [43]:
learn.save('9233')

In [44]:
phases = [TrainingPhase(**def_phase, epochs=5, lr=(.04,.001), lr_decay=DecayType.LINEAR, momentum=(0.95))]
learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=5), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                   
    0      0.055303   0.235548   0.9271    
    1      0.048198   0.240492   0.9281                     
    2      0.038738   0.236836   0.9313                     
    3      0.034984   0.233395   0.9323                     
    4      0.02779    0.232784   0.9327                     



[array([0.23278]), 0.9327000002861023]

In [46]:
tta_data = torch_loader(PATH, sz, prefetcher=False)
learn.data_ = tta_data
get_TTA_accuracy(learn), get_TTA_accuracy_2(learn)

TTA acc: 0.9391                              


(0.9396, None)

In [52]:
learn.load('9233')

In [54]:
phases = [TrainingPhase(**def_phase, epochs=5, lr=(.04,.001), lr_decay=DecayType.LINEAR, momentum=(0.95))]
learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=5), HTML(value='')))

 83%|████████▎ | 81/98 [00:06<00:01, 12.82it/s, loss=0.0544]

Process Process-2596:
Process Process-2595:
Process Process-2597:
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

Process Process-2593:
Traceback (most recent call last):
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Process Process-2592:
Process Process-2591:
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 50, in _worker_loop
    r = index_queue.get()
Process Process-2594:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/paperspace/anacon

Traceback (most recent call last):
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-54-e8bcff9ff780>", line 2, in <module>
    learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)
  File "/home/paperspace/fastai/courses/dl2/fastai/learner.py", line 426, in fit_opt_sched
    metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, fp16=self.fp16, **kwargs)
  File "/home/paperspace/fastai/courses/dl2/fastai/model.py", line 129, in fit
    loss = model_stepper.step(V(x),V(y), epoch)
  File "/home/paperspace/fastai/courses/dl2/fastai/model.py", line 60, in step
    nn.utils.clip_grad_norm(trainable_params_(self.m), self.clip)
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/utils/clip_grad.py", line 26, in clip_grad_norm
    param_norm = p.grad.data.norm(norm_type)
Keyboa

Traceback (most recent call last):
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


KeyboardInterrupt: 

  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 50, in _worker_loop
    r = index_queue.get()
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
KeyboardInterrupt
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  F

In [56]:
m = PreActResNet(PreActBlock, [2,2,2,2], concatpool=True)
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=2e-4
lr=1.2
learn.clip = 3e-1
def_phase = {'opt_fn':optim.SGD, 'wds':wd}
# TODO: add momentum
# %time learn.fit(lr, 1, wds=wd, cycle_len=23, use_clr_beta=(20,22,0.95,0.85), loss_scale=512)
phases = [
    TrainingPhase(**def_phase, epochs=2, lr=(.04, .07), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=11, lr=(.07,.6), lr_decay=DecayType.LINEAR, momentum=(0.95,0.85), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=13, lr=(.6,.04), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR)
]

learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=26), HTML(value='')))

  0%|          | 0/98 [00:00<?, ?it/s]                   


Exception in thread Thread-535:
Traceback (most recent call last):
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/tqdm/_monitor.py", line 62, in run
    for instance in self.tqdm_cls._instances:
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration



epoch      trn_loss   val_loss   accuracy                 
    0      1.406422   1.363898   0.5187    
    1      1.143778   1.443159   0.5407                   
    2      0.958711   0.92218    0.6963                    
    3      0.726452   0.822333   0.7298                    
    4      0.592805   0.730651   0.7525                    
    5      0.523831   0.721372   0.7676                    
    6      0.492072   0.555648   0.8184                    
    7      0.454976   0.671316   0.7736                    
    8      0.433841   0.5424     0.8194                    
    9      0.408343   0.630389   0.7996                    
    10     0.380653   0.501859   0.8373                    
    11     0.36229    0.790254   0.7638                    
    12     0.36454    0.798706   0.759                     
    13     0.352369   0.544393   0.8193                    
    14     0.320008   0.490618   0.8425                    
    15     0.310419   0.677558   0.8042                   

[array([0.25713]), 0.9229000001907348]

In [57]:
learn.save('9229')

In [58]:
phases = [TrainingPhase(**def_phase, epochs=6, lr=(.04,.001), lr_decay=DecayType.LINEAR, momentum=(0.95))]
learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=6), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                   
    0      0.055875   0.239902   0.9263    
    1      0.04695    0.248908   0.9281                     
    2      0.036955   0.241113   0.9321                     
    3      0.029449   0.241012   0.9338                     
    4      0.025771   0.246175   0.9335                     
    5      0.021908   0.240626   0.9351                     



[array([0.24063]), 0.935100000667572]

In [60]:
tta_data = torch_loader(PATH, sz, prefetcher=False)
learn.data_ = tta_data
get_TTA_accuracy(learn), get_TTA_accuracy_2(learn)

TTA acc: 0.9393                              


(0.9414, None)

In [61]:
m = PreActResNet(PreActBlock, [2,2,2,2], concatpool=True)
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=2e-4
lr=1.2
learn.clip = 1e-1
def_phase = {'opt_fn':optim.SGD, 'wds':wd}
# TODO: add momentum
# %time learn.fit(lr, 1, wds=wd, cycle_len=23, use_clr_beta=(20,22,0.95,0.85), loss_scale=512)
phases = [
    TrainingPhase(**def_phase, epochs=2, lr=(.04, .07), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=10, lr=(.07,.5), lr_decay=DecayType.LINEAR, momentum=(0.95,0.85), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=13, lr=(.5,.04), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR)
]

learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=26), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      1.401288   1.431436   0.5146    
    1      1.128926   2.922088   0.3794                   
    2      0.948805   1.142918   0.666                     
    3      0.744348   0.710946   0.7587                    
    4      0.61139    0.679687   0.77                      
    5      0.542358   0.716951   0.771                     
    6      0.470684   0.574388   0.8077                    
    7      0.430563   0.605658   0.8065                    
    8      0.404186   0.527993   0.8192                    
    9      0.37285    0.484941   0.8285                    
    10     0.363053   0.862881   0.7338                    
    11     0.350714   0.499591   0.8293                    
    12     0.346253   0.487284   0.8268                    
    13     0.333914   0.552516   0.8181                    
    14     0.297226   0.560681   0.8258                    
    15     0.28224    0.472441   0.8566                   

[array([0.25818]), 0.9226000002861023]

In [62]:
learn.save('9226')

In [63]:
phases = [TrainingPhase(**def_phase, epochs=6, lr=(.04,.001), lr_decay=DecayType.LINEAR, momentum=(0.95))]
learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=6), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                   
    0      0.054579   0.259938   0.9234    
    1      0.04359    0.254944   0.9286                     
    2      0.035823   0.251349   0.9305                     
    3      0.028213   0.246712   0.9333                     
    4      0.022769   0.248657   0.9321                     
    5      0.019539   0.247696   0.9339                     



[array([0.2477]), 0.9339000003814697]

In [64]:
tta_data = torch_loader(PATH, sz, prefetcher=False)
learn.data_ = tta_data
get_TTA_accuracy(learn), get_TTA_accuracy_2(learn)

TTA acc: 0.9395                              


(0.9393, None)

In [65]:
m = PreActResNet(PreActBlock, [2,2,2,2], concatpool=True)
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=5e-4
lr=1.2
learn.clip = 3e-1
def_phase = {'opt_fn':optim.SGD, 'wds':wd}
# TODO: add momentum
# %time learn.fit(lr, 1, wds=wd, cycle_len=23, use_clr_beta=(20,22,0.95,0.85), loss_scale=512)
phases = [
    TrainingPhase(**def_phase, epochs=2, lr=(.04, .07), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=11, lr=(.07,.6), lr_decay=DecayType.LINEAR, momentum=(0.95,0.85), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=13, lr=(.6,.04), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR)
]

learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=26), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      1.414748   1.664819   0.4772    
    1      1.134363   1.457289   0.5645                   
    2      1.015202   0.965186   0.6746                   
    3      0.765109   0.752077   0.7422                    
    4      0.642791   0.590479   0.8004                    
    5      0.569069   0.658552   0.774                     
    6      0.526559   0.699446   0.7631                    
    7      0.506984   0.762919   0.7462                    
    8      0.495577   0.660882   0.7777                    
    9      0.470361   0.681891   0.7611                    
    10     0.469142   0.791516   0.7358                    
    11     0.450069   0.658498   0.7799                    
    12     0.442187   0.752826   0.761                     
    13     0.421857   0.599802   0.7982                    
    14     0.407977   0.645302   0.7831                    
    15     0.395819   0.4686     0.8443                    

[array([0.28032]), 0.9066999999046326]

In [66]:
phases = [TrainingPhase(**def_phase, epochs=6, lr=(.04,.001), lr_decay=DecayType.LINEAR, momentum=(0.95))]
learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=6), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                  
    0      0.110044   0.258751   0.9148    
    1      0.093127   0.245846   0.9266                     
    2      0.07675    0.243798   0.9239                     
    3      0.063608   0.224439   0.9331                     
    4      0.050007   0.22053    0.9326                     
    5      0.039199   0.217854   0.9353                     



[array([0.21785]), 0.935300000667572]

In [67]:
tta_data = torch_loader(PATH, sz, prefetcher=False)
learn.data_ = tta_data
get_TTA_accuracy(learn), get_TTA_accuracy_2(learn)

TTA acc: 0.9407                              


(0.9398, None)

### END TESTING

In [None]:
m = pre_resnet18()
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=1e-4
lr=.6
learn.clip = 3e-1
def_phase = {'opt_fn':optim.SGD, 'wds':wd}
# TODO: add momentum
lr=0.6
phases = [
    TrainingPhase(**def_phase, epochs=1, lr=(.005,.05), lr_decay=DecayType.EXPONENTIAL, momentum=0.95),
    TrainingPhase(**def_phase, epochs=6, lr=(.05,.9), lr_decay=DecayType.COSINE, momentum=(0.95,0.85), momentum_decay=DecayType.COSINE),
    TrainingPhase(**def_phase, epochs=4, lr=1, momentum=0.85),
    TrainingPhase(**def_phase, epochs=7, lr=(.9,.01), lr_decay=DecayType.COSINE, momentum=(0.85,0.95), momentum_decay=DecayType.COSINE),
    TrainingPhase(**def_phase, epochs=3, lr=(.01,.0005), lr_decay=DecayType.LINEAR, momentum=(0.95))]

learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

In [None]:
learn.sched.plot_lr()

In [None]:
m = WideResNetConcat(num_groups=3, N=3, num_classes=10, k=1, drop_p=0.)
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=1e-4
lr=1.5
learn.clip = 3e-1
%time learn.fit(lr, 1, wds=wd, cycle_len=23, use_clr_beta=(12,22,0.95,0.85), loss_scale=512)