## CIFAR 10

In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
from fastai.conv_learner import *
from fastai.models.cifar10.wideresnet import wrn_22_cat, wrn_22, WideResNetConcat
torch.backends.cudnn.benchmark = True
PATH = Path("data/cifar10/")
os.makedirs(PATH,exist_ok=True)

In [3]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))

bs=512
sz=32
workers=7

In [4]:
import torchvision.transforms as transforms
import torchvision.datasets as datasets

def pad(img, p=4, padding_mode='reflect'):
    return Image.fromarray(np.pad(np.asarray(img), ((p, p), (p, p), (0, 0)), padding_mode))


def get_loaders(bs, num_workers):
    traindir = str(PATH/'train')
    valdir = str(PATH/'test')
    tfms = [transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]

    aug_tfms =transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
        ] + tfms)
    
    train_dataset = datasets.ImageFolder(
        traindir,
        aug_tfms)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers, pin_memory=True)

    val_dataset = datasets.ImageFolder(valdir, transforms.Compose(tfms))

    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=bs, shuffle=False, num_workers=num_workers, pin_memory=True)
    
    aug_dataset = datasets.ImageFolder(valdir, aug_tfms)

    aug_loader = torch.utils.data.DataLoader(
        aug_dataset, batch_size=bs, shuffle=False, num_workers=num_workers, pin_memory=True)
    
    return train_loader, val_loader, aug_loader

def torch_loader(data_path, size):
    if not os.path.exists(data_path+'/train'): download_cifar10(data_path)

    # Data loading code
    traindir = os.path.join(data_path, 'train')
    valdir = os.path.join(data_path, 'test')
    normalize = transforms.Normalize(mean=[0.4914 , 0.48216, 0.44653], std=[0.24703, 0.24349, 0.26159])
    tfms = [transforms.ToTensor(), normalize]

    train_tfms = transforms.Compose([
        pad, # TODO: use `padding` rather than assuming 4
        transforms.RandomCrop(size),
        transforms.RandomHorizontalFlip(),
    ] + tfms)
    val_tfms = transforms.Compose(tfms)

    train_dataset = datasets.ImageFolder(traindir, train_tfms)
    val_dataset = datasets.ImageFolder(valdir, val_tfms)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=bs, shuffle=True,
        num_workers=workers, pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=bs*2, shuffle=False,
        num_workers=workers, pin_memory=True)
    
    aug_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(valdir, train_tfms),
        batch_size=bs*2, shuffle=False,
        num_workers=workers, pin_memory=True)

    train_loader = DataPrefetcher(train_loader)
    val_loader = DataPrefetcher(val_loader)
    aug_loader = DataPrefetcher(aug_loader)
    
    data = ModelData(data_path, train_loader, val_loader)
    data.sz = size
    data.aug_dl = aug_loader
    return data

# Seems to speed up training by ~2%
class DataPrefetcher():
    def __init__(self, loader, stop_after=None):
        self.loader = loader
        self.dataset = loader.dataset
        self.stream = torch.cuda.Stream()
        self.stop_after = stop_after
        self.next_input = None
        self.next_target = None

    def __len__(self):
        return len(self.loader)

    def preload(self):
        try:
            self.next_input, self.next_target = next(self.loaditer)
        except StopIteration:
            self.next_input = None
            self.next_target = None
            return
        with torch.cuda.stream(self.stream):
            self.next_input = self.next_input.cuda(async=True)
            self.next_target = self.next_target.cuda(async=True)

    def __iter__(self):
        count = 0
        self.loaditer = iter(self.loader)
        self.preload()
        while self.next_input is not None:
            torch.cuda.current_stream().wait_stream(self.stream)
            input = self.next_input
            target = self.next_target
            self.preload()
            count += 1
            yield input, target
            if type(self.stop_after) is int and (count > self.stop_after):
                break

In [5]:
data = torch_loader(str(PATH), sz)

In [6]:
'''Pre-activation ResNet in PyTorch.

Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Identity Mappings in Deep Residual Networks. arXiv:1603.05027
'''
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.autograd import Variable


class AdaptiveConcatPool2d(nn.Module):
    def __init__(self, sz=None):
        super().__init__()
        sz = sz or (1,1)
        self.ap = nn.AdaptiveAvgPool2d(sz)
        self.mp = nn.AdaptiveMaxPool2d(sz)
    def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)
    

class PreActBlock(nn.Module):
    '''Pre-activation version of the BasicBlock.'''
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(PreActBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.bn2.bias.data.zero_()
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)

        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
            )

    def forward(self, x):
        out = F.relu(self.bn1(x), inplace=True)
        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
        out = self.conv1(out)
        out = self.conv2(F.relu(self.bn2(out), inplace=True))
        out += shortcut
        return out


class PreActBottleneck(nn.Module):
    '''Pre-activation version of the original Bottleneck module.'''
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(PreActBottleneck, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)

        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
            )

    def forward(self, x):
        out = F.relu(self.bn1(x))
        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
        out = self.conv1(out)
        out = self.conv2(F.relu(self.bn2(out)))
        out = self.conv3(F.relu(self.bn3(out)))
        out += shortcut
        return out


class PreActResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10, concatpool=False):
        super(PreActResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.pool = AdaptiveConcatPool2d() if concatpool else nn.AdaptiveMaxPool2d((1,1))
        
        self.linear = nn.Linear(512*block.expansion*(concatpool+1), num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
#         out = F.adaptive_max_pool2d(out, 1)
        out = self.pool(out)
        out = out.view(out.size(0), -1)
        return F.log_softmax(self.linear(out))

def preact_resnet18(): return PreActResNet(PreActBlock, [2,2,2,2])
def preact_resnet2332(): return PreActResNet(PreActBlock, [2,3,3,2])
def preact_resnet3333(): return PreActResNet(PreActBlock, [3,3,3,3])
def preact_resnet34(): return PreActResNet(PreActBlock, [3,4,6,3])
def preact_resnet50(): return PreActResNet(PreActBottleneck, [3,4,6,3])
def preActResNet101(): return PreActResNet(PreActBottleneck, [3,4,23,3])
def preActResNet152(): return PreActResNet(PreActBottleneck, [3,8,36,3])


In [7]:
# m = WideResNetConcat(num_groups=3, N=3, num_classes=10, k=1, drop_p=0.)

In [8]:
def get_TTA_accuracy(learn):
    preds, targs = learn.TTA()
    # combining the predictions across augmented and non augmented inputs
    preds = 0.6 * preds[0] + 0.4 * preds[1:].sum(0)
    return accuracy_np(preds, targs)

def get_TTA_accuracy_2(learn):
    log_preds,y = learn.TTA()
    preds = np.mean(np.exp(log_preds),0)
    acc = accuracy(torch.FloatTensor(preds),torch.LongTensor(y))
    print('TTA acc:', acc)

In [17]:
m = PreActResNet(PreActBlock, [2,2,2,2], concatpool=True)
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=2e-4
lr=1.2
learn.clip = 3e-1
def_phase = {'opt_fn':optim.SGD, 'wds':wd}
# TODO: add momentum
# %time learn.fit(lr, 1, wds=wd, cycle_len=23, use_clr_beta=(20,22,0.95,0.85), loss_scale=512)
phases = [
    TrainingPhase(**def_phase, epochs=2, lr=(.04, .08), lr_decay=DecayType.LINEAR, momentum=0.95),
    TrainingPhase(**def_phase, epochs=13, lr=(.08,.7), lr_decay=DecayType.LINEAR, momentum=(0.95,0.85), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=13, lr=(.7,.04), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=7, lr=(.04,.001), lr_decay=DecayType.LINEAR, momentum=(0.95))]

learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=35), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      1.54745    1.341417   0.516     
    1      1.199921   1.162127   0.6105                   
    2      0.952421   0.844205   0.7047                    
    3      0.754694   0.783398   0.7247                    
    4      0.655994   0.763181   0.7423                    
    5      0.574407   0.60161    0.7945                    
    6      0.525335   0.675541   0.7738                    
    7      0.488829   0.673738   0.7684                    
    8      0.471731   0.550068   0.8135                    
    9      0.431304   0.626316   0.8019                    
    10     0.414254   0.625968   0.8013                    
    11     0.406064   0.612051   0.8159                    
    12     0.401669   0.573889   0.819                     
    13     0.386631   0.62647    0.8072                    
    14     0.386211   0.556812   0.8231                    
    15     0.368219   0.810465   0.7484                   

[array([0.24418]), 0.9328000016212463]

In [18]:
print(get_TTA_accuracy(learn), get_TTA_accuracy_2(learn))

TTA acc: 0.6618                              
0.8267 None


In [9]:
m = PreActResNet(PreActBlock, [2,2,2,2], concatpool=True)
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=2e-4
learn.clip = 3e-1

def_phase = {'opt_fn':optim.SGD, 'wds':wd}
# TODO: add momentum
# %time learn.fit(lr, 1, wds=wd, cycle_len=23, use_clr_beta=(20,22,0.95,0.85), loss_scale=512)
phases = [
    TrainingPhase(**def_phase, epochs=3, lr=(.04, .08), lr_decay=DecayType.LINEAR, momentum=(0.75,0.95), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=10, lr=(.08,.7), lr_decay=DecayType.LINEAR, momentum=(0.95,0.85), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=10, lr=(.7,.04), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=5, lr=(.04,.001), lr_decay=DecayType.LINEAR, momentum=(0.95))]

learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

In [10]:
learn.load('27epoch9288')

In [11]:
phases = [TrainingPhase(**def_phase, epochs=5, lr=(.01,.001), lr_decay=DecayType.LINEAR, momentum=(0.95))]

learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=5), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                   
    0      0.040664   0.243845   0.9292    
                                                            

Process Process-28:
Process Process-24:
Process Process-23:
Process Process-26:
Process Process-22:
Process Process-25:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process Process-27:
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 55, in _worker_loop
    samples =

  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
KeyboardInterrupt
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 55, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torchvision/datasets/folder.py", line 124, in __getitem__
    img = self.transform(img)
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 55, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torchvision/transforms/transforms.py", line 42, in __call__
    img = t(img)
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torchvision/datasets/folder.py", line 122, in __geti

Traceback (most recent call last):
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-11-6f55e2b490a8>", line 3, in <module>
    learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)
  File "/home/paperspace/fastai/courses/dl2/fastai/learner.py", line 426, in fit_opt_sched
    metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, fp16=self.fp16, **kwargs)
  File "/home/paperspace/fastai/courses/dl2/fastai/model.py", line 150, in fit
    vals = validate(model_stepper, cur_data.val_dl, metrics)
  File "/home/paperspace/fastai/courses/dl2/fastai/model.py", line 206, in validate
    for (*x,y) in iter(dl):
  File "<ipython-input-4-9b4bf4427d44>", line 106, in __iter__
    self.preload()
  File "<ipython-input-4-9b4bf4427d44>", line 94, in preload
    self.next_input, self.next_target = next(self.loaditer)


KeyboardInterrupt: 

In [17]:
print(get_TTA_accuracy(learn), ga
      et_TTA_accuracy_2(learn))

TTA acc: 0.6647                              
0.6592 None


In [9]:
m = PreActResNet(PreActBlock, [2,2,2,2], concatpool=True)
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=2e-4
lr=1.2
learn.clip = 3e-1
def_phase = {'opt_fn':optim.SGD, 'wds':wd}
# TODO: add momentum
# %time learn.fit(lr, 1, wds=wd, cycle_len=23, use_clr_beta=(20,22,0.95,0.85), loss_scale=512)
phases = [
    TrainingPhase(**def_phase, epochs=3, lr=(.04, .08), lr_decay=DecayType.LINEAR, momentum=(0.75,0.95), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=12, lr=(.08,.7), lr_decay=DecayType.LINEAR, momentum=(0.95,0.85), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=13, lr=(.7,.04), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR)]

learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

In [None]:
learner.save()

In [11]:
learn.load('e279269')

In [13]:
wd=5e-4
lr=1.2
learn.clip = 3e-1
def_phase = {'opt_fn':optim.SGD, 'wds':wd}
phases = [TrainingPhase(**def_phase, epochs=10, lr=(.03,.0005), lr_decay=DecayType.LINEAR, momentum=(0.95))]
learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                   
    0      0.021264   0.247426   0.9341    
    1      0.021122   0.248507   0.9342                     
    2      0.019623   0.250664   0.9344                     
    3      0.018247   0.244344   0.9352                     
    4      0.014671   0.246918   0.9361                     
    5      0.013124   0.243003   0.9383                     
    6      0.010386   0.242575   0.9388                     
    7      0.010377   0.245571   0.9377                     
    8      0.010275   0.245191   0.9374                      
    9      0.009524   0.244672   0.9379                      



[array([0.24467]), 0.9379000016212463]

In [14]:
m = PreActResNet(PreActBlock, [2,2,2,2], concatpool=True)
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=5e-4
lr=1.2
learn.clip = 3e-1
def_phase = {'opt_fn':optim.SGD, 'wds':wd}
# TODO: add momentum
# %time learn.fit(lr, 1, wds=wd, cycle_len=23, use_clr_beta=(20,22,0.95,0.85), loss_scale=512)
phases = [
    TrainingPhase(**def_phase, epochs=3, lr=(.04, .08), lr_decay=DecayType.LINEAR, momentum=(0.75,0.95), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=12, lr=(.08,.7), lr_decay=DecayType.LINEAR, momentum=(0.95,0.85), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=13, lr=(.7,.04), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR)]

learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=28), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      1.391143   1.334314   0.5177    
    1      1.042856   1.109197   0.6287                   
    2      0.904438   1.469603   0.5758                    
    3      0.874192   0.962976   0.705                     
    4      0.715085   0.694865   0.7622                    
    5      0.586988   0.598435   0.796                     
    6      0.5375     0.745907   0.7434                    
    7      0.516125   0.606515   0.7944                    
    8      0.494006   0.886046   0.7204                    
    9      0.484716   0.610714   0.7865                    
    10     0.475833   1.010698   0.6963                    
    11     0.462307   0.678418   0.7723                    
    12     0.475521   0.613303   0.8007                    
    13     0.47077    1.558636   0.6062                    
    14     0.456944   0.552482   0.814                     
    15     0.442594   0.936427   0.7178                   

[array([0.26992]), 0.9150000018119812]

In [18]:
learn.load('27e915wd5')

In [19]:
phases = [TrainingPhase(**def_phase, epochs=12, lr=(.04,.0001), lr_decay=DecayType.LINEAR, momentum=(0.95))]
learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=12), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                  
    0      0.110968   0.238117   0.9238    
    1      0.098732   0.241428   0.9235                     
    2      0.090655   0.290853   0.9096                     
    3      0.080642   0.239033   0.9274                     
    4      0.073385   0.240833   0.9264                     
    5      0.059446   0.227871   0.9319                     
    6      0.048171   0.239564   0.9307                     
    7      0.041601   0.231649   0.9352                     
    8      0.034683   0.223164   0.9371                     
    9      0.027196   0.223093   0.939                      
    10     0.022635   0.226913   0.9393                     
    11     0.018472   0.223484   0.9399                     



[array([0.22348]), 0.9399000015258789]

In [20]:
learn.load('27e915wd5')

In [22]:
phases = [TrainingPhase(**def_phase, epochs=13, lr=(.04,.00001), lr_decay=DecayType.LINEAR, momentum=(0.95))]
learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=13), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                   
    0      0.083213   0.32967    0.9061    
    1      0.094038   0.282612   0.9148                     
    2      0.081213   0.317804   0.9092                     
    3      0.076014   0.288641   0.9167                     
    4      0.066762   0.256036   0.9232                     
    5      0.057202   0.263993   0.9229                     
    6      0.04991    0.268894   0.9249                     
    7      0.037379   0.242746   0.933                      
    8      0.02749    0.241933   0.9349                     
    9      0.02153    0.232156   0.9385                     
    10     0.016212   0.234194   0.9365                     
    11     0.01265    0.231261   0.9384                     
    12     0.010391   0.23264    0.9399                     



[array([0.23264]), 0.9398999988555908]

In [23]:
learn.load('27e915wd5')

In [9]:
m = PreActResNet(PreActBlock, [2,2,2,2], concatpool=True)
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=2e-4
lr=1.2
learn.clip = 3e-1
def_phase = {'opt_fn':optim.SGD, 'wds':wd}
# TODO: add momentum
# %time learn.fit(lr, 1, wds=wd, cycle_len=23, use_clr_beta=(20,22,0.95,0.85), loss_scale=512)
phases = [
    TrainingPhase(**def_phase, epochs=3, lr=(.04, .08), lr_decay=DecayType.LINEAR, momentum=(0.75,0.95), momentum_decay=DecayType.LINEAR),
    TrainingPhase(**def_phase, epochs=13, lr=(.08,.6), lr_decay=DecayType.LINEAR, momentum=(0.95,0.85), momentum_decay=DecayType.LINEAR),
    TrainingPhase(opt_fn=optim.SGD, wds=6e-4, epochs=14, lr=(.6,.04), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR)]

learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=30), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      1.395335   1.396503   0.5316    
    1      1.041985   1.463319   0.5623                   
    2      0.904097   1.009868   0.6577                    
    3      1.001244   1.388565   0.5683                    
    4      0.704542   0.734299   0.7588                    
    5      0.553996   0.776914   0.7565                    
    6      0.493651   0.708688   0.7639                    
    7      0.466777   0.492839   0.8291                    
    8      0.42863    0.524811   0.8235                    
    9      0.40446    0.593429   0.8122                    
    10     0.38921    0.504208   0.8254                    
    11     0.373812   0.53962    0.8265                    
    12     0.356916   0.441853   0.8549                    
    13     0.361846   0.614352   0.7995                    
    14     0.340386   0.771667   0.7689                    
    15     0.32771    0.906955   0.7456                   

[array([0.26517]), 0.9100999982833863]

In [12]:
learn.load('Crap')

In [13]:
phases = [TrainingPhase(opt_fn=optim.SGD, wds=6e-4, epochs=12, lr=(.04,.0001), lr_decay=DecayType.LINEAR, momentum=(0.95))]
learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

HBox(children=(IntProgress(value=0, description='Epoch', max=12), HTML(value='')))

 93%|█████████▎| 91/98 [00:07<00:00, 12.93it/s, loss=0.115] 
 97%|█████████▋| 95/98 [00:07<00:00, 13.12it/s, loss=0.116]

Exception in thread Thread-95:
Traceback (most recent call last):
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/tqdm/_monitor.py", line 62, in run
    for instance in self.tqdm_cls._instances:
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration



epoch      trn_loss   val_loss   accuracy                  
    0      0.115448   0.250468   0.9168    
    1      0.110093   0.241932   0.9189                    
    2      0.095553   0.242897   0.9249                     
    3      0.0861     0.262625   0.9206                     
    4      0.07784    0.248432   0.9247                     
    5      0.064775   0.244981   0.9226                     
    6      0.051766   0.227596   0.9307                     
    7      0.042212   0.229271   0.931                      
    8      0.033536   0.231184   0.9325                     
    9      0.026059   0.214287   0.9357                     
    10     0.021885   0.208243   0.9383                     
    11     0.017362   0.205271   0.9375                     



[array([0.20527]), 0.9375000012397766]

### END TESTING

In [None]:
m = pre_resnet18()
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=1e-4
lr=.6
learn.clip = 3e-1
def_phase = {'opt_fn':optim.SGD, 'wds':wd}
# TODO: add momentum
lr=0.6
phases = [
    TrainingPhase(**def_phase, epochs=1, lr=(.005,.05), lr_decay=DecayType.EXPONENTIAL, momentum=0.95),
    TrainingPhase(**def_phase, epochs=6, lr=(.05,.9), lr_decay=DecayType.COSINE, momentum=(0.95,0.85), momentum_decay=DecayType.COSINE),
    TrainingPhase(**def_phase, epochs=4, lr=1, momentum=0.85),
    TrainingPhase(**def_phase, epochs=7, lr=(.9,.01), lr_decay=DecayType.COSINE, momentum=(0.85,0.95), momentum_decay=DecayType.COSINE),
    TrainingPhase(**def_phase, epochs=3, lr=(.01,.0005), lr_decay=DecayType.LINEAR, momentum=(0.95))]

learn.fit_opt_sched(phases, data_list=[data], loss_scale=512)

In [None]:
learn.sched.plot_lr()

In [None]:
m = WideResNetConcat(num_groups=3, N=3, num_classes=10, k=1, drop_p=0.)
learn = ConvLearner.from_model_data(m, data)
learn.half()
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=1e-4
lr=1.5
learn.clip = 3e-1
%time learn.fit(lr, 1, wds=wd, cycle_len=23, use_clr_beta=(12,22,0.95,0.85), loss_scale=512)