## CIFAR 10

In [8]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [11]:
from time import time
from fastai.conv_learner import *
PATH = "data/cifar10/"

from fp16utils import *

### Load classes

In [17]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))

In [18]:
def get_data(sz,bs):
    tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=sz//8)
    return ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs)

In [19]:
bs=128

## Measure fp16 - half

In [28]:
from fastai.models.cifar10.resnext import resnext29_8_64

m = resnext29_8_64()
# m = resnet50(False)
bm = BasicModel(network_to_half(m).cuda(), name='cifar10_resnet50')

In [22]:
data = get_data(8,bs*4*4)

In [23]:
learn = ConvLearner(data, bm)
learn.unfreeze()

In [24]:
lr=4e-2; wd=5e-4

In [37]:
t1 = time.time()
%time learn.fit(lr, 1, cycle_len=1, use_clr=(20,8))
t2 = time.time()

epoch      trn_loss   val_loss   accuracy                 
    0      3.742317   2.416797   0.149995  



In [39]:
t2 - t1

33.93567514419556

### Measure time on 32x32

In [43]:
from fastai.models.cifar10.resnext import resnext29_8_64

m = resnext29_8_64()
# m = resnet50(False)
bm = BasicModel(network_to_half(m).cuda(), name='cifar10_resnet50')

In [44]:
data = get_data(32,bs*4)

In [45]:
t1_32 = time.time()
%time learn.fit(lr, 1, cycle_len=1, use_clr=(20,8))
t2_32 = time.time()

  8%|▊         | 2/25 [00:05<01:07,  2.94s/it, loss=1.85]
 12%|█▏        | 3/25 [00:06<00:50,  2.31s/it, loss=1.8] 

Exception in thread Thread-7:
Traceback (most recent call last):
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/tqdm/_monitor.py", line 63, in run
    for instance in self.tqdm_cls._instances:
  File "/home/paperspace/anaconda3/envs/fastai/lib/python3.6/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration



epoch      trn_loss   val_loss   accuracy                 
    0      1.802143   1.641016   0.402595  

CPU times: user 33.4 s, sys: 15.1 s, total: 48.5 s
Wall time: 32.6 s


In [46]:
t2_32 - t1_32

32.553186893463135

## Measure fp32 - full

In [51]:
from fastai.models.cifar10.resnext import resnext29_8_64

mf = resnext29_8_64()
# m = resnet50(False)
bmf = BasicModel(mf.cuda(), name='cifar10_resnet50')

In [52]:
dataf = get_data(8,bs*4*4)

In [53]:
learnf = ConvLearner(dataf, bmf)
learnf.unfreeze()

In [54]:
lr=4e-2; wd=5e-4

In [55]:
t1_full = time.time()
%time learnf.fit(lr, 1, cycle_len=1, use_clr=(20,8))
t2_full = time.time()

epoch      trn_loss   val_loss   accuracy                 
    0      3.086542   2.172181   0.239295  

CPU times: user 27.2 s, sys: 15.5 s, total: 42.7 s
Wall time: 30.8 s


In [56]:
t2_full - t1_full

30.818873643875122

### Measure time on 32x32

In [22]:
data = get_data(32,bs*4)

In [23]:
%time learnf.fit(lr, 1, cycle_len=3, use_clr=(20,8))

epoch      trn_loss   val_loss   accuracy                 
    0      1.606625   1.529435   0.445549  
    1      1.535117   1.434556   0.485208                 
    2      1.466418   1.392065   0.501437                 

CPU times: user 1min 20s, sys: 47.4 s, total: 2min 8s
Wall time: 1min 33s


[1.3920648, 0.5014371871948242]

### Results:

FP16 is actually slower in these tests. Will have to look at why this is.
Possible reasons:
* Image size or batch size is too small
    * Training isn't long enough to make a difference?
* Data loader is too slow
    * Training time doesn't increase when training on 8x8 vs 32x32