## CIFAR 10

In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
from time import time
from fastai.conv_learner import *
PATH = "data/cifar10/"

from fp16utils import *

In [3]:
# PATH = "data/cifar10/tmp/8"

### Load classes

In [4]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))

In [5]:
def get_data(sz,bs):
    tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=sz//8)
    return ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs, num_workers=8)

In [6]:
bs=128

In [None]:
# data = get_data(32,bs*4*4)

In [None]:
# data.trn_ds.resize_imgs(8, 'tmp')

In [None]:
# data.val_ds.resize_imgs(8, 'tmp')

## Measure fp16 - half

In [7]:
fp16 = True

In [8]:
from fastai.models.cifar10.resnext import resnext29_8_64

m = resnext29_8_64()
if fp16:
    m = network_to_half(m)
bm = BasicModel(m.cuda(), name='cifar10_resnet50')

In [9]:
data = get_data(8,bs*4*4)

In [10]:
learn = ConvLearner(data, bm)
learn.unfreeze()

In [11]:
lr=4e-2; wd=5e-4

In [None]:
import subprocess

subprocess.Popen("timeout 120 nvidia-smi --query-gpu=utilization.gpu,utilization.memory --format=csv -l 1 | sed s/%//g > ./GPU-stats.log",shell=True)

In [12]:
t1 = time.time()
%time learn.fit(lr, 1, cycle_len=1, use_clr=(20,8))
t2 = time.time()

epoch      trn_loss   val_loss   accuracy                 
    0      4.196475   2.624609   0.215559  

CPU times: user 1min 1s, sys: 21.7 s, total: 1min 23s
Wall time: 1min 9s


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
sb.set_style("darkgrid")

gpu = pd.read_csv("./GPU-stats.log")   # make sure that 120 seconds have expired before running this cell
gpu.plot()
plt.show()

In [13]:
t2 - t1

69.40062093734741

In [None]:
t2 - t1

### Measure time on 32x32

In [None]:
from fastai.models.cifar10.resnext import resnext29_8_64

m = resnext29_8_64()
if fp16:
    m = network_to_half(m)
bm = BasicModel(m.cuda(), name='cifar10_resnet50')

In [None]:
data = get_data(32,bs*4)

In [None]:
t1_32 = time.time()
%time learn.fit(lr, 1, cycle_len=1, use_clr=(20,8))
t2_32 = time.time()

In [None]:
t2_32 - t1_32

## Measure fp32 - full

In [None]:
from fastai.models.cifar10.resnext import resnext29_8_64

mf = resnext29_8_64()
# m = resnet50(False)
bmf = BasicModel(mf.cuda(), name='cifar10_resnet50')

In [None]:
dataf = get_data(8,bs*4*4)

In [None]:
learnf = ConvLearner(dataf, bmf)
learnf.unfreeze()

In [None]:
lr=4e-2; wd=5e-4

In [None]:
t1_full = time.time()
%time learnf.fit(lr, 1, cycle_len=1, use_clr=(20,8))
t2_full = time.time()

In [None]:
t2_full - t1_full

### Measure time on 32x32

In [None]:
data = get_data(32,bs*4)

In [None]:
%time learnf.fit(lr, 1, cycle_len=3, use_clr=(20,8))

### Results:

FP16 is actually slower in these tests. Will have to look at why this is.
Possible reasons:
* Image size or batch size is too small
    * Training isn't long enough to make a difference?
* Data loader is too slow
    * Training time doesn't increase when training on 8x8 vs 32x32