## CIFAR 10

In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [3]:
from fastai.conv_learner import *
PATH = "../fp16/data/cifar10/"
os.makedirs(PATH,exist_ok=True)

In [4]:
from fastai.fp16 import *

In [17]:
iter(a)

<list_iterator at 0x7f87846a50f0>

In [19]:
a = [{'a':1}, {'a':3}]

In [27]:
def copy_model_to_fp32(m, optim):
    fp32_params = [m_param.clone().type(torch.cuda.FloatTensor).detach() for m_param in m.parameters()]
    optim_groups = [group['params'] for group in optim.param_groups]
    iter_fp32_params = iter(fp32_params)
    for group_params in optim_groups:
        for i in range(len(group_params)):
            fp32_param = next(iter_fp32_params)
            fp32_param.requires_grad = group_params[i].requires_grad
            group_params[i] = fp32_param
    return fp32_params

In [28]:
class StepperFP16():
    def __init__(self, m, opt, crit, clip=0, reg_fn=None, loss_scale=1, fp16=False):
        self.m,self.opt,self.crit,self.clip,self.reg_fn = m,opt,crit,clip,reg_fn
        self.reset(True)
        
        self.fp16 = fp16
        self.loss_scale = loss_scale if fp16 else 1
        if self.fp16: self.fp32_params = copy_model_to_fp32(m, opt)
        
    def reset(self, train=True):
        if train: apply_leaf(self.m, set_train_mode)
        else: self.m.eval()
        if hasattr(self.m, 'reset'): 
            self.m.reset()
            if self.fp16: self.fp32_params = copy_model_to_fp32(self.m, self.opt)

    def step(self, xs, y, epoch):
        if self.fp16: return self.step_fp16(xs, y, epoch)
        xtra = []
        output = self.m(*xs)
        if isinstance(output,tuple): output,*xtra = output
        self.opt.zero_grad()
        loss = raw_loss = self.crit(output, y)
        if self.reg_fn: loss = self.reg_fn(output, xtra, raw_loss)
        loss.backward()
        if self.clip:   # Gradient clipping
            nn.utils.clip_grad_norm(trainable_params_(self.m), self.clip)
        self.opt.step()
        return raw_loss.data[0]
    
    
    def step_fp16(self, xs, y, epoch):
        xtra = []
        output = self.m(*xs)
        if isinstance(output,tuple): output,*xtra = output
        self.m.zero_grad()
        loss = raw_loss = self.crit(output, y)
        if self.loss_scale != 1: loss = loss*self.loss_scale
        if self.reg_fn: loss = self.reg_fn(output, xtra, raw_loss)
        loss.backward()
        update_fp32_grads(self.fp32_params, m)
        if self.loss_scale != 1:
            for param in self.fp32_params: param.grad.data.div_(self.loss_scale)
        if self.clip:   # Gradient clipping
            nn.utils.clip_grad_norm(trainable_params_(self.fp32_params), self.clip)
        self.opt.step()
        copy_fp32_to_model(self.m, self.fp32_params)
        return raw_loss.data[0]

    def evaluate(self, xs, y):
        preds = self.m(*xs)
        if isinstance(preds,tuple): preds=preds[0]
        return preds, self.crit(preds, y)

### Load Data

In [29]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))

In [30]:
def get_data(sz,bs):
    tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=sz//8)
    return ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs)

In [31]:
bs=128

## Initial model

In [None]:
%pdb off

### Let's try to copy params into fp16

In [32]:
from fastai.models.cifar10.resnext import resnext29_8_64

m = resnext29_8_64()
if True:
    m = FP16(m)
bm = BasicModel(m.cuda(), name='cifar10_rn29_8_64')
data = get_data(8,bs*4)

TypeError: super(type, obj): obj must be an instance or subtype of type

In [None]:
learn = ConvLearner(data, bm)
learn.unfreeze()
lr=1e-2; wd=5e-4

In [None]:
learn.half()

In [11]:
%time learn.fit(lr, 1, stepper=StepperFP16, cycle_len=.5, loss_scale=128, fp16=True)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

50it [00:32,  1.52it/s, loss=3.29]                        epoch      trn_loss   val_loss   accuracy   
    0      3.291506   2.956299   0.116883  

CPU times: user 39.2 s, sys: 10.7 s, total: 49.8 s
Wall time: 35.6 s


[2.956298828125, 0.11688304245471955]

In [None]:
learn.save('fp16_model')

## Testing fp32

In [None]:
from fastai.models.cifar10.resnext import resnext29_8_64

m = resnext29_8_64()
if False:
    m = FP16(m)
bm = BasicModel(m.cuda(), name='cifar10_rn29_8_64')
data = get_data(8,bs*4)

In [None]:
learn = ConvLearner(data, bm)
learn.unfreeze()
lr=1e-2; wd=5e-4

In [None]:
learn.load('fp16_model')

In [None]:
%time learn.fit(lr, 1, cycle_len=.5)

In [None]:
learn.save('8x8_8')

### Test

In [None]:
new_optim = torch.optim.SGD(m.half().parameters(), lr=.05)

In [None]:
new_optim.param_groups[0].keys()

In [None]:
t1 = new_optim.param_groups[0]['params'][3]
type(t1.data), t1.shape

In [None]:
type(t1)

In [None]:
new_optim.param_groups[0]['params'][0] = next(m.parameters())

In [None]:
t2 = new_optim.param_groups[0]['params'][0][0]
type(t2.data), t2.shape

In [None]:
it = m.parameters()

In [None]:
a = next(it)

In [None]:
type(a.data)