In [1]:
import fastai
from fastai.vision import *

In [2]:
gpu_device = 1
defaults.device = torch.device(f'cuda:{gpu_device}')
torch.cuda.set_device(gpu_device)

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(42)

In [4]:
path = untar_data(URLs.PETS)

In [5]:
path_anno = path/'annotations'
path_img = path/'images'
fnames = get_image_files(path_img)
pat = re.compile(r'/([^/]+)_\d+.jpg$')

### No Acc

In [6]:
seed_everything(2)
data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=224, bs=64
                                  ).normalize(imagenet_stats)
learn = create_cnn(data, models.resnet34, metrics=error_rate)
learn.fit(1)

epoch,train_loss,valid_loss,error_rate,time
1,0.639602,0.256227,0.087957,00:17


In [7]:
data.batch_size

64

### Naive Acc 

In [8]:
seed_everything(2)
data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=224, bs=2
                                  ).normalize(imagenet_stats)
learn = create_cnn(data=data, arch=models.resnet34, metrics=error_rate,
                   callback_fns=[partial(AccumulateStepper, n_step=32)])
learn.loss_func = CrossEntropyFlat(reduction='sum')
learn.fit(1)

epoch,train_loss,valid_loss,error_rate,time
1,4.862456,2.239424,0.270636,00:36


### Acc + BnFreeze

In [9]:
seed_everything(2)
data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=224, bs=2
                                  ).normalize(imagenet_stats)
learn = create_cnn(data=data, arch=models.resnet34, metrics=error_rate,
                   callback_fns=[partial(AccumulateStepper, n_step=32), BnFreeze])
learn.loss_func = CrossEntropyFlat(reduction='sum')
learn.fit(1)

epoch,train_loss,valid_loss,error_rate,time
1,4.862456,2.239424,0.270636,00:36


### Increase Momentum 

In [10]:
def set_bn_mom(m:nn.Module, mom=0.9):
    "Set bn layers in eval mode for all recursive children of `m`."
    for l in m.children():
        if isinstance(l, bn_types):
            l.momentum = mom
        set_bn_mom(l, mom)

In [11]:
seed_everything(2)
data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=224, bs=2
                                  ).normalize(imagenet_stats)
learn = create_cnn(data=data, arch=models.resnet34, metrics=error_rate,
                   callback_fns=[partial(AccumulateStepper, n_step=32)])
learn.loss_func = CrossEntropyFlat(reduction='sum')
set_bn_mom(learn.model, mom=0.9)
learn.fit(1)

epoch,train_loss,valid_loss,error_rate,time
1,4.862456,19.069668,0.755751,00:37


### Decrease Momentum

In [12]:
seed_everything(2)
data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=224, bs=2
                                  ).normalize(imagenet_stats)
learn = create_cnn(data=data, arch=models.resnet34, metrics=error_rate,
                   callback_fns=[partial(AccumulateStepper, n_step=32)])
learn.loss_func = CrossEntropyFlat(reduction='sum')
set_bn_mom(learn.model, mom=0.01)
learn.fit(1)

epoch,train_loss,valid_loss,error_rate,time
1,4.862456,2.231589,0.253045,00:37


### InstanceNorm

In [13]:
def bn2instance(bn):
    if isinstance(bn, nn.BatchNorm1d): inst = nn.InstanceNorm1d(bn.num_features, affine=True)
    elif isinstance(bn, nn.BatchNorm2d): inst = nn.InstanceNorm2d(bn.num_features, affine=True)
    elif isinstance(bn, nn.BatchNorm3d): inst = nn.InstanceNorm3d(bn.num_features, affine=True)
    
    inst.weight = bn.weight
    inst.bias = bn.bias
    inst.running_mean = nn.Parameter(bn.running_mean, requires_grad=False)
    inst.running_var = nn.Parameter(bn.running_var, requires_grad=False)
    inst.momentum = bn.momentum
    inst.eps = bn.eps
    inst.track_running_stats = bn.track_running_stats
    return (inst).to(bn.weight.device)

In [14]:
def convert_bn(list_mods, func=bn2instance):
    for i in range(len(list_mods)):
        if isinstance(list_mods[i], bn_types):
            list_mods[i] = func(list_mods[i])
        elif list_mods[i].__class__.__name__ in ("Sequential", "BasicBlock"):
            list_mods[i] = nn.Sequential(*convert_bn(list(list_mods[i].children()), func))
    return list_mods

In [15]:
seed_everything(2)
data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=224, bs=2
                                  ).normalize(imagenet_stats)
learn = create_cnn(data=data, arch=models.resnet34, metrics=error_rate,
                   callback_fns=[partial(AccumulateStepper, n_step=32)])
learn.loss_func = CrossEntropyFlat(reduction='sum')

In [16]:
learn.model = nn.Sequential(*convert_bn(list(learn.model.children()), bn2instance))

In [17]:
learn.freeze()

In [18]:
learn.fit(1)

epoch,train_loss,valid_loss,error_rate,time


RuntimeError: Given groups=1, weight of size [128, 64, 1, 1], expected input[2, 128, 28, 28] to have 64 channels, but got 128 channels instead

### GroupNorm

In [30]:
def bn2group(bn):
    groupnorm = nn.GroupNorm(4, bn.num_features, affine=True)
    groupnorm.weight = bn.weight
    groupnorm.bias = bn.bias
    groupnorm.eps = bn.eps
    return (groupnorm).to(bn.weight.device)

In [31]:
seed_everything(2)
data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=224, bs=2
                                  ).normalize(imagenet_stats)
learn = create_cnn(data=data, arch=models.resnet34, metrics=error_rate,
                   callback_fns=[partial(AccumulateStepper, n_step=32)])
learn.loss_func = CrossEntropyFlat(reduction='sum')

In [32]:
learn.model = nn.Sequential(*convert_bn(list(learn.model.children()), bn2group))

In [33]:
gnorm = nn.GroupNorm(2,5)

In [34]:
learn.freeze()

In [35]:
learn.fit(1)

epoch,train_loss,valid_loss,error_rate,time


RuntimeError: Given groups=1, weight of size [128, 64, 1, 1], expected input[2, 128, 28, 28] to have 64 channels, but got 128 channels instead

### Resnet + GroupNorm

In [19]:
seed_everything(2)
data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=224, bs=2
                                  ).normalize(imagenet_stats)
learn = create_cnn(data=data, arch=models.resnet34, metrics=error_rate,
                   callback_fns=[partial(AccumulateStepper, n_step=32)])
learn.loss_func = CrossEntropyFlat(reduction='sum')

In [20]:
def change_all_BN(module):
    for i in range(5):
        atr = 'bn'+str(i)
        if hasattr(module, atr):
            setattr(module, atr, bn2group(getattr(module,atr)))

def wrap_BN(model):
    for i in range(len(model)):
        for j in range(len(model[i])):
            if isinstance(model[i][j], bn_types):
                model[i][j] = bn2group(model[i][j])
            elif model[i][j].__class__.__name__ == "Sequential":
                for k in range(len(model[i][j])):
                    if isinstance(model[i][j][k], bn_types):
                        model[i][j][k] = bn2group(model[i][j][k])
                    elif model[i][j][k].__class__.__name__ == "BasicBlock":
                        change_all_BN(model[i][j][k])
                        if hasattr(model[i][j][k],'downsample'):
                            if model[i][j][k].downsample is not None:
                                for l in range(len(model[i][j][k].downsample)):
                                     if isinstance(model[i][j][k].downsample[l], bn_types):
                                        model[i][j][k].downsample[l] = bn2group(model[i][j][k].downsample[l])
    

In [23]:
wrap_BN(learn.model)

In [24]:
learn.freeze()

In [25]:
learn.fit(1)

epoch,train_loss,valid_loss,error_rate,time
1,7.304383,7.247257,0.9682,00:45


### Resnet + GroupNorm (No Acc)

In [26]:
seed_everything(2)
data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=224, bs=2
                                  ).normalize(imagenet_stats)
learn = create_cnn(data=data, arch=models.resnet34, metrics=error_rate)

In [27]:
wrap_BN(learn.model)
learn.freeze()
learn.fit(1)

epoch,train_loss,valid_loss,error_rate,time
1,3.708869,3.646787,0.97429,00:57


### Resnet + GroupNorm (No Acc) bs = 1

In [28]:
seed_everything(2)
data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=224, bs=1
                                  ).normalize(imagenet_stats)
learn = create_cnn(data=data, arch=models.resnet34, metrics=error_rate)

In [29]:
wrap_BN(learn.model)
learn.freeze()
learn.fit(1)

epoch,train_loss,valid_loss,error_rate,time
1,3.804727,3.727457,0.965494,01:48
