# Transfer Learning Prototype

## Initialization

In [1]:
import importlib

import os
import sys
import numpy as np
import time

import torch
from torch import nn
import torch.optim as optim
from torch.utils import data
from torchvision import transforms
import distiller.apputils as apputils

import ai8x

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


## Helper Functions

In [3]:
# Count Model Parameters
def count_params(model):
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    return params

# Freezing of Layer
def freeze_layer(layer):
    layer.op.weight.requires_grad_(False)
    layer.op.bias.requires_grad_(False)

# Unfreezing of Layer
def unfreeze_layer(layer):
    layer.op.weight.requires_grad_(True)
    layer.op.bias.requires_grad_(True)

# Print Model Parameters
def print_model_params(model):
    print('Layer, Size, Requires Grad:')
    for name, param in model.named_parameters():
        print(name, ',', param.size(),',', param.requires_grad)

## Loading of Model from Checkpoint

In [4]:
ai8x.set_device(device=85, simulate=False, round_avg=False)

mod = importlib.import_module("models.ai85net-nas-cifar")

model = mod.AI85NASCifarNet(num_classes=100, num_channels=3, dimensions=(32, 32), bias=True)
print(f'Number of Model Params: {count_params(model)}')

model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
            model, "trained/ai85-cifar100-new.pth.tar", model_device=device)

Configuring device: MAX78000, simulate=False.
Number of Model Params: 348772


In [5]:
# Print Original Model Parameters
print_model_params(model)

Layer, Size, Requires Grad:
conv1_1.output_shift , torch.Size([1]) , False
conv1_1.weight_bits , torch.Size([1]) , False
conv1_1.bias_bits , torch.Size([1]) , False
conv1_1.quantize_activation , torch.Size([1]) , False
conv1_1.adjust_output_shift , torch.Size([1]) , False
conv1_1.shift_quantile , torch.Size([1]) , False
conv1_1.op.weight , torch.Size([64, 3, 3, 3]) , True
conv1_1.op.bias , torch.Size([64]) , True
conv1_2.output_shift , torch.Size([1]) , False
conv1_2.weight_bits , torch.Size([1]) , False
conv1_2.bias_bits , torch.Size([1]) , False
conv1_2.quantize_activation , torch.Size([1]) , False
conv1_2.adjust_output_shift , torch.Size([1]) , False
conv1_2.shift_quantile , torch.Size([1]) , False
conv1_2.op.weight , torch.Size([32, 64, 1, 1]) , True
conv1_2.op.bias , torch.Size([32]) , True
conv1_3.output_shift , torch.Size([1]) , False
conv1_3.weight_bits , torch.Size([1]) , False
conv1_3.bias_bits , torch.Size([1]) , False
conv1_3.quantize_activation , torch.Size([1]) , False
co

## Freezing of Layers
Note: Layers must to be configured must be the same with the model architecture </br>
Changes are found in the op.weight and op.bias

In [6]:
# CIFAR 100 NAS Model

freeze_layer(model.conv1_1)
freeze_layer(model.conv1_2)
freeze_layer(model.conv1_3)
freeze_layer(model.conv2_1)
freeze_layer(model.conv2_2)
freeze_layer(model.conv3_1)
freeze_layer(model.conv3_2)
freeze_layer(model.conv4_1)
freeze_layer(model.conv4_2)
# freeze_layer(model.conv5_1)
# freeze_layer(model.fc)
model.to(device)

AI85NASCifarNet(
  (conv1_1): FusedConv2dBNReLU(
    (activate): ReLU(inplace=True)
    (op): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn): BatchNorm2d(64, eps=1e-05, momentum=0.05, affine=False, track_running_stats=True)
    (calc_out_shift): OutputShiftSqueeze()
    (calc_weight_scale): One()
    (scale): Scaler()
    (calc_out_scale): OutputScale()
    (quantize_weight): Empty()
    (quantize_bias): Empty()
    (clamp_weight): Empty()
    (clamp_bias): Empty()
    (quantize): Empty()
    (clamp): Clamp()
    (quantize_pool): Empty()
    (clamp_pool): Empty()
  )
  (conv1_2): FusedConv2dBNReLU(
    (activate): ReLU(inplace=True)
    (op): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.05, affine=False, track_running_stats=True)
    (calc_out_shift): OutputShiftSqueeze()
    (calc_weight_scale): One()
    (scale): Scaler()
    (calc_out_scale): OutputScale()
    (quantize_weight): Empty()
    (quantize_bia

In [7]:
# Print Modified Model Parameters
print_model_params(model)

Layer, Size, Requires Grad:
conv1_1.output_shift , torch.Size([1]) , False
conv1_1.weight_bits , torch.Size([1]) , False
conv1_1.bias_bits , torch.Size([1]) , False
conv1_1.quantize_activation , torch.Size([1]) , False
conv1_1.adjust_output_shift , torch.Size([1]) , False
conv1_1.shift_quantile , torch.Size([1]) , False
conv1_1.op.weight , torch.Size([64, 3, 3, 3]) , False
conv1_1.op.bias , torch.Size([64]) , False
conv1_2.output_shift , torch.Size([1]) , False
conv1_2.weight_bits , torch.Size([1]) , False
conv1_2.bias_bits , torch.Size([1]) , False
conv1_2.quantize_activation , torch.Size([1]) , False
conv1_2.adjust_output_shift , torch.Size([1]) , False
conv1_2.shift_quantile , torch.Size([1]) , False
conv1_2.op.weight , torch.Size([32, 64, 1, 1]) , False
conv1_2.op.bias , torch.Size([32]) , False
conv1_3.output_shift , torch.Size([1]) , False
conv1_3.weight_bits , torch.Size([1]) , False
conv1_3.bias_bits , torch.Size([1]) , False
conv1_3.quantize_activation , torch.Size([1]) , Fals

## Unfreezing of Layers
Note: Layers must to be configured must be the same with the model architecture
Changes are found in the op.weight and op.bias

In [9]:
# CIFAR 100 NAS Model

unfreeze_layer(model.conv1_1)
unfreeze_layer(model.conv1_2)
unfreeze_layer(model.conv1_3)
unfreeze_layer(model.conv2_1)
unfreeze_layer(model.conv2_2)
unfreeze_layer(model.conv3_1)
unfreeze_layer(model.conv3_2)
unfreeze_layer(model.conv4_1)
unfreeze_layer(model.conv4_2)
# unfreeze_layer(model.conv5_1)
# unfreeze_layer(model.fc)
model.to(device)

AI85NASCifarNet(
  (conv1_1): FusedConv2dBNReLU(
    (activate): ReLU(inplace=True)
    (op): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn): BatchNorm2d(64, eps=1e-05, momentum=0.05, affine=False, track_running_stats=True)
    (calc_out_shift): OutputShiftSqueeze()
    (calc_weight_scale): One()
    (scale): Scaler()
    (calc_out_scale): OutputScale()
    (quantize_weight): Empty()
    (quantize_bias): Empty()
    (clamp_weight): Empty()
    (clamp_bias): Empty()
    (quantize): Empty()
    (clamp): Clamp()
    (quantize_pool): Empty()
    (clamp_pool): Empty()
  )
  (conv1_2): FusedConv2dBNReLU(
    (activate): ReLU(inplace=True)
    (op): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.05, affine=False, track_running_stats=True)
    (calc_out_shift): OutputShiftSqueeze()
    (calc_weight_scale): One()
    (scale): Scaler()
    (calc_out_scale): OutputScale()
    (quantize_weight): Empty()
    (quantize_bia

In [10]:
# Print Modified Model Parameters
print_model_params(model)

Layer, Size, Requires Grad:
conv1_1.output_shift , torch.Size([1]) , False
conv1_1.weight_bits , torch.Size([1]) , False
conv1_1.bias_bits , torch.Size([1]) , False
conv1_1.quantize_activation , torch.Size([1]) , False
conv1_1.adjust_output_shift , torch.Size([1]) , False
conv1_1.shift_quantile , torch.Size([1]) , False
conv1_1.op.weight , torch.Size([64, 3, 3, 3]) , True
conv1_1.op.bias , torch.Size([64]) , True
conv1_2.output_shift , torch.Size([1]) , False
conv1_2.weight_bits , torch.Size([1]) , False
conv1_2.bias_bits , torch.Size([1]) , False
conv1_2.quantize_activation , torch.Size([1]) , False
conv1_2.adjust_output_shift , torch.Size([1]) , False
conv1_2.shift_quantile , torch.Size([1]) , False
conv1_2.op.weight , torch.Size([32, 64, 1, 1]) , True
conv1_2.op.bias , torch.Size([32]) , True
conv1_3.output_shift , torch.Size([1]) , False
conv1_3.weight_bits , torch.Size([1]) , False
conv1_3.bias_bits , torch.Size([1]) , False
conv1_3.quantize_activation , torch.Size([1]) , False
co