In [1]:
import caffe

from mxnet import nd
from gluoncv.model_zoo import mobilenet1_0

from collections import OrderedDict
import functools
import numpy as np

In [2]:
def _is_number(s):
    try:
        _ = int(s)
        return True
    except:
        return False

In [3]:
# map dynamic parameters names to static ones
# for example, 
# feature.0.weight -> conv0_weight
def recover_pnames(net, gluon_params):
    params = OrderedDict()
    for k in gluon_params.keys():
        *others, attr_name = k.split(".")
        atom_block = functools.reduce(
            lambda b, n: b[int(n)] if _is_number(n) else getattr(b, n),
            others, gluon_net
        )
        params[f'{atom_block.name[len(net.name)+1:]}_{attr_name}'] = gluon_params[k].asnumpy()
    return params

In [4]:
# swap in_channel for the first convolution
def rgb2bgr(conv_wt):
    r = conv_wt[:, 0, :, :]
    g = conv_wt[:, 1, :, :]
    b = conv_wt[:, 2, :, :]
    return np.stack((b, g, r), axis=1)

In [5]:
# map gluon layer names to caffe
caffe_conv_list = [
    "conv1",
    "conv2_1/dw", "conv2_1/sep", "conv2_2/dw", "conv2_2/sep",
    "conv3_1/dw", "conv3_1/sep", "conv3_2/dw", "conv3_2/sep",
    "conv4_1/dw", "conv4_1/sep", "conv4_2/dw", "conv4_2/sep",
    "conv5_1/dw", "conv5_1/sep", "conv5_2/dw", "conv5_2/sep",
    "conv5_3/dw", "conv5_3/sep", "conv5_4/dw", "conv5_4/sep",
    "conv5_5/dw", "conv5_5/sep", "conv5_6/dw", "conv5_6/sep",
    "conv6/dw", "conv6/sep"
]
layer_map = {f"conv{i}": conv for i, conv in enumerate(caffe_conv_list)}
layer_map['dense0'] = 'fc7'

In [6]:
caffe_net = caffe.Net("models/mobilenet_no_bn.prototxt", caffe.TEST)
gluon_net = mobilenet1_0(pretrained=False)

gluon_params = nd.load("tmp/checkpoints/mobilenet1.0_quantize_without_fc-006000.params")
gluon_params = recover_pnames(gluon_net, gluon_params)

In [7]:
update_params = {}
for k in gluon_params.keys():
    if "_weight" in k:
        basic_name = k[:-len("_weight")]
        bn_name = basic_name.replace("conv", "batchnorm")
        if f'{basic_name}_gamma' in gluon_params:
            print(f"Merge fake BN: {basic_name}")
            weight = gluon_params[f'{basic_name}_weight']
            bias = gluon_params[f'{basic_name}_bias']
            
            gamma = gluon_params[f'{basic_name}_gamma']
            beta = gluon_params[f'{basic_name}_beta']
            var = gluon_params[f'{basic_name}_running_var']
            mean = gluon_params[f'{basic_name}_running_mean']
            
            w_shape = weight.shape
            cout = w_shape[0]
            update_params[f'{basic_name}_weight'] = (weight.reshape(cout, -1) * gamma.reshape(-1, 1) \
                                                      / np.sqrt(var + 1e-10).reshape(-1, 1)).reshape(w_shape)
            update_params[f'{basic_name}_bias'] = gamma * (bias - mean) / np.sqrt(var + 1e-10) + beta
        elif f'{bn_name}_gamma' in gluon_params:
            print(f"Merge BN: {basic_name}")
            weight = gluon_params[f'{basic_name}_weight']
            bias = gluon_params.get(f'{basic_name}_bias', 0.)
            
            gamma = gluon_params[f'{bn_name}_gamma']
            beta = gluon_params[f'{bn_name}_beta']
            var = gluon_params[f'{bn_name}_running_var']
            mean = gluon_params[f'{bn_name}_running_mean']
            
            w_shape = weight.shape
            cout = w_shape[0]
            update_params[f'{basic_name}_weight'] = (weight.reshape(cout, -1) * gamma.reshape(-1, 1) \
                                                      / np.sqrt(var + 1e-10).reshape(-1, 1)).reshape(w_shape)
            update_params[f'{basic_name}_bias'] = gamma * (bias - mean) / np.sqrt(var + 1e-10) + beta
gluon_params.update(update_params)

Merge BN: conv0
Merge fake BN: conv1
Merge fake BN: conv2
Merge fake BN: conv3
Merge fake BN: conv4
Merge fake BN: conv5
Merge fake BN: conv6
Merge fake BN: conv7
Merge fake BN: conv8
Merge fake BN: conv9
Merge fake BN: conv10
Merge fake BN: conv11
Merge fake BN: conv12
Merge fake BN: conv13
Merge fake BN: conv14
Merge fake BN: conv15
Merge fake BN: conv16
Merge fake BN: conv17
Merge fake BN: conv18
Merge fake BN: conv19
Merge fake BN: conv20
Merge fake BN: conv21
Merge fake BN: conv22
Merge fake BN: conv23
Merge fake BN: conv24
Merge fake BN: conv25
Merge fake BN: conv26


In [8]:
scale_list = OrderedDict()
for glayer, clayer in layer_map.items():
    if "conv" in glayer:
        # Convolution
        weight = gluon_params[f'{glayer}_weight']
#         # bgr to rgb
#         if glayer == 'conv0':
#             weight = rgb2bgr(weight)
        caffe_net.params[clayer][0].data[...] = weight
        caffe_net.params[clayer][1].data[...] = gluon_params[f'{glayer}_bias']
    elif "dense" in glayer:   # Dense to Convolution_1x1
        caffe_net.params[clayer][0].data[...] = gluon_params[f'{glayer}_weight'].reshape(1000,-1,1,1)
        caffe_net.params[clayer][1].data[...] = gluon_params[f'{glayer}_bias']
    
    # generate scales list
    if f'{glayer}_input_max' in gluon_params:
        # per-channel quantize for weights
        weight = gluon_params[f'{glayer}_weight']
        wt_scales = []
        for filter_ in weight:
            max_ = abs(filter_).max()
            wt_scale = (2 ** 7 - 1) / max_ if max_ > 1e-4 else np.float32(0.)
            wt_scales.append(str(wt_scale))
            
        # calculate scale for inputs
        max_ = gluon_params[f'{glayer}_input_max'][0]
        in_scale = (2 ** 7 - 1) / max_ if max_ != 0. else 0.
        scale_list[clayer] = wt_scales + [str(in_scale),]

In [9]:
caffe_net.save("tmp/retrained_mobilenet.caffemodel")

In [10]:
# output scales table
with open("tmp/mobilenet_int8.table", "w") as f:
    for k, v in scale_list.items():
        f.write(f"{k}_param_0 {' '.join(v[:-1])}\n")
    for k, v in scale_list.items():
        f.write(f"{k} {v[-1]}\n")