# Hyperparameter Search Algorithm (HSA)

We will use the ResNet18 (imagenet) to demonstrate our hyperparameter search algorithm (HSA) in this notebook.

* We have provide the "imagenet_resnet18_r1.pth" checkpoint file in "./experiments-resnet/experiments_save_ckpt/imagenet/" folder.


- Step1: Load the pretrained (on stanford dogs dataset) ResNet18. (we have provide this pretrained checkpoint.)
- Step2: The KernelAnaylsor class calculates the explained_variance_ratio.
- Step3: Find the candidates that qualify the given conditions (variance, MAdds, and Params)
- Step4: Use the Selection Strategy (SS) to select the final BlkSConv-based ResNet.

In [20]:
import numpy as np
import sklearn.decomposition
import matplotlib.pyplot as plt

class KernelAnaylsor(object):
    def __init__(self, conv_kernel):
        self.conv_kernel = conv_kernel

    def kernel_pca_slice(self, n_components=1, blk_depth=1):
        # num_basis: n_components
        # blk_depth: blk_depth
        num_kernels, in_channels,_,_ = self.conv_kernel.weight.shape
        kernel_filter_weights = self.conv_kernel.weight.detach().numpy()
        
        pc_N_list = []
        for idx in range(0, num_kernels):
            kernel = kernel_filter_weights[idx]
            kernel = kernel.reshape(in_channels//blk_depth, -1)
            pca = sklearn.decomposition.PCA(n_components=n_components)
            try:
                pca.fit(kernel)
                pc1_v = np.sum([pca.explained_variance_ratio_[ratio_idx] for ratio_idx in range(n_components)])
            except:
                return None
            pc_N_list.append(pc1_v)
        return pc_N_list

    def plot_histogram(self, pc_N_list):
        x = np.array(pc_N_list)*100
        y = x//5 + 1
        y = y.astype('int')
        y = y.reshape(-1)*5
        plt.hist(y, bins=np.arange(5,105,5), range=(0, 100))
        plt.show()

    def calculate_explained_ratio_mean_std(self, num_basis=1, blk_depth=1, plot=False):
        if num_basis==blk_depth:
            print('=>', end='')
        print(f"s{num_basis}-t{blk_depth}: ", end='')
        pc_N_list = self.kernel_pca_slice(n_components=num_basis, blk_depth=blk_depth)
        
        if pc_N_list == None:
            print("None")
            return (-1, -1)
        print(f"Mean: {np.mean(pc_N_list)*100:.2f}, Std: {np.std(pc_N_list)*100:.2f}")
        if plot==True:
            self.plot_histogram(pc_N_list)
        return (np.mean(pc_N_list)*100, np.std(pc_N_list)*100)

In [21]:
# Params
def count_blksconv_parameters(in_channels, out_channels, kernel_size, num_basis, blk_depth):
    pointwise = (in_channels/blk_depth) * out_channels 
    groupwise = kernel_size[0]*kernel_size[1]*blk_depth*out_channels
    return (pointwise+groupwise)*num_basis

def count_conv_params(in_channels, out_channels, kernel_size):
    return kernel_size[0]*kernel_size[1]*in_channels*out_channels

# MAdds
def count_blksconv_madds(in_channels, out_channels, kernel_size, num_basis, blk_depth, HW_shape):
    (Hi,Wi, stride) = HW_shape
    if stride == 1:
        Ho, Wo = Hi, Wi
    if stride == 2:
        Ho, Wo = Hi//2, Wi//2

    pointwise = Hi*Wi*in_channels
    groupwise = Ho*Wo*kernel_size[0]*kernel_size[1]*blk_depth

    return ((pointwise+groupwise)*out_channels)*num_basis

def count_conv_madds(in_channels, out_channels, kernel_size, HW_shape):
    (Hi,Wi, stride) = HW_shape
    if stride == 1:
        Ho, Wo = Hi, Wi
    if stride == 2:
        Ho, Wo = Hi//2, Wi//2

    return Ho*Wo*kernel_size[0]*kernel_size[1]*in_channels*out_channels

### Step1: Load pretrained standard ResNet

In [22]:
import torch
from resnet import get_resnet
from collections import OrderedDict

# unit per stage
# "resnet10": [1, 1, 1, 1],
# "resnet18": [2, 2, 2, 2],
# "resnet26": [3, 3, 3, 3],

config = {
    'imagenet': {'db_name':'imagenet', 'num_classes':1000, 'backbone':['resnet10', 'resnet18', 'resnet26']},
    'dogs': {'db_name':'dogs', 'num_classes':120, 'backbone':['resnet10', 'resnet18', 'resnet26']},
    'flowers': {'db_name':'flowers', 'num_classes':102, 'backbone':['resnet10', 'resnet18', 'resnet26']},
}

# Use ResNet18 (imagenet) for example
db_name = 'imagenet'
num_classes = config[db_name]['num_classes']
backbone = config[db_name]['backbone'][1]
exp_round = 1 # experiemnts round

pretrained_model = get_resnet(architecture=f'{db_name}_{backbone}', num_classes=num_classes)

arch = f'{db_name}_{backbone}'
PATH = f'../experiments-resnet/experiments_save_ckpt/{db_name}/{arch}_r{exp_round}.pth'

state_dict = torch.load(PATH, map_location=torch.device('cpu'))
ordereddict=state_dict['state_dict']
modified_ordereddict = OrderedDict([(k.replace('module.',''), v) if k.startswith('module') else (k, v) for k, v in ordereddict.items()])

pretrained_model.load_state_dict(modified_ordereddict)

imagenet_resnet18


<All keys matched successfully>

#### Select the layers to analyse

In [23]:
# imagenet, dogs, flowers
# "resnet10": [1, 1, 1, 1],
# "resnet18": [2, 2, 2, 2],
# "resnet26": [3, 3, 3, 3]
conv_kernel_list = [
                    
                    pretrained_model.backbone.stage2.unit1.conv1.conv,
                    pretrained_model.backbone.stage2.unit1.conv2.conv,
                    pretrained_model.backbone.stage2.unit2.conv1.conv,
                    pretrained_model.backbone.stage2.unit2.conv2.conv,
                    # pretrained_model.backbone.stage2.unit3.conv1.conv,
                    # pretrained_model.backbone.stage2.unit3.conv2.conv,
                    pretrained_model.backbone.stage3.unit1.conv1.conv,
                    pretrained_model.backbone.stage3.unit1.conv2.conv,
                    pretrained_model.backbone.stage3.unit2.conv1.conv,
                    pretrained_model.backbone.stage3.unit2.conv2.conv,
                    # pretrained_model.backbone.stage3.unit3.conv1.conv,
                    # pretrained_model.backbone.stage3.unit3.conv2.conv,
                    pretrained_model.backbone.stage4.unit1.conv1.conv,
                    pretrained_model.backbone.stage4.unit1.conv2.conv,
                    pretrained_model.backbone.stage4.unit2.conv1.conv,
                    pretrained_model.backbone.stage4.unit2.conv2.conv,
                    # pretrained_model.backbone.stage4.unit3.conv1.conv,
                    # pretrained_model.backbone.stage4.unit3.conv2.conv,
                ]

conv_kernel_name_list = [
                         'stage2_blk0_conv1',
                         'stage2_blk0_conv2',
                         'stage2_blk1_conv1',
                         'stage2_blk1_conv2',
                        #  'stage2_blk2_conv1',
                        #  'stage2_blk2_conv2',
                         'stage3_blk0_conv1',
                         'stage3_blk0_conv2',
                         'stage3_blk1_conv1',
                         'stage3_blk1_conv2',
                        #  'stage3_blk2_conv1',
                        #  'stage3_blk2_conv2',
                         'stage4_blk0_conv1',
                         'stage4_blk0_conv2',
                         'stage4_blk1_conv1',
                         'stage4_blk1_conv2',
                        #  'stage4_blk2_conv1',
                        #  'stage4_blk2_conv2',
                    ]

# # resnet10
# input_shape_list  = [
#     # (Hi,Wi, stride)
#     (56,56,2), # stage2
#     (28,28,1),
#     (28,28,2), # stage3
#     (14,14,1),
#     (14,14,2), # stage4
#     (7,7,1),
# ]
# # resnet18
input_shape_list  = [
    # (Hi,Wi, stride)
    (56,56,2), # stage2
    (28,28,1),
    (28,28,1),
    (28,28,1),
    (28,28,2), # stage3
    (14,14,1),
    (14,14,1),
    (14,14,1),
    (14,14,2), # stage4
    (7,7,1),
    (7,7,1),
    (7,7,1),
]
# # resnet26
# input_shape_list  = [
#     # (Hi,Wi, stride)
#     (56,56,2), # stage2
#     (28,28,1),
#     (28,28,1),
#     (28,28,1),
#     (28,28,1),
#     (28,28,1),
#     (28,28,2), # stage3
#     (14,14,1),
#     (14,14,1),
#     (14,14,1),
#     (14,14,1),
#     (14,14,1),
#     (14,14,2), # stage4
#     (7,7,1),
#     (7,7,1),
#     (7,7,1),
#     (7,7,1),
#     (7,7,1),
# ]

#### Check the tensor shape

In [24]:
img = torch.randn(1,3,224,224)
print(img.shape)
img = pretrained_model.backbone.init_unit(img)
print('stage1 input', img.shape)
img = pretrained_model.backbone.stage1(img)
print('stage2 input', img.shape)
img = pretrained_model.backbone.stage2(img)
print('stage3 input', img.shape)
img = pretrained_model.backbone.stage3(img)
print('stage4 input', img.shape)
img = pretrained_model.backbone.stage4(img)
print('stage4 output', img.shape)


torch.Size([1, 3, 224, 224])
stage1 input torch.Size([1, 64, 56, 56])
stage2 input torch.Size([1, 64, 56, 56])
stage3 input torch.Size([1, 128, 28, 28])
stage4 input torch.Size([1, 256, 14, 14])
stage4 output torch.Size([1, 512, 7, 7])


### Step2: Calculate the explained variance ratio

In [25]:
num_basis_set=set([1,2,3,4,5,6,7,8,9])
blk_depth_set=set([1,2,4])

kernel_dict = {}
for k in conv_kernel_name_list:
    kernel_dict.update({f'{k}_blksconv': []})
    kernel_dict.update({f'{k}_num_basis': []})
    kernel_dict.update({f'{k}_blk_depth': []})
    kernel_dict.update({f'{k}_mean': []})
    kernel_dict.update({f'{k}_madds': []})
    kernel_dict.update({f'{k}_madds_conv': []})
    kernel_dict.update({f'{k}_maddsratio': []})
    kernel_dict.update({f'{k}_params': []})
    kernel_dict.update({f'{k}_params_conv': []})
    kernel_dict.update({f'{k}_paramsratio': []})

for conv_kernel, conv_kernel_name, input_shape in zip(conv_kernel_list, conv_kernel_name_list, input_shape_list):
    kernel_analysor = KernelAnaylsor(conv_kernel)
    
    for blk_depth in blk_depth_set:
        for num_basis in num_basis_set:
            (mean, var) = kernel_analysor.calculate_explained_ratio_mean_std(num_basis=num_basis, blk_depth=blk_depth, plot=False)
            kernel_dict[f'{conv_kernel_name}_blksconv'].append(f's{num_basis}t{blk_depth}')
            kernel_dict[f'{conv_kernel_name}_num_basis'].append(f'{num_basis}')
            kernel_dict[f'{conv_kernel_name}_blk_depth'].append(f'{blk_depth}')
            kernel_dict[f'{conv_kernel_name}_mean'].append(mean)
            
            # compute MAdds
            blksconv_madds = count_blksconv_madds(
                          in_channels=conv_kernel.in_channels,
                          out_channels=conv_kernel.out_channels,
                          kernel_size=conv_kernel.kernel_size,
                          num_basis=num_basis,
                          blk_depth=blk_depth,
                          HW_shape=input_shape)
            conv_madds = count_conv_madds(
                          in_channels=conv_kernel.in_channels,
                          out_channels=conv_kernel.out_channels,
                          kernel_size=conv_kernel.kernel_size,
                          HW_shape=input_shape)
            madds_ratio = blksconv_madds/conv_madds

            kernel_dict[f'{conv_kernel_name}_madds'].append(blksconv_madds)
            kernel_dict[f'{conv_kernel_name}_madds_conv'].append(conv_madds)
            kernel_dict[f'{conv_kernel_name}_maddsratio'].append(madds_ratio)

            # compute params
            blksconv_params = count_blksconv_parameters(
                          in_channels=conv_kernel.in_channels,
                          out_channels=conv_kernel.out_channels,
                          kernel_size=conv_kernel.kernel_size,
                          num_basis=num_basis,
                          blk_depth=blk_depth)
            conv_params = count_conv_params(
                          in_channels=conv_kernel.in_channels, 
                          out_channels=conv_kernel.out_channels, 
                          kernel_size=conv_kernel.kernel_size)
            params_ratio = blksconv_params/conv_params

            kernel_dict[f'{conv_kernel_name}_params'].append(blksconv_params)
            kernel_dict[f'{conv_kernel_name}_params_conv'].append(conv_params)
            kernel_dict[f'{conv_kernel_name}_paramsratio'].append(params_ratio)

            print(f'\tMAdd_ratio:{madds_ratio},\tParams_ratio:{params_ratio}')
    print()

=>s1-t1: Mean: 57.97, Std: 9.73
	MAdd_ratio:0.4600694444444444,	Params_ratio:0.1267361111111111
s2-t1: Mean: 82.57, Std: 4.16
	MAdd_ratio:0.9201388888888888,	Params_ratio:0.2534722222222222
s3-t1: Mean: 88.91, Std: 2.36
	MAdd_ratio:1.3802083333333333,	Params_ratio:0.3802083333333333
s4-t1: Mean: 92.93, Std: 1.54
	MAdd_ratio:1.8402777777777777,	Params_ratio:0.5069444444444444
s5-t1: Mean: 95.56, Std: 1.00
	MAdd_ratio:2.3003472222222223,	Params_ratio:0.6336805555555556
s6-t1: Mean: 97.41, Std: 0.65
	MAdd_ratio:2.7604166666666665,	Params_ratio:0.7604166666666666
s7-t1: Mean: 98.65, Std: 0.38
	MAdd_ratio:3.220486111111111,	Params_ratio:0.8871527777777778
s8-t1: Mean: 99.46, Std: 0.17
	MAdd_ratio:3.6805555555555554,	Params_ratio:1.0138888888888888
s9-t1: Mean: 100.00, Std: 0.00
	MAdd_ratio:4.140625,	Params_ratio:1.140625
s1-t2: Mean: 38.27, Std: 7.99
	MAdd_ratio:0.4756944444444444,	Params_ratio:0.08680555555555555
=>s2-t2: Mean: 62.58, Std: 8.05
	MAdd_ratio:0.9513888888888888,	Params_ratio:

In [17]:
def find_candidate(blksconv_type, explained_ratio_mean, madds_ratio, params_ratio, mean_threshold, madds_threshold, params_threshold, model_type, kernel_info=None):
    layer_info = {'conv_param': None, 'conv_madds': None, 'params': None, 'madds': None}

    if kernel_info is not None:
         (madds_list, params_list, madds_conv_list, params_conv_list) = kernel_info

    # Step 1: filter by variance_mean threshold
    candidate_index = list(np.where(np.array(explained_ratio_mean)>mean_threshold)[0])
    
    candidate_name = np.array(blksconv_type)[candidate_index]
    candidate_mean = np.array(explained_ratio_mean)[candidate_index]
    candidate_madds_ratio = np.array(madds_ratio)[candidate_index]
    candidate_param_ratio = np.array(params_ratio)[candidate_index]
    if kernel_info is not None:
        madds_list = np.array(madds_list)[candidate_index]
        params_list = np.array(params_list)[candidate_index]
        madds_conv_list = np.array(madds_conv_list)[candidate_index]
        params_conv_list = np.array(params_conv_list)[candidate_index]
    
    # Step 2: filter by madds threshold
    second_round_candidate_index = list(np.where(candidate_madds_ratio<=madds_threshold)[0])
    
    candidate_name = candidate_name[second_round_candidate_index]
    candidate_mean = candidate_mean[second_round_candidate_index]
    candidate_madds_ratio = candidate_madds_ratio[second_round_candidate_index]
    candidate_param_ratio = candidate_param_ratio[second_round_candidate_index]
    if kernel_info is not None:
        madds_list = madds_list[second_round_candidate_index]
        params_list = params_list[second_round_candidate_index]
        madds_conv_list = madds_conv_list[second_round_candidate_index]
        params_conv_list = params_conv_list[second_round_candidate_index]
    
    # Step 3: filter by params threshold
    third_round_candidate_index = list(np.where(candidate_param_ratio<=params_threshold)[0])
    candidate_name = candidate_name[third_round_candidate_index]
    candidate_mean = candidate_mean[third_round_candidate_index]
    candidate_madds_ratio = candidate_madds_ratio[third_round_candidate_index]
    candidate_param_ratio = candidate_param_ratio[third_round_candidate_index]
    if kernel_info is not None:
        madds_list = madds_list[third_round_candidate_index]
        params_list = params_list[third_round_candidate_index]
        madds_conv_list = madds_conv_list[third_round_candidate_index]
        params_conv_list = params_conv_list[third_round_candidate_index]

    # Step 4: selection strategy
    if model_type == 'big':
        idx = np.argmax(candidate_param_ratio)
    if model_type == 'small':
        idx = np.argmin(candidate_param_ratio)

    if kernel_info is not None:
        layer_info['conv_param']=params_conv_list[idx]
        layer_info['conv_madds']=madds_conv_list[idx]
        layer_info['params']=params_list[idx]
        layer_info['madds']=madds_list[idx]

    return candidate_name[idx], layer_info

In [18]:
select_model_hyperparameter = {
    # (var_thres, MAdd_thres, Params_thres, selection-strategy)
    'V50M50P50b':(50,0.5,0.5,'big'),
    # 'V50M50P75b':(50,0.5,0.75,'big'),
    # 'V50M75P50b':(50,0.75,0.5,'big'),
    'V50M75P75b':(50,0.75,0.75,'big'),
    # 'V75M50P50b':(75,0.5,0.5,'big'),
    # 'V75M50P75b':(75,0.5,0.75,'big'),
    # 'V75M75P50b':(75,0.75,0.5,'big'),
    # 'V75M75P75b':(75,0.75,0.75,'big'),
    # 'V50M50P50s':(50,0.5,0.5,'small'),
    # 'V50M50P75s':(50,0.5,0.75,'small'),
    # 'V50M75P50s':(50,0.75,0.5,'small'),
    # 'V50M75P75s':(50,0.75,0.75,'small'),
    'V75M50P50s':(75,0.5,0.5,'small'),
    # 'V75M50P75s':(75,0.5,0.75,'small'),
    # 'V75M75P50s':(75,0.75,0.5,'small'),
    'V75M75P75s':(75,0.75,0.75,'small'),

    # dogs (ablation study)
    # 'V00M75P75s':(0,0.75,0.75,'small'),
    # 'V10M75P75s':(10,0.75,0.75,'small'),
    # 'V20M75P75s':(20,0.75,0.75,'small'),
    # 'V30M75P75s':(30,0.75,0.75,'small'),
    # 'V40M75P75s':(40,0.75,0.75,'small'),
}

### Step3 + Step4: Find the final BlkSConv-based ResNet

In [26]:
# imagenet, dogs, or flowers
for k in select_model_hyperparameter.keys():
    total_params = 0
    total_madds = 0
    total_conv_params = 0
    total_conv_madds = 0

    print(f'"{db_name}_{backbone}_{k}": [',end='')
    
    if backbone == 'resnet10':
        print(f'[False], ',end='') # resnet10
    if backbone == 'resnet18':
        print(f'[False, False], ',end='') # resnet18
    if backbone == 'resnet26':
        print(f'[False, False, False], ',end='') # resnet26

    (var_thres,madd_thres,params_thres,model_size)=select_model_hyperparameter[k]
    print(f'[')
    for conv_name in conv_kernel_name_list:
        blksconv_type = kernel_dict[f'{conv_name}_blksconv']
        mean = kernel_dict[f'{conv_name}_mean']
        madds_ratio = kernel_dict[f'{conv_name}_maddsratio']
        params_ratio = kernel_dict[f'{conv_name}_paramsratio']
        
        madds = kernel_dict[f'{conv_name}_madds']
        params = kernel_dict[f'{conv_name}_params']
        madds_conv = kernel_dict[f'{conv_name}_madds_conv']
        params_conv = kernel_dict[f'{conv_name}_params_conv']

        kernel_info = (madds, params, madds_conv, params_conv)

        try:
            replace_name, layer_info = find_candidate(blksconv_type=blksconv_type, explained_ratio_mean=mean, madds_ratio=madds_ratio, params_ratio=params_ratio, mean_threshold=var_thres, madds_threshold=madd_thres, params_threshold=params_thres, model_type=model_size, kernel_info=kernel_info)
        except:
            _, layer_info = find_candidate(blksconv_type=blksconv_type, explained_ratio_mean=mean, madds_ratio=madds_ratio, params_ratio=params_ratio, mean_threshold=10, madds_threshold=0.75, params_threshold=0.75, model_type='big', kernel_info=kernel_info)
            replace_name = 'conv'
        
        if 'conv' in replace_name:
            print(f"{conv_name} | conv: {layer_info['conv_madds']}, {layer_info['conv_param']}")
            total_params += layer_info['conv_param']
            total_madds += layer_info['conv_madds']
        else:
            print(f"{conv_name} | {replace_name}: {layer_info['madds']}/{layer_info['conv_madds']}={layer_info['madds']/layer_info['conv_madds']}, {layer_info['params']}/{layer_info['conv_param']}={layer_info['params']/layer_info['conv_param']}")
            total_params += layer_info['params']
            total_madds += layer_info['madds'] 
            
        total_conv_params += layer_info['conv_param']
        total_conv_madds += layer_info['conv_madds']
    
    print(f']', end='\n')
    # outer ]
    print(f'@@@ Total(BlkSConv/Standard)\n@ MAdds: {total_madds}/{total_conv_madds}={total_madds/total_conv_madds}\n@ Params: {total_params}/{total_conv_params}={total_params/total_conv_params}')
    print(f'],', end='\n\n')

"imagenet_resnet18_V50M50P50b": [[False, False], [
stage2_blk0_conv1 | s1t1: 26593280/57802752=0.4600694444444444, 9344.0/73728=0.1267361111111111
stage2_blk0_conv2 | s4t1: 54992896/115605504=0.4756944444444444, 70144.0/147456=0.4756944444444444
stage2_blk1_conv1 | s4t1: 54992896/115605504=0.4756944444444444, 70144.0/147456=0.4756944444444444
stage2_blk1_conv2 | s4t1: 54992896/115605504=0.4756944444444444, 70144.0/147456=0.4756944444444444
stage3_blk0_conv1 | s1t1: 26141696/57802752=0.4522569444444444, 35072.0/294912=0.1189236111111111
stage3_blk0_conv2 | s4t1: 53186560/115605504=0.4600694444444444, 271360.0/589824=0.4600694444444444
stage3_blk1_conv1 | s4t1: 53186560/115605504=0.4600694444444444, 271360.0/589824=0.4600694444444444
stage3_blk1_conv2 | s4t1: 53186560/115605504=0.4600694444444444, 271360.0/589824=0.4600694444444444
stage4_blk0_conv1 | s1t1: 25915904/57802752=0.4483506944444444, 135680.0/1179648=0.1150173611111111
stage4_blk0_conv2 | s4t1: 52283392/115605504=0.45225694444