## Model DenseNet 

In [1]:
import torch

torch.cuda.is_available()

True

### 1. Model 

In [2]:
import torch.nn as nn
import numpy

class channel_selection(nn.Module):
    """
    Select channels from the output of BatchNorm2d layer. It should be put directly after BatchNorm2d layer.
    The output shape of this layer is determined by the number of 1 in `self.indexes`.
    """
    def __init__(self, num_channels):
        """
        Initialize the `indexes` with all one vector with the length same as the number of channels.
        During pruning, the places in `indexes` which correpond to the channels to be pruned will be set to 0.
        """
        super(channel_selection, self).__init__()
        self.indexes = nn.Parameter(torch.ones(num_channels))

    def forward(self, input_tensor):
        """
        Parameter
        ---------
        input_tensor: (N,C,H,W). It should be the output of BatchNorm2d layer.
        """
        selected_index = np.squeeze(np.argwhere(self.indexes.data.cpu().numpy()))
        if selected_index.size == 1:
            selected_index = np.resize(selected_index, (1,)) 
        output = input_tensor[:, selected_index, :, :]
        return output

In [3]:
import math
import numpy as np
import torch.nn.functional as F

class BasicBlock(nn.Module):
    def __init__(self, inplanes, cfg, growthRate=12, dropRate=0):
        """
        :param inplanes: input channel size
        :param cfg: `in_planes` equals `cfg`
        :param growthRate: output channel size = `in_planes` + growthRate
        :param dropRate: dropout rate
        """
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(inplanes)
        self.select = channel_selection(inplanes)
        self.conv1 = nn.Conv2d(cfg, growthRate, kernel_size=3, padding=1, bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.dropRate = dropRate

    def mask_bn(self, cfg_mask):
        self.bn1.weight.data.mul_(cfg_mask)
        self.bn1.bias.data.mul_(cfg_mask)

    def forward(self, x):
        out = self.bn1(x)
        out = self.select(out)
        out = self.relu(out)
        out = self.conv1(out)
        if self.dropRate > 0:
            out = F.dropout(out, p=self.dropRate, training=self.training)
        out = torch.cat((x, out), 1)
        return out

    def forward_bn(self, x):
        out = self.bn1(x)
        bn_value = out.clone()
        out = self.select(out)
        out = self.relu(out)
        out = self.conv1(out)
        if self.dropRate > 0:
            out = F.dropout(out, p=self.dropRate, training=self.training)
        out = torch.cat((x, out), 1)
        return out, bn_value


class Transition(nn.Module):
    def __init__(self, inplanes, outplanes, cfg):
        """
        :param inplanes: number of the input channel
        :param outplanes: number of the output channel
        :param cfg: `out_planes` equals `cfg`
        """
        super(Transition, self).__init__()
        self.bn1 = nn.BatchNorm2d(inplanes)
        self.select = channel_selection(inplanes)
        self.conv1 = nn.Conv2d(cfg, outplanes, kernel_size=1, bias=False)
        self.relu = nn.ReLU(inplace=True)

    def mask_bn(self, cfg_mask):
        self.bn1.weight.data.mul_(cfg_mask)
        self.bn1.bias.data.mul_(cfg_mask)

    def forward(self, x):
        out = self.bn1(x)
        out = self.select(out)
        out = self.relu(out)
        out = self.conv1(out)
        out = F.avg_pool2d(out, 2)
        return out

    def forward_bn(self, x):
        out = self.bn1(x)
        bn_value = out.clone()
        out = self.select(out)
        out = self.relu(out)
        out = self.conv1(out)
        out = F.avg_pool2d(out, 2)
        return out, bn_value



class densenet(nn.Module):

    def __init__(self, depth=40, dropRate=0, dataset='cifar10',
                 growthRate=12, compressionRate=1, cfg=None, conv_cfg=None):
        """
        :param depth: 3 (layers) × n (number of conv2ds / layer) + 4 (Conv2ds)
        :param drop_rate: dropout rate
        :param dataset: cifar10 or cifar100
        :param growth_rate: gradually increasing from the `n` conv2d to the `n+1` conv2d / layer
        :param cfg:
           default cfg is None:
               start = 24, len(cfg) = 3, growth_rate = 12, cfg.shape = [3, 12 + 1]
               cfg[0] = [24(start), 36, 48, 60, 72, ..., 168]
               cfg[1] = [168(start), 170, 182, 194, ..., 312]
               cfg[2] = [312(start), 324, 336, 348, ..., 456]
        :param conv_cfg:
            layer block index examples: (index starts at 1 & ≤ 12):
                3 indexes / layer: [3, 6, 9]
                2 indexes / layer: [4, 8]
                1 index / layer: [6]

        model 40 conv2ds' distribution:
           1 conv2d
           dense block 1 (12 conv2ds)
           trans block 1 (1 conv2d)
           dense block 2 (12 conv2ds)
           trans block 2 (1 conv2d)
           dense block 3 (12 conv2ds)
           1 conv2d
       """
        super(densenet, self).__init__()

        assert (depth - 4) % 3 == 0, 'depth should be 3n+4'
        n = (depth - 4) // 3
        block = BasicBlock

        self.growthRate = growthRate
        self.dropRate = dropRate
        self.block_cfg = conv_cfg

        if cfg is None:
            cfg = []
            start = growthRate * 2
            for _ in range(3):
                cfg.append([start + growthRate * i for i in range(n + 1)])
                start += growthRate * n
            cfg = [item for sub_list in cfg for item in sub_list]
        assert len(cfg) == 3 * n + 3, 'length of config variable cfg should be 3n+3'

        # self.inplanes is a global variable used across multiple
        self.inplanes = growthRate * 2
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, padding=1, bias=False)
        self.dense1 = self._make_dense_block(block, n, cfg[0:n])
        self.trans1 = self._make_transition(compressionRate, cfg[n])
        self.dense2 = self._make_dense_block(block, n, cfg[n + 1:2 * n + 1])
        self.trans2 = self._make_transition(compressionRate, cfg[2 * n + 1])
        self.dense3 = self._make_dense_block(block, n, cfg[2 * n + 2:3 * n + 2])
        self.bn = nn.BatchNorm2d(self.inplanes)
        self.select = channel_selection(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.avgpool = nn.AvgPool2d(8)

        # model dataset
        if dataset == 'cifar10':
            num_classes = 10
        elif dataset == 'cifar100':
            num_classes = 100
        else:
            raise ValueError('Model `dataset` parameter is Error!')
        self.fc = nn.Linear(cfg[-1], num_classes)

        # weight initialize
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(0.5)
                m.bias.data.zero_()

    def _make_dense_block(self, block, blocks, cfg):
        """
        :param block: Basic Block (1 block means 1 conv2d)
        :param blocks: number of blocks (n) / layer
        :param cfg: channel config of all blocks / layer
        """
        layers = []
        assert blocks == len(cfg), 'Length of the cfg parameter is not right.'
        for i in range(blocks):
            layers.append(block(self.inplanes, cfg=cfg[i], growthRate=self.growthRate, dropRate=self.dropRate))
            self.inplanes += self.growthRate
        return nn.Sequential(*layers)

    def _make_transition(self, compressionRate, cfg):
        """
        :param compressionRate: compress input channel
        :param cfg:
                    input channel size, `cfg` equals `in_planes`
                    cfg is a number in this case
        """
        inplanes = self.inplanes
        outplanes = int(math.floor(self.inplanes // compressionRate))
        self.inplanes = outplanes
        return Transition(inplanes, outplanes, cfg)

    def forward(self, x):
        x = self.conv1(x)

        block_value = []
        if self.block_cfg:
            for idx, item in enumerate(self.dense1):
                x = item(x)
                if idx + 1 in self.block_cfg:
                    block_value.append(x.clone())
            x = self.trans1(x)
            for idx, item in enumerate(self.dense2):
                x = item(x)
                if idx + 1 in self.block_cfg:
                    block_value.append(x.clone())
            x = self.trans2(x)
            for idx, item in enumerate(self.dense3):
                x = item(x)
                if idx + 1 in self.block_cfg:
                    block_value.append(x.clone())
        else:
            x = self.dense1(x)
            x = self.trans1(x)
            x = self.dense2(x)
            x = self.trans2(x)
            x = self.dense3(x)

        x = self.bn(x)
        x = self.select(x)
        x = self.relu(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        y = self.fc(x)

        if len(block_value):
            return y, block_value
        return y


### 2. load file

In [4]:
import os

def resume_model(resume_file):
    if not os.path.isfile(resume_file):
        raise ValueError("Resume model file is not found at '{}'".format(resume_file))
    print("=> loading checkpoint '{}'".format(resume_file))
    checkpoint = torch.load(resume_file)
    if 'epoch' in checkpoint:
        start_epoch = checkpoint['epoch']
    else:
        start_epoch = None
        
    if 'best_prec1' in checkpoint:
        best_prec1 = checkpoint['best_prec1']
    else:
        best_prec1 = None
        
    if 'state_dict' in checkpoint:
        state_dict = checkpoint['state_dict']
    else:
        state_dict = None
    
    if 'optimizer' in checkpoint:
        opti_dict = checkpoint['optimizer']
    else:
        opti_dict = None
        
    if 'cfg' in checkpoint:
        cfg = checkpoint['cfg']
        print("-> model cfg is loading...\n cfg: {}".format(list(cfg)))
    else:
        cfg = None
        print("-> not found model cfg...")
    print("=>  epoch {} Prec1: {}".format(start_epoch, best_prec1))
    return state_dict, opti_dict, start_epoch, best_prec1, cfg


root_path = r'D:\Project\Pycharm\network-slimming\logs'
file_name = 'model_best.pth.tar'
name = [
    'sparsity_densenet40_cifar100_s_1e_4', 
    'ft_inherit_bn_densenet40_cifar10_percent_0.4_seed_2',
    'ft_inherit_bn_densenet40_cifar100_percent_0.4_seed_2',
    'ft_inherit_at_densenet40_cifar10_percent_0.4_seed_2',
    'ft_inherit_at_densenet40_cifar100_percent_0.4_seed_2',
    'ft_inherit_at_densenet40_cifar100_percent_0.6_seed_2',
]
file_path = os.path.join(root_path, name[0], file_name)

root_path2 = r'D:\Project\Gitee\network-slimming\logs'
file_name2 = 'pruned.pt'
name2 = [
    'at_prune_densenet40_cifar10_percent_0.4',
    'at_prune_densenet40_cifar10_percent_0.7',
    'at_prune_densenet40_cifar100_percent_0.4',
    'at_prune_densenet40_cifar100_percent_0.6'
]
file_path2 = os.path.join(root_path2, name2[2], file_name2)


state_dict, opti_dict, start_epoch, best_prec1, cfg = resume_model(file_path2)

=> loading checkpoint 'D:\Project\Gitee\network-slimming\logs\at_prune_densenet40_cifar100_percent_0.4\pruned.pt'
-> model cfg is loading...
 cfg: [22, 27, 26, 37, 51, 59, 57, 74, 79, 83, 83, 83, 145, 116, 115, 126, 130, 135, 144, 138, 150, 164, 170, 186, 173, 264, 191, 220, 220, 212, 214, 206, 217, 210, 209, 222, 222, 231, 204]
=>  epoch None Prec1: None


In [5]:
model = densenet(depth=40, dropRate=0, dataset='cifar100',
                 growthRate=12, compressionRate=1, cfg=cfg, conv_cfg=None)
model.load_state_dict(state_dict)
model.eval()

densenet(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (dense1): Sequential(
    (0): BasicBlock(
      (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (select): channel_selection()
      (conv1): Conv2d(22, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (relu): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (select): channel_selection()
      (conv1): Conv2d(27, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (relu): ReLU(inplace=True)
    )
    (2): BasicBlock(
      (bn1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (select): channel_selection()
      (conv1): Conv2d(26, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (relu): ReLU(inplace=True)
    )
    (3): BasicBlock(
      (bn1)

### 3. Params 

#### 3.1 thop
    计算parameters

In [6]:
import torch
from thop import profile
from thop import clever_format


input = torch.randn(1, 3, 32, 32)
flops, params = profile(model, inputs=(input, ))
print(flops, params)
flops, params = clever_format([flops, params], "%.2f")
flops, params

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_bn() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[91m[WARN] Cannot find rule for <class '__main__.channel_selection'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[91m[WARN] Cannot find rule for <class '__main__.BasicBlock'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.container.Sequential'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class '__main__.Transition'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register count_avgpool() for <class 'torch.nn.modules.pooling.AvgPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[91m[WARN] Cannot find rule for <class '__main__.densenet'>. Treat it as zero Macs and zero Params.[00m
192913020.0 686812.0


('192.91M', '686.81K')

#### 3.2 torchstat
    计算flops

In [7]:
from torchstat import stat

stat(model, (3, 32, 32))

[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: ch

#### 3.3  ptflops

In [8]:
import torchvision.models as models
import torch
from ptflops import get_model_complexity_info

with torch.cuda.device(0):
    macs, params = get_model_complexity_info(model, (3, 32, 32), as_strings=True,
                                           print_per_layer_stat=True, verbose=True)
print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))
macs, params

[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: ch

('0.19 GMac', '696.17 k')

#### 3.4 torchsummary 

In [9]:
from torchsummary import summary

summary(model, (3, 32, 32), device='cpu')

[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: channel_selection is not supported!
[Memory]: channel_selection is not supported!
[MAdd]: channel_selection is not supported!
[Flops]: ch