## Model Resnet: modules & parameters

In [2]:
import torch

torch.cuda.is_available()

True

In [3]:
import torch
from torchvision import datasets, transforms

kwargs = {'num_workers': 1, 'pin_memory': True} if torch.cuda.is_available() else {}
test_loader = torch.utils.data.DataLoader(
            datasets.CIFAR10('./data.cifar10', train=False, download=True, transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])),
            batch_size=8, shuffle=True, **kwargs)

Files already downloaded and verified


In [4]:
idx, data = next(enumerate(test_loader))
one_batch = data[0]
one_batch.shape

torch.Size([8, 3, 32, 32])

### 1. Channel Selection & Bottleneck

In [1]:
import torch
import torch.nn as nn

class channel_selection(nn.Module):
    """
    Select channels from the output of BatchNorm2d layer. It should be put directly after BatchNorm2d layer.
    The output shape of this layer is determined by the number of 1 in `self.indexes`.
    """
    def __init__(self, num_channels):
        """
        Initialize the `indexes` with all one vector with the length same as the number of channels.
        During pruning, the places in `indexes` which correpond to the channels to be pruned will be set to 0.
        """
        super(channel_selection, self).__init__()
        self.indexes = nn.Parameter(torch.ones(num_channels))

    def forward(self, input_tensor):
        """
        Parameter
        ---------
        input_tensor: (N,C,H,W). It should be the output of BatchNorm2d layer.
        """
        selected_index = np.squeeze(np.argwhere(self.indexes.data.cpu().numpy()))
        if selected_index.size == 1:
            selected_index = np.resize(selected_index, (1,)) 
        output = input_tensor[:, selected_index, :, :]
        return output

    
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, cfg, stride=1, down_sample=None):
        super(Bottleneck, self).__init__()
        self.bn1 = nn.BatchNorm2d(inplanes)
        self.select = channel_selection(inplanes)
        self.conv1 = nn.Conv2d(cfg[0], cfg[1], kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(cfg[1])
        self.conv2 = nn.Conv2d(cfg[1], cfg[2], kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(cfg[2])
        self.conv3 = nn.Conv2d(cfg[2], planes * 4, kernel_size=1, bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.down_sample = down_sample
        self.stride = stride

    def forward(self, x):
        residual = x

        # group1
        out = self.bn1(x)
        out = self.select(out)
        out = self.relu(out)
        out = self.conv1(out)

        # group2
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv2(out)

        # group3
        out = self.bn3(out)
        out = self.relu(out)
        out = self.conv3(out)

        # down sample
        if self.down_sample is not None:
            residual = self.down_sample(x)

        out += residual
        return out

    def mask(self, index, cfg_mask):
        if index == 0:
            self.bn1.weight.data.mul_(cfg_mask)
            self.bn1.bias.data.mul_(cfg_mask)
        elif index == 1:
            self.bn2.weight.data.mul_(cfg_mask)
            self.bn2.bias.data.mul_(cfg_mask)
        elif index == 2:
            self.bn3.weight.data.mul_(cfg_mask)
            self.bn3.bias.data.mul_(cfg_mask)
        else:
            raise ValueError("Index is not including.")

    def forward_bn(self, x):
        bn_value = []
        residual = x

        # group1
        out = self.bn1(x)
        bn_value.append(out.clone())
        out = self.select(out)
        out = self.relu(out)
        out = self.conv1(out)

        # group2
        out = self.bn2(out)
        bn_value.append(out.clone())
        out = self.relu(out)
        out = self.conv2(out)

        # group3
        out = self.bn3(out)
        bn_value.append(out.clone())
        out = self.relu(out)
        out = self.conv3(out)

        # down sample
        if self.down_sample is not None:
            residual = self.down_sample(x)

        out += residual
        return out, bn_value

    def forward_ge(self, x):
        bn_value, bn_weight = [], []
        residual = x

        # group1
        out = self.bn1(x)
        bn_value.append(out.clone())
        bn_weight.append(self.bn1.weight.data)
        out = self.select(out)
        out = self.relu(out)
        out = self.conv1(out)

        # group2
        out = self.bn2(out)
        bn_value.append(out.clone())
        bn_weight.append(self.bn2.weight.data)
        out = self.relu(out)
        out = self.conv2(out)

        # group3
        out = self.bn3(out)
        bn_value.append(out.clone())
        bn_weight.append(self.bn3.weight.data)
        out = self.relu(out)
        out = self.conv3(out)

        # down sample
        if self.down_sample is not None:
            residual = self.down_sample(x)

        out += residual
        return out, bn_value, bn_weight

### 2. ResNet Model 

In [6]:
import math
import torch.nn as nn


"""
preactivation resnet with bottleneck design.
"""

class resnet(nn.Module):
    def __init__(self, depth=164, dataset='cifar10', cfg=None, block_cfg=None):
        """
        :param depth:
            164 layers => 1 conv2d + 3 layers × 18 blocks (every layer)  × 3 conv2ds (every block)  + 1 avgPool2d
            param n = (depth - 2) // 9:
                n means how many blocks in every layer
                9 = 3 layers × 3 conv2d (every block)
        :param cfg:
            if depth = 164, then len(cfg) = 164
        :param block_cfg:
            every layer block indexes: [6, 12, 18], [9, 18], [18]
            every `conv_cfg` value should <= `n` (if depth = 164, n = 18)
        """
        super(resnet, self).__init__()
        assert (depth - 2) % 9 == 0, 'depth should be 9n+2'

        # model value
        n = (depth - 2) // 9  # depth = 164, n = 18
        block = Bottleneck
        self.block_cfg = block_cfg
        self.inplanes = 16

        # model config
        if cfg is None:
            cfg = [[16, 16, 16], [64, 16, 16] * (n - 1),
                   [64, 32, 32], [128, 32, 32] * (n - 1),
                   [128, 64, 64], [256, 64, 64] * (n - 1), [256]]
            cfg = [item for sub_list in cfg for item in sub_list]

        # model feature
        conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False)
        layer1 = self._make_layer(block, 16, n, cfg=cfg[0:3 * n])                # 32 × 32
        layer2 = self._make_layer(block, 32, n, cfg=cfg[3 * n:6 * n], stride=2)  # 16 × 16
        layer3 = self._make_layer(block, 64, n, cfg=cfg[6 * n:9 * n], stride=2)  # 8 × 8
        bn = nn.BatchNorm2d(64 * block.expansion)
        select = channel_selection(64 * block.expansion)
        relu = nn.ReLU(inplace=True)
        avg_pool = nn.AvgPool2d(8)

        feature = [conv1, layer1, layer2, layer3, bn, select, relu, avg_pool]
        self.feature = nn.Sequential(*feature)

        # model classifier
        if dataset == 'cifar10':
            num_classes = 10
        elif dataset == 'cifar100':
            num_classes = 100
        else:
            raise ValueError('Model `dataset` parameter is Error!')
        self.classifier = nn.Linear(cfg[-1], num_classes)

        # model initialize weight
        self._initialize_weights()

    def _make_layer(self, block, planes, num_block, cfg, stride=1):
        """
        :param block: Bottleneck item
        :param planes: record the layer's output channel size
        :param num_block: how many blocks in every layer
        :param cfg:
        """
        down_sample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            down_sample = nn.Sequential(nn.Conv2d(
                self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False))

        layers = [block(self.inplanes, planes, cfg[0:3], stride, down_sample)]
        self.inplanes = planes * block.expansion
        for i in range(1, num_block):
            layers.append(block(self.inplanes, planes, cfg[3 * i: 3 * (i + 1)]))
        return nn.Sequential(*layers)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(0.5)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()

    def forward(self, x):
        # model feature
        block_value = []
        if self.block_cfg:
            for k, m in enumerate(self.feature):
                if isinstance(m, nn.Sequential):
                    for j, block_item in enumerate(m):
                        x = block_item(x)
                        if j + 1 in self.block_cfg:
                            block_value.append(x)
                else:
                    x = m(x)
        else:
            x = self.feature(x)

        # model classifier
        x = x.view(x.size(0), -1)
        y = self.classifier(x)

        if len(block_value):
            return y, block_value
        return y

model_resnet = resnet(dataset='cifar100')
model_resnet.eval()

resnet(
  (feature): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): Sequential(
      (0): Bottleneck(
        (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (select): channel_selection()
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn3): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU(inplace=True)
        (conv3): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (down_sample): Sequential(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        )
      )
      (1): Bottleneck(


In [6]:
for k, m in enumerate(model_resnet.modules()):
    if k < 10 and isinstance(m, nn.BatchNorm2d):
        print(k, m.weight.data, m.type)

5 tensor([0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000,
        0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000]) <bound method Module.type of BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)>
9 tensor([0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000,
        0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000]) <bound method Module.type of BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)>


In [7]:
checkpoint = torch.load(r'G:\lizulin\network-slimming\logs\sparsity_resnet164_cifar100_s_1e_4\\model_best.pth.tar')
best_prec1 = checkpoint['best_prec1']
epoch1 = checkpoint['epoch']
if checkpoint['state_dict'] is not None:
    model_resnet.load_state_dict(checkpoint['state_dict'])
    
epoch1, best_prec1

(86, 0.7687)

#### enumerate()迭代访问model.modules()，返回type如下：
- __整个模型的type__。即这里的resnet，其中`feature`和`classifier`仅为对层的重命名，不具有具体类型或不参与到模型遍历中。
- __依次遍历模型__。对Sequential类访问一次，然后对Sequential内的模块再访问。
- 此处列举出`model_resnet.modules()`，__粗体__表示在feature过程有效的模块：
    1. resnet (model type)
    2. __Conv2d (input_channel=3, output_channel=16, kernel_size=3)__
    3. Bottleneck
    4. Bottleneck的items：__BatchNorm2d__、(__channel_selection__仅三个一组中的第一个有)、__ReLU__、__Conv2d__
    5. AvgPool
    6. Linear
- 总的来说，数据流动的四个type：*Conv2d*、*BatchNorm2d*、*channel_selection*、*ReLU*

In [None]:
for k, m in enumerate(model_resnet.modules()):
    print(k, "-->", m.type)

#### enumerate()迭代访问Model.children()，返回type如下：
- feature命名的Sequential
- classifier命名的Linear

In [None]:
for k, m in enumerate(model_resnet.children()):
    print(k, "-->", m.type)

### 3. Model modules and parameters

1. model.modules() 返回模型的所有`子层`和`子层的子层`
2. model.named_modules() 以字符串的形式返回
3. model.children() 返回模型的子层
4. model.named_children() 以字符串的形式返回
5. model.parameters() 返回模型的所有参数，使用list()迭代
6. model.named_parameters() 以字符串的形式返回
7. model.state_dict() 返回模型的字典形式

#### 3.1 model.modules() 返回模型的所有`nn.Module`的子层以及子层的子层

In [None]:
model_resnet.modules

In [None]:
for k, m in enumerate(model_resnet.modules()):
    print(k, m.type)

#### 3.3 model.children() 返回模型的子层，常见的比如`feature`和`classifier`

In [None]:
model_resnet.children

#### 3.5 model.parameters()  返回模型的所有参数，使用list()迭代

In [None]:
list(model_resnet.parameters())

#### 3.7 model.state_dict() 返回模型的字典形式

In [None]:
model_resnet.state_dict()

### 4. Access Model feature

#### 4.1 module -> feature

In [None]:
model_resnet.feature

#### 4.2 module -> feature -> layer (18 bottlenecks)

In [None]:
model_resnet.feature[1]

#### 4.3 module -> feature -> layer -> bottleneck 

In [None]:
model_resnet.feature[1][0]

#### 4.4 module -> feature -> layer -> bottleneck -> batchNorm2d 

In [None]:
for k, m in enumerate(model_resnet.feature[1].children()):
    for j, n in enumerate(m.children()):
        print(k, "->", n)

#### 4.5 foreach: module -> feature -> layer -> bottleneck -> batchNorm2d

In [None]:
layer_idx = 0
for k, m in enumerate(model_resnet.feature):     # modules (-classifier) -> feature
    if isinstance(m, nn.Sequential):             # feature (-conv2d) -> layers
        layer_idx += 1
        print("layer {}".format(layer_idx))
        block_idx = 0
        for j, n in enumerate(m.children()):     # layers -> layer (many Bottlencks)
            block_idx += 1
            print("\tblock {}".format(block_idx))
            bn_idx = 0
            for i, o in enumerate(n.children()): # Bottlencks -> batchNorm2d
                if isinstance(o, nn.BatchNorm2d):
                    bn_idx += 1
                    print("\t\tbn {}".format(bn_idx))

### 5. Access Model forward

#### 5.1 model block config

- block_cfg = [6, 12, 18], block value: 3 × 3 = 9
- block_cfg = [9, 18], block value: 3 × 2 = 6

In [None]:
block_value = []
block_cfg = [6, 12, 18]
for k, m in enumerate(model_resnet.feature):
    if isinstance(m, nn.Sequential):
        for j, block_item in enumerate(m.children()):
            if j+1 in block_cfg:
                block_value.append("layer {} block {} value".format(k, j+1))
block_value, len(block_value)

#### 5.2 `forward_bn(x)`: model > bottlenck  > batchNorm2d => Feature map 

- 实现方式：`forward_bn(x)` retrun `data`, `bn_value`

In [None]:
import torch 
import torch.nn as nn
import numpy as np

data = torch.randn(32, 3, 32, 32)

layer_idx = 0
for k, m in enumerate(model_resnet.feature):     # modules (-classifier) -> feature
    if isinstance(m, nn.Sequential):             # feature (-conv2d) -> layers
        layer_idx += 1
        print("layer {}".format(layer_idx))
        block_idx = 0
        for j, n in enumerate(model_resnet.feature[k]):     # layers -> layer (many Bottlencks)
            block_idx += 1
            print("\tblock {}".format(block_idx))
            bn_idx = 0
            data, bn_value = n.forward_bn(data)
            print("\t\tbn_value: ", list(bn_value[0].shape))
    else:
        data = m(data)
        print(k, '->', list(data.shape))

#### 5.3 `bn_value()`: model > bottlenck > batchNorm2d => Feature map

- 实现方式：bn_value() return bn_data
- Model访问方式：
    - model_resnet.feature => k, m => model_resnet.feature[k]
    - model_resnet.feature => k, m => m.children()
    

In [None]:
import torch 
import torch.nn as nn
import numpy as np

data = torch.randn(32, 3, 32, 32)
data1 = data.clone()

record1 = []
for k, m in enumerate(model_resnet.feature):     # modules (-classifier) -> feature
    if isinstance(m, nn.Sequential):             # feature (-conv2d) -> layers
        print("layer {}".format(k))
        for j, n in enumerate(m.children()):     # layers -> layer (many Bottlencks)
            print("\tblock {}".format(j))
            
            data = n(data)
            bn_value = n.bn_value()
            
            record1.append(bn_value)
            print("\t\tbn_value: ", list(bn_value[0].shape))

    else:
        data = m(data)
        print(k, '->', data.shape)

#### 5.4 相同的输入data，验证两种方式的结果是相同的

In [None]:
data = data1.clone()

record2 = []
for k, m in enumerate(model_resnet.feature):     # modules (-classifier) -> feature
    if isinstance(m, nn.Sequential):             # feature (-conv2d) -> layers
        print("layer {}".format(k))
        for j, n in enumerate(m.children()):     # layers -> layer (many Bottlencks)
            print("\tblock {}".format(j))
            
            data, bn_value = n.forward_bn(data)
            record2.append(bn_value)
            
            print("\t\tbn_value: ", list(bn_value[0].shape))

    else:
        data = m(data)
        print(k, '->', data.shape)

In [None]:
record1[:5]

In [None]:
record2[:5]

#### 5.5 Bottlenck => BatchNorm2d

    由于Bootlenck的forward后的`out`需要与输入的数据`x`残差连接，所以使用`data=m(data)`遍历Bootlenck会出现问题

In [None]:
import torch 
import torch.nn as nn
import numpy as np

data = torch.randn(32, 3, 32, 32)

layer_idx = 0
for k, m in enumerate(model_resnet.feature):     # modules (-classifier) -> feature
    if isinstance(m, nn.Sequential):             # feature (-conv2d) -> layers
        layer_idx += 1
        print("layer {}".format(layer_idx))
        block_idx = 0
        for j, n in enumerate(model_resnet.feature[k].children()):     # layers -> layer (many Bottlencks)
            block_idx += 1
            print("\tblock {}".format(block_idx))
            bn_idx = 0
            for i, o in enumerate(n.children()): # Bottlencks -> batchNorm2d
                if isinstance(o, nn.Sequential):
                    print(o.type)
                    for l, p in enumerate(o):
                        print(p.type)
                        data = p(data)
                        print('\t\t-', k, '->', p.type)
                else:
                    data = o(data)
                    print('\t\t', k, '->', o.type)
                if isinstance(o, nn.BatchNorm2d):
                    bn_idx += 1
                    print("\t\tbn {}".format(bn_idx))
    else:
        data = m(data)
        print(k, '->', data.shape)

### 6. Access Model BatchNorm2d  Number: 163

$$163 = 162 (3 layers × 18 Bottlenecks × 3 BatchNorm2ds) + 1 BatchNorm2d$$

In [None]:
bn_count, bn_size = 0, 0
for k, m in enumerate(model_resnet.modules()):
    if isinstance(m, nn.BatchNorm2d):
        bn_count += 1
        bn_size += m.weight.data.shape[0]
bn_count, bn_size

In [None]:
data = torch.randn(32, 3, 32, 32)

bn_count, bn_size = 0, 0
for k, m in enumerate(model_resnet.feature):     # modules (-classifier) -> feature
    if isinstance(m, nn.Sequential):             # feature (-conv2d) -> layers
        for j, n in enumerate(m.children()):     # layers -> layer (many Bottlencks)
            data = n(data)
            bn_value = n.bn_value()
            
            bn_count += 3
            for i, bn_item in enumerate(bn_value):
                bn_size += bn_item.shape[1]
    else:
        data = m(data)
        if isinstance(m, nn.BatchNorm2d):
            bn_count += 1
            bn_size += data.shape[1]
bn_count, bn_size

In [None]:
import torch

def gammas(feature_map):
    """
    :param feature_map: [B, C, H, W]
    :return: [C]
    """
    b, c, h, w = feature_map.shape
    gamma = torch.zeros(c)
    for j in range(c):
        feature_map_j = feature_map[:, torch.arange(c) != j, :, :]
        gamma[j] = at_loss(feature_map, feature_map_j)
    return gamma

In [None]:
b, c, h, w = 64, 3, 32, 32
gamma = torch.randn(b, c, h, w)
value = gamma[0:23, :, :, :]
value.shape

In [None]:
value = gamma.view(0:23, -1, -1, -1)
value.shape

In [2]:
import torch

x = torch.tensor([2, 3, 4, 5])
y = torch.tensor([2.2, 2.2, 4.4, 2.])
z = x * y
z

tensor([ 4.4000,  6.6000, 17.6000, 10.0000])