## Resnet152
### Reference
Kaiming He, et al., Deep Residual Learning for Image Recognition, CVPR, 2016. [link](https://arxiv.org/pdf/1512.03385.pdf)
### Contents
* up to 8x depper than VGG nets with lower complexity (152 layers)
* as a network is deeper with more stacked layers, problems such as gradient vanishing/exploding, degradation become problematic
* gradient vanishing/exploding has been largely addressed by normalized initialization and intermediate normalization layers
* degradation of training accuracy is stronger with deeper networks and in not caused by overfitting.
** <img src='../etc/images/Resnet-1.png' width='500'>
* solve it with shortcut connections which simply perform identity mapping
### Keys
* 224 * 224 crop is randomly sampled from an image
* batch normalization right after each convolution and before activation
* 256 batch SGD
* learning rate starts from 0.1 and is divided by 10 when the error plateaus
* weight decay of 1e-4 and momentum of 0.9
* the bottleneck design was used for building blocks to reduce training time
* <img src='../etc/images/Resnet-2.png' width='500'>
* when channel increased, stride is set to 2, and the conv layer for dimension matching of residual connection has stride of 2, too
* the residual connection is applied with dimension matching at the first iteration, and after then it just be added without dimension matching
* no bias for conv layers

In [1]:
import torch
from torch import nn
import torchvision
from torchinfo import summary

In [2]:
class conv_residual_bottleneck(nn.Module):
    def __init__(
        self,
        in_channels: int,
        inner_channel_1: int,
        inner_channel_2: int,
        inner_channel_3: int,
        reduce: bool,
        first_iter: bool
    ) -> None:
        super().__init__()
        if reduce:
            stride = 2
        else:
            stride = 1
        self.conv_sequence = nn.Sequential(
            nn.Conv2d(
                in_channels=in_channels,
                out_channels=inner_channel_1,
                kernel_size=1,
                bias=False
            ),
            nn.BatchNorm2d(num_features=inner_channel_1),
            nn.Conv2d(
                in_channels=inner_channel_1,
                out_channels=inner_channel_2,
                kernel_size=3,
                padding=1,
                stride=stride,
                bias=False
            ),
            nn.BatchNorm2d(num_features=inner_channel_2),
            nn.Conv2d(
                in_channels=inner_channel_2,
                out_channels=inner_channel_3,
                kernel_size=1,
                bias=False
            ),
            nn.BatchNorm2d(num_features=inner_channel_3),
            nn.ReLU(inplace=True)
        )

        self.dim_match_conv = None
        if first_iter:
            self.dim_match_conv = nn.Sequential(
                nn.Conv2d(
                    in_channels=in_channels,
                    out_channels=inner_channel_3,
                    kernel_size=1,
                    stride=stride,
                    bias=False
                ),
                nn.BatchNorm2d(num_features=inner_channel_3),
            )

    def forward(self, x):
        skip = x
        x = self.conv_sequence(x)
        if self.dim_match_conv:
            skip = self.dim_match_conv(skip)
        return x + skip


In [3]:
def conv_iter(
    conv_name: str,
    in_channels: int,
    inner_channel_1: int,
    inner_channel_2: int,
    inner_channel_3: int,
    iter_num: int,
    reduce: bool = True
):
    convN = nn.Sequential()
    prev_channels = in_channels
    for i in range(iter_num):
        if i == 0 and reduce:
            reduce = True
        else:
            reduce = False

        if i == 0:
            first_iter = True
        else:
            first_iter = False

        convN.add_module(conv_name + '_' + str(i + 1), conv_residual_bottleneck(
            in_channels=prev_channels,
            inner_channel_1=inner_channel_1,
            inner_channel_2=inner_channel_2,
            inner_channel_3=inner_channel_3,
            reduce=reduce,
            first_iter=first_iter
        ))
        prev_channels = inner_channel_3
    return convN

In [4]:
class Resnet50(torch.nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=3,
                out_channels=64,
                kernel_size=7,
                padding=3,
                stride=2,
                bias=False
            ),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(
                kernel_size=3,
                padding=1,
                stride=2
            )
        )
        self.conv2 = conv_iter(
            conv_name='conv2',
            in_channels=64,
            inner_channel_1=64,
            inner_channel_2=64,
            inner_channel_3=256,
            iter_num=3,
            reduce=False
        )
        self.conv3 = conv_iter(
            conv_name='conv3',
            in_channels=256,
            inner_channel_1=128,
            inner_channel_2=128,
            inner_channel_3=512,
            iter_num=4
        )
        self.conv4 = conv_iter(
            conv_name='conv4',
            in_channels=512,
            inner_channel_1=256,
            inner_channel_2=256,
            inner_channel_3=1024,
            iter_num=6
        )
        self.conv5 = conv_iter(
            conv_name='conv5',
            in_channels=1024,
            inner_channel_1=512,
            inner_channel_2=512,
            inner_channel_3=2048,
            iter_num=3
        )
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(in_features=2048, out_features=1000)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.classifier(x)
        return x

In [5]:
model = Resnet50()
model

Resnet50(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (conv2_1): conv_residual_bottleneck(
      (conv_sequence): Sequential(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (4): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (5): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (6): ReLU(inplace=True)
      )
      (dim_matc

In [6]:
summary(model)

Layer (type:depth-idx)                        Param #
Resnet50                                      --
├─Sequential: 1-1                             --
│    └─Conv2d: 2-1                            9,408
│    └─BatchNorm2d: 2-2                       128
│    └─ReLU: 2-3                              --
│    └─MaxPool2d: 2-4                         --
├─Sequential: 1-2                             --
│    └─conv_residual_bottleneck: 2-5          --
│    │    └─Sequential: 3-1                   58,112
│    │    └─Sequential: 3-2                   16,896
│    └─conv_residual_bottleneck: 2-6          --
│    │    └─Sequential: 3-3                   70,400
│    └─conv_residual_bottleneck: 2-7          --
│    │    └─Sequential: 3-4                   70,400
├─Sequential: 1-3                             --
│    └─conv_residual_bottleneck: 2-8          --
│    │    └─Sequential: 3-5                   247,296
│    │    └─Sequential: 3-6                   132,096
│    └─conv_residual_bottleneck: 2

In [7]:
model(torch.randn((2, 3, 224, 224))).shape

torch.Size([2, 1000])

In [8]:
model2 = torchvision.models.resnet50()
model2

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [9]:
summary(model2)

Layer (type:depth-idx)                   Param #
ResNet                                   --
├─Conv2d: 1-1                            9,408
├─BatchNorm2d: 1-2                       128
├─ReLU: 1-3                              --
├─MaxPool2d: 1-4                         --
├─Sequential: 1-5                        --
│    └─Bottleneck: 2-1                   --
│    │    └─Conv2d: 3-1                  4,096
│    │    └─BatchNorm2d: 3-2             128
│    │    └─Conv2d: 3-3                  36,864
│    │    └─BatchNorm2d: 3-4             128
│    │    └─Conv2d: 3-5                  16,384
│    │    └─BatchNorm2d: 3-6             512
│    │    └─ReLU: 3-7                    --
│    │    └─Sequential: 3-8              16,896
│    └─Bottleneck: 2-2                   --
│    │    └─Conv2d: 3-9                  16,384
│    │    └─BatchNorm2d: 3-10            128
│    │    └─Conv2d: 3-11                 36,864
│    │    └─BatchNorm2d: 3-12            128
│    │    └─Conv2d: 3-13               

In [11]:
import sys
sys.path.append('..')
from python_scripts import model_builder

model3 = model_builder.Resnet152(
    in_channels=3,
    num_classes=101
)
summary(model3)

Layer (type:depth-idx)                        Param #
Resnet152                                     --
├─Sequential: 1-1                             --
│    └─Conv2d: 2-1                            9,408
│    └─BatchNorm2d: 2-2                       128
│    └─ReLU: 2-3                              --
│    └─MaxPool2d: 2-4                         --
├─Sequential: 1-2                             --
│    └─conv_residual_bottleneck: 2-5          --
│    │    └─Sequential: 3-1                   58,112
│    │    └─Sequential: 3-2                   16,896
│    └─conv_residual_bottleneck: 2-6          --
│    │    └─Sequential: 3-3                   70,400
│    └─conv_residual_bottleneck: 2-7          --
│    │    └─Sequential: 3-4                   70,400
├─Sequential: 1-3                             --
│    └─conv_residual_bottleneck: 2-8          --
│    │    └─Sequential: 3-5                   247,296
│    │    └─Sequential: 3-6                   132,096
│    └─conv_residual_bottleneck: 2