## ResNet Implementation
[Paper: Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class BasicBlock(nn.Module):
    # Scale factor of the number of output channels
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        
        # Block 1
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)

        # Block 2
        self.conv2 = nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels * self.expansion)

        self.shortcut = nn.Sequential()

        # Check if input and output channels are different
        if stride != 1 or in_channels != out_channels * self.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * self.expansion)
            )
        
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [3]:
class Bottleneck(nn.Module):
    # Scale factor of the number of output channels
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super(Bottleneck, self).__init__()
        
        # Block 1
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)

        # Block 2
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        # Block 3
        self.conv3 = nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)

        self.shortcut = nn.Sequential()

        # Check if input and output channels are different
        if stride != 1 or in_channels != out_channels * self.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * self.expansion)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [32]:
def test_bottleneck():
    block = Bottleneck(256, 128, stride=1)
    x = torch.randn(1, 256, 112, 112)
    out = block(x)
    print(out.shape) # Expected output shape: (1, 1024, 56, 56)

test_bottleneck()

torch.Size([1, 512, 112, 112])


In [19]:
def test_basic_block():
    block = BasicBlock(64, 64, stride=1)
    x = torch.randn(1, 64, 56, 56)
    out = block(x)
    print(out.shape) # Expected output shape: (1, 64, 56, 56)

test_basic_block() # TD

torch.Size([1, 64, 56, 56])


In [36]:
class ResNet(nn.Module):
    def __init__(self, resnet_type, in_channels, num_classes):
        """"
        Initialize the ResNet model based on the specified type.
        Args:
            resnet_type (dict): Dictionary containing the configuration of the ResNet type. eg: [[64, 128, 256, 512], [3, 4, 6, 3], 4] for ResNet-50 etc.
                - '0': List of channel sizes for each block.
                - '1': List of repeat counts for each block.
                - '2': Expansion factor (1 for BasicBlock, 4 for Bottleneck).
            in_channels (int): Number of input channels.
            num_classes (int): Number of output classes for the final fully connected layer.
        """
        super(ResNet, self).__init__()
        self.channels_list = resnet_type[0]
        self.repeat_list = resnet_type[1]
        self.expansion = resnet_type[2]

        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False) # Feature map halfed
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # Feature map halfed

        self.block1 = self._make_layer(64, self.channels_list[0], self.repeat_list[0], self.expansion, stride=1) # Feature map unchanged, as halfed-2X by conv1 and maxpool
        self.block2 = self._make_layer(self.channels_list[0] * self.expansion, self.channels_list[1], self.repeat_list[1], self.expansion, stride=2) # Feature map halfed
        self.block3 = self._make_layer(self.channels_list[1] * self.expansion, self.channels_list[2], self.repeat_list[2], self.expansion, stride=2) # Feature map halfed
        self.block4 = self._make_layer(self.channels_list[2] * self.expansion, self.channels_list[3], self.repeat_list[3], self.expansion, stride=2) # Feature map halfed

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # Feature map to 1x1
        self.fc = nn.Linear(self.channels_list[3] * self.expansion, num_classes) # fully connected layer


    def _make_layer(self, in_channels, out_channels, num_repeat, expansion, stride):
        """"
        Create a layer of blocks (either BasicBlock or Bottleneck) based on the specified parameters.
        Args: 
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            num_repeat (int): Number of blocks to repeat.
            expansion (int): Expansion factor for the block type (1 for BasicBlock, 4 for Bottleneck).
            stride (int): Stride for the first block in the layer.

            output: nn.Sequential: A sequential container of blocks.
        """
        layers = []

        layers.append(Bottleneck(in_channels, out_channels, stride) if expansion == 4 else BasicBlock(in_channels, out_channels, stride)) # downsampled here
        for _ in range(1, num_repeat):
            layers.append(Bottleneck(out_channels * expansion, out_channels, stride=1) if expansion == 4 else BasicBlock(out_channels * expansion, out_channels, stride=1))
        
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)

        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)

        x = self.avgpool(x)
        
        x = torch.flatten(x, 1)
        x = self.fc(x)
        
        return x

In [37]:
# Resnet Architecture [num channels, repetition, expansion_factor]
models = {}
models['resnet18'] = ([64, 128, 256, 512], [2, 2, 2, 2], 1)
models['resnet34'] = ([64, 128, 256, 512], [3, 4, 6, 3], 1)
models['resnet50'] = ([64, 128, 256, 512], [3, 4, 6, 3], 4)
models['resnet101'] = ([64, 128, 256, 512], [3, 4, 23, 3], 4)
models['resnet152'] = ([64, 128, 256, 512], [3, 8, 36, 3], 4)

In [47]:
def test_ResNet(args):
    model = ResNet(args, 3, 1000)
    x = torch.randn(1, 3, 224, 224)
    op = model(x)
    print(op.shape)
    return model

architecture_type = 'resnet50'
model = test_ResNet(models[architecture_type])

torch.Size([1, 1000])


In [48]:
from torchsummary import summary
model.to('cuda')
summary(model, (3, 224, 224), device='cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
            Conv2d-7           [-1, 64, 56, 56]          36,864
       BatchNorm2d-8           [-1, 64, 56, 56]             128
            Conv2d-9          [-1, 256, 56, 56]          16,384
      BatchNorm2d-10          [-1, 256, 56, 56]             512
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
       Bottleneck-13          [-1, 256, 56, 56]               0
           Conv2d-14           [-1, 64,

In [46]:
from torchvision.models import resnet152, resnet50, resnet18, resnet34, resnet152, ResNet50_Weights, ResNet18_Weights, ResNet34_Weights, ResNet152_Weights

torchvision_models = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
torchvision_models = torchvision_models.to('cuda')

summary(torchvision_models, (3, 224, 224), device='cuda')

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\amanc/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:16<00:00, 6.25MB/s]


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,