# ***ResNet Architecture***

- **ResNet** is motivated by the difficulty of training very deep networks due to **vanishing gradients**. It introduces *residual learning* through skip connections.
- *Residual Connections* allow gradients to flow directly through identity paths, enabling the training of deeper models without degradation. This helps the network learn residual functions instead of full mappings, improving convergence and accuracy.

## Importing libraries and modules

In [None]:
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
from torch.optim import Adam, SGD
from torchvision import datasets
import numpy as np
import matplotlib.pyplot as plt
import os
from torchvision.transforms import v2

device = "cuda" if torch.cuda.is_available() else "cpu"

<p align="center">
  <img src="Images/Difference.png" alt = "ResBlock" width="70%"/> <br>
  <em> Left: block for ResNet-
34. Right: bottleneck for ResNet-50/101/152. </em>
</p>

## ***ResNet 50+***

### Bottleneck block class (ResNet 50/101/152)

In [56]:
class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, identity_downsample = None, stride = 1):
        super(Bottleneck,self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride = 1, padding=0),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding = 1), #This is where the spatial size may be halved/changed, often on the 1st block
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels *self.expansion, kernel_size=1, stride = 1, padding = 0),
            nn.BatchNorm2d(out_channels*self.expansion)
        )
        self.stride = stride
        self.relu = nn.ReLU()
        self.identity_downsample = identity_downsample
    
    def forward (self, x):
        identity = x.clone()
        x = self.model(x)

        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)
        
        x+= identity
        x = self.relu(x)
        return x

### Making layers of blocks function

In [66]:
def make_layer(ResBlock, blocks,in_channels, planes, stride=1):
        identity_downsample = None
        layers = []

        #Perform downsampling when the dimensions do not match or stride is not 1
        if stride!= 1 or in_channels != planes*ResBlock.expansion:
            identity_downsample = nn.Sequential(
                nn.Conv2d(in_channels,planes*ResBlock.expansion, kernel_size=1, stride = stride, padding = 0),
                nn.BatchNorm2d(planes*ResBlock.expansion)
            )

        #Compute the 1st block of the layer that requires downsampling
        layers.append(ResBlock(in_channels, planes, identity_downsample= identity_downsample, stride = stride)) 
        in_channels = planes *ResBlock.expansion

        for i in range (blocks -1):
            layers.append(ResBlock(in_channels, planes))

        return nn.Sequential(*layers) #This means that nn.Sequential(block1, block2, block3, block4..)

### ResNet 50+ class defined

In [None]:
class ResNet(nn.Module):
    def __init__(self, ResBlock, block_list, num_classes, num_channels = 3, planes = [64,128,256,512] ):
        super(ResNet, self).__init__()
        self.planes = planes
        self.conv1 = nn.Conv2d(num_channels, self.planes[0], kernel_size=7, stride = 2, padding = 3) #[64,112,112]
        self.BN1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.max_pool1 = nn.MaxPool2d(kernel_size=3,stride = 2, padding = 1) #[64,56,56]
        
        self.layer_1 = make_layer(ResBlock, block_list[0],self.planes[0], planes=self.planes[0]) #Output: [256,56,56]

        self.layers = nn.ModuleList()
        for i in range (1,len(block_list)):
            layer = make_layer(ResBlock, block_list[i], self.planes[i-1]*ResBlock.expansion, self.planes[i], stride =2)
            self.layers.append(layer)
        
        self.avg_pool = nn.AdaptiveAvgPool2d((1,1)) #(B,C,1,1) This pools each feature map individually into a 1x1 feature map
        self.fc = nn.Linear(self.planes[3]*ResBlock.expansion, num_classes)

    def forward(self, x):
        x = self.relu(self.BN1(self.conv1(x)))
        x = self.max_pool1(x)
        x = self.layer_1(x)
        for layer in self.layers: 
            x = layer(x)
        x = self.avg_pool(x)
        x = x.reshape(x.shape[0], -1) #Flatten the tensor of 4 dimensions [B,C,1,1] to [B,C] of 2 dimensions including the batch size
        x = self.fc(x)
        return x
    
    def train_batch(self, x, y, optimizer, loss_fn):
        self.train()
        prediction = self(x) #same as self.forward(x)
        batch_loss = loss_fn(prediction, y)
        batch_loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        return batch_loss.item(), prediction

## ***ResNet 18/34*** (or lighter)

### Residual Block for the 18/34-layers ResNet

In [None]:
class Block(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, identity_downsample = None, stride = 1):
        super(Block,self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride = 1, padding = 1), #For changes in spatial size (Downsample)
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding = 1),
            nn.BatchNorm2d(out_channels)
        )
        self.relu = nn.ReLU()
        self.identity_downsample = identity_downsample
    
    def forward (self, x):
        identity = x.clone()
        x = self.model(x)

        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)
        
        x+= identity
        x = self.relu(x)
        return x

### Making layers of blocks function

In [49]:
def make_layer_ (ResBlock, blocks, in_channels, out_channels, stride = 1):
    identity_downsample = None
    layers = []
    if stride!= 1 or in_channels !=  out_channels:
        identity_downsample = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride = stride),
            nn.BatchNorm2d(out_channels)
        )
    layers.append(ResBlock(in_channels,out_channels, identity_downsample = identity_downsample, stride = stride))
    for i in range (blocks-1):
        layers.append(ResBlock(out_channels,out_channels))
    return nn.Sequential(*layers)

### Lite ResNet (18/34) class defined

In [None]:
class ResNet_lite(nn.Module):
    def __init__ (self, ResBlock, block_list, num_classes, num_channels = 3, planes = [64,128,256,512]):
        super(ResNet_lite, self).__init__()
        self.planes = planes
        self.conv1 = nn.Conv2d(num_channels, self.planes[0], kernel_size=7, stride = 2, padding = 3)
        self.BN1 = nn.BatchNorm2d(self.planes[0])
        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride = 2, padding = 1) #Downsample 

        self.layer1 = make_layer_(ResBlock,block_list[0], self.planes[0], self.planes[0])

        self.layers = nn.ModuleList()
        for i in range (1,len(block_list)):
            layer = make_layer_(ResBlock, block_list[i], self.planes[i-1], self.planes[i], stride =2)
            self.layers.append(layer)

        self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(self.planes[3], num_classes)

    def forward(self,x):
        x = self.relu(self.BN1(self.conv1(x)))
        x = self.max_pool(x)
        x = self.layer1(x)
        for layer in self.layers: 
            x = layer(x)
        x = self.avg_pool(x)
        x = x.reshape(x.shape[0], -1) #Flatten the tensor of 4 dimensions [B,C,1,1] to [B,C] of 2 dimensions including the batch size
        x = self.fc(x)
        return x
    
    def train_batch(self, x, y, optimizer, loss_fn):
        self.train()
        prediction = self(x) #same as self.forward(x)
        batch_loss = loss_fn(prediction, y)
        batch_loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        return batch_loss.item(), prediction

## Defining Residual Networks of different depths: 18/34/50/101/152

In [73]:
def ResNet18(num_classes = 100, num_in_channels = 3):
    block_list = [2,2,2,2]
    return ResNet_lite(Block,block_list, num_classes, num_in_channels)

def ResNet34(num_classes = 100, num_in_channels = 3):
    block_list = [3,4,6,3]
    return ResNet_lite(Block, block_list, num_classes, num_in_channels)

def ResNet50(num_classes = 1000, num_in_channels = 3):
    block_list = [3,4,6,3]
    return ResNet(Bottleneck, block_list, num_classes, num_in_channels)

def ResNet101(num_classes = 1000, num_in_channels = 3):
    block_list = [3,4,23,3]
    return ResNet(Bottleneck, block_list, num_classes, num_in_channels)

def ResNet152(num_classes = 1000, num_in_channels = 3):
    block_list = [3,8,36,3]
    return ResNet(Bottleneck, block_list, num_classes, num_in_channels)

In [74]:
def test():
    net = ResNet50()
    x = torch.randn(2,3,224,244)
    y = net(x).to(device)
    print( y.shape, x.shape)

test()

torch.Size([2, 1000]) torch.Size([2, 3, 224, 244])
