In [None]:
import torch
from torch import nn


In [None]:
class FCN_8s(nn.Module):
    def __init__(self, num_classes=21):
        super().__init__()
        '''
        Conv block 1
        Down-samples the input_size by 1/2
        '''
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
        )
        '''
        Conv block 2
        Down-samples the input_size by 1/4
        '''
        self.conv2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
        )
        '''
        Conv block 3
        Down-samples the input_size by 1/8
        '''
        self.conv3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
        )
        '''
        Conv block 4
        Down-samples the input_size by 1/16
        '''
        self.conv4 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
        )
        '''
        Conv block 5
        Down-samples the input_size by 1/32
        '''
        self.conv5 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
        )
        '''
        Conv block 6 (1x1)
        Increase the channels to 4096 using (1,1) conv Kernels
        '''
        self.conv6 = nn.Sequential(
            nn.Conv2d(512, 4096, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Dropout2d(0.5)
        )
        '''
        Conv block 7 (1x1)
        Second 1x1 conv kernels
        '''
        self.conv7 = nn.Sequential(
            nn.Conv2d(4096, 4096, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Dropout2d(0.5)
        )

        self.score_pool5 = nn.Conv2d(4096, num_classes, kernel_size=1)
        self.score_pool4 = nn.Conv2d(512, num_classes, kernel_size=1)
        self.score_pool3 = nn.Conv2d(256, num_classes, kernel_size=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        pool_3 = self.conv3(x)
        pool_4 = self.conv4(pool_3)
        pool_5 = self.conv5(pool_4)
        x = self.conv6(pool_5)
        x = self.conv7(x)

    return x

In [1]:
import torch
import torch.nn as nn
import torchvision.models as models

In [2]:
class FCN8s(nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        vgg16 = models.vgg16_bn(weights="IMAGENET1K_V1", progress=True)

        # Use the features from vgg16
        self.features = vgg16.features

        # Replace the classifier with convolutional layers
        self.score_pool4 = nn.Conv2d(512, num_classes, kernel_size=1)
        self.score_pool3 = nn.Conv2d(256, num_classes, kernel_size=1)

        self.score_fr = nn.Sequential(
            nn.Conv2d(512, 4096, kernel_size=7),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            nn.Conv2d(4096, 4096, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            nn.Conv2d(4096, num_classes, kernel_size=1)
        )

        # Transposed convolution layers for upsampling
        '''
        score_fr*2 means score_fr upsampled by factor of 2 using Transposed Convolution
        '''
        self.upscore_pool5 = nn.ConvTranspose2d(
            num_classes, num_classes, kernel_size=4, stride=2, padding=1
        ) # Upsamples the score_fr by factor of 2
        self.upscore_pool4 = nn.ConvTranspose2d(
            num_classes, num_classes, kernel_size=4, stride=2, padding=1
        ) # Upsamples the (score_fr*2 + score_pool4) by factor of 2
        self.upscore_pool3 = nn.ConvTranspose2d(
            num_classes, num_classes, kernel_size=16, stride=8, padding=4
        ) # Upsamples the [(score_fr*2 + score_pool4)*2 + score_pool3] by factor of 8

    def forward(self, x):
        # Store intermediate outputs for skip connections
        pool3 = None
        pool4 = None

        # Forward pass through VGG16 features
        for i in range(len(self.features)):
            x = self.features[i](x) # Feed forwarding the previous output to each layer coming next
            if i == 23: # After pool_3 layer passed
                pool3 = x
            elif i == 33: # After pool_4 layer passed
                pool4 = x

        # Classify the features
        # x is now the output from the last pooling layer(pool_5) of vgg16_bn
        x = self.score_fr(x)

        # Upsample the pool5 score by factor of 2
        x = self.upscore_pool5(x)
        # Add skip connection from pool4
        score_pool4 = self.score_pool4(pool4)
        x = x + score_pool4

        # Upsample the skip-connected pool4+pool5 score by factor of 2
        x = self.upscore_pool4(x)
        # Add skip connection from pool3
        score_pool3 = self.score_pool3(pool3)
        x = x + score_pool3

        # Finally, upsample the skip-connected pool3+pool4+pool5 score by factor of 8
        x = self.upscore_pool3(x)

        # The output tensor now has the same spatial dimensions as the input
        return x
        

In [3]:
vgg16 = models.vgg16_bn(weights="IMAGENET1K_V1", progress=True)

In [7]:
vgg16.features

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): ReLU(inplace=True)
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (9): ReLU(inplace=True)
  (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (12): ReLU(inplace=True)
  (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 