In [26]:
import torch
import torch.nn as nn
from torchvision.models.densenet import DenseNet121_Weights, densenet121
from torchvision.models.mobilenetv2 import mobilenet_v2,MobileNet_V2_Weights

## Visualising MobileNetV2 architecture

In [27]:
print(mobilenet_v2(MobileNet_V2_Weights).features)



Sequential(
  (0): Conv2dNormActivation(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU6(inplace=True)
  )
  (1): InvertedResidual(
    (conv): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (2): InvertedResidual(
    (conv): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (

## Original densenet backbone

In [28]:
backbone_dense = densenet121(weights=DenseNet121_Weights.IMAGENET1K_V1).features

In [29]:
# Pass the input through each part of the model
backbones = nn.ModuleList([
            backbone_dense[:4],
            backbone_dense.denseblock1,
            nn.Sequential(
                backbone_dense.transition1,
                backbone_dense.denseblock2,
            ),
            nn.Sequential(
                backbone_dense.transition2,
                backbone_dense.denseblock3,
            ),
            nn.Sequential(
                backbone_dense.transition3,
                backbone_dense.denseblock4,
            )
        ])
output_part_1 = backbones[0](torch.rand(1,3,224,224))  # Output of backbone[:4]
output_part_2 = backbones[1](output_part_1)  # Output of backbone.denseblock1
output_part_3 = backbones[2](output_part_2)  # Output of transition1 + denseblock2
output_part_4 = backbones[3](output_part_3)  # Output of transition2 + denseblock3
output_part_5 = backbones[4](output_part_4)  # Output of transition3 + denseblock4

# Print the sizes of the outputs
print("Output Size - Part 1:", output_part_1.size())
print("Output Size - Part 2:", output_part_2.size())
print("Output Size - Part 3:", output_part_3.size())
print("Output Size - Part 4:", output_part_4.size())
print("Output Size - Part 5:", output_part_5.size())

Output Size - Part 1: torch.Size([1, 64, 56, 56])
Output Size - Part 2: torch.Size([1, 256, 56, 56])
Output Size - Part 3: torch.Size([1, 512, 28, 28])
Output Size - Part 4: torch.Size([1, 1024, 14, 14])
Output Size - Part 5: torch.Size([1, 1024, 7, 7])


## Modification made to backbone to use MobilenetV2 instead (The one that is included into retinanet.py)

In [30]:
backbone_mobile = mobilenet_v2(MobileNet_V2_Weights).features

In [40]:
# Pass the input through each part of the model
import torch.nn.functional as F
backbones_mobile = nn.ModuleList([
            nn.Sequential(
                backbone_mobile[:3],
                nn.Upsample(size=(224, 224), mode='bilinear', align_corners=False),
                nn.Conv2d(in_channels=24, out_channels=3, kernel_size=1),
                backbone_mobile[:3],
            ),
            nn.Sequential(
                backbone_mobile[3:4],
                backbone_mobile[3:4],
            ),
            nn.Sequential(
                backbone_mobile[4:7],
                nn.Upsample(size=(56, 56), mode='bilinear', align_corners=False),
                nn.Conv2d(in_channels=32, out_channels=24, kernel_size=1),
                backbone_mobile[4:7],
            ),
            nn.Sequential(
                backbone_mobile[7:14],
                nn.Upsample(size=(28,28), mode='bilinear', align_corners=False),
                nn.Conv2d(in_channels=96, out_channels=32, kernel_size=1),
                backbone_mobile[7:14],
            ),
            nn.Sequential(
                backbone_mobile[14:],
                nn.Upsample(size=(14,14), mode='bilinear', align_corners=False),
                nn.Conv2d(in_channels=1280, out_channels=96, kernel_size=1),
                backbone_mobile[14:],
            ) 
        ])
output_part_1 = backbones_mobile[0](torch.rand(1,3,224,224))  
output_part_2 = backbones_mobile[1](output_part_1)  
output_part_3 = backbones_mobile[2](output_part_2)  
output_part_4 = backbones_mobile[3](output_part_3)  
output_part_5 = backbones_mobile[4](output_part_4)  

# Print the sizes of the outputs
print("Output Size - Part 1:", output_part_1.size())
print("Output Size - Part 2:", output_part_2.size())
print("Output Size - Part 3:", output_part_3.size())
print("Output Size - Part 4:", output_part_4.size())
print("Output Size - Part 5:", output_part_5.size())

Output Size - Part 1: torch.Size([1, 24, 56, 56])
Output Size - Part 2: torch.Size([1, 24, 56, 56])
Output Size - Part 3: torch.Size([1, 32, 28, 28])
Output Size - Part 4: torch.Size([1, 96, 14, 14])
Output Size - Part 5: torch.Size([1, 1280, 7, 7])


## original architecture of mobilenetv2 without changes

In [32]:
backbones_mobile = nn.ModuleList([
            nn.Sequential(
                    backbone_mobile[:3], # out channels: 24
                ),
            nn.Sequential(
                    backbone_mobile[3:4], # out channels: 24
                ),
            nn.Sequential(
                    backbone_mobile[4:7], # out channels: 32
                ),
            nn.Sequential(
                    backbone_mobile[7:14], # out channels: 96
                ),
            nn.Sequential(
                    backbone_mobile[14:], # out channels: 1280
                )
        ])
output_part_1 = backbones_mobile[0](torch.rand(1,3,224,224))  # Output of backbone[:4]
output_part_2 = backbones_mobile[1](output_part_1)  # Output of backbone.denseblock1
output_part_3 = backbones_mobile[2](output_part_2)  # Output of transition1 + denseblock2
output_part_4 = backbones_mobile[3](output_part_3)  # Output of transition2 + denseblock3
output_part_5 = backbones_mobile[4](output_part_4)  # Output of transition3 + denseblock4

# Print the sizes of the outputs
print("Output Size - Part 1:", output_part_1.size())
print("Output Size - Part 2:", output_part_2.size())
print("Output Size - Part 3:", output_part_3.size())
print("Output Size - Part 4:", output_part_4.size())
print("Output Size - Part 5:", output_part_5.size())

Output Size - Part 1: torch.Size([1, 24, 56, 56])
Output Size - Part 2: torch.Size([1, 24, 56, 56])
Output Size - Part 3: torch.Size([1, 32, 28, 28])
Output Size - Part 4: torch.Size([1, 96, 14, 14])
Output Size - Part 5: torch.Size([1, 1280, 7, 7])


In [None]:
backbones_mobile = nn.ModuleList([
            nn.Sequential(
                backbone_mobile[:3],
            ),
            nn.Sequential(
                backbone_mobile[3:4],
            ),
            nn.Sequential(
                backbone_mobile[4:7],
            ),
            nn.Sequential(
                backbone_mobile[7:14],
            ),
            nn.Sequential(
                backbone_mobile[14:],
            ) 
        ])
output_part_1 = backbones_mobile[0](torch.rand(1,3,224,224))  
output_part_2 = backbones_mobile[1](output_part_1)  
output_part_3 = backbones_mobile[2](output_part_2)  
output_part_4 = backbones_mobile[3](output_part_3) 
output_part_5 = backbones_mobile[4](output_part_4)

# Print the sizes of the outputs
print("Output Size - Part 1:", output_part_1.size()) # torch.Size([1, 24, 56, 56])
print("Output Size - Part 2:", output_part_2.size()) # torch.Size([1, 24, 56, 56])
print("Output Size - Part 3:", output_part_3.size()) # torch.Size([1, 32, 28, 28])
print("Output Size - Part 4:", output_part_4.size()) # torch.Size([1, 96, 14, 14])
print("Output Size - Part 5:", output_part_5.size()) # torch.Size([1, 1280, 7, 7])

Output Size - Part 1: torch.Size([1, 24, 56, 56])
Output Size - Part 2: torch.Size([1, 24, 56, 56])
Output Size - Part 3: torch.Size([1, 32, 28, 28])
Output Size - Part 4: torch.Size([1, 96, 14, 14])
Output Size - Part 5: torch.Size([1, 1280, 7, 7])
