In [7]:
import sys
sys.path.append('../')

import torch
from networks.ResNet import ResNetMel
from torchsummary import summary
import torchvision.models as models
import torch.nn as nn
from networks.VGG16 import VGG16_BN, VGG16_BN_Attention
import torch.nn.functional as F

# To allow auto reload to this notebook after modifying any external file imported
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
resnet_mel = ResNetMel(num_classes=2).to(device)
print(resnet_mel)



ResNetMel(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          

In [4]:
summary(resnet_mel, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,

In [5]:
# Loop over all layers and check if they require gradients
for name, param in resnet_mel.named_parameters():
    print(f"Layer: {name}, Requires Grad: {param.requires_grad}")


Layer: model.conv1.weight, Requires Grad: False
Layer: model.bn1.weight, Requires Grad: False
Layer: model.bn1.bias, Requires Grad: False
Layer: model.layer1.0.conv1.weight, Requires Grad: False
Layer: model.layer1.0.bn1.weight, Requires Grad: False
Layer: model.layer1.0.bn1.bias, Requires Grad: False
Layer: model.layer1.0.conv2.weight, Requires Grad: False
Layer: model.layer1.0.bn2.weight, Requires Grad: False
Layer: model.layer1.0.bn2.bias, Requires Grad: False
Layer: model.layer1.0.conv3.weight, Requires Grad: False
Layer: model.layer1.0.bn3.weight, Requires Grad: False
Layer: model.layer1.0.bn3.bias, Requires Grad: False
Layer: model.layer1.0.downsample.0.weight, Requires Grad: False
Layer: model.layer1.0.downsample.1.weight, Requires Grad: False
Layer: model.layer1.0.downsample.1.bias, Requires Grad: False
Layer: model.layer1.1.conv1.weight, Requires Grad: False
Layer: model.layer1.1.bn1.weight, Requires Grad: False
Layer: model.layer1.1.bn1.bias, Requires Grad: False
Layer: model

In [15]:
# densnet
densenet_model = models.densenet161(pretrained=True)
print(densenet_model)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(192, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (rel

In [16]:
densenet_model.classifier

Linear(in_features=2208, out_features=1000, bias=True)

In [53]:
from networks.DenseNet import DenseNetMel

In [54]:
densenet_mel = DenseNetMel(num_classes=2)

In [55]:
print(densenet_mel)

DenseNetMel(
  (features): Sequential(
    (conv0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(192, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (

In [56]:
# Loop over all layers and check if they require gradients
for name, param in densenet_mel.named_parameters():
    print(f"Layer: {name}, Requires Grad: {param.requires_grad}")


Layer: features.conv0.weight, Requires Grad: False
Layer: features.norm0.weight, Requires Grad: False
Layer: features.norm0.bias, Requires Grad: False
Layer: features.denseblock1.denselayer1.norm1.weight, Requires Grad: False
Layer: features.denseblock1.denselayer1.norm1.bias, Requires Grad: False
Layer: features.denseblock1.denselayer1.conv1.weight, Requires Grad: False
Layer: features.denseblock1.denselayer1.norm2.weight, Requires Grad: False
Layer: features.denseblock1.denselayer1.norm2.bias, Requires Grad: False
Layer: features.denseblock1.denselayer1.conv2.weight, Requires Grad: False
Layer: features.denseblock1.denselayer2.norm1.weight, Requires Grad: False
Layer: features.denseblock1.denselayer2.norm1.bias, Requires Grad: False
Layer: features.denseblock1.denselayer2.conv1.weight, Requires Grad: False
Layer: features.denseblock1.denselayer2.norm2.weight, Requires Grad: False
Layer: features.denseblock1.denselayer2.norm2.bias, Requires Grad: False
Layer: features.denseblock1.dens

In [56]:
vgg16_bn_base = models.vgg16_bn(pretrained=True).to(device)
summary(vgg16_bn_base, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
       BatchNorm2d-2         [-1, 64, 224, 224]             128
              ReLU-3         [-1, 64, 224, 224]               0
            Conv2d-4         [-1, 64, 224, 224]          36,928
       BatchNorm2d-5         [-1, 64, 224, 224]             128
              ReLU-6         [-1, 64, 224, 224]               0
         MaxPool2d-7         [-1, 64, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]          73,856
       BatchNorm2d-9        [-1, 128, 112, 112]             256
             ReLU-10        [-1, 128, 112, 112]               0
           Conv2d-11        [-1, 128, 112, 112]         147,584
      BatchNorm2d-12        [-1, 128, 112, 112]             256
             ReLU-13        [-1, 128, 112, 112]               0
        MaxPool2d-14          [-1, 128,

In [57]:
# vgg16_base = models.vgg16(pretrained=True).to(device)
# summary(vgg16_base, input_size=(3, 224, 224))

In [58]:
print(vgg16_bn_base)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [59]:
# Note: we select the conv blocks without the first max-pooling layers of each block [6, 13, 23, 33, 43] of the features layers
conv_block1 = nn.Sequential(*list(vgg16_bn_base.features.children())[0:6])
conv_block2 = nn.Sequential(*list(vgg16_bn_base.features.children())[7:13])
conv_block3 = nn.Sequential(*list(vgg16_bn_base.features.children())[14:23])
conv_block4 = nn.Sequential(*list(vgg16_bn_base.features.children())[24:33])
conv_block5 = nn.Sequential(*list(vgg16_bn_base.features.children())[34:43])
classifier  = nn.Sequential(*list(vgg16_bn_base.classifier.children())[:-1])

avgpool2d = nn.AdaptiveAvgPool2d(output_size=(7, 7))
maxpool2d = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)

In [52]:
print(conv_block1)
print(conv_block2)
print(conv_block3)
print(conv_block4)
print(conv_block5)
print(avgpool2d)
print(maxpool2d)
print(classifier)
print(vgg16_bn_base.classifier[-1].in_features)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): ReLU(inplace=True)
)
Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): ReLU(inplace=True)
)
Sequential(
  (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): Conv2d(256, 256,

In [65]:
# vgg16_bn_base.classifier[-1].out_features = 2
print(vgg16_bn_base.classifier[-1].out_features)

classifier  = nn.Sequential(*list(vgg16_bn_base.classifier.children()))
print(classifier)

# create the final fully connected layer that matches the output of our problem
in_features = classifier[-1].in_features
fc = nn.Linear(in_features, 2)

# modify the last layer to the fc layer created
classifier[-1] = fc
print(classifier)

1000
Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)
Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=2, bias=True)
)


In [43]:
vgg16_bn = VGG16_BN(num_classes=2).to(device)
# print(vgg16_bn)

[32m2023-11-24 22:42:27.315[0m | [1mINFO    [0m | [36mnetworks.VGG16[0m:[36m__init__[0m:[36m70[0m - [1mUsing VGG16_BN with configurations: num_classes='2'[0m


In [44]:
summary(vgg16_bn, input_size=(3, 224, 224))

torch.Size([2, 256, 28, 28]) torch.Size([2, 512, 7, 7])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
       BatchNorm2d-2         [-1, 64, 224, 224]             128
              ReLU-3         [-1, 64, 224, 224]               0
            Conv2d-4         [-1, 64, 224, 224]          36,928
       BatchNorm2d-5         [-1, 64, 224, 224]             128
              ReLU-6         [-1, 64, 224, 224]               0
         MaxPool2d-7         [-1, 64, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]          73,856
       BatchNorm2d-9        [-1, 128, 112, 112]             256
             ReLU-10        [-1, 128, 112, 112]               0
           Conv2d-11        [-1, 128, 112, 112]         147,584
      BatchNorm2d-12        [-1, 128, 112, 112]             256
             ReLU-13        [-1, 128, 112, 112]

In [5]:
# Loop over all layers and check if they require gradients
for name, param in vgg16_bn.named_parameters():
    print(f"Layer: {name}, Requires Grad: {param.requires_grad}")

Layer: conv_block1.0.weight, Requires Grad: False
Layer: conv_block1.0.bias, Requires Grad: False
Layer: conv_block1.1.weight, Requires Grad: False
Layer: conv_block1.1.bias, Requires Grad: False
Layer: conv_block1.3.weight, Requires Grad: False
Layer: conv_block1.3.bias, Requires Grad: False
Layer: conv_block1.4.weight, Requires Grad: False
Layer: conv_block1.4.bias, Requires Grad: False
Layer: conv_block2.0.weight, Requires Grad: False
Layer: conv_block2.0.bias, Requires Grad: False
Layer: conv_block2.1.weight, Requires Grad: False
Layer: conv_block2.1.bias, Requires Grad: False
Layer: conv_block2.3.weight, Requires Grad: False
Layer: conv_block2.3.bias, Requires Grad: False
Layer: conv_block2.4.weight, Requires Grad: False
Layer: conv_block2.4.bias, Requires Grad: False
Layer: conv_block3.0.weight, Requires Grad: False
Layer: conv_block3.0.bias, Requires Grad: False
Layer: conv_block3.1.weight, Requires Grad: False
Layer: conv_block3.1.bias, Requires Grad: False
Layer: conv_block3.3

In [9]:
vgg16bn_attention = VGG16_BN_Attention(num_classes=2).to(device)

[32m2023-11-25 12:46:37.070[0m | [1mINFO    [0m | [36mnetworks.VGG16[0m:[36m__init__[0m:[36m69[0m - [1mUsing VGG16_BN_Attention with configurations: num_classes='2'[0m


In [10]:
print(vgg16bn_attention)

VGG16_BN_Attention(
  (conv_block1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
  )
  (conv_block2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
  )
  (conv_block3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(256, eps=1e-05,

In [11]:
for name, param in vgg16bn_attention.named_parameters():
    print(f"Layer: {name}, Requires Grad: {param.requires_grad}")

Layer: conv_block1.0.weight, Requires Grad: True
Layer: conv_block1.0.bias, Requires Grad: True
Layer: conv_block1.1.weight, Requires Grad: True
Layer: conv_block1.1.bias, Requires Grad: True
Layer: conv_block1.3.weight, Requires Grad: True
Layer: conv_block1.3.bias, Requires Grad: True
Layer: conv_block1.4.weight, Requires Grad: True
Layer: conv_block1.4.bias, Requires Grad: True
Layer: conv_block2.0.weight, Requires Grad: True
Layer: conv_block2.0.bias, Requires Grad: True
Layer: conv_block2.1.weight, Requires Grad: True
Layer: conv_block2.1.bias, Requires Grad: True
Layer: conv_block2.3.weight, Requires Grad: True
Layer: conv_block2.3.bias, Requires Grad: True
Layer: conv_block2.4.weight, Requires Grad: True
Layer: conv_block2.4.bias, Requires Grad: True
Layer: conv_block3.0.weight, Requires Grad: True
Layer: conv_block3.0.bias, Requires Grad: True
Layer: conv_block3.1.weight, Requires Grad: True
Layer: conv_block3.1.bias, Requires Grad: True
Layer: conv_block3.3.weight, Requires Gr

In [40]:
# summary(vgg16bn_attention, input_size=(3, 224, 224))