In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn
from torchsummary import summary

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Load the dataset

In [3]:
import sys
sys.path.append('../util')
from pocovid_dataset import PocovidDataset

In [4]:
dataset = PocovidDataset(root_dir='../data/image_dataset',
                                     transform=transforms.Compose([
                                       transforms.Resize((224,224)),
                                       transforms.RandomAffine(10,translate=(0.1,0.1)),
                                       transforms.ToTensor()
                                       ]))

In [5]:
dataset[0]['image'].shape

torch.Size([3, 224, 224])

### models/vgg.py

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

class VGG16_model_2(nn.Module):
    def __init__(self,
                 input_size: tuple = (3, 224, 224),
                 hidden_size: int = 64,
                 dropout: float = 0.5,
                 num_classes: int = 3,
                 **kwargs
                ):
        """
        Initialize a new network
        
        Inputs: 
        - input_size: Tuple, size of input data
        - hidden_size: Integer, number of units to use in hidden layer
        - dropout: Float, dropout coefficient
        - num_classes: Integer, number of classes
        """
        
        super(VGG16_model_2, self).__init__()
        
        # load the VGG16 network
        self.model = models.vgg16(pretrained=True)

        # freeze weights of base model except last cnn layer
        # model.parameters() does not include max pooling layers
        last_frozen = 25
        count = 0
        for param in self.model.parameters():
            count += 1
            if count < last_frozen:
                param.requires_grad = False
                
        # Taking only sequential part
        self.model = self.model.features
    
        self.avgpool = nn.AvgPool2d(4)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(512, hidden_size)
        self.bn = nn.BatchNorm1d(hidden_size)
        self.relu = nn.ReLU(hidden_size)
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self,x):
        x = self.model(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.bn(x) 
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [8]:
model_2 = VGG16_model_2(input_size = (3,224,224), num_classes = 3)
model_2.to(device)

summary(model_2, (3,224,224)) 

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

In [9]:
total_params = sum(p.numel() for p in model_2.parameters())
print(f'{total_params:,} total parameters.')
total_trainable_params = sum(
    p.numel() for p in model_2.parameters() if p.requires_grad)
print(f'{total_trainable_params:,} training parameters.')

14,747,843 total parameters.
2,392,963 training parameters.


### Note

#### Original Pocovidnet Model has the following trainable/nontrain parameters:

Total:         14,747,971

Trainable:      2,392,963

Non trainable: 12,355,008

##### Potential reason

I am able to replicate everything except Batchnorm
And I searched the reason for that. I found that Keras and Pytorch have a slight difference between the running means of batchnorm
Which makes the no of parameters in them differ by half. Actually the no of trainable parameters are same
Keras overcounts some hidden parameters as trainable which are not changed during backpropagation
So, I think we are good
There is a way to avoid that by manually setting the torch settings to replicate Keras.