In [1]:
import torchvision
import torch
import torch.nn as nn

In [2]:
input_sample = torch.randn(1, 3, 224, 224)

In [30]:
model = torchvision.models.vgg16()
print(f"million of params: {sum([p.numel() for p in model.parameters() if p.requires_grad]) / 1000 / 1000:.2f}")
print(model)
model = torch.nn.Sequential(*list(model.children())[:-1])
print(f"last layer shape:{model(input_sample).shape}")

million of params: 138.36
VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, strid

In [21]:
model = torchvision.models.vgg19()
print(f"million of params: {sum([p.numel() for p in model.parameters() if p.requires_grad]) / 1000 / 1000:.2f}")
model = torch.nn.Sequential(*list(model.children())[:-1])
print(f"last layer shape:{model(input_sample).shape}")

million of params: 143.67
last layer shape:torch.Size([1, 512, 7, 7])


In [37]:
model = torchvision.models.resnet50()
print(f"million of params: {sum([p.numel() for p in model.parameters() if p.requires_grad]) / 1000 / 1000:.2f}")
print(model)
model = torch.nn.Sequential(*list(model.children())[:-1])
print(f"last layer shape:{model(input_sample).shape}")

million of params: 25.56
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel

In [28]:
model = torchvision.models.resnet152(weights = "DEFAULT")
print(f"million of params: {sum([p.numel() for p in model.parameters() if p.requires_grad]) / 1000 / 1000:.2f}")
model = torch.nn.Sequential(*list(model.children())[:-1])
print(f"last layer shape:{model(input_sample).shape}")

Downloading: "https://download.pytorch.org/models/resnet152-f82ba261.pth" to /home/giovani/.cache/torch/hub/checkpoints/resnet152-f82ba261.pth
100%|██████████| 230M/230M [00:04<00:00, 59.4MB/s] 


million of params: 60.19
last layer shape:torch.Size([1, 2048, 1, 1])


In [11]:
7 * 7 * 512

25088

In [20]:
class PetrainedEncoder(nn.Module):
    """
    Convolutional Encoder that uses a pretrained model as a base.
    It removes the last two layers and add two linear layers to generate the latent space.
    The pretrained model can be one of the following: vgg16, vgg19, resnet50, resnet152

    Inputs:
        latent_dim: int with the dimension of the latent space
        arch: string with the name of the pretrained model
        lock_weights: bool to lock the weights of the pretrained model
    """

    def __init__(self, latent_dim, arch="vgg16", lock_weights=True):
        super(PetrainedEncoder, self).__init__()
        assert arch in ["vgg16", "vgg19", "resnet50", "resnet152"]
        self.latent_dim = latent_dim
        self.arch = arch
        self.lock_weights = lock_weights
        self.model = self._get_model()

    def _get_model(self):
        if self.arch == "vgg16":
            model = torchvision.models.vgg16(weights="DEFAULT")
        elif self.arch == "vgg19":
            model = torchvision.models.vgg19(weights="DEFAULT")
        elif self.arch == "resnet50":
            model = torchvision.models.resnet50(weights="DEFAULT")
        elif self.arch == "resnet152":
            model = torchvision.models.resnet152(weights="DEFAULT")
        
        if self.lock_weights:
            for param in model.parameters():
                param.requires_grad = False

        output_sizes = {
            "vgg16": 6272,
            "vgg19": 6272,
            "resnet50": 2048,
            "resnet152": 2048,
        }
        model = list(model.children())[:-1]
        if "vgg" in self.arch:
            # add a convolutional layer to reduce the number of channels
            model += [
                nn.Conv2d(512, 128, kernel_size=3, stride=1, padding=1),
                nn.ReLU(),
            ]
        model += [
            nn.Flatten(),
            nn.Linear(output_sizes[self.arch], 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, self.latent_dim),
        ]
        model = nn.Sequential(*model)
        
        return model

    def forward(self, x):
        return self.model(x)


In [21]:
t = PetrainedEncoder(100, "vgg16")
print(t(input_sample).shape)
print(sum([p.numel() for p in t.parameters() if p.requires_grad]) / 1000 / 1000)

torch.Size([1, 100])
26.69386


: 

In [15]:
t(input_sample).shape

torch.Size([1, 100])

In [15]:
model(input_sample)

tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.0827],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0

In [23]:
[p for p in t.parameters() if p.requires_grad]

[Parameter containing:
 tensor([[ 1.8034e-02,  2.0798e-02, -1.7046e-02,  ...,  1.2112e-02,
          -2.0329e-02, -1.4446e-02],
         [-1.4846e-05, -1.0145e-02,  1.1176e-02,  ..., -1.6961e-02,
          -1.8129e-02,  7.3319e-03],
         [-1.9054e-02,  5.1273e-03,  1.7095e-02,  ..., -1.0513e-02,
          -1.7733e-02,  1.1119e-02],
         ...,
         [-6.8120e-03, -2.1270e-02, -2.4200e-03,  ..., -1.2071e-02,
          -2.0373e-02, -2.1279e-02],
         [-1.4741e-02, -2.0560e-02,  1.3354e-02,  ...,  9.2922e-03,
          -9.9112e-03,  7.4200e-03],
         [ 1.5476e-02,  1.8514e-02, -1.6555e-02,  ...,  1.3659e-02,
          -1.7627e-02,  7.4661e-03]], requires_grad=True),
 Parameter containing:
 tensor([-0.0184, -0.0043, -0.0114,  ..., -0.0091, -0.0181, -0.0015],
        requires_grad=True),
 Parameter containing:
 tensor([[ 0.0153,  0.0073,  0.0148,  ..., -0.0121,  0.0050,  0.0156],
         [-0.0139,  0.0075,  0.0131,  ...,  0.0128, -0.0144,  0.0057],
         [ 0.0009,  0.00

In [61]:
7 * 7 * 128

6272

In [59]:
224 * 32

7168

In [77]:
sample = torch.randn(1, 100)

In [81]:

d = Decoder(100, 512)
print(d(sample).shape)
print(sum([p.numel() for p in d.parameters() if p.requires_grad]) / 1000 / 1000)

torch.Size([1, 3, 224, 224])
64.233923


torch.Size([1, 3, 224, 224])

14.999426999999999