<a href="https://colab.research.google.com/github/arkalim/PyTorch/blob/master/Model_Creation_Notes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Today, we are going to see how to use the three main building blocks of PyTorch: Module, Sequential and ModuleList. We are going to start with an example and iteratively we will make it better.

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models

## Simplest version of a classifier model

In [2]:
class Network(nn.Module):
    def __init__(self, in_c, n_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(in_c, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(32)

        self.fc1 = nn.Linear(32 * 28 * 28, 1024)
        self.fc2 = nn.Linear(1024, n_classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)

        x = x.view(x.size(0), -1) # flat
        
        x = self.fc1(x)
        x = F.sigmoid(x)
        x = self.fc2(x)
        
        return x
    
model = Network(1,10)
print(model)

Network(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=25088, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=10, bias=True)
)


If we want to add a layer we have to again write lots of code in the __init__ and in the forward function. Also, if we have some common block that we want to use in another model, e.g. the 3x3 conv + batchnorm + relu, we have to write it again.
## Sequential: stack and merge layers

In [3]:
class Network(nn.Module):
    
    def __init__(self, channel_in, num_classes):
        super().__init__()
        
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(channel_in, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(32 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, num_classes)
        )
        
    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)

        x = x.view(x.size(dim = 0), -1) # flatten
        
        x = self.decoder(x)
        
        return x
    
model = Network(1,10) 
print(model)

Network(
  (conv_block1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv_block2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


Still, conv_block1 and conv_block2 looks almost the same. We could create a function that reteurns a nn.Sequential to even simplify the code!

In [4]:
class Network(nn.Module):
    def __init__(self, in_channel, num_classes):
        super().__init__()
        
        # defining the conv blocks as attributes
        self.conv_block1 = self.conv_block(in_channel, 32, kernel_size=3, padding=1)
        
        self.conv_block2 = self.conv_block(32, 64, kernel_size=3, padding=1)
        
        self.decoder = nn.Sequential(
            nn.Linear(32 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, num_classes)
        )
        
    # function that returns conv blocks    
    def conv_block(self,in_filters, out_filters, *args, **kwargs):
        return nn.Sequential(
            nn.Conv2d(in_filters, out_filters, *args, **kwargs),
            nn.BatchNorm2d(out_filters),
            nn.ReLU()
        )    

        
    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)

        x = x.view(x.size(0), -1) # flat
        
        x = self.decoder(x)
        
        return x
    
network_2 = Network(1,10)
print(network_2)

Network(
  (conv_block1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv_block2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


Still conv_block1 and conv_block2 are almost the same! We can merge them using nn.Sequential

In [5]:
class Network(nn.Module):
    def __init__(self, in_channel, num_classes):
        super().__init__()
        
        # defining the encoder as a Sequential combination of conv blocks
        self.encoder = nn.Sequential(
            self.conv_block(in_channel, 32, kernel_size=3, padding=1),
            self.conv_block(32, 64, kernel_size=3, padding=1)
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(32 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, num_classes)
        )
        
    # function that returns conv blocks    
    def conv_block(self,in_filters, out_filters, *args, **kwargs):
        return nn.Sequential(
            nn.Conv2d(in_filters, out_filters, *args, **kwargs),
            nn.BatchNorm2d(out_filters),
            nn.ReLU()
        )    

        
    def forward(self, x):
        x = self.encoder(x)

        x = x.view(x.size(0), -1) # flat
        
        x = self.decoder(x)
        
        return x
    
network_3 = Network(1,10)
print(network_3)

Network(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (decoder): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


## ModuleList : when we need to iterate
ModuleList allows you to store Module as a list. It can be useful when you need to iterate through layer and store/use some information, like in U-net.

The main difference between Sequential is that ModuleList have not a forward method so the inner layers are not connected. Assuming we need each output of each layer in the decoder, we can store it by:

In [6]:
class Network(nn.Module):
    
    def __init__(self, sizes):
        super().__init__()
        # use this form to make a list of layers
        self.layers = nn.ModuleList([nn.Linear(filters_in, filters_out) for filters_in, filters_out in zip(sizes, sizes[1:])])
        self.trace = []
        
    def forward(self,x):
        for layer in self.layers:
            x = layer(x)
            # append the output of each layer to trace
            self.trace.append(x)
        return x
    
model = Network([1,16,32,64])    

print(model)

# feed a random tensor into the model to see the output of each layer
out = model(torch.rand((4,1)))

[print(x.shape) for x in model.trace]

Network(
  (layers): ModuleList(
    (0): Linear(in_features=1, out_features=16, bias=True)
    (1): Linear(in_features=16, out_features=32, bias=True)
    (2): Linear(in_features=32, out_features=64, bias=True)
  )
)
torch.Size([4, 16])
torch.Size([4, 32])
torch.Size([4, 64])


[None, None, None]

## ModuleDict: when we need to choose
What if we want to switch to LearkyRelu in our conv_block? We can use ModuleDict to create a dictionary of Module and dynamically switch Module when we want

In [7]:
def conv_block(in_f, out_f, activation='relu', *args, **kwargs):
    
    activations = nn.ModuleDict([
                ['lrelu', nn.LeakyReLU()],
                ['relu', nn.ReLU()]
    ])
    
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(out_f),
        activations[activation]
    )

print(conv_block(1, 32,'lrelu', kernel_size=3, padding=1))

Sequential(
  (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): LeakyReLU(negative_slope=0.01)
)


## Accessing weights of model

In [8]:
for name, param in network_2.named_parameters():
    print('Name: {}      Shape: {}'.format(name, param.shape))

Name: conv_block1.0.weight      Shape: torch.Size([32, 1, 3, 3])
Name: conv_block1.0.bias      Shape: torch.Size([32])
Name: conv_block1.1.weight      Shape: torch.Size([32])
Name: conv_block1.1.bias      Shape: torch.Size([32])
Name: conv_block2.0.weight      Shape: torch.Size([64, 32, 3, 3])
Name: conv_block2.0.bias      Shape: torch.Size([64])
Name: conv_block2.1.weight      Shape: torch.Size([64])
Name: conv_block2.1.bias      Shape: torch.Size([64])
Name: decoder.0.weight      Shape: torch.Size([1024, 25088])
Name: decoder.0.bias      Shape: torch.Size([1024])
Name: decoder.2.weight      Shape: torch.Size([10, 1024])
Name: decoder.2.bias      Shape: torch.Size([10])


In [9]:
print(network_2.conv_block1[0].weight)

Parameter containing:
tensor([[[[ 0.1813,  0.3016,  0.1065],
          [-0.2300,  0.2665,  0.0304],
          [-0.0722,  0.0862, -0.1226]]],


        [[[-0.0597,  0.2799, -0.2240],
          [-0.1560, -0.3205, -0.0806],
          [-0.1788,  0.2126, -0.3080]]],


        [[[ 0.0028, -0.1531, -0.1594],
          [-0.3124, -0.0508,  0.1177],
          [-0.1244, -0.0753, -0.1156]]],


        [[[-0.2866, -0.1170, -0.0453],
          [ 0.2169, -0.2491, -0.2628],
          [-0.2521,  0.1010,  0.2865]]],


        [[[ 0.1071,  0.0961, -0.0265],
          [-0.1603, -0.2720,  0.1919],
          [ 0.1646,  0.1213,  0.0539]]],


        [[[ 0.2713,  0.1730, -0.1299],
          [ 0.2504,  0.2500, -0.2485],
          [-0.0630,  0.1732,  0.0749]]],


        [[[ 0.3025, -0.0232,  0.0219],
          [-0.0894, -0.0069, -0.1024],
          [ 0.2313,  0.2644,  0.2640]]],


        [[[ 0.0021, -0.2720,  0.1208],
          [ 0.2177, -0.0704, -0.1544],
          [ 0.0426, -0.2196, -0.0513]]],


        [[

In [10]:
for name, param in network_3.named_parameters():
    print('Name: {}      Shape: {}'.format(name, param.shape))

Name: encoder.0.0.weight      Shape: torch.Size([32, 1, 3, 3])
Name: encoder.0.0.bias      Shape: torch.Size([32])
Name: encoder.0.1.weight      Shape: torch.Size([32])
Name: encoder.0.1.bias      Shape: torch.Size([32])
Name: encoder.1.0.weight      Shape: torch.Size([64, 32, 3, 3])
Name: encoder.1.0.bias      Shape: torch.Size([64])
Name: encoder.1.1.weight      Shape: torch.Size([64])
Name: encoder.1.1.bias      Shape: torch.Size([64])
Name: decoder.0.weight      Shape: torch.Size([1024, 25088])
Name: decoder.0.bias      Shape: torch.Size([1024])
Name: decoder.2.weight      Shape: torch.Size([10, 1024])
Name: decoder.2.bias      Shape: torch.Size([10])


In [11]:
print(network_3.encoder[0][0].weight)

Parameter containing:
tensor([[[[ 0.2732, -0.2741, -0.0126],
          [-0.1148,  0.2462, -0.1302],
          [ 0.0691, -0.2845, -0.3233]]],


        [[[-0.2175, -0.1577,  0.0981],
          [ 0.3065,  0.2226,  0.0250],
          [ 0.3090,  0.2913, -0.1763]]],


        [[[ 0.3049,  0.1212,  0.2746],
          [ 0.2395, -0.1232, -0.2852],
          [ 0.0535, -0.2075,  0.2906]]],


        [[[ 0.1554, -0.2954, -0.1115],
          [-0.1020,  0.1374,  0.2249],
          [-0.2360,  0.3282, -0.0927]]],


        [[[-0.0089, -0.1333,  0.1938],
          [-0.0938, -0.2730,  0.1930],
          [-0.2393,  0.0766,  0.1745]]],


        [[[-0.3313,  0.3092, -0.0275],
          [ 0.2731, -0.1712,  0.1513],
          [ 0.0917,  0.2621, -0.1094]]],


        [[[ 0.0158,  0.1030, -0.1795],
          [ 0.2042,  0.0680,  0.0666],
          [-0.1753,  0.0203,  0.3320]]],


        [[[ 0.2696,  0.0389,  0.1725],
          [ 0.0950,  0.1126, -0.1503],
          [-0.0016,  0.2869, -0.0130]]],


        [[

## Accessing weights of a specific block or layer

In [12]:
for name, param in network_3.encoder.named_parameters():
    print('Name: {}      Shape: {}'.format(name, param.shape))

Name: 0.0.weight      Shape: torch.Size([32, 1, 3, 3])
Name: 0.0.bias      Shape: torch.Size([32])
Name: 0.1.weight      Shape: torch.Size([32])
Name: 0.1.bias      Shape: torch.Size([32])
Name: 1.0.weight      Shape: torch.Size([64, 32, 3, 3])
Name: 1.0.bias      Shape: torch.Size([64])
Name: 1.1.weight      Shape: torch.Size([64])
Name: 1.1.bias      Shape: torch.Size([64])


## Adding layers to the model

In [13]:
# adding a layer named 'new_layer' to encoder[0] block
network_3.encoder[0].new_layer = nn.Linear(12,13)

print(network_3)

Network(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (new_layer): Linear(in_features=12, out_features=13, bias=True)
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (decoder): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


## Unfreezing selected layers

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.fc1 = nn.Linear(1, 16)
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, 4)
        self.fc4 = nn.Linear(4, 2)
        self.fc5 = nn.Linear(2, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        x = self.fc5(x)
        return x

net = Net()    

### Find different components of the model


In [15]:
for name, child in net.named_children():
    print(name)  

fc1
fc2
fc3
fc4
fc5


### Freeze selected layers

In [16]:
for name, child in net.named_children():
    
    if name in ['fc4', 'fc5']:
        print(name + ' is unfrozen')
        for param in child.parameters():
            param.requires_grad = True
    else:
        print(name + ' is frozen')
        for param in child.parameters():
            param.requires_grad = False

fc1 is frozen
fc2 is frozen
fc3 is frozen
fc4 is unfrozen
fc5 is unfrozen


# Different ways to define Sequential Models

In [0]:
import torch
import torch.nn as nn
from collections import OrderedDict

### Method 1: Without mentioning layer names

In [18]:
model_Seq1 = nn.Sequential(
          nn.Conv2d(1,20,5),
          nn.ReLU(),
          nn.Conv2d(20,64,5),
          nn.ReLU()
        )

print(model_Seq1)

Sequential(
  (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU()
  (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
  (3): ReLU()
)


Layers are numbered sequentially by default
### Method 2: Mention layer names  using OrderedDict

In [19]:
# An OrderedDict is a dictionary subclass that remembers the order that keys were first inserted. 
# The only difference between dict() and OrderedDict() is that:

# OrderedDict preserves the order in which the keys are inserted. 
# A regular dict doesn’t track the insertion order, 
# and iterating it gives the values in an arbitrary order. 
# By contrast, the order the items are inserted is remembered by OrderedDict.

model_Seq2 = nn.Sequential(OrderedDict([
          ('conv1', nn.Conv2d(1,20,5)),
          ('relu1', nn.ReLU()),
          ('conv2', nn.Conv2d(20,64,5)),
          ('relu2', nn.ReLU())
        ]))

print(model_Seq2)

Sequential(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (relu1): ReLU()
  (conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
)


### Method 3: Using a list of layers

In [20]:
layers = []
layers.append(nn.Linear(3, 4))
layers.append(nn.Sigmoid())
layers.append(nn.Linear(4, 1))
layers.append(nn.Sigmoid())

model_Seq3 = nn.Sequential(*layers)

print(model_Seq3)

Sequential(
  (0): Linear(in_features=3, out_features=4, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=4, out_features=1, bias=True)
  (3): Sigmoid()
)


In this case also, layers are numbered by default
### Method 4: Using class

In [21]:
class ConvNet(torch.nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()

        self.conv = torch.nn.Sequential()
        self.conv.add_module("conv_1", torch.nn.Conv2d(1, 10, kernel_size=5))
        self.conv.add_module("maxpool_1", torch.nn.MaxPool2d(kernel_size=2))
        self.conv.add_module("relu_1", torch.nn.ReLU())
        self.conv.add_module("conv_2", torch.nn.Conv2d(10, 20, kernel_size=5))
        self.conv.add_module("dropout_2", torch.nn.Dropout(p=0.2))
        
model_Seq4 = ConvNet()

print(model_Seq4)

ConvNet(
  (conv): Sequential(
    (conv_1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
    (maxpool_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (relu_1): ReLU()
    (conv_2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
    (dropout_2): Dropout(p=0.2)
  )
)


## Adding, Accessing and Replacing layers in Sequential Models

In [22]:
# Accessing the layers
print(model_Seq1[0])
print(model_Seq2.conv1)

Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))


In [23]:
# replacing the layers
model_Seq1[0] = nn.Conv2d(1,50,7,2)
model_Seq2.conv1 = nn.Conv2d(1,50,7,2)

print(model_Seq1)
print(model_Seq2)

Sequential(
  (0): Conv2d(1, 50, kernel_size=(7, 7), stride=(2, 2))
  (1): ReLU()
  (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
  (3): ReLU()
)
Sequential(
  (conv1): Conv2d(1, 50, kernel_size=(7, 7), stride=(2, 2))
  (relu1): ReLU()
  (conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
)


In [24]:
# Adding layers
model_Seq1 = nn.Sequential(model_Seq1, torch.nn.Linear(2048,365))
model_Seq2.add_module('conv3', nn.Conv2d(64, 64, 5))

print(model_Seq1)
print(model_Seq2)

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 50, kernel_size=(7, 7), stride=(2, 2))
    (1): ReLU()
    (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
    (3): ReLU()
  )
  (1): Linear(in_features=2048, out_features=365, bias=True)
)
Sequential(
  (conv1): Conv2d(1, 50, kernel_size=(7, 7), stride=(2, 2))
  (relu1): ReLU()
  (conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1))
)


## Adding layers to a pretrained model

In [25]:
# Pretrained Model
vgg = models.vgg16(pretrained = True)
print("Pretrained Model")
print(vgg)

# Adding on top of the model
print("\n Adding on top of the model")
on_top = nn.Sequential(vgg, net)
print(on_top)

# take care of the shapes when adding inbetween
print("\n Adding in between the model")
in_between = nn.Sequential(vgg.features[:5] , net , vgg.features[5:] , vgg.avgpool , vgg.classifier)
print(in_between)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/checkpoints/vgg16-397923af.pth
100%|██████████| 553433881/553433881 [00:39<00:00, 14070257.93it/s]


Pretrained Model
VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)

# Replacing layers of a pretrained network

In [0]:
vgg16 = models.vgg16(pretrained=True)

In [42]:
# select a particular layer to replace
layers = list(vgg16.children())
print('Last Layer',layers[2][-1])

Last Layer Linear(in_features=4096, out_features=1000, bias=True)


In [54]:
# replace the selected layer
layers[2][-1] = nn.Linear(4096, 10, bias = True)

# create a new model using the list
new_model = nn.Sequential(*layers)

# freeze the pretrained model 
# Parameters of newly constructed modules have requires_grad=True by default
for param in new_model.parameters():
    param.requires_grad = False
    
# unfreeze the last layer    
for param in new_model[2][-1].parameters():
    param.requires_grad = True
    
print(new_model)    

# print trainable parameters
print('Trainable Parameters: \n')
for name, param in new_model.named_parameters():
    if param.requires_grad:
        print (name, param.shape)

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d

In [56]:
# Another method

model = models.vgg16(True)

new_model = torch.nn.Sequential(*(list(model.children())))
new_model[2][-1] = nn.Linear(4096, 10, bias = True)

# freeze the pretrained model 
# Parameters of newly constructed modules have requires_grad=True by default
for param in new_model.parameters():
    param.requires_grad = False
    
# unfreeze the last layer    
for param in new_model[2][-1].parameters():
    param.requires_grad = True
    
print(new_model)    

# print trainable parameters
print('Trainable Parameters: \n')
for name, param in new_model.named_parameters():
    if param.requires_grad:
        print (name, param.shape)

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d