Today, we are going to see how to use the three main building blocks of PyTorch: Module, Sequential and ModuleList. We are going to start with an example and iteratively we will make it better.

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models

## Simplest version of a classifier model

In [0]:
class Network(nn.Module):
    def __init__(self, in_c, n_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(in_c, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(32)

        self.fc1 = nn.Linear(32 * 28 * 28, 1024)
        self.fc2 = nn.Linear(1024, n_classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)

        x = x.view(x.size(0), -1) # flat
        
        x = self.fc1(x)
        x = F.sigmoid(x)
        x = self.fc2(x)
        
        return x
    
model = Network(1,10)
print(model)

Network(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=25088, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=10, bias=True)
)


If we want to add a layer we have to again write lots of code in the __init__ and in the forward function. Also, if we have some common block that we want to use in another model, e.g. the 3x3 conv + batchnorm + relu, we have to write it again.
## Sequential: stack and merge layers

In [0]:
class Network(nn.Module):
    
    def __init__(self, channel_in, num_classes):
        super().__init__()
        
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(channel_in, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(32 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, num_classes)
        )
        
    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)

        x = x.view(x.size(dim = 0), -1) # flatten
        
        x = self.decoder(x)
        
        return x
    
model = Network(1,10) 
print(model)

Network(
  (conv_block1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv_block2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


Still, conv_block1 and conv_block2 looks almost the same. We could create a function that reteurns a nn.Sequential to even simplify the code!

In [0]:
class Network(nn.Module):
    def __init__(self, in_channel, num_classes):
        super().__init__()
        
        # defining the conv blocks as attributes
        self.conv_block1 = self.conv_block(in_channel, 32, kernel_size=3, padding=1)
        
        self.conv_block2 = self.conv_block(32, 64, kernel_size=3, padding=1)
        
        self.decoder = nn.Sequential(
            nn.Linear(32 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, num_classes)
        )
        
    # function that returns conv blocks    
    def conv_block(self,in_filters, out_filters, *args, **kwargs):
        return nn.Sequential(
            nn.Conv2d(in_filters, out_filters, *args, **kwargs),
            nn.BatchNorm2d(out_filters),
            nn.ReLU()
        )    

        
    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)

        x = x.view(x.size(0), -1) # flat
        
        x = self.decoder(x)
        
        return x
    
network_2 = Network(1,10)
print(network_2)

Network(
  (conv_block1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv_block2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


Still conv_block1 and conv_block2 are almost the same! We can merge them using nn.Sequential

In [0]:
class Network(nn.Module):
    def __init__(self, in_channel, num_classes):
        super().__init__()
        
        # defining the encoder as a Sequential combination of conv blocks
        self.encoder = nn.Sequential(
            self.conv_block(in_channel, 32, kernel_size=3, padding=1),
            self.conv_block(32, 64, kernel_size=3, padding=1)
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(32 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, num_classes)
        )
        
    # function that returns conv blocks    
    def conv_block(self,in_filters, out_filters, *args, **kwargs):
        return nn.Sequential(
            nn.Conv2d(in_filters, out_filters, *args, **kwargs),
            nn.BatchNorm2d(out_filters),
            nn.ReLU()
        )    

        
    def forward(self, x):
        x = self.encoder(x)

        x = x.view(x.size(0), -1) # flat
        
        x = self.decoder(x)
        
        return x
    
network_3 = Network(1,10)
print(network_3)

Network(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (decoder): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


## ModuleList : when we need to iterate
ModuleList allows you to store Module as a list. It can be useful when you need to iterate through layer and store/use some information, like in U-net.

The main difference between Sequential is that ModuleList have not a forward method so the inner layers are not connected. Assuming we need each output of each layer in the decoder, we can store it by:

In [0]:
class Network(nn.Module):
    
    def __init__(self, sizes):
        super().__init__()
        # use this form to make a list of layers
        self.layers = nn.ModuleList([nn.Linear(filters_in, filters_out) for filters_in, filters_out in zip(sizes, sizes[1:])])
        self.trace = []
        
    def forward(self,x):
        for layer in self.layers:
            x = layer(x)
            # append the output of each layer to trace
            self.trace.append(x)
        return x
    
model = Network([1,16,32,64])    

print(model)

# feed a random tensor into the model to see the output of each layer
out = model(torch.rand((4,1)))

[print(x.shape) for x in model.trace]

Network(
  (layers): ModuleList(
    (0): Linear(in_features=1, out_features=16, bias=True)
    (1): Linear(in_features=16, out_features=32, bias=True)
    (2): Linear(in_features=32, out_features=64, bias=True)
  )
)
torch.Size([4, 16])
torch.Size([4, 32])
torch.Size([4, 64])


[None, None, None]

## ModuleDict: when we need to choose
What if we want to switch to LearkyRelu in our conv_block? We can use ModuleDict to create a dictionary of Module and dynamically switch Module when we want

In [0]:
def conv_block(in_f, out_f, activation='relu', *args, **kwargs):
    
    activations = nn.ModuleDict([
                ['lrelu', nn.LeakyReLU()],
                ['relu', nn.ReLU()]
    ])
    
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(out_f),
        activations[activation]
    )

print(conv_block(1, 32,'lrelu', kernel_size=3, padding=1))

Sequential(
  (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): LeakyReLU(negative_slope=0.01)
)


## Accessing weights of model

In [0]:
for name, param in network_2.named_parameters():
    print('Name: {}      Shape: {}'.format(name, param.shape))

Name: conv_block1.0.weight      Shape: torch.Size([32, 1, 3, 3])
Name: conv_block1.0.bias      Shape: torch.Size([32])
Name: conv_block1.1.weight      Shape: torch.Size([32])
Name: conv_block1.1.bias      Shape: torch.Size([32])
Name: conv_block2.0.weight      Shape: torch.Size([64, 32, 3, 3])
Name: conv_block2.0.bias      Shape: torch.Size([64])
Name: conv_block2.1.weight      Shape: torch.Size([64])
Name: conv_block2.1.bias      Shape: torch.Size([64])
Name: decoder.0.weight      Shape: torch.Size([1024, 25088])
Name: decoder.0.bias      Shape: torch.Size([1024])
Name: decoder.2.weight      Shape: torch.Size([10, 1024])
Name: decoder.2.bias      Shape: torch.Size([10])


In [0]:
print(network_2.conv_block1[0].weight)

Parameter containing:
tensor([[[[-0.0968, -0.0176,  0.2056],
          [ 0.2380, -0.2025, -0.0140],
          [ 0.0016, -0.2793, -0.2413]]],


        [[[-0.0581,  0.1973, -0.1767],
          [ 0.1951, -0.2000, -0.1600],
          [ 0.1384,  0.0581, -0.0950]]],


        [[[ 0.2822, -0.0789,  0.0566],
          [-0.3128,  0.1762, -0.0464],
          [ 0.2763, -0.2105, -0.0805]]],


        [[[-0.0070,  0.2555, -0.1148],
          [ 0.1730, -0.2041,  0.2247],
          [ 0.2175,  0.0277,  0.1433]]],


        [[[ 0.0004,  0.2964, -0.3164],
          [ 0.2352,  0.2817,  0.2959],
          [-0.2600, -0.2384, -0.2324]]],


        [[[-0.1866, -0.0754,  0.2645],
          [-0.3185, -0.0444, -0.2537],
          [-0.0934, -0.1955,  0.1101]]],


        [[[ 0.2920,  0.0449, -0.2449],
          [ 0.2203,  0.1899, -0.2187],
          [ 0.0010, -0.1576,  0.2997]]],


        [[[-0.1905,  0.3149, -0.2065],
          [-0.0053, -0.1043, -0.0436],
          [ 0.2638, -0.2825, -0.2655]]],


        [[

In [0]:
for name, param in network_3.named_parameters():
    print('Name: {}      Shape: {}'.format(name, param.shape))

Name: encoder.0.0.weight      Shape: torch.Size([32, 1, 3, 3])
Name: encoder.0.0.bias      Shape: torch.Size([32])
Name: encoder.0.1.weight      Shape: torch.Size([32])
Name: encoder.0.1.bias      Shape: torch.Size([32])
Name: encoder.1.0.weight      Shape: torch.Size([64, 32, 3, 3])
Name: encoder.1.0.bias      Shape: torch.Size([64])
Name: encoder.1.1.weight      Shape: torch.Size([64])
Name: encoder.1.1.bias      Shape: torch.Size([64])
Name: decoder.0.weight      Shape: torch.Size([1024, 25088])
Name: decoder.0.bias      Shape: torch.Size([1024])
Name: decoder.2.weight      Shape: torch.Size([10, 1024])
Name: decoder.2.bias      Shape: torch.Size([10])


In [0]:
print(network_3.encoder[0][0].weight)

Parameter containing:
tensor([[[[-0.1010, -0.0993,  0.0706],
          [ 0.0888,  0.3237,  0.3193],
          [ 0.3108,  0.0830, -0.3037]]],


        [[[ 0.2009,  0.1916, -0.2025],
          [ 0.3180,  0.2431, -0.0588],
          [-0.3331, -0.2871,  0.2003]]],


        [[[ 0.3035,  0.1326, -0.2049],
          [ 0.1352,  0.1921, -0.0957],
          [ 0.1206, -0.0506,  0.2487]]],


        [[[-0.2404,  0.2763, -0.0284],
          [ 0.2143,  0.2812,  0.1206],
          [-0.1870, -0.2508,  0.2792]]],


        [[[ 0.1795,  0.0486,  0.2426],
          [-0.2224, -0.0175, -0.0583],
          [ 0.1608, -0.2588,  0.2775]]],


        [[[-0.2269, -0.1922,  0.3026],
          [ 0.0338, -0.0343,  0.0176],
          [-0.2817,  0.2951, -0.0739]]],


        [[[-0.1770, -0.0691, -0.3088],
          [-0.0548,  0.1882,  0.0971],
          [ 0.2351,  0.1257, -0.1398]]],


        [[[-0.0038,  0.0858, -0.0782],
          [ 0.2513, -0.2480,  0.2173],
          [ 0.2378, -0.2329, -0.0094]]],


        [[

## Accessing weights of a specific block or layer

In [0]:
for name, param in network_3.encoder.named_parameters():
    print('Name: {}      Shape: {}'.format(name, param.shape))

Name: 0.0.weight      Shape: torch.Size([32, 1, 3, 3])
Name: 0.0.bias      Shape: torch.Size([32])
Name: 0.1.weight      Shape: torch.Size([32])
Name: 0.1.bias      Shape: torch.Size([32])
Name: 1.0.weight      Shape: torch.Size([64, 32, 3, 3])
Name: 1.0.bias      Shape: torch.Size([64])
Name: 1.1.weight      Shape: torch.Size([64])
Name: 1.1.bias      Shape: torch.Size([64])


## Adding layers to the model

In [0]:
# adding a layer named 'new_layer' to encoder[0] block
network_3.encoder[0].new_layer = nn.Linear(12,13)

print(network_3)

Network(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (new_layer): Linear(in_features=12, out_features=13, bias=True)
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (decoder): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


## Unfreezing selected layers

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.fc1 = nn.Linear(1, 16)
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, 4)
        self.fc4 = nn.Linear(4, 2)
        self.fc5 = nn.Linear(2, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        x = self.fc5(x)
        return x

net = Net()    

### Find different components of the model


In [0]:
for name, child in net.named_children():
    print(name)  

fc1
fc2
fc3
fc4
fc5


### Freeze selected layers

In [0]:
for name, child in net.named_children():
    
    if name in ['fc4', 'fc5']:
        print(name + ' is unfrozen')
        for param in child.parameters():
            param.requires_grad = True
    else:
        print(name + ' is frozen')
        for param in child.parameters():
            param.requires_grad = False

fc1 is frozen
fc2 is frozen
fc3 is frozen
fc4 is unfrozen
fc5 is unfrozen


# Different ways to define Sequential Models

In [0]:
import torch
import torch.nn as nn
from collections import OrderedDict

### Method 1: Without mentioning layer names

In [0]:
model_Seq1 = nn.Sequential(
          nn.Conv2d(1,20,5),
          nn.ReLU(),
          nn.Conv2d(20,64,5),
          nn.ReLU()
        )

print(model_Seq1)

Sequential(
  (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU()
  (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
  (3): ReLU()
)


Layers are numbered sequentially by default
### Method 2: Mention layer names  using OrderedDict

In [0]:
# An OrderedDict is a dictionary subclass that remembers the order that keys were first inserted. 
# The only difference between dict() and OrderedDict() is that:

# OrderedDict preserves the order in which the keys are inserted. 
# A regular dict doesn’t track the insertion order, 
# and iterating it gives the values in an arbitrary order. 
# By contrast, the order the items are inserted is remembered by OrderedDict.

model_Seq2 = nn.Sequential(OrderedDict([
          ('conv1', nn.Conv2d(1,20,5)),
          ('relu1', nn.ReLU()),
          ('conv2', nn.Conv2d(20,64,5)),
          ('relu2', nn.ReLU())
        ]))

print(model_Seq2)

Sequential(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (relu1): ReLU()
  (conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
)


### Method 3: Using a list of layers

In [0]:
layers = []
layers.append(nn.Linear(3, 4))
layers.append(nn.Sigmoid())
layers.append(nn.Linear(4, 1))
layers.append(nn.Sigmoid())

model_Seq3 = nn.Sequential(*layers)

print(model_Seq3)

Sequential(
  (0): Linear(in_features=3, out_features=4, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=4, out_features=1, bias=True)
  (3): Sigmoid()
)


In this case also, layers are numbered by default
### Method 4: Using class

In [0]:
class ConvNet(torch.nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()

        self.conv = torch.nn.Sequential()
        self.conv.add_module("conv_1", torch.nn.Conv2d(1, 10, kernel_size=5))
        self.conv.add_module("maxpool_1", torch.nn.MaxPool2d(kernel_size=2))
        self.conv.add_module("relu_1", torch.nn.ReLU())
        self.conv.add_module("conv_2", torch.nn.Conv2d(10, 20, kernel_size=5))
        self.conv.add_module("dropout_2", torch.nn.Dropout(p=0.2))
        
model_Seq4 = ConvNet()

print(model_Seq4)

ConvNet(
  (conv): Sequential(
    (conv_1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
    (maxpool_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (relu_1): ReLU()
    (conv_2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
    (dropout_2): Dropout(p=0.2)
  )
)


## Adding, Accessing and Replacing layers in Sequential Models

In [0]:
# Accessing the layers
print(model_Seq1[0])
print(model_Seq2.conv1)

Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))


In [0]:
# replacing the layers
model_Seq1[0] = nn.Conv2d(1,50,7,2)
model_Seq2.conv1 = nn.Conv2d(1,50,7,2)

print(model_Seq1)
print(model_Seq2)

Sequential(
  (0): Conv2d(1, 50, kernel_size=(7, 7), stride=(2, 2))
  (1): ReLU()
  (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
  (3): ReLU()
)
Sequential(
  (conv1): Conv2d(1, 50, kernel_size=(7, 7), stride=(2, 2))
  (relu1): ReLU()
  (conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
)


In [0]:
# Adding layers
model_Seq1 = nn.Sequential(model_Seq1, torch.nn.Linear(2048,365))
model_Seq2.add_module('conv3', nn.Conv2d(64, 64, 5))

print(model_Seq1)
print(model_Seq2)

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 50, kernel_size=(7, 7), stride=(2, 2))
    (1): ReLU()
    (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
    (3): ReLU()
  )
  (1): Linear(in_features=2048, out_features=365, bias=True)
)
Sequential(
  (conv1): Conv2d(1, 50, kernel_size=(7, 7), stride=(2, 2))
  (relu1): ReLU()
  (conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1))
)


## Adding layers to a pretrained model

In [0]:
# Pretrained Model
vgg = models.vgg16(pretrained = True)
print("Pretrained Model")
print(vgg)

# Adding on top of the model
print("\n Adding on top of the model")
on_top = nn.Sequential(vgg, net)
print(on_top)

# take care of the shapes when adding inbetween
print("\n Adding in between the model")
in_between = nn.Sequential(vgg.features[:5] , net , vgg.features[5:] , vgg.avgpool , vgg.classifier)
print(in_between)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/checkpoints/vgg16-397923af.pth
100%|██████████| 553433881/553433881 [00:06<00:00, 88062725.12it/s]


Pretrained Model
VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)

# Replacing layers of a pretrained network

In [0]:
vgg16 = models.vgg16(pretrained=True)

In [0]:
# select a particular layer to replace
layers = list(vgg16.children())
print('Last Layer',layers[2][-1])

Last Layer Linear(in_features=4096, out_features=1000, bias=True)


In [0]:
# replace the selected layer
layers[2][-1] = nn.Linear(4096, 10, bias = True)

# create a new model using the list
new_model = nn.Sequential(*layers)

# freeze the pretrained model 
# Parameters of newly constructed modules have requires_grad=True by default
for param in new_model.parameters():
    param.requires_grad = False
    
# unfreeze the last layer    
for param in new_model[2][-1].parameters():
    param.requires_grad = True
    
print(new_model)    

# print trainable parameters
print('Trainable Parameters: \n')
for name, param in new_model.named_parameters():
    if param.requires_grad:
        print (name, param.shape)

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d

In [0]:
# Another method

model = models.vgg16(True)

new_model = torch.nn.Sequential(*(list(model.children())))
new_model[2][-1] = nn.Linear(4096, 10, bias = True)

# freeze the pretrained model 
# Parameters of newly constructed modules have requires_grad=True by default
for param in new_model.parameters():
    param.requires_grad = False
    
# unfreeze the last layer    
for param in new_model[2][-1].parameters():
    param.requires_grad = True
    
print(new_model)    

# print trainable parameters
print('Trainable Parameters: \n')
for name, param in new_model.named_parameters():
    if param.requires_grad:
        print (name, param.shape)

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d

## Adding Layers to a pretrained network in a Class

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        
        x = x.view(-1, 16 * 4 * 4)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()

print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [0]:
# Expanding the network
class ExpandedNet(nn.Module):
    
    def __init__(self):
        super(ExpandedNet , self).__init__()
        self.net = Net()
        self.fc4 = nn.Linear(10,1)
        
    def forward(self , x):
        # feed the output of last layer of self.net to relu and then to fc4
        x = F.relu(self.net.fc3(x))
        x = self.fc4(x)
        return x
    
new_net = ExpandedNet()

print(new_net)

ExpandedNet(
  (net): Net(
    (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (fc1): Linear(in_features=256, out_features=120, bias=True)
    (fc2): Linear(in_features=120, out_features=84, bias=True)
    (fc3): Linear(in_features=84, out_features=10, bias=True)
  )
  (fc4): Linear(in_features=10, out_features=1, bias=True)
)


## Manipulating a pretrained model
Sometimes it is needed to extract some features from different layers of a pretrained model in a way that forward function can be run one time. In other words, running forward function in pretrained model and stopping it in a layer whose output is our interest is not a good method. Assume you wants to get output of several layers and you must run forward function several times (ie the number of runs is the number of layers whose output is our interest). To achieve this goal it is needed some background information.

 Assume that I want to extract the first layers of VGG16 as features. In this regard, look at the following picture. The blue line shows which outputs I consider to get from layers.
 
 ![alt text](https://user-images.githubusercontent.com/15813546/32988686-5119820c-cd1e-11e7-8213-7a21a3227863.png)

As it can be seen from above picture and python output, our desire part of vgg net is lines in the python output correspond with line from (0) to (15). Also, we need to concatenate output of lines (3), (8) and (15). The outputs of lines (8) and (15) must be enlarged (upsample) to obtain the size of the output in line (3); then they are concatenated to acheive the result.

In [0]:
from torchvision import models
import torch
import torchvision
import torch.nn as nn

vgg = models.vgg16(True)
print(vgg)

class myModel(nn.Module):
    def __init__(self):
        super(myModel,self).__init__()
        vgg_model = models.vgg16(pretrained=True)	
        
        self.Conv1 = nn.Sequential(*list(vgg_model.children())[0][0:4])
        self.Conv2 = nn.Sequential(*list(vgg_model.children())[0][4:9]) 
        self.Conv3 = nn.Sequential(*list(vgg_model.children())[0][9:16])
        
        # upsampling layers
        self.upSample1 = nn.Upsample(scale_factor=2)
        self.upSample2 = nn.Upsample(scale_factor=4)

    def forward(self,x):
        out1 = self.Conv1(x)
        out2 = self.Conv2(out1)
        out3 = self.Conv3(out2)
        
        # upsampling fo skip connections
        out2 = self.upSample1(out2)
        out3 = self.upSample2(out3)
        
        # concatenate
        concat_features = torch.cat([out1, out2, out3], 1)
        return out1, concat_features

In the above implementation,

```
        self.Conv1 = nn.Sequential(*list(vgg_model.children())[0][0:4])
        self.Conv2 = nn.Sequential(*list(vgg_model.children())[0][4:9]) 
        self.Conv3 = nn.Sequential(*list(vgg_model.children())[0][9:16])
```
can be replaced with


```
        self.Conv1 = nn.Sequential(*list(vgg_model.features.children())[0:4])
        self.Conv2 = nn.Sequential(*list(vgg_model.features.children())[4:9]) 
        self.Conv3 = nn.Sequential(*list(vgg_model.features.children())[9:16])
```
model.features.children() gives a flattened version of model.children()


