In [1]:
# In this notebook, we will try to understand 2 of the most popular ways of saving weights in PyTorch-
# 1. Saving the weights of the model using state_dict()
# 2. Saving the whole model(including the architecture as well as the weights)

# Please follow the below mentioned blog link for detailed explaination of this notebook.
# https://medium.com/@animesh7pointer/everything-you-need-to-know-about-saving-weights-in-pytorch-572651f3f8de

In [2]:
# Importing the necessary libraries

import torch
import torch.nn as nn

In [3]:
# Defining a CNN based model

class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        
        self.sequential = nn.Sequential(nn.Conv2d(1, 32, 5), 
                                        nn.Conv2d(32, 64, 5), 
                                        nn.Dropout(0.3))
        self.layer1 = nn.Conv2d(64, 128, 5)
        self.layer2 = nn.Conv2d(128, 256, 5)
        self.fc = nn.Linear(256*34*34, 128)
    
    def forward(self, x):
        
        output = self.sequential(x)
        output = self.layer1(output)
        output = self.layer2(output)
        output = output.view(output.size()[0], -1)
        output = self.fc(output)
        
        return output

In [10]:
# Initializing and printing the model to see what's inside it

model = NeuralNet()

print(model)

NeuralNet(
  (sequential): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
    (1): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
    (2): Dropout(p=0.3, inplace=False)
  )
  (layer1): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (layer2): Conv2d(128, 256, kernel_size=(5, 5), stride=(1, 1))
  (fc): Linear(in_features=295936, out_features=128, bias=True)
)


In [8]:
# Printing all the parameters of the model

for name, param in model.named_parameters():
    print('name: ', name)
    print(type(param))
    print('param.shape: ', param.shape)
    print('param.requires_grad: ', param.requires_grad)
    print('=====')

name:  sequential.0.weight
<class 'torch.nn.parameter.Parameter'>
param.shape:  torch.Size([32, 1, 5, 5])
param.requires_grad:  True
=====
name:  sequential.0.bias
<class 'torch.nn.parameter.Parameter'>
param.shape:  torch.Size([32])
param.requires_grad:  True
=====
name:  sequential.1.weight
<class 'torch.nn.parameter.Parameter'>
param.shape:  torch.Size([64, 32, 5, 5])
param.requires_grad:  True
=====
name:  sequential.1.bias
<class 'torch.nn.parameter.Parameter'>
param.shape:  torch.Size([64])
param.requires_grad:  True
=====
name:  layer1.weight
<class 'torch.nn.parameter.Parameter'>
param.shape:  torch.Size([128, 64, 5, 5])
param.requires_grad:  True
=====
name:  layer1.bias
<class 'torch.nn.parameter.Parameter'>
param.shape:  torch.Size([128])
param.requires_grad:  True
=====
name:  layer2.weight
<class 'torch.nn.parameter.Parameter'>
param.shape:  torch.Size([256, 128, 5, 5])
param.requires_grad:  True
=====
name:  layer2.bias
<class 'torch.nn.parameter.Parameter'>
param.shape: 

In [11]:
for name, param in model.named_parameters():
    if name in ['fc.weight', 'fc.bias']:
        param.requires_grad = True
    else:
        param.requires_grad = False

In [12]:
for name, param in model.named_parameters():
    print(name, ':', param.requires_grad)

sequential.0.weight : False
sequential.0.bias : False
sequential.1.weight : False
sequential.1.bias : False
layer1.weight : False
layer1.bias : False
layer2.weight : False
layer2.bias : False
fc.weight : True
fc.bias : True


In [16]:
for key in model.fc.state_dict():
    print('key: ', key)
    param = model.fc.state_dict()[key]
    print('param.shape: ', param.shape)
    print('param.requires_grad: ', param.requires_grad)
    print('param.shape, param.requires_grad: ', param.shape, param.requires_grad)
    print('isinstance(param, nn.Module) ', isinstance(param, nn.Module))
    print('isinstance(param, nn.Parameter) ', isinstance(param, nn.Parameter))
    print('isinstance(param, torch.Tensor): ', isinstance(param, torch.Tensor))
    print('=====')

key:  weight
param.shape:  torch.Size([128, 295936])
param.requires_grad:  False
param.shape, param.requires_grad:  torch.Size([128, 295936]) False
isinstance(param, nn.Module)  False
isinstance(param, nn.Parameter)  False
isinstance(param, torch.Tensor):  True
=====
key:  bias
param.shape:  torch.Size([128])
param.requires_grad:  False
param.shape, param.requires_grad:  torch.Size([128]) False
isinstance(param, nn.Module)  False
isinstance(param, nn.Parameter)  False
isinstance(param, torch.Tensor):  True
=====


In [17]:
torch.save(model.state_dict(), 'weights_only.pth')

In [31]:
model_new = NeuralNet()
model_new.load_state_dict(torch.load('weights_only.pth'))
for name, param in model_new.named_parameters():
    print(name, ':', param.requires_grad)

sequential.0.weight : True
sequential.0.bias : True
sequential.1.weight : True
sequential.1.bias : True
layer1.weight : True
layer1.bias : True
layer2.weight : True
layer2.bias : True
fc.weight : True
fc.bias : True


In [32]:
torch.save(model, 'entire_model.pth')

In [33]:
model_new = torch.load('entire_model.pth')
for name, param in model_new.named_parameters():
    print(name, ':', param.requires_grad)

sequential.0.weight : False
sequential.0.bias : False
sequential.1.weight : False
sequential.1.bias : False
layer1.weight : False
layer1.bias : False
layer2.weight : False
layer2.bias : False
fc.weight : True
fc.bias : True
