In [2]:
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2, 4)
        self.relu1 = nn.ReLU()
        #self.dout = nn.Dropout(0.2)
        self.fc2 = nn.Linear(4, 3)
        self.relu2 = nn.ReLU(1)
        self.out = nn.Linear(3, 1)
        self.out_act = nn.Sigmoid()
        
    def forward(self, inputs):
        a1 = self.fc1(inputs)
        h1 = self.relu1(a1)
        a2 = self.fc2(h1)
        h2 = self.relu2(a2)
        a3 = self.out(h2)
        y = self.out_act(a3)
        return y

In [3]:
net = Net()
for para in net.parameters():
    print(para)

Parameter containing:
tensor([[-0.2249,  0.5759],
        [ 0.4148,  0.4126],
        [ 0.3939,  0.3192],
        [-0.0072, -0.3396]], requires_grad=True)
Parameter containing:
tensor([ 0.4758,  0.2861,  0.6777, -0.4127], requires_grad=True)
Parameter containing:
tensor([[-0.1068, -0.0981,  0.1543,  0.3864],
        [ 0.1319, -0.2243, -0.0253,  0.3030],
        [ 0.4892, -0.2366, -0.3212, -0.4371]], requires_grad=True)
Parameter containing:
tensor([ 0.0378, -0.4122,  0.3049], requires_grad=True)
Parameter containing:
tensor([[-0.5143,  0.2256,  0.0253]], requires_grad=True)
Parameter containing:
tensor([-0.4425], requires_grad=True)


In [4]:
for para in net.parameters():
    para.requires_grad = False
    print(para)

Parameter containing:
tensor([[-0.2249,  0.5759],
        [ 0.4148,  0.4126],
        [ 0.3939,  0.3192],
        [-0.0072, -0.3396]])
Parameter containing:
tensor([ 0.4758,  0.2861,  0.6777, -0.4127])
Parameter containing:
tensor([[-0.1068, -0.0981,  0.1543,  0.3864],
        [ 0.1319, -0.2243, -0.0253,  0.3030],
        [ 0.4892, -0.2366, -0.3212, -0.4371]])
Parameter containing:
tensor([ 0.0378, -0.4122,  0.3049])
Parameter containing:
tensor([[-0.5143,  0.2256,  0.0253]])
Parameter containing:
tensor([-0.4425])


In [6]:
for para in net.parameters():
    para.requires_grad = True
    print(para)

Parameter containing:
tensor([[-0.2249,  0.5759],
        [ 0.4148,  0.4126],
        [ 0.3939,  0.3192],
        [-0.0072, -0.3396]], requires_grad=True)
Parameter containing:
tensor([ 0.4758,  0.2861,  0.6777, -0.4127], requires_grad=True)
Parameter containing:
tensor([[-0.1068, -0.0981,  0.1543,  0.3864],
        [ 0.1319, -0.2243, -0.0253,  0.3030],
        [ 0.4892, -0.2366, -0.3212, -0.4371]], requires_grad=True)
Parameter containing:
tensor([ 0.0378, -0.4122,  0.3049], requires_grad=True)
Parameter containing:
tensor([[-0.5143,  0.2256,  0.0253]], requires_grad=True)
Parameter containing:
tensor([-0.4425], requires_grad=True)


### Freeze part of the parameter

In [7]:
params = net.state_dict()
params.keys()

odict_keys(['fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'out.weight', 'out.bias'])

### set related layer’s require grads to False (a naive way)


In [8]:
keys = list(params.keys())
keys[0]

'fc1.weight'

In [9]:
net.fc1.weight.requires_grad = False

for para in net.parameters():
    print(para)

Parameter containing:
tensor([[-0.2249,  0.5759],
        [ 0.4148,  0.4126],
        [ 0.3939,  0.3192],
        [-0.0072, -0.3396]])
Parameter containing:
tensor([ 0.4758,  0.2861,  0.6777, -0.4127], requires_grad=True)
Parameter containing:
tensor([[-0.1068, -0.0981,  0.1543,  0.3864],
        [ 0.1319, -0.2243, -0.0253,  0.3030],
        [ 0.4892, -0.2366, -0.3212, -0.4371]], requires_grad=True)
Parameter containing:
tensor([ 0.0378, -0.4122,  0.3049], requires_grad=True)
Parameter containing:
tensor([[-0.5143,  0.2256,  0.0253]], requires_grad=True)
Parameter containing:
tensor([-0.4425], requires_grad=True)


### A better way

In [10]:
net.fc1.weight.requires_grad = True

for name, param in net.named_parameters():
    if param.requires_grad:
        print(name)

fc1.weight
fc1.bias
fc2.weight
fc2.bias
out.weight
out.bias


Then we can filter out and control the requires\_grad by filtering through the parameter names

In [14]:
"""
param.requires_grad = True whenever we can use a derivative
"""
for name, param in net.named_parameters():
    if param.requires_grad and 'fc1' in name:
        param.requires_grad = False

In [15]:
for name, param in net.named_parameters():
    print(name, param)

fc1.weight Parameter containing:
tensor([[-0.2249,  0.5759],
        [ 0.4148,  0.4126],
        [ 0.3939,  0.3192],
        [-0.0072, -0.3396]])
fc1.bias Parameter containing:
tensor([ 0.4758,  0.2861,  0.6777, -0.4127])
fc2.weight Parameter containing:
tensor([[-0.1068, -0.0981,  0.1543,  0.3864],
        [ 0.1319, -0.2243, -0.0253,  0.3030],
        [ 0.4892, -0.2366, -0.3212, -0.4371]], requires_grad=True)
fc2.bias Parameter containing:
tensor([ 0.0378, -0.4122,  0.3049], requires_grad=True)
out.weight Parameter containing:
tensor([[-0.5143,  0.2256,  0.0253]], requires_grad=True)
out.bias Parameter containing:
tensor([-0.4425], requires_grad=True)


### Last one more step

We haven’t done yet as even the required grad is set to False, we still can update the weights


In [16]:
net.fc1.weight -= 0.1*net.fc1.weight

for name, param in net.named_parameters():
    print(name, param)

fc1.weight Parameter containing:
tensor([[-0.2024,  0.5183],
        [ 0.3733,  0.3713],
        [ 0.3545,  0.2873],
        [-0.0064, -0.3056]])
fc1.bias Parameter containing:
tensor([ 0.4758,  0.2861,  0.6777, -0.4127])
fc2.weight Parameter containing:
tensor([[-0.1068, -0.0981,  0.1543,  0.3864],
        [ 0.1319, -0.2243, -0.0253,  0.3030],
        [ 0.4892, -0.2366, -0.3212, -0.4371]], requires_grad=True)
fc2.bias Parameter containing:
tensor([ 0.0378, -0.4122,  0.3049], requires_grad=True)
out.weight Parameter containing:
tensor([[-0.5143,  0.2256,  0.0253]], requires_grad=True)
out.bias Parameter containing:
tensor([-0.4425], requires_grad=True)


So we should filter the parameters to only those requires_grad ones by using this code

In [18]:
optimizer = optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=0.1)

for p in filter(lambda p: p.requires_grad, net.parameters()):
    print(p)


Parameter containing:
tensor([[-0.1068, -0.0981,  0.1543,  0.3864],
        [ 0.1319, -0.2243, -0.0253,  0.3030],
        [ 0.4892, -0.2366, -0.3212, -0.4371]], requires_grad=True)
Parameter containing:
tensor([ 0.0378, -0.4122,  0.3049], requires_grad=True)
Parameter containing:
tensor([[-0.5143,  0.2256,  0.0253]], requires_grad=True)
Parameter containing:
tensor([-0.4425], requires_grad=True)
