In [2]:
import torch
from torch import nn

## .parameters() vs .modules() vs .children() and use of isinstance

In [15]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()

        self.fc1 = nn.Sequential(
            nn.Linear(2,3),
            nn.ReLU())
        
        self.fc2 = nn.Sequential(
            nn.Linear(3,4),
            nn.ReLU())

        self.fc_out = nn.Sequential(
            nn.Linear(4,1),
            nn.Sigmoid())

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc_out(x)
        
        return x

In [16]:
model = MLP()
print(model(torch.randn(2,2)).shape)
print(model)

torch.Size([2, 1])
MLP(
  (fc1): Sequential(
    (0): Linear(in_features=2, out_features=3, bias=True)
    (1): ReLU()
  )
  (fc2): Sequential(
    (0): Linear(in_features=3, out_features=4, bias=True)
    (1): ReLU()
  )
  (fc_out): Sequential(
    (0): Linear(in_features=4, out_features=1, bias=True)
    (1): Sigmoid()
  )
)


In [17]:
list(model.parameters())
# [weights of layer0, bias of layer0, weights of layer1, bias of layer1, ...]

[Parameter containing:
 tensor([[-0.3175,  0.5478],
         [-0.5563, -0.1145],
         [-0.0160, -0.4649]], requires_grad=True),
 Parameter containing:
 tensor([ 0.1935, -0.2049,  0.0430], requires_grad=True),
 Parameter containing:
 tensor([[ 0.2744, -0.3202, -0.2248],
         [-0.4927, -0.0760, -0.1611],
         [ 0.5661,  0.1391, -0.3362],
         [ 0.5018,  0.4406,  0.0258]], requires_grad=True),
 Parameter containing:
 tensor([ 0.1454, -0.0329, -0.4958, -0.4972], requires_grad=True),
 Parameter containing:
 tensor([[ 0.2405, -0.3910, -0.2822, -0.3223]], requires_grad=True),
 Parameter containing:
 tensor([-0.0872], requires_grad=True)]

In [22]:
# for transfer learning
model = MLP()
print([p for p in model.parameters() if p.requires_grad])

for p in model.parameters(): # freeze all layers
    p.requires_grad = False

model.fc_out = nn.Linear(4,10)

params = [p for p in model.parameters() if p.requires_grad]
print(params)

from torch import optim
optimizer = optim.Adam(params, lr=0.1)

[Parameter containing:
tensor([[-0.0446, -0.5147],
        [-0.6061, -0.6079],
        [-0.3588,  0.3693]], requires_grad=True), Parameter containing:
tensor([0.3321, 0.6777, 0.0884], requires_grad=True), Parameter containing:
tensor([[ 0.0613, -0.4069, -0.0708],
        [-0.0148,  0.2663, -0.2451],
        [-0.5550,  0.4612, -0.2591],
        [ 0.2561,  0.3127, -0.4289]], requires_grad=True), Parameter containing:
tensor([ 0.3894,  0.3136, -0.3341,  0.4260], requires_grad=True), Parameter containing:
tensor([[ 0.4476, -0.1449, -0.1200,  0.4762]], requires_grad=True), Parameter containing:
tensor([0.0802], requires_grad=True)]
[Parameter containing:
tensor([[ 0.1638,  0.3244, -0.0308, -0.3449],
        [-0.1192, -0.4191, -0.4503,  0.4029],
        [ 0.2735,  0.4003, -0.0317, -0.2537],
        [ 0.0182, -0.3743,  0.0797, -0.2049],
        [-0.3190,  0.2987, -0.2477, -0.4538],
        [-0.4307,  0.3057,  0.0184, -0.0692],
        [ 0.1070,  0.1515,  0.0853, -0.4439],
        [ 0.2560, -0

In [23]:
list(model.named_parameters())
# [('layer0.weight', weight), ('layer0.bias', bias), ('layer1.weight', weight), ('layer1.bias', bias), ...]

[('fc1.0.weight',
  Parameter containing:
  tensor([[-0.0446, -0.5147],
          [-0.6061, -0.6079],
          [-0.3588,  0.3693]])),
 ('fc1.0.bias',
  Parameter containing:
  tensor([0.3321, 0.6777, 0.0884])),
 ('fc2.0.weight',
  Parameter containing:
  tensor([[ 0.0613, -0.4069, -0.0708],
          [-0.0148,  0.2663, -0.2451],
          [-0.5550,  0.4612, -0.2591],
          [ 0.2561,  0.3127, -0.4289]])),
 ('fc2.0.bias',
  Parameter containing:
  tensor([ 0.3894,  0.3136, -0.3341,  0.4260])),
 ('fc_out.weight',
  Parameter containing:
  tensor([[ 0.1638,  0.3244, -0.0308, -0.3449],
          [-0.1192, -0.4191, -0.4503,  0.4029],
          [ 0.2735,  0.4003, -0.0317, -0.2537],
          [ 0.0182, -0.3743,  0.0797, -0.2049],
          [-0.3190,  0.2987, -0.2477, -0.4538],
          [-0.4307,  0.3057,  0.0184, -0.0692],
          [ 0.1070,  0.1515,  0.0853, -0.4439],
          [ 0.2560, -0.4953, -0.2089,  0.3581],
          [ 0.1996,  0.1609,  0.0887, -0.3851],
          [-0.1657, -0.

In [24]:
for name, p in model.named_parameters():
    print(name)
    print(p)

fc1.0.weight
Parameter containing:
tensor([[-0.0446, -0.5147],
        [-0.6061, -0.6079],
        [-0.3588,  0.3693]])
fc1.0.bias
Parameter containing:
tensor([0.3321, 0.6777, 0.0884])
fc2.0.weight
Parameter containing:
tensor([[ 0.0613, -0.4069, -0.0708],
        [-0.0148,  0.2663, -0.2451],
        [-0.5550,  0.4612, -0.2591],
        [ 0.2561,  0.3127, -0.4289]])
fc2.0.bias
Parameter containing:
tensor([ 0.3894,  0.3136, -0.3341,  0.4260])
fc_out.weight
Parameter containing:
tensor([[ 0.1638,  0.3244, -0.0308, -0.3449],
        [-0.1192, -0.4191, -0.4503,  0.4029],
        [ 0.2735,  0.4003, -0.0317, -0.2537],
        [ 0.0182, -0.3743,  0.0797, -0.2049],
        [-0.3190,  0.2987, -0.2477, -0.4538],
        [-0.4307,  0.3057,  0.0184, -0.0692],
        [ 0.1070,  0.1515,  0.0853, -0.4439],
        [ 0.2560, -0.4953, -0.2089,  0.3581],
        [ 0.1996,  0.1609,  0.0887, -0.3851],
        [-0.1657, -0.1701,  0.4240, -0.4716]], requires_grad=True)
fc_out.bias
Parameter containing:
t

In [25]:
model.modules()

<generator object Module.modules at 0x1252983c0>

In [27]:
list(model.modules())

[MLP(
   (fc1): Sequential(
     (0): Linear(in_features=2, out_features=3, bias=True)
     (1): ReLU()
   )
   (fc2): Sequential(
     (0): Linear(in_features=3, out_features=4, bias=True)
     (1): ReLU()
   )
   (fc_out): Linear(in_features=4, out_features=10, bias=True)
 ),
 Sequential(
   (0): Linear(in_features=2, out_features=3, bias=True)
   (1): ReLU()
 ),
 Linear(in_features=2, out_features=3, bias=True),
 ReLU(),
 Sequential(
   (0): Linear(in_features=3, out_features=4, bias=True)
   (1): ReLU()
 ),
 Linear(in_features=3, out_features=4, bias=True),
 ReLU(),
 Linear(in_features=4, out_features=10, bias=True)]

In [33]:
print([m for m in model.modules() if isinstance(m,nn.Linear)])
print('='*100)
print([m.weight for m in model.modules() if isinstance(m,nn.Linear)])
print('='*100)
print([m.weight.grad for m in model.modules() if isinstance(m,nn.Linear)])

[Linear(in_features=2, out_features=3, bias=True), Linear(in_features=3, out_features=4, bias=True), Linear(in_features=4, out_features=10, bias=True)]
[Parameter containing:
tensor([[-0.0446, -0.5147],
        [-0.6061, -0.6079],
        [-0.3588,  0.3693]]), Parameter containing:
tensor([[ 0.0613, -0.4069, -0.0708],
        [-0.0148,  0.2663, -0.2451],
        [-0.5550,  0.4612, -0.2591],
        [ 0.2561,  0.3127, -0.4289]]), Parameter containing:
tensor([[ 0.1638,  0.3244, -0.0308, -0.3449],
        [-0.1192, -0.4191, -0.4503,  0.4029],
        [ 0.2735,  0.4003, -0.0317, -0.2537],
        [ 0.0182, -0.3743,  0.0797, -0.2049],
        [-0.3190,  0.2987, -0.2477, -0.4538],
        [-0.4307,  0.3057,  0.0184, -0.0692],
        [ 0.1070,  0.1515,  0.0853, -0.4439],
        [ 0.2560, -0.4953, -0.2089,  0.3581],
        [ 0.1996,  0.1609,  0.0887, -0.3851],
        [-0.1657, -0.1701,  0.4240, -0.4716]], requires_grad=True)]
[None, None, None]


In [35]:
# use of weight initialization
for m in model.modules():
    if isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight)
        # nn.init.constant_(m.weight, 1)

print([m.weight for m in model.modules() if isinstance(m, nn.Linear)])

[Parameter containing:
tensor([[ 0.5670, -0.0343],
        [ 1.8016, -0.4220],
        [ 0.3171,  0.3611]]), Parameter containing:
tensor([[-1.4606, -1.9580, -0.4722],
        [ 0.6781,  1.4849,  0.3239],
        [-1.0499, -0.7866, -1.1500],
        [-0.4202, -1.8598, -0.3321]]), Parameter containing:
tensor([[-0.5432,  0.7104, -0.0659, -0.1110],
        [ 0.1235, -0.2710,  0.4636, -0.7608],
        [ 1.0782,  0.4037, -1.1682,  0.1440],
        [-0.2082,  0.2691, -0.0986,  0.2202],
        [-0.9084,  1.0514,  1.1327,  0.1640],
        [-0.5923,  0.0770, -0.7696, -0.6349],
        [ 0.2742, -0.0940,  1.2302, -0.9808],
        [-1.2743,  1.0752, -0.2211,  0.4223],
        [ 0.3793,  0.6303,  0.5048, -1.4283],
        [-0.2779,  0.1493, -0.2957,  0.8790]], requires_grad=True)]


In [36]:
model.children()

<generator object Module.children at 0x1253a7660>

In [37]:
list(model.children())

[Sequential(
   (0): Linear(in_features=2, out_features=3, bias=True)
   (1): ReLU()
 ),
 Sequential(
   (0): Linear(in_features=3, out_features=4, bias=True)
   (1): ReLU()
 ),
 Linear(in_features=4, out_features=10, bias=True)]

In [38]:
x = torch.randn(2,2)
list(model.children())[0](x)

tensor([[0.3589, 0.7104, 0.1674],
        [0.7278, 1.4922, 0.8476]])

In [41]:
print(*list(model.children())[:2])
sub_network = nn.Sequential(*list(model.children())[:2])
print(sub_network)
print(sub_network(x))

Sequential(
  (0): Linear(in_features=2, out_features=3, bias=True)
  (1): ReLU()
) Sequential(
  (0): Linear(in_features=3, out_features=4, bias=True)
  (1): ReLU()
)
Sequential(
  (0): Sequential(
    (0): Linear(in_features=2, out_features=3, bias=True)
    (1): ReLU()
  )
  (1): Sequential(
    (0): Linear(in_features=3, out_features=4, bias=True)
    (1): ReLU()
  )
)
tensor([[0.0000, 1.6661, 0.0000, 0.0000],
        [0.0000, 3.2974, 0.0000, 0.0000]])


## ModuleList vs Sequential

In [48]:
fc = nn.Linear(3,3)
layer_list = [fc for _ in range(5)]
layers1 = nn.Sequential(*layer_list)
layers2 = nn.ModuleList(layer_list)   # you don't need '*'
print(layers1)
print(layers2)

x = torch.randn(1,3)
print(layers1(x))

# print(layers2(x)) error!
for layer in layers2:
    x = layer(x)
print(x)

Sequential(
  (0): Linear(in_features=3, out_features=3, bias=True)
  (1): Linear(in_features=3, out_features=3, bias=True)
  (2): Linear(in_features=3, out_features=3, bias=True)
  (3): Linear(in_features=3, out_features=3, bias=True)
  (4): Linear(in_features=3, out_features=3, bias=True)
)
ModuleList(
  (0-4): 5 x Linear(in_features=3, out_features=3, bias=True)
)
tensor([[ 0.9100, -0.7000, -0.7797]], grad_fn=<AddmmBackward0>)
tensor([[ 0.9100, -0.7000, -0.7797]], grad_fn=<AddmmBackward0>)


In [None]:
# Why not use list, but use nn.ModuleList?
class testNet(nn.Module):
    def __init__(self):
        super().__init__()

        # self.Module_List = [nn.Linear(3,3), nn.Linear(3,3)]
        self.Module_List = nn.ModuleList([nn.Linear(3,3), nn.Linear(3,3)])
        
    def forward():
        for layer in self.Module_List:
            x = layer(x)
        return x

model=testNet()
print(model(torch.randn(1,3)))

print(model)

print([p for p in model.parameters()])
optimizer = optim.Adam(model.parameters(), lr=0.1)