In [5]:
import torch
from torch import nn
from torch.nn import init

net = nn.Sequential(nn.Linear(4, 3), nn.ReLU(), nn.Linear(3, 1))

print(net)
X = torch.rand(2, 4)
Y = net(X).sum()
print(net(X))

Sequential(
  (0): Linear(in_features=4, out_features=3, bias=True)
  (1): ReLU()
  (2): Linear(in_features=3, out_features=1, bias=True)
)
tensor([[-0.1760],
        [-0.3375]], grad_fn=<AddmmBackward>)


In [6]:
print(type(net.named_parameters()))
for name, param in net.named_parameters():
    print(name, param.size)

<class 'generator'>
0.weight <built-in method size of Parameter object at 0x7f4a97119090>
0.bias <built-in method size of Parameter object at 0x7f4a971193f0>
2.weight <built-in method size of Parameter object at 0x7f4a97119900>
2.bias <built-in method size of Parameter object at 0x7f4a97119990>


In [7]:
for name, param in net[0].named_parameters():
    print(name, param)

weight Parameter containing:
tensor([[ 0.2087, -0.4285,  0.1149,  0.2005],
        [ 0.0689, -0.2978,  0.3647,  0.0465],
        [-0.3124, -0.3844,  0.1328,  0.4543]], requires_grad=True)
bias Parameter containing:
tensor([-0.3527, -0.1940,  0.4125], requires_grad=True)


In [8]:
class MyModel(nn.Module):
    def __init__(self, **kwargs):
        super(MyModel, self).__init__(**kwargs)
        
        self.weight1 = nn.Parameter(torch.rand(20, 20))
        self.wight2 = torch.rand(20, 20)
    def forward(X):
        pass

n = MyModel()
for name, param in n.named_parameters():
    print(name, param.size())

weight1 torch.Size([20, 20])


In [9]:
weight_0 = list(net[0].parameters())[0]
print(weight_0.data)
print(weight_0.grad)
Y.backward()
print(weight_0.grad)

tensor([[ 0.2087, -0.4285,  0.1149,  0.2005],
        [ 0.0689, -0.2978,  0.3647,  0.0465],
        [-0.3124, -0.3844,  0.1328,  0.4543]])
None
tensor([[ 0.0000,  0.0000,  0.0000,  0.0000],
        [-0.2250, -0.2045, -0.3594, -0.3535],
        [-0.7986, -0.5637, -0.6632, -0.8660]])


In [10]:
for name, param in net.named_parameters():
    if 'weight' in name:
        init.normal_(param, mean=0, std=0.01)
        print(name, param.data)

0.weight tensor([[ 0.0073,  0.0119,  0.0147, -0.0060],
        [-0.0030,  0.0109,  0.0037, -0.0024],
        [ 0.0194,  0.0010, -0.0099,  0.0078]])
2.weight tensor([[ 0.0136, -0.0066,  0.0025]])


In [11]:
for name, param in net.named_parameters():
    if 'bias' in name:
        init.constant_(param, val=0)
        print(name, param.data)

0.bias tensor([0., 0., 0.])
2.bias tensor([0.])


In [12]:
def init_weight_(tensor):
    with torch.no_grad():
        tensor.uniform_(-10, 10)
        tensor *= (tensor.abs() > 5).float()

for name, param in net.named_parameters():
    if 'weight' in name:
        init_weight_(param)
        print(name, param.data)

0.weight tensor([[ 0.0000,  5.7990,  0.0000,  0.0000],
        [-0.0000,  9.3435,  0.0000,  5.1931],
        [-7.7816, -0.0000, -0.0000,  0.0000]])
2.weight tensor([[-6.3818,  0.0000,  5.6176]])
