In [2]:
import torch
from torch import nn

In [4]:
# by default pytorch inits bias and weights matricies unformly  by drawing form range 
# according to input and output dimensions
net = nn.Sequential(nn.LazyLinear(8), nn.ReLU(), nn.LazyLinear(1))
X = torch.rand(size=(2, 4))
net(X).shape




torch.Size([2, 1])

In [6]:
def init_normal(module):
    if type(module) == nn.Linear:
        nn.init.normal_(module.weight, 0, 0.01)
        nn.init.zeros_(module.bias)

net.apply(init_normal)
net[0].weight.data[0], net[0].bias.data[0]

(tensor([0.0030, 0.0019, 0.0069, 0.0015]), tensor(0.))

In [9]:
# also can give constant
def init_normal(module):
    if type(module) == nn.Linear:
        nn.init.constant_(module.weight, 1)
        nn.init.zeros_(module.bias)

net.apply(init_normal)
net[0].weight.data[0], net[0].bias.data[0]

(tensor([1., 1., 1., 1.]), tensor(0.))

In [12]:
# init different layers differently
def init_42(module):
    if type(module) == nn.Linear:
        nn.init.constant_(module.weight, 42)
def init_normal(module):
    if type(module) == nn.Linear:
        nn.init.xavier_normal_(module.weight)

net[0].apply(init_42)
net[2].apply(init_normal)
print(net[0].weight.data[0])
print(net[2].weight.data[0])


tensor([42., 42., 42., 42.])
tensor([-0.4223, -0.2579, -0.0724, -0.4839,  0.1913, -0.7111,  0.5406, -0.4797])


In [13]:
# also do custom init
def my_init(module):
    if type(module) == nn.Linear:
        print("Init", *[(name, param.shape)
                        for name, param in module.named_parameters()][0])
        nn.init.uniform_(module.weight, -10, 10)
        module.weight.data *= module.weight.data.abs() >= 5

net.apply(my_init)
net[0].weight[:2]


Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])


tensor([[8.3773, 8.3314, 0.0000, -0.0000],
        [0.0000, 0.0000, -0.0000, 0.0000]], grad_fn=<SliceBackward0>)

In [14]:
net[0].weight.data[:] += 1
net[0].weight.data[0, 0] = 42
net[0].weight.data[0]

tensor([42.0000,  9.3314,  1.0000,  1.0000])