In [1]:
import torch
from torch import nn 

import utils

### Parameters Management

In [2]:
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))

Access

In [3]:
net.state_dict()

OrderedDict([('0.weight',
              tensor([[ 0.0659, -0.3145, -0.4595,  0.3309],
                      [-0.3164, -0.1920, -0.1169, -0.1623],
                      [ 0.2913, -0.1785, -0.4760,  0.0714],
                      [ 0.3176, -0.1024,  0.1128, -0.0090],
                      [-0.2623,  0.4340, -0.4340, -0.1177],
                      [ 0.3110,  0.4762,  0.3736,  0.2100],
                      [-0.2461, -0.3886,  0.4134, -0.2700],
                      [-0.4494,  0.2271,  0.0446,  0.3738]])),
             ('0.bias',
              tensor([-0.3204,  0.4508,  0.1258,  0.0233,  0.0802, -0.4210,  0.0733,  0.3756])),
             ('2.weight',
              tensor([[ 0.2249, -0.3044, -0.0576, -0.3405, -0.3418, -0.3461,  0.2867,  0.2373]])),
             ('2.bias', tensor([-0.2111]))])

In [5]:
net[0].state_dict()

OrderedDict([('weight',
              tensor([[ 0.0659, -0.3145, -0.4595,  0.3309],
                      [-0.3164, -0.1920, -0.1169, -0.1623],
                      [ 0.2913, -0.1785, -0.4760,  0.0714],
                      [ 0.3176, -0.1024,  0.1128, -0.0090],
                      [-0.2623,  0.4340, -0.4340, -0.1177],
                      [ 0.3110,  0.4762,  0.3736,  0.2100],
                      [-0.2461, -0.3886,  0.4134, -0.2700],
                      [-0.4494,  0.2271,  0.0446,  0.3738]])),
             ('bias',
              tensor([-0.3204,  0.4508,  0.1258,  0.0233,  0.0802, -0.4210,  0.0733,  0.3756]))])

In [6]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.2111], requires_grad=True)
tensor([-0.2111])


In [7]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
print(*[(name, param.shape) for name, param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


Initialization

In [8]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean = 0, std = 0.01)
        nn.init.zeros_(m.bias)
        
net.apply(init_normal)
net[0].state_dict()

OrderedDict([('weight',
              tensor([[ 0.0008, -0.0214,  0.0012, -0.0100],
                      [ 0.0059,  0.0013,  0.0274, -0.0189],
                      [-0.0037,  0.0055, -0.0052,  0.0020],
                      [-0.0106, -0.0135, -0.0023,  0.0007],
                      [-0.0082,  0.0043, -0.0057, -0.0021],
                      [ 0.0019, -0.0113, -0.0077,  0.0076],
                      [-0.0048,  0.0090, -0.0019, -0.0130],
                      [ 0.0116,  0.0183,  0.0042,  0.0078]])),
             ('bias', tensor([0., 0., 0., 0., 0., 0., 0., 0.]))])

In [9]:
def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 42)

net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data)

tensor([ 0.1113, -0.0471,  0.0033,  0.3732])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])
