# Parameter Management

We start by focusing on an MLP with one hidden layer

In [1]:
import torch
from torch import nn

net = nn.Sequential(nn.LazyLinear(8), nn.ReLU(), nn.LazyLinear(1))
X = torch.rand(size=(2, 4))
net(X).shape



torch.Size([2, 1])

Parameter Access

In [2]:
net[2].state_dict()

OrderedDict([('weight',
              tensor([[-0.2067, -0.3326, -0.2806, -0.0525,  0.0306,  0.0665, -0.0704, -0.3219]])),
             ('bias', tensor([0.0025]))])

Targeted Parameters

In [3]:
type(net[2].bias), net[2].bias.data

(torch.nn.parameter.Parameter, tensor([0.0025]))

In [4]:
net[2].weight.grad == None

True

All Parameters at Once

In [5]:
[(name, param.shape) for name, param in net.named_parameters()]

[('0.weight', torch.Size([8, 4])),
 ('0.bias', torch.Size([8])),
 ('2.weight', torch.Size([1, 8])),
 ('2.bias', torch.Size([1]))]

Tied Parameters

In [6]:
shared = nn.LazyLinear(8)
net = nn.Sequential(nn.LazyLinear(8), nn.ReLU(),
                    shared, nn.ReLU(),
                    shared, nn.ReLU(),
                    nn.LazyLinear(1))
net(X)
print(net[2].weight.data[0] == net[4].weight.data[0])
net[2].weight.data[0, 0] = 100
print(net[2].weight.data[0] == net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])
