In [1]:
import torch
from torch import nn

In [2]:
num_samples = 2
num_inputs = 4
num_hiddens = 8
num_outputs = 1

In [3]:
net = nn.Sequential(
    nn.Linear(num_inputs, num_hiddens),
    nn.ReLU(),
    nn.Linear(num_hiddens, num_outputs)
)

X = torch.rand((2, 4))
net(X).shape

torch.Size([2, 1])

In [4]:
net

Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)

In [5]:
net[2].state_dict()

OrderedDict([('weight',
              tensor([[-0.1255, -0.1975, -0.2669, -0.2808,  0.0040, -0.2171,  0.2860, -0.1683]])),
             ('bias', tensor([0.1726]))])

In [6]:
type(net[2].bias)

torch.nn.parameter.Parameter

In [7]:
net[2].bias.data

tensor([0.1726])

In [8]:
print(net[2].weight.grad)

None


In [9]:
# all parameters at once
[(name, param.shape) for name, param in net.named_parameters()]

[('0.weight', torch.Size([8, 4])),
 ('0.bias', torch.Size([8])),
 ('2.weight', torch.Size([1, 8])),
 ('2.bias', torch.Size([1]))]

In [10]:
# shared parameters (상수 아님! 파라미터이고 업데이트 됨)
shared = nn.Linear(8, 8)
net = nn.Sequential(
    nn.Linear(4, 8), nn.ReLU(),
    shared, nn.ReLU(),
    shared, nn.ReLU(),
    nn.Linear(8, 1)
)

In [11]:
net

Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=8, bias=True)
  (3): ReLU()
  (4): Linear(in_features=8, out_features=8, bias=True)
  (5): ReLU()
  (6): Linear(in_features=8, out_features=1, bias=True)
)

In [12]:
net[2].weight == net[4].weight

tensor([[True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True]])