Applies a linear transformation to the incoming data:$$ \mathbf{y} = \mathbf{x} A^T + b  $$

In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn

In [2]:
enter = torch.tensor([[1, 2, 3, 4, 5, 6],
                      [2, 3, 4, 5, 6, 7]], dtype=torch.float32)
weight = torch.arange(18, dtype=torch.float32).reshape(3, 6)
bias = torch.tensor([1, 2, 1], dtype=torch.float32)

# 函数
linear_f = F.linear(input=enter, weight=weight, bias=bias)
linear_f

tensor([[ 71., 198., 323.],
        [ 86., 249., 410.]])

In [3]:
# 上式计算步骤如下
enter @ weight.T + bias  # (2, 6) x (3, 6)^T + (3)

tensor([[ 71., 198., 323.],
        [ 86., 249., 410.]])

In [4]:
# 类
linear_c = nn.Linear(in_features=6,  # 输入特征的维度
                     out_features=1,  # 输出特征的维度
                     bias=True)  # 是否引入偏置参数;默认bias=True
linear_c(enter)

tensor([[-0.4589],
        [-0.9204]], grad_fn=<AddmmBackward0>)

In [5]:
linear_c.weight # 内部默认进行了初始化:init.kaiming_uniform_(self.weight, a=math.sqrt(5))

Parameter containing:
tensor([[-0.2607, -0.3303,  0.2691, -0.1365, -0.0455,  0.0424]],
       requires_grad=True)

In [6]:
linear_c.weight.shape

torch.Size([1, 6])

In [7]:
linear_c.bias  # 内部默认进行了kaiming_uniform_初始化

Parameter containing:
tensor([0.1742], requires_grad=True)

In [8]:
list(linear_c.parameters())

[Parameter containing:
 tensor([[-0.2607, -0.3303,  0.2691, -0.1365, -0.0455,  0.0424]],
        requires_grad=True),
 Parameter containing:
 tensor([0.1742], requires_grad=True)]

In [9]:
# (3, 2, 6) x (6, 1) = (3, 2, 1)
b_enter = torch.rand(3, 2, 6)
linear_c(b_enter).shape

torch.Size([3, 2, 1])