In [1]:
from abc import ABC
import torch
from torch import nn
import torch.utils.data as Data
import torch.optim as optim

In [2]:
true_w = [2, -3.4]
true_b = 4.2
features = torch.normal(0, 1, size=(1000, 2))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += torch.normal(0, 0.0001, size=labels.size())  # 添加噪音

dataset = Data.TensorDataset(features, labels)  # 数据包装

data_iter = Data.DataLoader(
    dataset=dataset,
    batch_size=10,
    shuffle=True,
    num_workers=2
)

In [3]:
class LinearNet(nn.Module, ABC):
    def __init__(self, n_feature):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(n_feature, 1, bias=False)

    def forward(self, x):
        return self.linear(x)

In [4]:
net = LinearNet(2)
loss = nn.MSELoss()  # 损失函数为均方误差
optimizer = optim.SGD(net.parameters(), lr=0.03)  # 优化器

grad_list = list()  # 模型每次梯度更新后的梯度
input_list = list()  # 模型前向传播前的输入
weight_list = list()  # 模型前向传播前的权重
output_list = list()  # 模型前向传播后的输出


def grad_hook(grad):
    grad_list.append(grad.clone())


def forward_pre_hook(module, data_input):
    b = module.weight.clone()
    weight_list.append(b)
    input_list.append(data_input)


def forward_hook(module, data_input, data_output):
    output_list.append(data_output)

In [5]:
# hook注册
net.linear.weight.register_hook(grad_hook)
net.linear.register_forward_pre_hook(forward_pre_hook)
net.linear.register_forward_hook(forward_hook)

for epoch in range(2):
    l = 0
    for X, y in data_iter:
        output = net(X)
        l = loss(output, y.view(-1, 1))
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
    print('epoch %d, loss: %f' % (epoch, l.item()))
    if l < 0.009:
        break

epoch 0, loss: 17.749737
epoch 1, loss: 18.216667


In [6]:
print(len(weight_list))
for i in range(5):
    print(weight_list[i])

200
tensor([[0.4668, 0.1408]], grad_fn=<CloneBackward>)
tensor([[0.4776, 0.0549]], grad_fn=<CloneBackward>)
tensor([[ 0.5221, -0.2043]], grad_fn=<CloneBackward>)
tensor([[ 0.5780, -0.2440]], grad_fn=<CloneBackward>)
tensor([[ 0.7785, -0.8501]], grad_fn=<CloneBackward>)


In [7]:
print(len(grad_list))
for i in range(5):
    print(grad_list[i])

200
tensor([[-0.3616,  2.8623]])
tensor([[-1.4814,  8.6381]])
tensor([[-1.8654,  1.3237]])
tensor([[-6.6819, 20.2030]])
tensor([[-0.1126,  3.4293]])


In [8]:
print(len(input_list))
for j in range(5):
    print(input_list[j])

200
(tensor([[-0.7883,  1.6140],
        [ 0.0248,  1.7689],
        [-0.6858, -0.3681],
        [-0.6987, -0.2464],
        [-0.4242,  0.6962],
        [-0.1833, -0.9785],
        [ 1.0593,  0.1734],
        [ 0.0670,  0.0579],
        [ 1.2751,  1.7562],
        [ 1.3918,  1.4521]]),)
(tensor([[ 0.9305,  0.1268],
        [ 0.0809, -1.4031],
        [-0.2083, -0.6231],
        [-0.4751, -1.3295],
        [ 0.8351, -0.6900],
        [-1.1602,  1.1624],
        [-0.8936,  2.1308],
        [ 0.0517,  1.0283],
        [-0.7746, -0.0962],
        [-1.3538, -0.0091]]),)
(tensor([[-0.9083, -0.3691],
        [-0.3740,  0.2926],
        [-0.5948,  0.6339],
        [ 0.8718, -0.3834],
        [ 1.3070,  0.9047],
        [ 1.2523,  0.2241],
        [-0.6087,  0.6298],
        [-0.5499,  0.7431],
        [-0.7246, -1.1253],
        [ 0.6662, -0.3615]]),)
(tensor([[ 0.4770, -0.4419],
        [ 0.6602,  0.8987],
        [ 0.4041, -2.3535],
        [ 0.8901, -0.7783],
        [ 0.5339, -1.4236],
   

In [9]:
print(len(output_list))
for h in range(5):
    print(output_list[h])

200
tensor([[-0.1408],
        [ 0.2605],
        [-0.3719],
        [-0.3608],
        [-0.1000],
        [-0.2233],
        [ 0.5189],
        [ 0.0394],
        [ 0.8424],
        [ 0.8541]], grad_fn=<MmBackward>)
tensor([[ 0.4514],
        [-0.0384],
        [-0.1337],
        [-0.2999],
        [ 0.3610],
        [-0.4903],
        [-0.3099],
        [ 0.0812],
        [-0.3753],
        [-0.6471]], grad_fn=<MmBackward>)
tensor([[-0.3988],
        [-0.2550],
        [-0.4400],
        [ 0.5334],
        [ 0.4975],
        [ 0.6080],
        [-0.4464],
        [-0.4389],
        [-0.1484],
        [ 0.4216]], grad_fn=<MmBackward>)
tensor([[ 0.3835],
        [ 0.1623],
        [ 0.8078],
        [ 0.7044],
        [ 0.6559],
        [ 0.8724],
        [ 0.7898],
        [-0.6375],
        [ 0.5135],
        [ 0.0272]], grad_fn=<MmBackward>)
tensor([[-0.8045],
        [ 0.1354],
        [ 0.0472],
        [ 0.9661],
        [-0.0939],
        [ 0.9369],
        [ 0.2980],
        [ 0

In [10]:
# 前向传播前的输入 @ 前向传播前的权重.T = 前向传播后的输出
input_list[0][0] @ weight_list[0].T - output_list[0]

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]], grad_fn=<SubBackward0>)