In [125]:
from abc import ABC
import torch
from torch import nn
import torch.utils.data as Data
import torch.optim as optim

In [126]:
true_w = [2, -3.4]
true_b = 4.2
features = torch.normal(0, 1, size=(1000, 2))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += torch.normal(0, 0.0001, size=labels.size()) # 添加噪音

dataset = Data.TensorDataset(features, labels) # 数据包装

data_iter = Data.DataLoader(
    dataset=dataset,
    batch_size=10,
    shuffle=True,
    num_workers=2
)

In [127]:
class LinearNet(nn.Module, ABC):
    def __init__(self, n_feature):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(n_feature, 1, bias=False)

    def forward(self, x):
        return self.linear(x)

In [128]:
net = LinearNet(2)
loss = nn.MSELoss() # 损失函数为均方误差
optimizer = optim.SGD(net.parameters(), lr=0.03) # 优化器

grad_list = list() # 模型每次梯度更新后的梯度
input_list = list() # 模型前向传播前的输入
weight_list = list() # 模型前向传播前的权重
output_list = list() # 模型前向传播后的输出

def grad_hook(grad):
    grad_list.append(grad.clone())


def forward_pre_hook(module, data_input):
    b = module.weight.clone()
    weight_list.append(b)
    input_list.append(data_input)


def forward_hook(module, data_input, data_output):
    output_list.append(data_output)

In [129]:
# hook注册
net.linear.weight.register_hook(grad_hook)
net.linear.register_forward_pre_hook(forward_pre_hook)
net.linear.register_forward_hook(forward_hook)

for epoch in range(2):
    l = 0
    for X, y in data_iter:
        output = net(X)
        l = loss(output, y.view(-1, 1))
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
    print('epoch %d, loss: %f' % (epoch, l.item()))
    if l < 0.009:
        break

epoch 0, loss: 19.356615
epoch 1, loss: 16.588722


In [130]:
print(len(weight_list))
for i in range(5):
    print(weight_list[i])

200
tensor([[0.5649, 0.2564]], grad_fn=<CloneBackward>)
tensor([[0.8272, 0.0077]], grad_fn=<CloneBackward>)
tensor([[ 0.8027, -0.4295]], grad_fn=<CloneBackward>)
tensor([[ 0.8996, -0.4810]], grad_fn=<CloneBackward>)
tensor([[ 0.8912, -0.5298]], grad_fn=<CloneBackward>)


In [131]:
print(len(grad_list))
for i in range(5):
     print(grad_list[i])

200
tensor([[-8.7409,  8.2897]])
tensor([[ 0.8160, 14.5727]])
tensor([[-3.2319,  1.7161]])
tensor([[0.2816, 1.6280]])
tensor([[-3.5301,  3.7507]])


In [132]:
print(len(input_list))
for j in range(5):
    print(input_list[j])

200
(tensor([[ 0.7113,  0.4814],
        [-0.6823, -0.3889],
        [ 0.6921,  0.7602],
        [ 1.5265, -1.1680],
        [ 1.4280,  2.3250],
        [ 1.5904, -1.7254],
        [ 0.7305,  0.1300],
        [-0.2738, -0.4559],
        [-1.4554,  0.9210],
        [ 0.9534, -0.0612]]),)
(tensor([[-0.6283, -0.1799],
        [ 0.5119, -0.6675],
        [ 0.9803, -1.8322],
        [ 0.6820,  0.5866],
        [-0.5895, -1.8728],
        [ 0.9480,  0.4579],
        [-1.1571, -1.6336],
        [-0.0024, -0.8734],
        [-0.9567, -1.2456],
        [ 0.0862, -0.3613]]),)
(tensor([[ 0.0986, -0.5774],
        [ 1.1459,  0.1607],
        [ 0.0211, -0.6154],
        [ 1.1798, -0.4697],
        [ 0.0955,  2.1872],
        [ 0.0987,  0.6975],
        [ 0.1476,  1.2530],
        [ 0.5516,  0.4058],
        [-0.4137,  0.0755],
        [ 0.3191,  0.8621]]),)
(tensor([[-1.5407, -0.2731],
        [ 0.0529,  0.9327],
        [-0.0367, -0.7526],
        [ 1.2101,  0.2804],
        [-1.7342,  1.2527],
   

In [133]:
print(len(output_list))
for h in range(5):
    print(output_list[h])

200
tensor([[ 0.5253],
        [-0.4852],
        [ 0.5859],
        [ 0.5629],
        [ 1.4028],
        [ 0.4561],
        [ 0.4460],
        [-0.2716],
        [-0.5861],
        [ 0.5230]], grad_fn=<MmBackward>)
tensor([[-0.5211],
        [ 0.4183],
        [ 0.7968],
        [ 0.5686],
        [-0.5021],
        [ 0.7877],
        [-0.9697],
        [-0.0087],
        [-0.8009],
        [ 0.0685]], grad_fn=<MmBackward>)
tensor([[ 0.3271],
        [ 0.8508],
        [ 0.2812],
        [ 1.1488],
        [-0.8627],
        [-0.2203],
        [-0.4197],
        [ 0.2685],
        [-0.3645],
        [-0.1142]], grad_fn=<MmBackward>)
tensor([[-1.2547],
        [-0.4010],
        [ 0.3290],
        [ 0.9539],
        [-2.1627],
        [-0.6168],
        [ 0.1990],
        [-0.4621],
        [-1.6965],
        [-0.5801]], grad_fn=<MmBackward>)
tensor([[-0.4580],
        [-0.0592],
        [-0.7804],
        [ 0.6580],
        [-0.7658],
        [-0.5998],
        [ 1.4658],
        [-0

In [134]:
# 前向传播前的输入 @ 前向传播前的权重.T = 前向传播后的输出
input_list[0][0] @ weight_list[0].T - output_list[0]

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]], grad_fn=<SubBackward0>)