---
title: "人工智能 - 求导与反向传播"
date: 2025-05-05T21:40:32+08:00
author: "Liu Zheng"
tags: ["笔记", "实验"]
categories: "实验笔记"
---

## 求导与反向传播

假设等式 $ y = w^2 $ ， 当 $ w = 3 $ 时， $ y = 9 $

给定 $ w $ 一个增量 $ \varepsilon = 0.01 $ ，当 $ w = w + \varepsilon $ 时， $ y = (3.01)^2 = 9.0601 = 9 + 6\varepsilon + \varepsilon^2 $

给定 $ w $ 一个增量 $ \varepsilon = 0.02 $ ，当 $ w = w + \varepsilon $ 时， $ y = (3.02)^2 = 9.1204 = 9 + 6\varepsilon + \varepsilon^2 $

也就是说当 $ \varepsilon \to 0  $ 时 $ \frac{\partial y}{\partial w} = 6 = 2w $

在进行梯度计算时，我们需要多次计算公式： $ w = w - \alpha \frac{\partial J(\vec{w}, b)}{\partial w} $。由于学习率 $ \alpha $ 足够小，我们可以确保每次更新都能逐步逼近最优解。

In [1]:
import torch
import pandas
import numpy
import matplotlib.pyplot as plt
import time
from sympy import latex, diff, symbols
from IPython.display import display, Math
from torch.utils.tensorboard import SummaryWriter

plt.style.use('../matplotlib.mplstyle')

# 设置 GPU
DEVICE0, DEVICE1 = torch.device('cuda:0'), torch.device('cuda:1')

In [2]:
J, w = symbols('J,w')

In [3]:
J = 1 / w ; display(Math(f"函数：{latex(J)}, 导数：{latex(diff(J, w))}"))

J = w ** 2; display(Math(f"函数：{latex(J)}, 导数：{latex(diff(J, w))}"))

J = w ** 3; display(Math(f"函数：{latex(J)}, 导数：{latex(diff(J, w))}"))

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

接下来我们使用 `TensorBoard` 输出一个神经网络的计算图

In [4]:
# 定义模型
class SoftmaxRegression(torch.nn.Module):
    def __init__(self):
        super(SoftmaxRegression, self).__init__()
        self.fc1 = torch.nn.Linear(4, 2)
        self.fc2 = torch.nn.Linear(2, 3)
    
    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))
        return self.fc2(x)

# 训练模型
def train(writer, model, epochs, learning_rate, x, y):
    running_loss = 0.0
    # 定义损失函数和优化器, 我们使用 torch.nn.CrossEntropyLoss(output_logits, target)，它会自动处理 softmax 和对数运算
    criterion, optimizer = torch.nn.CrossEntropyLoss(), torch.optim.SGD(model.parameters(), lr=learning_rate)
    for epoch in range(epochs):
        pred = model(x) # 前向传播
        loss = criterion(pred, y) # 损失计算
        optimizer.zero_grad() # 清除梯度
        loss.backward() # 反向传播
        optimizer.step() # 更新参数
        running_loss += loss.item()
        if (epoch + 1) % 10 == 0:
            writer.add_scalar('training loss', loss.item(), epoch)
            running_loss = 0.0

In [5]:
# 使用 NumPy 执行标准化特征缩放
def standardize(data):
    mean = numpy.mean(data, axis=0)  # 计算均值
    std = numpy.std(data, axis=0)    # 计算标准差
    return (data - mean) / std

# 计算准确率
def accuracy(model, x, y):
    with torch.no_grad():
        pred = model(x)
        correct = (torch.argmax(pred, 1) == y).sum().item()
        return correct / y.size(0)

In [6]:
# 数据路径(鸢尾花数据集) & 模型路径加载
PATH_DATASET = 'https://gairuo.com/file/data/dataset/iris.data'

DATASET = pandas.read_csv(PATH_DATASET)
DATASET['species'] = pandas.factorize(DATASET['species'])[0]
DATASET['sepal_length'], DATASET['sepal_width'] = standardize(DATASET['sepal_length']), standardize(DATASET['sepal_width'])
DATASET['petal_length'], DATASET['petal_width'] = standardize(DATASET['petal_length']), standardize(DATASET['petal_width'])

DATASET_TEST = DATASET.sample(n=20)
DATASET = DATASET.drop(DATASET_TEST.index)
display(DATASET)

X = torch.from_numpy(numpy.array(DATASET[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']], dtype=numpy.float32)).to(DEVICE0)
Y = torch.from_numpy(numpy.array(DATASET['species'].tolist(), dtype=numpy.long)).long().to(DEVICE0)

MODEL = SoftmaxRegression().to(DEVICE0)
WRITER = SummaryWriter(f"runs/logs-{int(time.time())}")
WRITER.add_graph(MODEL, X)
HISTORY = train(WRITER, MODEL, 100, 0.5, X, Y)
    
X_TEST = torch.from_numpy(numpy.array(DATASET_TEST[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']], dtype=numpy.float32)).to(DEVICE0)
Y_TEST = torch.from_numpy(numpy.array(DATASET_TEST['species'].tolist(), dtype=numpy.long)).long().to(DEVICE0)

ACCURACY = accuracy(MODEL, X_TEST, Y_TEST)

print(f"Accuracy is: {ACCURACY * 100:.2f}%")

WRITER.close()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,-0.900681,1.032057,-1.341272,-1.312977,0
1,-1.143017,-0.124958,-1.341272,-1.312977,0
2,-1.385353,0.337848,-1.398138,-1.312977,0
3,-1.506521,0.106445,-1.284407,-1.312977,0
4,-1.021849,1.263460,-1.341272,-1.312977,0
...,...,...,...,...,...
144,1.038005,0.569251,1.103953,1.710902,2
145,1.038005,-0.124958,0.819624,1.447956,2
146,0.553333,-1.281972,0.705893,0.922064,2
148,0.432165,0.800654,0.933356,1.447956,2


Accuracy is: 95.00%


### 反向传播的原理

这里给出三层神经网络的传播示意图：

![传播原理](./202503141905.png)

In [7]:
# import manim
# 
# class ManimLayer(manim.VGroup):
#     def __init__(self, coordinate=(0, 0, 0), shape=(1, 1), **kwargs):
#         super().__init__(**kwargs)
#         text = manim.Text(str(shape), font="Sarasa Fixed SC", font_size=24)
#         instance = manim.RoundedRectangle(corner_radius=0.1, stroke_width=1).surround(text)
#         self.add(instance, text)
#         self.move_to(coordinate)
# 
# class ManimNode(manim.VGroup):
#     def __init__(self, coordinate=(0, 0, 0), text_latex = "y = x^2", **kwargs):
#         super().__init__(**kwargs)
#         text = manim.MathTex(text_latex)
#         instance = manim.RoundedRectangle().surround(text)
#         self.add(instance, text)
#         self.move_to(coordinate)
# 
# class ManimScene(manim.Scene):
#     def construct(self):
#         layer0 = ManimLayer(coordinate=(2, 0, 0))
#         layer1 = ManimLayer(coordinate=(4, 0, 0))
#         layer2 = ManimLayer(coordinate=(6, 0, 0))
#         self.play(manim.FadeIn(layer0), manim.FadeIn(layer1), manim.FadeIn(layer2))

In [8]:
# %manim -qm -v WARNING ManimScene