In [None]:
import numpy as np

def sigmoid(z):
    """Sigmoid激活函数"""
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(a):
    """Sigmoid函数的导数，输入是sigmoid的输出"""
    return a * (1 - a)

def forward_pass(w1, b1, w2, b2, x):
    """前向传播计算"""
    # 第一层计算 (隐藏层)
    z1 = np.dot(x, w1) + b1  # 加权输入
    a1 = sigmoid(z1)         # 激活输出

    # 第二层计算 (输出层)
    z2 = np.dot(a1, w2) + b2  # 加权输入
    a2 = sigmoid(z2)          # 激活输出

    # 返回所有计算结果，供反向传播使用
    return {
        'z1': z1,
        'a1': a1,
        'z2': z2,
        'a2': a2
    }

def loss(w1, b1, w2, b2, x, y):
    """计算损失函数 (均方误差)"""
    forward = forward_pass(w1, b1, w2, b2, x)
    y_pred = forward['a2']
    return 0.5 * np.mean((y_pred - y) ** 2)

def backprop(w1, b1, w2, b2, x, y):
    """反向传播计算梯度"""
    # 前向传播，获取各层输出
    forward = forward_pass(w1, b1, w2, b2, x)
    z1, a1, z2, a2 = forward['z1'], forward['a1'], forward['z2'], forward['a2']

    # 输出层误差 (delta2)
    delta2 = (a2 - y) * sigmoid_derivative(a2)

    # 隐藏层误差 (delta1)
    delta1 = np.dot(delta2, w2.T) * sigmoid_derivative(a1)

    # 计算权重和偏置的梯度
    dw2 = np.dot(a1.T, delta2)
    db2 = np.sum(delta2, axis=0, keepdims=True)

    dw1 = np.dot(x.T, delta1)
    db1 = np.sum(delta1, axis=0, keepdims=True)

    return {
        'dw1': dw1,
        'db1': db1,
        'dw2': dw2,
        'db2': db2
    }

def compute_dw_check(w1, b1, w2, b2, x, y, eps=1e-7):
    """通过数值方法计算梯度，用于验证反向传播的正确性"""
    # 计算w1的数值梯度
    dw1 = np.zeros_like(w1)
    for i in range(w1.shape[0]):
        for j in range(w1.shape[1]):
            eps_mat = np.zeros_like(w1)
            eps_mat[i, j] = eps
            # 中心差分
            loss_plus = loss(w1 + eps_mat, b1, w2, b2, x, y)
            loss_minus = loss(w1 - eps_mat, b1, w2, b2, x, y)
            dw1[i, j] = (loss_plus - loss_minus) / (2 * eps)

    # 计算w2的数值梯度
    dw2 = np.zeros_like(w2)
    for i in range(w2.shape[0]):
        for j in range(w2.shape[1]):
            eps_mat = np.zeros_like(w2)
            eps_mat[i, j] = eps
            # 中心差分
            loss_plus = loss(w1, b1, w2 + eps_mat, b2, x, y)
            loss_minus = loss(w1, b1, w2 - eps_mat, b2, x, y)
            dw2[i, j] = (loss_plus - loss_minus) / (2 * eps)

    return {"dw1": dw1, "dw2": dw2}

# 变量初始化
X_input = np.array([[1, 0]])  # 输入样本
y_target = np.array([[1]])    # 目标输出

# 权重和偏置初始化
W1 = np.array([[0.5, -0.5],
               [0.3,  0.8]])
b1 = np.array([[0, 0]])

W2 = np.array([[1],
               [-1]])
b2 = np.array([[0]])

# 计算梯度检查结果和反向传播结果
gradient_check_result = compute_dw_check(W1, b1, W2, b2, X_input, y_target)
backprop_result = backprop(W1, b1, W2, b2, X_input, y_target)

# 打印对比结果
for k in ["dw1", "dw2"]:
    print(f"梯度检查 {k}：\n{gradient_check_result[k]}")
    print(f"反向传播 {k}：\n{backprop_result[k]}")
    print(f"两者差值：\n{np.abs(gradient_check_result[k] - backprop_result[k])}\n")
