In [1]:
import torch
import torch.nn as nn
from torch.nn import init
from collections import OrderedDict
from pathlib import Path
pwd=Path.cwd()
ROOT=pwd.parent
import sys
sys.path.append(str(ROOT))
from utils import *
 

In [2]:
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torch.nn import init

def build_dataset(batch_size=32, n_samples=1000):
    """
    构建训练数据集：
    - x: (n_samples, 2, 28, 28) 模拟双通道28×28数据
    - y: (n_samples,) 类别索引（0-9）
    """
    # 生成输入数据（添加轻微噪声，模拟真实数据）
    x = torch.randn(n_samples, 2, 28, 28) * 0.5 + 0.5  # 均值0.5，方差0.5，更贴近真实数据分布
    
    # 生成标签（与输入弱关联，避免完全随机）
    net_temp = A_simple_net()  # 用临时网络生成伪得分
    with torch.no_grad():  # 不计算梯度，仅用于生成标签
        x_flat = net_temp.flatten(x)
        pseudo_score = net_temp.linear.simulate(x_flat)
    y = torch.argmax(pseudo_score, dim=1)  # 基于伪得分生成类别索引
    
    # 构建Dataset和DataLoader
    dataset = TensorDataset(x, y)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    return dataloader

def train(epochs: int, dataloader, model, optimizer, loss_fn, use_simulate_verify=False):
    """
    训练函数：
    - use_simulate_verify: 是否用simulate函数验证forward结果（仅用于调试）
    """
    model.train()  # 设置训练模式
    best_loss = float('inf')
    
    for epoch in range(epochs):
        total_loss = 0.0
        batch_count = 0
        
        for x_batch, y_batch in dataloader:
            # ---------------------- 数据预处理 ----------------------
            x_batch = x_batch.float()  # 确保输入为float类型
            # 归一化到[0,1]（若原始数据范围过大，避免梯度爆炸）
            if x_batch.max() > 1:
                x_batch = x_batch / x_batch.max()
            
            # 确保标签为long类型（交叉熵损失要求）
            y_batch = y_batch.long()
            
            # ---------------------- 前向传播 ----------------------
            out = model(x_batch)  # 训练用forward
            loss = loss_fn(out, y_batch)  # 官方损失计算
            
            # （可选）用simulate验证结果一致性（调试用）
            if use_simulate_verify and epoch == 0 and batch_count == 0:
                out_sim = model.simulate(x_batch)
                loss_sim = loss_fn.simulate(out, y_batch)
                print(f"=== 训练首轮验证（forward vs simulate）===")
                print(f"输出误差（L2范数）: {torch.norm(out - out_sim).item():.6f}")
                print(f"损失误差: {abs(loss.item() - loss_sim.item()):.6f}")
            
            # ---------------------- 反向传播+参数更新 ----------------------
            optimizer.zero_grad()  # 梯度清零
            loss.backward()  # 反向传播
            optimizer.step()  # 参数更新
            
            # ---------------------- 损失统计 ----------------------
            total_loss += loss.item()
            batch_count += 1
        
        # ---------------------- 每轮日志 ----------------------
        avg_loss = total_loss / batch_count
        # 保存最优模型（基于训练损失）
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), "best_softmax_model.pth")  # 保存模型参数
        
        print(f"Epoch {epoch+1:2d} | Avg Loss: {avg_loss:.6f} | Best Loss: {best_loss:.6f}")
    
    print(f"\n训练结束！最优模型已保存为 'best_softmax_model.pth'")



In [3]:
if __name__ == '__main__':
    # 1. 配置训练参数
    EPOCHS = 50  # 训练轮数
    BATCH_SIZE = 32  # 批次大小
    LEARNING_RATE = 0.5  # 学习率（适配SGD+动量）
    N_SAMPLES = 2000  # 模拟训练样本数
    
    # 2. 构建数据加载器
    train_dataloader = build_dataset(batch_size=BATCH_SIZE, n_samples=N_SAMPLES)
    print(f"数据集构建完成：共{N_SAMPLES}个样本，批次大小{BATCH_SIZE}，共{len(train_dataloader)}个批次")
    
    # 3. 初始化模型、损失函数、优化器
    model = A_simple_net()  # 推荐使用（无内置Softmax，适配CrossEntropyLoss）
    # model = Softmax_Regression()  # 可选：带Softmax输出（需改用NLLLoss，注释下方loss_fn，启用第148行）
    
    loss_fn = Loss_cross_entropy()  # 交叉熵损失（内置Softmax，适配A_simple_net）
    # loss_fn = nn.NLLLoss()  # 若用Softmax_Regression，需用NLLLoss（输入为概率的log）
    
    optimizer = optim.SGD(
        model.parameters(),
        lr=LEARNING_RATE,
        momentum=0.9,  # 动量加速收敛
        weight_decay=1e-4  # L2正则化，防止过拟合
    )
    
    # 4. 启动训练（use_simulate_verify=True 可验证simulate函数）
    train(
        epochs=EPOCHS,
        dataloader=train_dataloader,
        model=model,
        optimizer=optimizer,
        loss_fn=loss_fn,
        use_simulate_verify=True  # 训练首轮验证forward与simulate一致性
    )

数据集构建完成：共2000个样本，批次大小32，共62个批次
=== 训练首轮验证（forward vs simulate）===
输出误差（L2范数）: 0.000000
损失误差: 0.000000
Epoch  1 | Avg Loss: 28.275754 | Best Loss: 28.275754
Epoch  2 | Avg Loss: 6.949462 | Best Loss: 6.949462
Epoch  3 | Avg Loss: 1.539084 | Best Loss: 1.539084
Epoch  4 | Avg Loss: 0.640247 | Best Loss: 0.640247
Epoch  5 | Avg Loss: 0.458476 | Best Loss: 0.458476
Epoch  6 | Avg Loss: 0.118787 | Best Loss: 0.118787
Epoch  7 | Avg Loss: 0.042850 | Best Loss: 0.042850
Epoch  8 | Avg Loss: 0.012965 | Best Loss: 0.012965
Epoch  9 | Avg Loss: 0.003017 | Best Loss: 0.003017
Epoch 10 | Avg Loss: 0.001397 | Best Loss: 0.001397
Epoch 11 | Avg Loss: 0.001406 | Best Loss: 0.001397
Epoch 12 | Avg Loss: 0.001340 | Best Loss: 0.001340
Epoch 13 | Avg Loss: 0.001444 | Best Loss: 0.001340
Epoch 14 | Avg Loss: 0.001520 | Best Loss: 0.001340
Epoch 15 | Avg Loss: 0.001721 | Best Loss: 0.001340
Epoch 16 | Avg Loss: 0.001845 | Best Loss: 0.001340
Epoch 17 | Avg Loss: 0.002057 | Best Loss: 0.001340
Epoch 18 | A