In [1]:
# 1. 环境和依赖检查
import torch
import numpy as np
import os
import yaml

print("[INFO] Torch version:", torch.__version__)
print("[INFO] CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("[INFO] CUDA device:", torch.cuda.get_device_name(0))
else:
    print("[INFO] CUDA device not found, using CPU.")

# 安装 tqdm/scipy（如未安装时取消注释）
# !pip install tqdm scipy

[INFO] Torch version: 2.5.1+cu121
[INFO] CUDA available: True
[INFO] CUDA device: NVIDIA GeForce RTX 4080 Laptop GPU


In [2]:
# 2. 配置、模型和工具模块导入
from data.dataset import get_dataloaders
from models.mlp import MLPRegressor
from utils.metrics import mse_loss
import tqdm

In [3]:
# 3. 加载配置
cfg_path = "configs/mlp.yaml"
with open(cfg_path) as f:
    cfg = yaml.safe_load(f)
print("配置内容：", cfg)


配置内容： {'seed': 42, 'epochs': 100, 'batch_size': 64, 'lr': 0.001, 'hidden_dims': [150, 150, 150], 'activation': 'relu', 'train_ratio': 0.8, 'early_stop_patience': 10}


In [4]:
# 4. 设置随机种子与训练参数
def set_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
set_seed(cfg['seed'])

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)


Using device: cuda


In [5]:
# 5. 数据加载
scaler_path = "checkpoints/mlp_minmaxscaler.pkl"
train_loader, val_loader = get_dataloaders(
    mat_path='D:/AiProjects/UCLmaster/dlfitting_verdict/VERDICT_training/AS_Z_fixdv/TrainingSet.mat',
    batch_size=cfg['batch_size'],
    train_ratio=cfg['train_ratio'],
    seed=cfg['seed'],
    scaler_path=scaler_path
)
print("[INFO] 训练/验证数据已准备好。")


[INFO] 训练/验证数据已准备好。


In [6]:
# 6. 初始化模型和优化器
X_sample, y_sample = next(iter(train_loader))
input_dim = X_sample.shape[1]
output_dim = y_sample.shape[1]
model = MLPRegressor(input_dim, output_dim, cfg['hidden_dims'], cfg['activation']).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=cfg['lr'])
criterion = torch.nn.MSELoss()
print("[INFO] 模型初始化完成。")


[INFO] 模型初始化完成。


In [7]:
# 7. 日志与权重文件准备
log_dir = 'logs'
ckpt_dir = 'checkpoints'
os.makedirs(log_dir, exist_ok=True)
os.makedirs(ckpt_dir, exist_ok=True)
log_path = os.path.join(log_dir, 'mlp_log.txt')
ckpt_path = os.path.join(ckpt_dir, 'mlp_best.pt')

if not os.path.exists(log_path):
    with open(log_path, 'w') as f:
        f.write("Epoch\tTrainLoss\tValLoss\n")


In [8]:
# 8. 训练主循环（带tqdm进度条）
from tqdm.notebook import tqdm

epochs = cfg['epochs']
best_loss = float('inf')
patience = 0

for epoch in tqdm(range(epochs), desc="Epoch"):
    model.train()
    total_loss = 0
    for X, y in tqdm(train_loader, desc=f"Train {epoch+1}", leave=False):
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        pred = model(X)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * X.size(0)
    avg_train_loss = total_loss / len(train_loader.dataset)

    # 验证
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for X, y in val_loader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            loss = criterion(pred, y)
            total_val_loss += loss.item() * X.size(0)
    avg_val_loss = total_val_loss / len(val_loader.dataset)

    # tqdm.write会在进度条下方输出log，方便notebook观察
    tqdm.write(f"Epoch {epoch+1}: Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

    # 追加日志
    with open(log_path, 'a') as f:
        f.write(f"{epoch+1}\t{avg_train_loss:.6f}\t{avg_val_loss:.6f}\n")

    # Early stopping & best权重
    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        patience = 0
        torch.save(model.state_dict(), ckpt_path)
        tqdm.write(f"[INFO] Best model saved at: {ckpt_path}")
    else:
        patience += 1
        if patience >= cfg['early_stop_patience']:
            tqdm.write("[INFO] Early stopping triggered.")
            break

print("训练完成！")


Epoch:   0%|          | 0/100 [00:00<?, ?it/s]

Train 1:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 1: Train Loss: 0.0308, Val Loss: 0.0278
[INFO] Best model saved at: checkpoints\mlp_best.pt


Train 2:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 2: Train Loss: 0.0277, Val Loss: 0.0270
[INFO] Best model saved at: checkpoints\mlp_best.pt


Train 3:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 3: Train Loss: 0.0269, Val Loss: 0.0263
[INFO] Best model saved at: checkpoints\mlp_best.pt


Train 4:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 4: Train Loss: 0.0265, Val Loss: 0.0257
[INFO] Best model saved at: checkpoints\mlp_best.pt


Train 5:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 5: Train Loss: 0.0263, Val Loss: 0.0261


Train 6:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 6: Train Loss: 0.0261, Val Loss: 0.0254
[INFO] Best model saved at: checkpoints\mlp_best.pt


Train 7:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 7: Train Loss: 0.0259, Val Loss: 0.0262


Train 8:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 8: Train Loss: 0.0258, Val Loss: 0.0257


Train 9:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 9: Train Loss: 0.0257, Val Loss: 0.0252
[INFO] Best model saved at: checkpoints\mlp_best.pt


Train 10:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 10: Train Loss: 0.0255, Val Loss: 0.0266


Train 11:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 11: Train Loss: 0.0255, Val Loss: 0.0251
[INFO] Best model saved at: checkpoints\mlp_best.pt


Train 12:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 12: Train Loss: 0.0254, Val Loss: 0.0249
[INFO] Best model saved at: checkpoints\mlp_best.pt


Train 13:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 13: Train Loss: 0.0254, Val Loss: 0.0256


Train 14:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 14: Train Loss: 0.0253, Val Loss: 0.0251


Train 15:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 15: Train Loss: 0.0252, Val Loss: 0.0253


Train 16:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 16: Train Loss: 0.0252, Val Loss: 0.0248
[INFO] Best model saved at: checkpoints\mlp_best.pt


Train 17:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 17: Train Loss: 0.0251, Val Loss: 0.0250


Train 18:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 18: Train Loss: 0.0251, Val Loss: 0.0249


Train 19:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 19: Train Loss: 0.0251, Val Loss: 0.0250


Train 20:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 20: Train Loss: 0.0250, Val Loss: 0.0254


Train 21:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 21: Train Loss: 0.0250, Val Loss: 0.0246
[INFO] Best model saved at: checkpoints\mlp_best.pt


Train 22:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 22: Train Loss: 0.0250, Val Loss: 0.0247


Train 23:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 23: Train Loss: 0.0250, Val Loss: 0.0248


Train 24:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 24: Train Loss: 0.0249, Val Loss: 0.0246


Train 25:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 25: Train Loss: 0.0249, Val Loss: 0.0247


Train 26:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 26: Train Loss: 0.0249, Val Loss: 0.0249


Train 27:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 27: Train Loss: 0.0249, Val Loss: 0.0252


Train 28:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 28: Train Loss: 0.0249, Val Loss: 0.0245
[INFO] Best model saved at: checkpoints\mlp_best.pt


Train 29:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 29: Train Loss: 0.0248, Val Loss: 0.0247


Train 30:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 30: Train Loss: 0.0248, Val Loss: 0.0250


Train 31:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 31: Train Loss: 0.0248, Val Loss: 0.0249


Train 32:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 32: Train Loss: 0.0248, Val Loss: 0.0248


Train 33:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 33: Train Loss: 0.0248, Val Loss: 0.0244
[INFO] Best model saved at: checkpoints\mlp_best.pt


Train 34:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 34: Train Loss: 0.0248, Val Loss: 0.0250


Train 35:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 35: Train Loss: 0.0248, Val Loss: 0.0255


Train 36:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 36: Train Loss: 0.0247, Val Loss: 0.0248


Train 37:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 37: Train Loss: 0.0247, Val Loss: 0.0248


Train 38:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 38: Train Loss: 0.0247, Val Loss: 0.0249


Train 39:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 39: Train Loss: 0.0247, Val Loss: 0.0245


Train 40:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 40: Train Loss: 0.0247, Val Loss: 0.0246


Train 41:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 41: Train Loss: 0.0247, Val Loss: 0.0247


Train 42:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 42: Train Loss: 0.0247, Val Loss: 0.0252


Train 43:   0%|          | 0/12500 [00:00<?, ?it/s]

Epoch 43: Train Loss: 0.0247, Val Loss: 0.0248
[INFO] Early stopping triggered.
训练完成！
