# 准备数据

In [1]:
import numpy as np
import pandas as pd
import torch

# 检查是否有可用的 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [2]:
from sklearn.model_selection import train_test_split

# 读取数据
data = pd.read_csv("../../data/dataset.csv")
data['target_class'] = pd.qcut(data['Cs'], q=10, labels=False)
X = data.drop(['Cs', 'target_class'], axis=1)
y = data['Cs']
stratify_column = data['target_class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21, stratify=stratify_column)

X_train_categ = X_train[:, 8]  # 第九列为类别特征
X_train_cont = np.delete(X_train, 8, axis=1)  # 删除第九列，其他为连续特征

# 将 NumPy 数组转换为 PyTorch 张量
X_train_categ_tensor = torch.tensor(X_train_categ, dtype=torch.long)  # 类别特征需要使用长整型
X_train_categ_tensor = X_train_categ_tensor.unsqueeze(1).to(device)  # 在最后一个维度添加1
X_train_cont_tensor = torch.tensor(X_train_cont, dtype=torch.float).to(device)  # 连续特征使用浮点型
y_train_tensor = torch.tensor(y_train, dtype=torch.float)  # 对于回归问题，通常使用浮点数
y_train_tensor = y_train_tensor.unsqueeze(1).to(device)

# 计算连续特征的均值和标准差
mean = X_train_cont_tensor.mean(dim=0)
std = X_train_cont_tensor.std(dim=0)
continuous_mean_std = torch.stack([mean, std], dim=1).to(device)

# 处理测试集
X_test_categ = X_test[:, 8]
X_test_cont = np.delete(X_test, 8, axis=1)
X_test_categ_tensor = torch.tensor(X_test_categ, dtype=torch.long)
X_test_categ_tensor = X_test_categ_tensor.unsqueeze(1).to(device)
X_test_cont_tensor = torch.tensor(X_test_cont, dtype=torch.float).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float)
y_test_tensor = y_test_tensor.unsqueeze(1).to(device)

# 定义模型

In [4]:
import torch.nn as nn
from tab_transformer_pytorch import TabTransformer
from torch_function import MAPE_Loss

# 我们有12个特征，其中有1个类别特征，11个连续值特征
# 类别特征每个有2个唯一值
categories = (2,)
num_continuous = 11

# 初始化 TabTransformer 模型
model = TabTransformer(
    categories=categories,
    num_continuous=num_continuous,
    dim=16,  # 默认维度为32
    dim_out=1,  # 回归问题的输出维度为1
    depth=6,  # 默认深度为6
    heads=8,  # 注意力机制的头数
    attn_dropout=0.01,  # 注意力机制的dropout
    ff_dropout=0.01,  # 前馈网络的的dropout
    mlp_hidden_mults=(1, 2, 4, 1),  # MLP隐藏层的倍数
    mlp_act=nn.ReLU(),  # MLP的激活函数, 默认为ReLU
    continuous_mean_std=continuous_mean_std,  # 连续值的均值和标准差
)

# 将模型移动到 GPU
model.to(device)

# 初始化损失函数
mse_loss = nn.MSELoss()
mape_loss = MAPE_Loss().to(device)

# 定义优化器
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 训练模型

In [5]:
# 训练循环
num_epochs = 3000
patience = 50  # 允许的最大连续未改进 epoch 数
epochs_without_improvement = 0  # 连续未改进的 epoch 数
best_loss = float('inf')
cumulative_loss = 0.0
model.train()

for epoch in range(num_epochs):
    model.zero_grad()
    outputs = model(X_train_categ_tensor, X_train_cont_tensor)
    loss = mape_loss(outputs, y_train_tensor)  # 使用MSE损失函数
    loss.backward()
    optimizer.step()
    cumulative_loss += loss.item()

    if (epoch + 1) % 10 == 0:
        average_loss = cumulative_loss / 10
        print(f'Epoch {epoch+1}, Average Loss: {average_loss}')
        cumulative_loss = 0.0  # 重置累积损失

    # 计算验证损失
    model.eval()
    with torch.no_grad():
        # 在 GPU 上进行预测
        y_val_pred = model(X_train_categ_tensor, X_train_cont_tensor).to(device)
        # 验证损失计算时，确保 y_test_tensor 也在同一个设备上
        y_train_tensor = y_train_tensor.to(device)
        val_loss = mape_loss(y_val_pred, y_train_tensor).item()  # 计算验证损失

    # 判断验证损失是否改善
    if val_loss < best_loss:
        best_loss = val_loss
        epochs_without_improvement = 0  # 重置计数器
        # 保存最佳模型
        torch.save(model.state_dict(), "tab_transformer_best_model_hidden1241.pth")
    else:
        epochs_without_improvement += 1

    # 如果验证损失在一定次数的 epoch 内没有改进，则停止训练
    if epochs_without_improvement >= patience:
        print(f"Early stopping at epoch {epoch + 1}")
        break

Epoch 10, Average Loss: 99.77881927490235
Epoch 20, Average Loss: 97.29062881469727
Epoch 30, Average Loss: 82.08184814453125
Epoch 40, Average Loss: 49.3889663696289
Epoch 50, Average Loss: 39.86666488647461
Epoch 60, Average Loss: 39.443864822387695
Epoch 70, Average Loss: 38.2396900177002
Epoch 80, Average Loss: 37.9215950012207
Epoch 90, Average Loss: 37.78740234375
Epoch 100, Average Loss: 37.697842025756835
Epoch 110, Average Loss: 37.61605987548828
Epoch 120, Average Loss: 37.54196014404297
Epoch 130, Average Loss: 37.46637496948242
Epoch 140, Average Loss: 37.38411674499512
Epoch 150, Average Loss: 37.292860412597655
Epoch 160, Average Loss: 37.19094009399414
Epoch 170, Average Loss: 37.07507057189942
Epoch 180, Average Loss: 36.93958587646485
Epoch 190, Average Loss: 36.77737808227539
Epoch 200, Average Loss: 36.58445358276367
Epoch 210, Average Loss: 36.33125228881836
Epoch 220, Average Loss: 36.015742492675784
Epoch 230, Average Loss: 35.60921516418457
Epoch 240, Average Los

In [6]:
from function import metrics_to_dataframe, calculate_metrics

# 加载最佳模型的状态字典
model.load_state_dict(torch.load("tab_transformer_best_model_hidden1241.pth", weights_only=True))

# 将模型设置为评估模式
model.eval()
model.to(device)

with torch.no_grad():
    # 将分类和连续数据张量转移到正确的设备
    X_train_categ_tensor = X_train_categ_tensor.to(device)
    X_train_cont_tensor = X_train_cont_tensor.to(device)
    y_train_tensor = y_train_tensor.to(device)

    # 对训练集进行预测
    predictions = model(X_train_categ_tensor, X_train_cont_tensor)
    print("训练集预测结果:")
    print(predictions)

    # 计算训练集的指标
    train_metrics = calculate_metrics(y_train_tensor.cpu().numpy(), predictions.cpu().numpy())
    print("训练集指标:", train_metrics)

    # 准备测试数据
    X_test_categ_tensor = X_test_categ_tensor.to(device)
    X_test_cont_tensor = X_test_cont_tensor.to(device)
    y_test_tensor = y_test_tensor.to(device)

    # 对测试集进行预测
    test_predictions = model(X_test_categ_tensor, X_test_cont_tensor)
    test_metrics = calculate_metrics(y_test_tensor.cpu().numpy(), test_predictions.cpu().numpy())
    print("测试集指标:", test_metrics)

    # 将结果转换为DataFrame
    tab_transformer_metrics = metrics_to_dataframe(
        y_train_tensor.cpu().numpy(), predictions.cpu().numpy(),
        y_test_tensor.cpu().numpy(), test_predictions.cpu().numpy(), "TabTransformer").round(3)
    tab_transformer_metrics.to_csv('TabTransformer_metrics.csv', index=False)
    print(tab_transformer_metrics)

训练集预测结果:
tensor([[ 45.5804],
        [161.3616],
        [ 67.4584],
        [ 36.3404],
        [ 89.0337],
        [164.1905],
        [ 91.0588],
        [140.3313],
        [167.8633],
        [ 79.4018],
        [ 53.4141],
        [ 93.6236],
        [136.8783],
        [ 83.1137],
        [ 88.1674],
        [ 48.8588],
        [110.2969],
        [ 45.0965],
        [114.7164],
        [122.4980],
        [ 66.9032],
        [138.4997],
        [ 79.6047],
        [ 41.1877],
        [101.4846],
        [ 81.3487],
        [ 61.2667],
        [ 35.5792],
        [102.2727],
        [ 53.9852],
        [ 95.3738],
        [ 61.1105],
        [112.6848],
        [ 69.1454],
        [101.8436],
        [ 23.8743],
        [ 81.1476],
        [ 84.3423],
        [114.3826],
        [ 93.9023],
        [ 73.4503],
        [ 38.3463],
        [ 42.9218],
        [103.8231],
        [ 52.6197],
        [ 57.5152],
        [ 54.3011],
        [126.7592],
        [ 14.6406],
        [13

In [7]:
# 保存训练集和测试集的预测结果（包含真实值）
tab_transformer_train = pd.DataFrame({'Actual': y_train_tensor.cpu().numpy().squeeze(), 'Predicted': predictions.cpu().numpy().squeeze()})
tab_transformer_test = pd.DataFrame({'Actual': y_test_tensor.cpu().numpy().squeeze(), 'Predicted': test_predictions.cpu().numpy().squeeze()})

tab_transformer_train.to_csv('tab_transformer_train.csv', index=False)
tab_transformer_test.to_csv('tab_transformer_test.csv', index=False)