In [1]:
import torch

# 检查是否有可用的 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch_geometric.loader import DataLoader
from torch_geometric.data import Data

# 读取数据
data = pd.read_csv("../../data/dataset.csv")
data['target_class'] = pd.qcut(data['Cs'], q=10, labels=False)
X = data.drop(['Cs', 'target_class'], axis=1)
y = data['Cs']
stratify_column = data['target_class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21, stratify=stratify_column)
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the number of material features and test conditions
num_material_features = 7
num_test_conditions = 5
num_features = num_material_features + num_test_conditions

# Construct edges: connect each node to its immediate neighbors
# edges = []
# for i in range(num_material_features):
#     if i < num_material_features - 1:
#         edges.append([i, i + 1])
#         edges.append([i + 1, i])


edges = [[0,1],[1,0],[1,2],[1,6],[2,1],[2,3],[3,2],[3,4],[4,3],[4,5],[4,6],[5,4],[6,1],[6,4]]
print(edges)

# 转换为Tensor
edges = np.array(edges).T  # 转置以匹配PyTorch Geometric的edge_index格式

# 转换为PyTorch张量

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
edge_index = torch.tensor(edges, dtype=torch.long)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1) # 确保y是列向量
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)  # 确保y是列向量

train_data = Data(x=X_train_tensor, edge_index=edge_index, y=y_train_tensor).to(device)
test_data = Data(x=X_test_tensor, edge_index=edge_index, y=y_test_tensor).to(device)

train_loader = DataLoader([train_data], batch_size=20, shuffle=True)  # 根据实际数据调整
test_loader = DataLoader([test_data], batch_size=20, shuffle=False)  # 根据实际数据调整
print(edges)
print(edge_index)

[[0, 1], [1, 0], [1, 2], [1, 6], [2, 1], [2, 3], [3, 2], [3, 4], [4, 3], [4, 5], [4, 6], [5, 4], [6, 1], [6, 4]]
[[0 1 1 1 2 2 3 3 4 4 4 5 6 6]
 [1 0 2 6 1 3 2 4 3 5 6 4 1 4]]
tensor([[0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 6, 6],
        [1, 0, 2, 6, 1, 3, 2, 4, 3, 5, 6, 4, 1, 4]])


In [3]:
from torch import nn
import torch.optim as optim
from DL_model.GNN.GNN_torch import GNNModel

model = GNNModel(input_dim=12, hidden_dims=[64,64], output_dim=1)  # 根据实际输入维度调整
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.002)

loss_fn = nn.MSELoss().to(device)

num_epochs = 3000
best_loss = float('inf')
cumulative_loss = 0.0
patience = 20  # 允许的最大连续未改进 epoch 数
epochs_without_improvement = 0  # 连续未改进的 epoch 数
model.train()

# 训练过程
model.train()
for epoch in range(num_epochs):
    model.train()
    cumulative_loss = 0.0  # 每个epoch重新初始化累积损失
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = loss_fn(out, data.y)
        loss.backward()
        optimizer.step()
        cumulative_loss += loss.item()

    # 每10个epoch输出一次平均损失
    if (epoch + 1) % 10 == 0:
        average_loss = cumulative_loss / 10
        print(f'Epoch {epoch+1}, Average Loss: {average_loss:.4f}')
        cumulative_loss = 0.0

    # 计算验证集的损失
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in test_loader:
            data = data.to(device)
            out = model(data)
            loss = loss_fn(out, data.y)
            val_loss += loss.item()

    val_loss /= len(test_loader)
    print(f'Epoch {epoch+1}, Validation Loss: {val_loss:.4f}')

    # 保存最佳模型
    if val_loss < best_loss:
        best_loss = val_loss
        epochs_without_improvement = 0
        torch.save(model.state_dict(), "gnn_best_model.pth")
    else:
        epochs_without_improvement += 1

    # 如果连续`patience`个epoch未改进，进行早停
    if epochs_without_improvement >= patience:
        print(f"Early stopping at epoch {epoch + 1}")
        break

Epoch 1, Validation Loss: 10302.7900
Epoch 2, Validation Loss: 10265.8047
Epoch 3, Validation Loss: 10228.3096
Epoch 4, Validation Loss: 10190.1260
Epoch 5, Validation Loss: 10150.6309
Epoch 6, Validation Loss: 10110.0381
Epoch 7, Validation Loss: 10068.2373
Epoch 8, Validation Loss: 10025.1816
Epoch 9, Validation Loss: 9980.2715
Epoch 10, Average Loss: 995.2084
Epoch 10, Validation Loss: 9933.1514
Epoch 11, Validation Loss: 9883.8975
Epoch 12, Validation Loss: 9832.1045
Epoch 13, Validation Loss: 9777.1006
Epoch 14, Validation Loss: 9719.1182
Epoch 15, Validation Loss: 9658.1221
Epoch 16, Validation Loss: 9593.9482
Epoch 17, Validation Loss: 9526.5898
Epoch 18, Validation Loss: 9455.8086
Epoch 19, Validation Loss: 9381.3711
Epoch 20, Average Loss: 936.7913
Epoch 20, Validation Loss: 9303.1904
Epoch 21, Validation Loss: 9221.0605
Epoch 22, Validation Loss: 9134.8896
Epoch 23, Validation Loss: 9044.6846
Epoch 24, Validation Loss: 8950.4375
Epoch 25, Validation Loss: 8852.0088
Epoch 26, 

In [4]:
from function import calculate_metrics
from function import metrics_to_dataframe

# 加载最佳模型的状态字典
model.load_state_dict(torch.load('gnn_best_model.pth', weights_only=True))

# 将模型设置为评估模式
model.eval()
model.to(device)

with torch.no_grad():
    # 对训练集进行预测
    out = model(train_data)
    print("训练集预测结果:")
    print(out)

    # 计算训练集的指标
    train_metrics = calculate_metrics(train_data.y.cpu().numpy(), out.cpu().numpy())
    print("训练集指标:", train_metrics)

    # 对测试集进行预测
    test_out = model(test_data)
    print("测试集预测结果:")
    print(test_out)

    # 计算测试集的指标
    test_metrics = calculate_metrics(test_data.y.cpu().numpy(), test_out.cpu().numpy())
    print("测试集指标:", test_metrics)

    # 保存指标到CSV文件
    metrics_df = metrics_to_dataframe(train_data.y.cpu().numpy(), out.cpu().numpy(),
                                      test_data.y.cpu().numpy(), test_out.cpu().numpy(), 'GNN').round(3)
    metrics_df.to_csv('gnn_metrics.csv', index=False)

    print(metrics_df)


# 保存训练集和测试集的预测结果（包含真实值）
train_predictions = pd.DataFrame({'Actual': train_data.y.cpu().detach().numpy().flatten(),
                                  'Predicted': model(train_data).cpu().detach().numpy().flatten()})
test_predictions = pd.DataFrame({'Actual': test_data.y.cpu().detach().numpy().flatten(),
                                 'Predicted': model(test_data).cpu().detach().numpy().flatten()})

train_predictions.to_csv('gnn_train_predictions.csv', index=False)
test_predictions.to_csv('gnn_test_predictions.csv', index=False)

训练集预测结果:
tensor([[ 63.7164],
        [ 90.8307],
        [ 64.7803],
        [ 75.1128],
        [105.2536],
        [ 90.7775],
        [ 97.0247],
        [154.9189],
        [153.2601],
        [ 75.4091],
        [ 52.3438],
        [ 85.0862],
        [149.2296],
        [ 73.7520],
        [ 72.9298],
        [ 44.5800],
        [126.3635],
        [ 40.8420],
        [122.2118],
        [134.8255],
        [ 66.3147],
        [134.2082],
        [ 91.8988],
        [ 41.1624],
        [ 99.6010],
        [ 83.2370],
        [ 67.9936],
        [ 52.4740],
        [106.0063],
        [ 49.0375],
        [ 99.5834],
        [ 62.6192],
        [132.3850],
        [ 76.5395],
        [106.6716],
        [ 33.1060],
        [ 88.7143],
        [ 90.6142],
        [115.9885],
        [ 74.1767],
        [ 81.9754],
        [ 54.8536],
        [ 56.2571],
        [114.4176],
        [ 60.4809],
        [ 57.8788],
        [ 63.3248],
        [140.8880],
        [ 30.3013],
        [13