In [1]:
import torch

# 检查是否有可用的 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch_geometric.loader import DataLoader
from torch_geometric.data import Data

# 读取数据
data = pd.read_csv("../../data/dataset_reduced.csv")
data['target_class'] = pd.qcut(data['Cs'], q=10, labels=False)
X = data.drop(['Cs', 'target_class'], axis=1)
y = data['Cs']
stratify_column = data['target_class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21, stratify=stratify_column)
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the number of material features and test conditions
num_material_features = 7
num_test_conditions = 2
num_features = num_material_features + num_test_conditions

# Construct edges: connect each node to its immediate neighbors
# edges = []
# for i in range(num_material_features):
#     if i < num_material_features - 1:
#         edges.append([i, i + 1])
#         edges.append([i + 1, i])


edges = [[0,1],[1,0],[1,2],[1,6],[2,1],[2,3],[3,2],[3,4],[4,3],[4,5],[4,6],[5,4],[6,1],[6,4]]
print(edges)

# 转换为Tensor
edges = np.array(edges).T  # 转置以匹配PyTorch Geometric的edge_index格式

# 转换为PyTorch张量

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
edge_index = torch.tensor(edges, dtype=torch.long)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1) # 确保y是列向量
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)  # 确保y是列向量

train_data = Data(x=X_train_tensor, edge_index=edge_index, y=y_train_tensor).to(device)
test_data = Data(x=X_test_tensor, edge_index=edge_index, y=y_test_tensor).to(device)

train_loader = DataLoader([train_data], batch_size=20, shuffle=True)  # 根据实际数据调整
test_loader = DataLoader([test_data], batch_size=20, shuffle=False)  # 根据实际数据调整
print(edge_index)

[[0, 1], [1, 0], [1, 2], [1, 6], [2, 1], [2, 3], [3, 2], [3, 4], [4, 3], [4, 5], [4, 6], [5, 4], [6, 1], [6, 4]]
tensor([[0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 6, 6],
        [1, 0, 2, 6, 1, 3, 2, 4, 3, 5, 6, 4, 1, 4]])


In [16]:
from torch_function import RMSE_Loss
import torch.optim as optim
from GNN_torch import GNNModel

model = GNNModel(input_dim=9, hidden_dims=[63, 63, 63], output_dim=1)  # 根据实际输入维度调整
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01)

criterion = RMSE_Loss().to(device)

num_epochs = 3000
best_loss = float('inf')
patience = 100  # Maximum allowed consecutive epochs without improvement
epochs_without_improvement = 0  # Consecutive epochs without improvement

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0  # Initialize cumulative loss for each epoch
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # 打印每10个 epoch 的损失
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")


    # Calculate validation loss
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in test_loader:
            data = data.to(device)
            out = model(data)
            loss = criterion(out, data.y)
            val_loss += loss.item()

    val_loss /= len(test_loader)

    # Save the best model
    if val_loss < best_loss:
        best_loss = val_loss
        epochs_without_improvement = 0
        torch.save(model.state_dict(), "gnn_best_model.pth")
    else:
        epochs_without_improvement += 1

    # Early stopping if no improvement for `patience` epochs
    if epochs_without_improvement >= patience:
        print(f"Early stopping at epoch {epoch + 1}")
        break

Epoch [10/3000], Loss: 60.9912
Epoch [20/3000], Loss: 39.1216
Epoch [30/3000], Loss: 32.2569
Epoch [40/3000], Loss: 29.5425
Epoch [50/3000], Loss: 27.3547
Epoch [60/3000], Loss: 26.0738
Epoch [70/3000], Loss: 25.0740
Epoch [80/3000], Loss: 23.9394
Epoch [90/3000], Loss: 22.5713
Epoch [100/3000], Loss: 21.0837
Epoch [110/3000], Loss: 19.4213
Epoch [120/3000], Loss: 17.5301
Epoch [130/3000], Loss: 15.8254
Epoch [140/3000], Loss: 14.2175
Epoch [150/3000], Loss: 13.0544
Epoch [160/3000], Loss: 12.2105
Epoch [170/3000], Loss: 11.1494
Epoch [180/3000], Loss: 10.9045
Epoch [190/3000], Loss: 9.9106
Epoch [200/3000], Loss: 9.5597
Epoch [210/3000], Loss: 9.1320
Epoch [220/3000], Loss: 9.2624
Epoch [230/3000], Loss: 9.2966
Epoch [240/3000], Loss: 8.6577
Epoch [250/3000], Loss: 8.0139
Epoch [260/3000], Loss: 7.7135
Epoch [270/3000], Loss: 7.5043
Epoch [280/3000], Loss: 7.2639
Epoch [290/3000], Loss: 7.1598
Epoch [300/3000], Loss: 6.9845
Epoch [310/3000], Loss: 6.9814
Epoch [320/3000], Loss: 6.8955

In [17]:
from function import metrics_to_dataframe

# 加载最佳模型的状态字典
model.load_state_dict(torch.load('gnn_best_model.pth', weights_only=True))

# 将模型设置为评估模式
model.eval()
model.to(device)

with torch.no_grad():
    # 对训练集进行预测
    out = model(train_data)
    # 对测试集进行预测
    test_out = model(test_data)

    # 保存指标到CSV文件
    metrics_df = metrics_to_dataframe(train_data.y.cpu().numpy(), out.cpu().numpy(),
                                      test_data.y.cpu().numpy(), test_out.cpu().numpy(), 'GNN').round(3)
    metrics_df.to_csv('gnn_metrics.csv', index=False)

metrics_df

Unnamed: 0,model,R2_train,MAE_train,MAPE_train,RMSE_train,R2_test,MAE_test,MAPE_test,RMSE_test
0,GNN,0.986,2.903,3.365,4.82,0.935,6.431,9.185,10.575


In [5]:
# 保存训练集和测试集的预测结果（包含真实值）
train_predictions = pd.DataFrame({'Actual': train_data.y.cpu().detach().numpy().flatten(),
                                  'Predicted': model(train_data).cpu().detach().numpy().flatten()})
test_predictions = pd.DataFrame({'Actual': test_data.y.cpu().detach().numpy().flatten(),
                                 'Predicted': model(test_data).cpu().detach().numpy().flatten()})

train_predictions.to_csv('gnn_train_predictions.csv', index=False)
test_predictions.to_csv('gnn_test_predictions.csv', index=False)