In [1]:
import torch

# 检查是否有可用的 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


# 读取数据

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd

# 读取数据
data = pd.read_csv("../data/dataset.csv")

# 数据分割
data['target_class'] = pd.qcut(data['Cs'], q=10, labels=False)
X = data.drop(['Cs', 'target_class'], axis=1).values
y = data['Cs'].values
stratify_column = data['target_class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21, stratify=stratify_column)

# 数据标准化
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

## 构建图数据对象, 转换数据为图数据结构

In [3]:
import numpy as np
from torch_geometric.data import Data

# 材料特性的索引和测试条件的索引
material_indices = list(range(7))  # 前七个特性
test_indices = list(range(7, 12))  # 后五个条件

# 构建边：仅为材料特性之间构建边
edges = []
for i in material_indices:
    for j in material_indices:
        if i != j:
            edges.append([i, j])

# 初始化边列表
# edges = []
#
# # 0和1有边
# edges.append([0, 1])
# edges.append([1, 0])  # 如果是无向图，需要添加反向边
#
# # 2-3-4-5相互有边
# node_group = [2, 3, 4, 5]
# for i in range(len(node_group)):
#     for j in range(i + 1, len(node_group)):
#         edges.append([node_group[i], node_group[j]])
#         edges.append([node_group[j], node_group[i]])  # 如果是无向图，需要添加反向边
#
# # 0、1、2、3分别与6有边
# for node in [0, 1, 2, 3]:
#     edges.append([node, 6])
#     edges.append([6, node])  # 如果是无向图，需要添加反向边
#
# # 0和2有边
# edges.append([0, 2])
# edges.append([2, 0])  # 如果是无向图，需要添加反向边

# 转换为Tensor
edges = np.array(edges).T  # 转置以匹配PyTorch Geometric的edge_index格式

print(edges)

# 转换为PyTorch张量
edge_index = torch.tensor(edges, dtype=torch.long)
X_train_torch = torch.tensor(X_train_scaled, dtype=torch.float)
y_train_torch = torch.tensor(y_train, dtype=torch.float).view(-1, 1)  # 确保y是列向量

# 创建图数据对象
train_data = Data(x=X_train_torch, edge_index=edge_index, y=y_train_torch).to(device)

# 打印数据对象信息，确认构建是否成功
print(train_data)

[[0 0 0 0 0 0 1 1 1 1 1 1 2 2 2 2 2 2 3 3 3 3 3 3 4 4 4 4 4 4 5 5 5 5 5 5
  6 6 6 6 6 6]
 [1 2 3 4 5 6 0 2 3 4 5 6 0 1 3 4 5 6 0 1 2 4 5 6 0 1 2 3 5 6 0 1 2 3 4 6
  0 1 2 3 4 5]]
Data(x=[480, 12], edge_index=[2, 42], y=[480, 1])


## 定义模型

In [4]:
from torch import nn, optim
from torch_geometric.nn import GCNConv

class MAPELoss(torch.nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, predictions, targets):
        epsilon = 1e-8  # 避免除以零
        mape = torch.mean(torch.abs((targets - predictions) / (targets + epsilon))) * 100
        return mape

class GNN4TDL(nn.Module):
    def __init__(self, input_dim):
        super(GNN4TDL, self).__init__()
        self.conv1 = GCNConv(input_dim, 24)
        self.conv2 = GCNConv(24, 48)
        self.conv3 = GCNConv(48, 1)
        # self.conv4 = GCNConv(24, 1)
        # self.conv5 = GCNConv(24, 1)
        # self.conv6 = GCNConv(70, 30)
        # self.conv7 = GCNConv(30, 1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        x = torch.relu(x)
        x = self.conv3(x, edge_index)
        # x = torch.relu(x)
        # x = self.conv4(x, edge_index)
        # x = torch.relu(x)
        # x = self.conv5(x, edge_index)
        # x = torch.relu(x)
        # x = self.conv6(x, edge_index)
        # x = torch.relu(x)
        # x = self.conv7(x, edge_index)
        return x

model = GNN4TDL(
    input_dim=X_train_scaled.shape[1]
)
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 定义损失函数
mse_loss = nn.MSELoss().to(device)
mape_loss = MAPELoss().to(device)
## 训练模型

In [5]:
num_epochs = 3000
best_loss = float('inf')
cumulative_loss = 0.0
patience = 30  # 允许的最大连续未改进 epoch 数
epochs_without_improvement = 0  # 连续未改进的 epoch 数
model.train()

for epoch in range(num_epochs):
    model.zero_grad()
    out = model(train_data)
    loss = mape_loss(out, train_data.y)  # Modify as per your loss function, e.g., mape_loss
    loss.backward()
    optimizer.step()
    cumulative_loss += loss.item()

    if (epoch + 1) % 10 == 0:
        average_loss = cumulative_loss / 10
        print(f'Epoch {epoch+1}, Average Loss: {average_loss}')
        cumulative_loss = 0.0  # Reset cumulative loss

    # 计算验证损失
    model.eval()
    with torch.no_grad():
        # 在 GPU 上进行预测
        y_val_pred = model(train_data).to(device)
        # 验证损失计算时，确保 y_test_tensor 也在同一个设备上
        train_data.y = train_data.y.to(device)
        val_loss = mape_loss(y_val_pred, train_data.y).item()  # 计算验证损失

    # 判断验证损失是否改善
    if val_loss < best_loss:
        best_loss = val_loss
        epochs_without_improvement = 0  # 重置计数器
        # 保存最佳模型
        torch.save(model.state_dict(), "gnn_best_model.pth")
    else:
        epochs_without_improvement += 1

    # 如果验证损失在一定次数的 epoch 内没有改进，则停止训练
    if epochs_without_improvement >= patience:
        print(f"Early stopping at epoch {epoch + 1}")
        break

Epoch 10, Average Loss: 99.74763641357421
Epoch 20, Average Loss: 99.03573226928711
Epoch 30, Average Loss: 98.26322326660156
Epoch 40, Average Loss: 97.36977996826172
Epoch 50, Average Loss: 96.26036148071289
Epoch 60, Average Loss: 94.83712768554688
Epoch 70, Average Loss: 93.00299911499023
Epoch 80, Average Loss: 90.64888992309571
Epoch 90, Average Loss: 87.6869384765625
Epoch 100, Average Loss: 84.31017684936523
Epoch 110, Average Loss: 80.70155563354493
Epoch 120, Average Loss: 76.93786849975587
Epoch 130, Average Loss: 73.09059143066406
Epoch 140, Average Loss: 69.05624771118164
Epoch 150, Average Loss: 64.86262359619141
Epoch 160, Average Loss: 60.77327346801758
Epoch 170, Average Loss: 56.97887649536133
Epoch 180, Average Loss: 53.389325332641604
Epoch 190, Average Loss: 50.094200897216794
Epoch 200, Average Loss: 47.20117073059082
Epoch 210, Average Loss: 44.70979042053223
Epoch 220, Average Loss: 42.61548080444336
Epoch 230, Average Loss: 40.909960174560545
Epoch 240, Average

## 6.构建测试集图数据对象

In [6]:
from function import metrics_to_dataframe, calculate_metrics

# 加载最佳模型的状态字典
model.load_state_dict(torch.load('gnn_best_model.pth', weights_only=True))

# 将模型设置为评估模式
model.eval()
model.to(device)

# 转换测试数据为张量
X_test_torch = torch.tensor(X_test_scaled, dtype=torch.float)
y_test_torch = torch.tensor(y_test, dtype=torch.float).view(-1, 1)  # 确保y是列向量

# 创建测试集图数据对象
test_data = Data(x=X_test_torch, edge_index=edge_index, y=y_test_torch).to(device)

with torch.no_grad():
    # 对训练集进行预测
    out = model(train_data)
    print("训练集预测结果:")
    print(out)

    # 计算训练集的指标
    train_metrics = calculate_metrics(train_data.y.cpu().numpy(), out.cpu().numpy())
    print("训练集指标:", train_metrics)

    # 对测试集进行预测
    test_out = model(test_data)
    print("测试集预测结果:")
    print(test_out)

    # 计算测试集的指标
    test_metrics = calculate_metrics(test_data.y.cpu().numpy(), test_out.cpu().numpy())
    print("测试集指标:", test_metrics)

    # 保存指标到CSV文件
    metrics_df = metrics_to_dataframe(train_data.y.cpu().numpy(), out.cpu().numpy(),
                                      test_data.y.cpu().numpy(), test_out.cpu().numpy(), 'GNN').round(3)
    metrics_df.to_csv('gnn_metrics.csv', index=False)

    print(metrics_df)


训练集预测结果:
tensor([[ 49.6322],
        [ 49.6322],
        [ 49.6322],
        [ 49.6322],
        [ 49.6322],
        [ 49.6322],
        [ 49.6322],
        [134.5268],
        [124.3092],
        [ 78.6683],
        [ 51.5766],
        [ 81.7555],
        [138.5731],
        [ 71.9933],
        [ 70.6052],
        [ 49.0343],
        [113.1268],
        [ 45.1720],
        [120.7163],
        [124.2427],
        [ 66.8808],
        [137.3500],
        [ 80.2200],
        [ 41.2620],
        [104.0936],
        [ 85.9277],
        [ 64.6213],
        [ 40.6692],
        [103.2055],
        [ 41.6549],
        [ 95.8690],
        [ 61.1720],
        [110.1070],
        [ 68.9228],
        [101.7975],
        [ 23.9795],
        [ 81.9069],
        [ 77.3336],
        [117.1922],
        [ 61.1172],
        [ 81.8908],
        [ 43.8438],
        [ 47.4775],
        [106.8591],
        [ 52.7342],
        [ 58.2501],
        [ 50.2414],
        [129.5638],
        [ 14.7522],
        [12

In [7]:
# 保存训练集和测试集的预测结果（包含真实值）
train_predictions = pd.DataFrame({'Actual': train_data.y.cpu().detach().numpy().flatten(),
                                  'Predicted': model(train_data).cpu().detach().numpy().flatten()})
test_predictions = pd.DataFrame({'Actual': test_data.y.cpu().detach().numpy().flatten(),
                                 'Predicted': model(test_data).cpu().detach().numpy().flatten()})

train_predictions.to_csv('gnn_train_predictions.csv', index=False)
test_predictions.to_csv('gnn_test_predictions.csv', index=False)