In [15]:
import numpy as np
import pandas as pd
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from torch.nn import Linear
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 读取Cora数据集
cora_content = pd.read_csv('./data/cora.content', sep='\t', header=None)
cora_cites = pd.read_csv('./data/cora.cites', sep='\t', header=None)

# 准备节点特征和标签
labels = pd.get_dummies(cora_content.iloc[:, -1]).values.argmax(axis=1)
features = cora_content.iloc[:, 1:-1].values
node_features = torch.tensor(features, dtype=torch.float)

# 准备边索引
paper_ids = cora_content.iloc[:, 0].values
id_map = {j: i for i, j in enumerate(paper_ids)}
edges = np.array(list(map(id_map.get, cora_cites.values.flatten())), dtype=np.int64).reshape(cora_cites.shape)
edge_index = torch.tensor(edges.T, dtype=torch.long)

# 创建PyTorch Geometric数据对象
data = Data(x=node_features, edge_index=edge_index, y=torch.tensor(labels, dtype=torch.long))

import torch
from torch_geometric.data import Data

# 假设data是你已经加载好的Cora图数据
num_nodes = data.num_nodes  # 获取节点总数

# 创建掩码
train_mask = torch.zeros(num_nodes, dtype=torch.bool)
val_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask = torch.zeros(num_nodes, dtype=torch.bool)

import numpy as np

# 假设`num_nodes`是总节点数，需要事先知道这个值
num_nodes = 2708  # 示例中的节点总数

# 定义训练、验证、测试集的比例
train_ratio = 0.70
val_ratio = 0.15
test_ratio = 0.15  # 通常确保三者之和为1

# 计算实际的索引数量
num_train = int(train_ratio * num_nodes)
num_val = int(val_ratio * num_nodes)
num_test = num_nodes - num_train - num_val  # 确保全部节点被覆盖

# 生成随机排列的节点索引
indices = np.random.permutation(num_nodes)

# 分配训练集、验证集和测试集索引
train_index = indices[:num_train]
val_index = indices[num_train:num_train + num_val]
test_index = indices[num_train + num_val:]

# 输出索引信息
print("Train Index:", train_index)
print("Validation Index:", val_index)
print("Test Index:", test_index)


train_mask[train_index] = True
val_mask[val_index] = True
test_mask[test_index] = True

# 将掩码赋给data对象
data.train_mask = train_mask
data.val_mask = val_mask
data.test_mask = test_mask

# 现在可以安全地使用这些掩码进行模型训练和评估了


Train Index: [ 671 1259 1819 ... 1437 1477  792]
Validation Index: [1902  767 1275  992 2063  120  575  962  900  221 1700 1404  117  449
 1802 1227 1754  615 1272  757 1916  677 1152  764 1963 2189  368  381
 2014 1148  505 1540 1520 1339 2214  836 1064  273  372  771 2145  816
 1295 2012  425   59 2505 2283 1353 1550 2090 1439  995 1658 2436  252
 1552 1075 1384 1617 1848 2198  275 1825 2416  578 1199  276 2134   52
 2697  864 2444 1673  646 1303 1628 1977  983  116 1421 1432  936  947
   40  292  815 1603 2341 1566 2031 2150 1610 1704 1853  909 1001 2651
 1982  255 1187  951 1799  278 1746 2369 2162 2210   72   97 2348 1146
  142 2392  549 2182 2531 1133 2081 1264 2319 1655  535 2609 1423  554
  596 1493  366  643  525 1941  362 1485  103 1753 1533 2597  799  471
  898 1060 1815 1726 2135 2075 1821  944 1308 1415 1331 1857 1276  302
 1858  412 2025 1706 1613  497 2389 2380  318   75  990 1965 1105 1621
 1366  364 2062 1725  486 1029 2082  361 2136 1620  250 2407 1945 2287
  787 2363

In [16]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from sklearn.metrics import roc_auc_score
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [17]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(1433, 128)  # Cora特征维度为1433
        self.conv2 = GCNConv(128, 64)
        self.conv3 = GCNConv(64, 32)
        self.conv4 = GCNConv(32, 7)  # Cora类别数为7

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.relu(self.conv3(x, edge_index))
        x = self.conv4(x, edge_index)
        return F.log_softmax(x, dim=1)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
scheduler = ReduceLROnPlateau(optimizer, 'min')
criterion = torch.nn.CrossEntropyLoss()  # 使用交叉熵损失

model.train()
for epoch in range(200):  # 运行200个epoch
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    scheduler.step(loss)

    if epoch % 10 == 0:
        model.eval()
        _, pred = out[data.test_mask].max(dim=1)
        correct = pred.eq(data.y[data.test_mask]).sum().item()
        acc = correct / data.test_mask.sum().item()
        print(f'Epoch {epoch}: Loss {loss.item():.4f}, Accuracy {acc:.4f}')

Epoch 0: Loss 1.9700, Accuracy 0.1597
Epoch 10: Loss 0.3660, Accuracy 0.8378
Epoch 20: Loss 0.2225, Accuracy 0.8329
Epoch 30: Loss 0.1782, Accuracy 0.8206
Epoch 40: Loss 0.1458, Accuracy 0.8206
Epoch 50: Loss 0.1234, Accuracy 0.8206
Epoch 60: Loss 0.1276, Accuracy 0.8084
Epoch 70: Loss 0.1131, Accuracy 0.8182
Epoch 80: Loss 0.1066, Accuracy 0.8182
Epoch 90: Loss 0.1046, Accuracy 0.8157
Epoch 100: Loss 0.1026, Accuracy 0.8182
Epoch 110: Loss 0.1011, Accuracy 0.8206
Epoch 120: Loss 0.0996, Accuracy 0.8157
Epoch 130: Loss 0.0982, Accuracy 0.8133
Epoch 140: Loss 0.0968, Accuracy 0.8133
Epoch 150: Loss 0.0954, Accuracy 0.8133
Epoch 160: Loss 0.0940, Accuracy 0.8133
Epoch 170: Loss 0.0926, Accuracy 0.8157
Epoch 180: Loss 0.0912, Accuracy 0.8157
Epoch 190: Loss 0.0898, Accuracy 0.8157


In [21]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
# 预测并计算性能指标
model.eval()
_, pred = model(data).max(dim=1)
pred = pred[data.test_mask]
labels = data.y[data.test_mask]

accuracy = accuracy_score(labels.cpu(), pred.cpu())
precision, recall, f1, _ = precision_recall_fscore_support(labels.cpu(), pred.cpu(), average='weighted')

print("Test Accuracy: ", accuracy)
print("Test Precision: ", precision)
print("Test Recall: ", recall)
print("Test F1 Score: ", f1)

Test Accuracy:  0.8181818181818182
Test Precision:  0.8182031028613065
Test Recall:  0.8181818181818182
Test F1 Score:  0.8165957783376938
