In [39]:
import torch
import torch.nn.functional as F
from torch.optim import Adam
from sklearn.metrics import roc_auc_score, average_precision_score
import scipy.sparse as sp
import numpy as np
import os
import time
from preprocessing import *
import args
from GVAE_GIN import DeepVGAE
import pandas as pd
from torch_geometric.utils import train_test_split_edges
from torch_geometric.data import Data
from sklearn.metrics import roc_auc_score, average_precision_score
from Pytorch_VGAE import CustomGraphDataset


In [52]:
A_list=[]
X_list=[]
for i in range(5000):
    A_list.append(pd.read_csv('../VGAE_dataset/raw/A/A_{i}.csv'.format(i=i),header=None).values)
    X_list.append(pd.read_csv('../VGAE_dataset/raw/X/X_{i}.csv'.format(i=i),header=None).values)

In [32]:
# 模型参数设定
class Args:
    input_dim = 1433 
    hidden1_dim = 32
    hidden2_dim = 16
    use_feature = True

    num_epoch = 200
    learning_rate = 0.01

In [35]:
args = Args()

# VGAE 模型实例化
model = DeepVGAE(args).to('cuda' if torch.cuda.is_available() else 'cpu')
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

In [36]:
def train(model, data, optimizer):
    model.train()
    optimizer.zero_grad()
    loss = model.loss(data.x, data.train_pos_edge_index, data.edge_index)
    loss.backward()
    optimizer.step()
    return loss.item()

# 模型测试函数
def test(model, data):
    model.eval()
    with torch.no_grad():
        z = model.encode(data.x, data.train_pos_edge_index)
        roc_auc, ap = model.test(z, data.test_pos_edge_index, data.test_neg_edge_index)
    return roc_auc, ap

In [37]:
def prepare_data(X, A):
    # 转换为 PyG 数据格式
    data_list = []
    for i in range(len(X)):
        x = torch.tensor(X[i], dtype=torch.float)
        edge_index = torch.tensor(A[i], dtype=torch.long)
        data = Data(x=x, edge_index=edge_index)
        
        # 分割边用于训练、验证和测试
        data = train_test_split_edges(data)
        data_list.append(data)

    return data_list

In [53]:
data = prepare_data(A=A_list, X=X_list)



ValueError: too many values to unpack (expected 2)

In [38]:
# 训练主函数
def run_training(X, A, epochs=200):
    # 数据集加载与预处理
    data_list = prepare_data(X, A)

    # 选择设备（GPU或CPU）
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    for epoch in range(epochs):
        total_loss = 0
        for data in data_list:
            data = data.to(device)
            loss = train(model, data, optimizer)
            total_loss += loss

        # 打印每个 epoch 的平均损失
        print(f'Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(data_list):.4f}')

        # 每隔一段时间进行测试评估
        if (epoch + 1) % 20 == 0:
            roc_auc_scores = []
            ap_scores = []
            for data in data_list:
                data = data.to(device)
                roc_auc, ap = test(model, data)
                roc_auc_scores.append(roc_auc)
                ap_scores.append(ap)
            print(f'Epoch {epoch+1}, ROC AUC: {sum(roc_auc_scores)/len(roc_auc_scores):.4f}, AP: {sum(ap_scores)/len(ap_scores):.4f}')