In [3]:
import torch
from torch_geometric.nn import GCNConv, global_mean_pool
import pickle
import csv
import os
import torch
import torch.nn.functional as F
from torch_geometric.data import Dataset, Data
from torch_geometric.loader import DataLoader
import torch.optim.lr_scheduler as lr_scheduler
# Import the summary writer 
from torch.utils.tensorboard import SummaryWriter
class GraphDataset(Dataset):
    def __init__(self, csv_file, root_dir):
        super().__init__(root_dir)
        self.labels = []
        self.file_paths = []
        self.root_dir = root_dir

        # 读取 CSV 文件，获取文件名和标签
        with open(csv_file, mode='r') as file:
            csv_reader = csv.reader(file)
            next(csv_reader)  # 跳过表头
            for row in csv_reader:
                file_name, label = row
                self.file_paths.append(os.path.join(root_dir, file_name))
                self.labels.append(float(label))  # 根据需要调整数据类型
    @property
    def raw_file_names(self):
        return self.file_paths
    
    @property
    def processed_file_names(self):
        return self.file_paths
    
    def len(self):
        return len(self.file_paths)

    def get(self, idx):
        with open(self.file_paths[idx], 'rb') as f:
            graph = pickle.load(f)

        # 创建图数据对象
        node_type = torch.tensor(graph['node_type'], dtype=torch.float)
        num_inverted_predecessors = torch.tensor(graph['num_inverted_predecessors'], dtype=torch.float).unsqueeze(1)
        # 拼接两个特征矩阵
        x = torch.cat([node_type, num_inverted_predecessors], dim=1)
        data = Data(
            x=x,
            edge_index=torch.tensor(graph['edge_index'], dtype=torch.long),
            num_nodes=int(graph['nodes'])
        )       
        # 添加标签
        data.y = torch.tensor([self.labels[idx]], dtype=torch.float)
        return data

class GCN(torch.nn.Module):
    def __init__(self, num_node_features):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_node_features, 16)
        self.conv2 = GCNConv(16, 32)
        self.fc = torch.nn.Linear(32, 1)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        x = torch.relu(x)
        x = global_mean_pool(x, batch)
        x = self.fc(x)
        return x

In [4]:
# dataset
csv_train = '/home/jiangzhengqun/ml/dataset1/train_data.csv'  # 替换为实际的 CSV 文件路径
root_train = '/home/jiangzhengqun/ml/dataset1/train_data'  # 替换为实际的文件夹路径
dataset_train = GraphDataset(csv_file=csv_train, root_dir=root_train)
loader_train = DataLoader(dataset_train, batch_size=128, shuffle=True)

csv_test = '/home/jiangzhengqun/ml/dataset1/test_data.csv'  # 替换为实际的 CSV 文件路径
root_test = '/home/jiangzhengqun/ml/dataset1/test_data'  # 替换为实际的文件夹路径
dataset_test = GraphDataset(csv_file=csv_test, root_dir=root_test)
loader_test = DataLoader(dataset_test, batch_size=128, shuffle=True)

In [5]:
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
model = GCN(num_node_features=2)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = torch.nn.MSELoss().to(device)

In [5]:
# writer = SummaryWriter('./runs')
writer = SummaryWriter('./run_2024_6_4')
num_epochs = 1000
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    n_loss = 0
    for i, data in enumerate(loader_train):
        data.to(device)
        optimizer.zero_grad()
        out = model(data).squeeze(1)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        n_loss += 1
    train_loss = epoch_loss / n_loss
    print(f'after epoch {epoch + 1} the train_loss is {train_loss}')
    model.eval()
    epoch_loss = 0
    n_loss = 0
    with torch.no_grad():
        for i, data in enumerate(loader_test):
            data.to(device)
            out = model(data).squeeze(1)
            loss = criterion(out, data.y)
            epoch_loss += loss.item()
            n_loss += 1
    eval_loss = epoch_loss / n_loss
    writer.add_scalars('Loss', {'train': train_loss, 'valid': eval_loss}, epoch + 1)
    print(f'after epoch {epoch + 1} the eval_loss is {eval_loss}')
    # 保存模型参数
    if (epoch + 1) % 100 == 0 or (epoch + 1) == num_epochs:
        torch.save(model.state_dict(), f'./model1/model_epoch_{epoch + 1}.pth')
writer.close()
# 在训练结束时保存最终的模型参数
torch.save(model.state_dict(), './model1/model_final.pth')

  node_type = torch.tensor(graph['node_type'], dtype=torch.float)
  num_inverted_predecessors = torch.tensor(graph['num_inverted_predecessors'], dtype=torch.float).unsqueeze(1)
  edge_index=torch.tensor(graph['edge_index'], dtype=torch.long),


after epoch 1 the train_loss is 0.052365670543770455
after epoch 1 the eval_loss is 0.05322665653445504
after epoch 2 the train_loss is 0.05168247443818769
after epoch 2 the eval_loss is 0.052479999901896175
after epoch 3 the train_loss is 0.05136062547044699
after epoch 3 the eval_loss is 0.05189687470820817
after epoch 4 the train_loss is 0.05118182279862637
after epoch 4 the eval_loss is 0.05181457119231874
after epoch 5 the train_loss is 0.050986840259716955
after epoch 5 the eval_loss is 0.05118036329407583
after epoch 6 the train_loss is 0.05093904996160851
after epoch 6 the eval_loss is 0.0513484367931431
after epoch 7 the train_loss is 0.05077504899439424
after epoch 7 the eval_loss is 0.052272643385963005
after epoch 8 the train_loss is 0.05069114070732233
after epoch 8 the eval_loss is 0.05173139883713289
after epoch 9 the train_loss is 0.0506648498608969
after epoch 9 the eval_loss is 0.05058716025880792
after epoch 10 the train_loss is 0.050618819892406464
after epoch 10 th