In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import networkx as nx
from torch_geometric.data import Data
from torch_geometric.nn import GATConv, GCNConv
from scipy.sparse import coo_matrix
import numpy as np
from torch_geometric.utils import to_networkx
import random
from heapdict import heapdict
from node2vec import Node2Vec
import argparse
import torch.nn.init as init
from utils import *

In [2]:
graph_embedding_size = 64
edge_sample_number = 100
edge_dict = {}
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 定义GCN模型
class GCN(torch.nn.Module):
    def __init__(self,num_features):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_features, 128)
        self.conv2 = GCNConv(128, graph_embedding_size)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        return x

# 定义GAT模型
class GAT(torch.nn.Module):
    def __init__(self, num_features, num_heads=4, graph_embedding_size=64):
        super(GAT, self).__init__()
        self.gat1 = GATConv(num_features, 512, heads=num_heads, dropout=0.2)
        self.gat2 = GATConv(512 * num_heads, graph_embedding_size, heads=1, concat=False, dropout=0.2)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.gat1(x, edge_index)
        x = F.elu(x)
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.gat2(x, edge_index)
        return x

# 定義用來決定edge是否修改的MLP
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 1)

        init.kaiming_normal_(self.fc1.weight, nonlinearity='relu')
        init.kaiming_normal_(self.fc2.weight, nonlinearity='relu')
        init.kaiming_normal_(self.fc3.weight, nonlinearity='relu')
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x
    
class GCN_edge_modify(nn.Module):
    def __init__(self, num_features, hidden_channels = 512):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        # 最后一层，用于产生最终输出
        self.out = nn.Linear(hidden_channels, 1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = self.out(x)
        return torch.sigmoid(x)
    
class MLPClassifier(nn.Module):  #最後用來判定graph的result是否有相同的MVC
    def __init__(self, input_size):
        super(MLPClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)  # 第一层
        self.fc2 = nn.Linear(512, 128)          # 第二层
        self.fc3 = nn.Linear(128, 1)           # 输出层

        init.kaiming_normal_(self.fc1.weight, nonlinearity='relu')
        init.kaiming_normal_(self.fc2.weight, nonlinearity='relu')
        init.kaiming_normal_(self.fc3.weight, nonlinearity='relu')
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))        # 使用sigmoid确保输出在0到1之间
        return x

In [3]:

class Modify_edge(nn.Module):
    def __init__(self, num_features, graph_embedding_size, epoch = 100, lr = 0.0001, modified_edge = 30, device = 'cuda:0', GraphNumber = 50, Graphsize = 50, num_heads = 8):
        super(Modify_edge, self).__init__()
        self.gat = GAT(num_features=num_features, num_heads=num_heads, graph_embedding_size = graph_embedding_size)  # 根据需要调整头数
        self.mlp = MLP(input_size=3 * graph_embedding_size + 1)
        self.classifier = MLPClassifier(input_size=2 * graph_embedding_size)
        self.modified_edge = modified_edge
        self.epoch = epoch
        self.lr = lr
        self.GraphNumber = GraphNumber
        self.Graphsize = Graphsize
        
        
    def forward(self):
        self.modified_graphs = []
        self.edge_dict = {}
        self.whole_edge_set = set()
        self.dataset = []
        self.init_graph()  ## create self.dataset
        
        original_embeddings = []
        for data in self.dataset:
            data = data.to(device)
            embedding = self.gat(data)
            original_embeddings.append(embedding)
        self.original_graph_embeddings = self.get_graph_embedding(original_embeddings)
        
        for emb, data, graph_emb in zip(original_embeddings, self.dataset, self.original_graph_embeddings):
            modify_edge = self.generate_edge_embeddings(data, emb, graph_emb)
            modify_num = 0
            add_num = 0
            delete_num = 0
            G = to_networkx(data, to_undirected=True)
            for decision in modify_edge:  # decision: (probabilities,(u,v))
                edge = decision[1]
                if (G.has_edge(edge[0], edge[1])):
                    G.remove_edge(edge[0], edge[1])
                    delete_num += 1
                    modify_num += 1
                else:
                    G.add_edge(edge[0], edge[1])
                    modify_num += 1
                    add_num += 1
                    
            # print(f"modify_num: {modify_num}")
            print(f"add_num: {add_num}, delete_num: {delete_num}")
            self.modified_graphs.append(G)
        
        self.modified_dataset = []  #type pyg
        for G in self.modified_graphs:
            # 从 NetworkX 图创建边索引
            edge_index = torch.tensor(list(G.edges)).t().contiguous()
            vec = Node2Vec(G, dimensions=50, walk_length=10, num_walks=10, workers=4, quiet=True)
            InitNodeEmb = vec.fit(window=3, min_count=2, batch_words=4)
            embeddings = InitNodeEmb.wv
            x = torch.tensor(embeddings.vectors, dtype=torch.float32)
            # 使用单位矩阵作为节点特征
            # x = torch.eye(G.number_of_nodes())
            
            # 创建 Data 对象
            data = Data(x=x, edge_index=edge_index)
            self.modified_dataset.append(data)  #networkx
            
        modified_embeddings = []
        for data in self.modified_dataset:
            data = data.to(device)
            embedding = self.gat(data)
            modified_embeddings.append(embedding)  
        self.modified_graph_embeddings = self.get_graph_embedding(modified_embeddings)
        
        cos = nn.CosineSimilarity(dim=1)
        self.cosine_similarities = cos(self.modified_graph_embeddings, self.original_graph_embeddings).mean()
        
        labels = []
        MVC_diff = 0
        for mod_graph, orig_graph in zip(self.modified_graphs, self.dataset):
            mod_mvc = len(self.calculate_MVC(mod_graph))
            orig_mvc = len(self.calculate_MVC(to_networkx(orig_graph, to_undirected=True)))
            # print(f"mod_mvc: {mod_mvc}, ori_mvc: {orig_mvc}")
            MVC_diff = MVC_diff + abs(mod_mvc - orig_mvc) * abs(mod_mvc - orig_mvc)
            label = 1 if mod_mvc == orig_mvc else 0
            labels.append(label)
        print(f"label presreved: {labels.count(1)}")
        combined_embeddings = [torch.cat((mod_emb, orig_emb)) for mod_emb, orig_emb in zip(self.modified_graph_embeddings, self.original_graph_embeddings)]
        # 将嵌入和标签转换为张量
        combined_embeddings_tensor = torch.stack(combined_embeddings)
        # combined_embeddings_tensor shape : torch.Size([50, 2*graph embedding]) 兩張graph的嵌入拼接起來
        self.labels_tensor = torch.tensor(labels).to(device)
        # labels_tensor shape : torch.Size([50]) 也就是50個graph的label
        self.preserve_predict = self.classifier(combined_embeddings_tensor).squeeze()
        # preserve_predict shape: torch.Size([50])也就是50個graph預測的label
        return self.cosine_similarities, self.preserve_predict, self.labels_tensor, MVC_diff/self.GraphNumber
        
        
    def init_graph(self):
        """construct or load training graph and use Node2vec to get node embedding"""
        self.train_graphs = pickle_load("/workspace/Synthetic_graph/Training_graph_50.pkl")
        self.create_edge_dict(self.Graphsize)
        for i in range(self.GraphNumber):
            # p = random.uniform(graph_density[0], graph_density[1])
            # G = nx.erdos_renyi_graph(graph_size, p)
            G = self.train_graphs[i]
            adj_matrix = nx.adjacency_matrix(G)
            adj_matrix = coo_matrix(adj_matrix)

            row = torch.from_numpy(adj_matrix.row.astype(np.int64))
            col = torch.from_numpy(adj_matrix.col.astype(np.int64))
            edge_index = torch.stack([row, col], dim=0)
            vec = Node2Vec(G, dimensions=50, walk_length=10, num_walks=10, workers=4, quiet=True)
            InitNodeEmb = vec.fit(window=3, min_count=2, batch_words=4)
            embeddings = InitNodeEmb.wv
            x = torch.tensor(embeddings.vectors, dtype=torch.float32)
            # x = torch.eye(G.number_of_nodes())  # 节点特征

            data = Data(x=x, edge_index=edge_index)
            self.dataset.append(data)
            
    def create_edge_dict(self,graph_size):
        """mapping edge to index"""
        index = 0
        for i in range(graph_size - 1):
            for j in range(i + 1, graph_size):
                self.whole_edge_set.add((i, j))
                self.edge_dict[(i, j)] = index
                index += 1
        
    def calculate_MVC(self,graph, UB=9999999, C=set()):
        """use branch and bound to find out the mvc result"""
        if len(graph.edges()) == 0:
            return C

        v, _ = max(graph.degree(), key=lambda a: a[1])
        # C1 分支：選擇鄰居
        C1 = C.copy()
        neighbors = set(graph.neighbors(v))
        C1.update(neighbors)
        graph_1 = graph.copy()
        graph_1.remove_nodes_from(neighbors)
        if len(C1) < UB:
            C1 = self.calculate_MVC(graph_1, UB, C1)

        # C2 分支：只選擇該節點
        C2 = C.copy()
        C2.add(v)
        graph_2 = graph.copy()
        graph_2.remove_node(v)
        if len(C2) < UB:
            C2 = self.calculate_MVC(graph_2, min(UB, len(C1)), C2)

        return min(C1, C2, key=len)
    
    def get_graph_embedding(self,embeddings):
        """average all node embeddings to get graph embedding"""
        graph_embeddings = []
        for embedding in embeddings:
            graph_embedding = embedding.mean(dim=0)  # 对所有节点嵌入求平均
            graph_embeddings.append(graph_embedding)
        return torch.stack(graph_embeddings)
    
    def generate_edge_embeddings(self,data, embedding, graph_emb):
        """generate and sample edge embeddings for training 需要修改"""
        data= to_networkx(data, to_undirected=True)
        edge_set = set(data.edges()) 
        combined_embeddings = []
        pro_list = []
        print(f"len of edge_set: {len(edge_set)}, len of whole_edge_set: {len(self.whole_edge_set)}, len of none edge: {len(self.whole_edge_set) - len(edge_set)}")
        for u,v in self.whole_edge_set:
            node1_emb = embedding[u]
            node2_emb = embedding[v]
            if (u,v) in edge_set:
                node_pair_emb = torch.cat([node1_emb, node2_emb, torch.tensor([1.0]).to(device), graph_emb])
                probabilities = self.mlp(node_pair_emb).squeeze()
                pro_list.append((probabilities,(u,v)))
            else:
                node_pair_emb = torch.cat([node1_emb, node2_emb, torch.tensor([0.0]).to(device), graph_emb])
                probabilities = self.mlp(node_pair_emb).squeeze()
                pro_list.append((probabilities.item(),(u,v)))
        pro_list = sorted(pro_list, key=lambda x: x[0], reverse=True)
        # print(pro_list)
        combined_embeddings = pro_list[:self.modified_edge]
        return combined_embeddings
    def validation(self):
        validation_data, valid_opt, pro = pickle_load("/workspace/Synthetic_graph/Validation_graph_200_withOPTPRO.pkl")
        valid_original_embeddings = []
        self.valid_dataset = []
        self.valid_modified_graphs = []
        # 把validation data轉成pyg的data，並透過node2vec得到node feature，再輸入GAT得到node embedding，最後透過get_graph_embedding得到graph embedding
        for i in range(len(validation_data)):
            G = validation_data[i]
            adj_matrix = nx.adjacency_matrix(G)
            adj_matrix = coo_matrix(adj_matrix)

            row = torch.from_numpy(adj_matrix.row.astype(np.int64))
            col = torch.from_numpy(adj_matrix.col.astype(np.int64))
            edge_index = torch.stack([row, col], dim=0)
            vec = Node2Vec(G, dimensions=50, walk_length=10, num_walks=10, workers=4, quiet=True)
            InitNodeEmb = vec.fit(window=3, min_count=2, batch_words=4)
            embeddings = InitNodeEmb.wv
            x = torch.tensor(embeddings.vectors, dtype=torch.float32)

            data = Data(x=x, edge_index=edge_index)
            self.valid_dataset.append(data)
            data = data.to(device)
            embedding = self.gat(data)
            valid_original_embeddings.append(embedding)
        self.valid_graph_embedding = self.get_graph_embedding(valid_original_embeddings)
        
        # 將每個graph的node embedding和graph embedding還有pyg data輸入generate_edge_embeddings得到要修改的edge，並直接修改成新的graph，用valid_modified_graphs儲存
        for emb, data, graph_emb in zip(valid_original_embeddings, self.valid_dataset, self.valid_graph_embedding):
            modify_edge = self.generate_edge_embeddings(data, emb, graph_emb) #pyg data是為了知道邊
            G = to_networkx(data, to_undirected=True)
            for decision in modify_edge:
                edge = decision[1]
                if (G.has_edge(edge[0], edge[1])):
                    G.remove_edge(edge[0], edge[1])
                else:
                    G.add_edge(edge[0], edge[1])
            self.valid_modified_graphs.append(G)
        
        valid_label_presever = 0
        for val_mod_graph, ori_opt in zip(self.valid_modified_graphs, valid_opt):
            mod_mvc = len(self.calculate_MVC(val_mod_graph))
            if ori_opt == mod_mvc:
                valid_label_presever += 1
        print(f"validation label presreved: {valid_label_presever}")

In [4]:
mymodel = Modify_edge(num_features=50, graph_embedding_size=graph_embedding_size, epoch=100, lr=0.0001, modified_edge=edge_sample_number, device=device, GraphNumber=50, Graphsize=50, num_heads=4)
mymodel = mymodel.to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(mymodel.parameters(), lr=0.00005)

In [5]:
diff_weight = 2
for epoch in range(100):
    mymodel.train()
    similarity_loss , preserve_predict, labels_tensor, difference_loss = mymodel()
    classifier_loss = criterion(preserve_predict, labels_tensor.float())
    loss = classifier_loss + similarity_loss + difference_loss
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print(f"Epoch: {epoch}, Loss: {loss}, similarity_loss: {similarity_loss}, difference_loss: {difference_loss}")
    mymodel.eval()
    with torch.no_grad():
        mymodel.validation()

len of edge_set: 853, len of whole_edge_set: 1225, len of none edge: 372
add_num: 100, delete_num: 0
len of edge_set: 750, len of whole_edge_set: 1225, len of none edge: 475
add_num: 100, delete_num: 0
len of edge_set: 818, len of whole_edge_set: 1225, len of none edge: 407
add_num: 100, delete_num: 0
len of edge_set: 722, len of whole_edge_set: 1225, len of none edge: 503
add_num: 100, delete_num: 0
len of edge_set: 331, len of whole_edge_set: 1225, len of none edge: 894
add_num: 100, delete_num: 0
len of edge_set: 913, len of whole_edge_set: 1225, len of none edge: 312
add_num: 100, delete_num: 0
len of edge_set: 534, len of whole_edge_set: 1225, len of none edge: 691
add_num: 100, delete_num: 0
len of edge_set: 432, len of whole_edge_set: 1225, len of none edge: 793
add_num: 100, delete_num: 0
len of edge_set: 576, len of whole_edge_set: 1225, len of none edge: 649
add_num: 100, delete_num: 0
len of edge_set: 443, len of whole_edge_set: 1225, len of none edge: 782
add_num: 100, dele