based of https://github.com/Flawless1202/VGAE_pyG

In [1]:
import numpy as np
from torch import Tensor

import os

import torch
import torch.nn as nn
from torch.optim import Adam



In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch_geometric.nn.models import InnerProductDecoder, VGAE
from torch_geometric.nn.conv import GCNConv
from torch_geometric.utils import negative_sampling, remove_self_loops, add_self_loops


class GCNEncoder(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCNEncoder, self).__init__()
        self.gcn_shared = GCNConv(in_channels, hidden_channels)
        self.gcn_mu = GCNConv(hidden_channels, out_channels)
        self.gcn_logvar = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = F.relu(self.gcn_shared(x, edge_index))
        mu = self.gcn_mu(x, edge_index)
        logvar = self.gcn_logvar(x, edge_index)
        return mu, logvar

#from torch_geometric.utils import contains_self_loops, is_undirected

class DeepVGAE(VGAE):
    def __init__(self, encoder, decoder):
        # https://github.com/Flawless1202/VGAE_pyG/tree/master
        super(DeepVGAE, self).__init__(encoder=encoder,
                                       decoder=decoder)

    def forward(self, x, edge_index):
        z = self.encode(x, edge_index)
        adj_pred = self.decoder.forward_all(z)
        return adj_pred

    def loss(self, x, edge_index):
        z = self.encode(x, edge_index)

        # Do not include self-loops in negative samples
        all_edge_index_tmp, _ = remove_self_loops(edge_index)
        all_edge_index_tmp, _ = add_self_loops(all_edge_index_tmp)

        neg_edge_index = negative_sampling(all_edge_index_tmp, z.size(0), num_neg_samples=edge_index.size(1))

        neg_loss = -torch.log(1 - self.decoder(z, neg_edge_index, sigmoid=True) + 1e-15).mean()
        pos_loss = -torch.log(self.decoder(z, edge_index, sigmoid=True) + 1e-15).mean()
        kl_loss = self.kl_loss() * 1/x.size(0)

        loss = pos_loss + neg_loss + kl_loss
        return loss

    def threshold_sample_new_graph(self, x, edge_index, batch_size, threshold=0.5):
        # This should be changed to sample top k edges with highest prob.
        z = self.encode(x, edge_index)

        num_nodes = z.size(0)
        neg_adj = torch.ones(num_nodes, num_nodes)  # Assume all are negative initially
        neg_adj[edge_index[0], edge_index[1]] = 0  # Remove positive edges
        neg_adj[edge_index[1], edge_index[0]] = 0  # Ensure symmetry for undirected graphs
        neg_adj.fill_diagonal_(0)  # Remove self-loops

        negative_edges = torch.nonzero(neg_adj.to(torch.bool)).int()

        pos_edge_candidates = edge_index.t()
        num_edges_to_sample = edge_index.size(1)

        sampled_edges_set = set()

        while len(sampled_edges_set) < num_edges_to_sample:
            # Sample batch_size random positive and negative edges
            pos_indices = torch.randperm(pos_edge_candidates.size(0))[:batch_size]
            neg_indices = torch.randperm(negative_edges.size(0))[:batch_size]

            sampled_pos_edges = pos_edge_candidates[pos_indices].to(z.device)
            sampled_neg_edges = negative_edges[neg_indices].to(z.device)
            sampled_edges = torch.cat([sampled_pos_edges, sampled_neg_edges], dim=0)

            batch_probs = self.decoder.forward(z, sampled_edges.t(), sigmoid=True).squeeze()

            high_prob_edges = sampled_edges[batch_probs > threshold]
            for edge in high_prob_edges:
                edge_tuple = (int(edge[0].item()), int(edge[1].item()))
                sampled_edges_set.add((edge_tuple[0], edge_tuple[1]))
                sampled_edges_set.add((edge_tuple[1], edge_tuple[0]))  # Add both directions for undirected graphs

                if len(sampled_edges_set) >= num_edges_to_sample:
                    break


        sampled_edge_index = torch.tensor(list(sampled_edges_set), dtype=torch.long).t()
        return sampled_edge_index


In [3]:
%load_ext autoreload
%autoreload 2

import logging

logging.basicConfig(level=logging.WARNING)
logging.getLogger('anonymigraph').setLevel(logging.INFO)

In [4]:
import torch
import networkx as nx

from anonymigraph.metrics.evaluator import Evaluator

from anonymigraph.metrics.utility.structural import (
    DegreeCentralityMetric,
    EigenvectorMetric,
    PageRankMetric,
    ClosenessCentralityMetric,
    LocalClusteringCoefficientMetric,

    WLColorMetric,

    ConnectedComponentsMetric,
    NumberOfEdgesMetric,
    NumberOfNodesMetric,
    NumberOfTrianglesMetric,
    MeanDegreeMetric,
    MaxDegreeMetric,
    MedianDegreeMetric,
    AverageClusteringCoefficientMetric,
    TransitivityMetric
)

def edge_index_to_networkx(edge_index, num_nodes):
    G = nx.Graph()
    G.add_nodes_from(range(num_nodes))
    edge_list = edge_index.t().tolist()
    G.add_edges_from(edge_list)
    return G

def compare_graphs(true_edge_index, sampled_edge_index, num_nodes):

    true_graph = edge_index_to_networkx(true_edge_index, num_nodes)
    sampled_graph = edge_index_to_networkx(sampled_edge_index, num_nodes)

    metrics = {
        "|V|": NumberOfNodesMetric(),
        "|E|": NumberOfEdgesMetric(),
        "|Δ|": NumberOfTrianglesMetric(),
        "|CC|": ConnectedComponentsMetric(),
        "Median Deg.": MedianDegreeMetric(),
        "Avg. Deg.": MeanDegreeMetric(),
        "Max Deg.": MaxDegreeMetric(),
        "Transitivity": TransitivityMetric(),
        "Avg. Clustering": AverageClusteringCoefficientMetric(),
        "Degree Centrality": DegreeCentralityMetric(),
        "Eigenvector Centrality": EigenvectorMetric(),
        "TVD WL Colors d=2": WLColorMetric(depth=2),
        "PageRank":	PageRankMetric(),
        "Local Clustering Coefficient": LocalClusteringCoefficientMetric()
    }


    evaluator = Evaluator(metrics, use_graphblas=True)
    print(evaluator.evaluate(true_graph, sampled_graph))
    return true_graph, sampled_graph

def compute_edge_overlap(edge_index_1, edge_index_2):
    # Convert to numpy arrays and ensure they are on CPU
    edge_index_1 = edge_index_1.cpu().numpy()
    edge_index_2 = edge_index_2.cpu().numpy()

    # Convert edge arrays to sets of tuples for intersection and union
    set_edges_1 = set([tuple(edge) for edge in edge_index_1.T])
    set_edges_2 = set([tuple(edge) for edge in edge_index_2.T])

    # Compute the intersection and union
    edge_intersection = set_edges_1.intersection(set_edges_2)
    edge_union = set_edges_1.union(set_edges_2)

    # Compute the edge overlap metrics
    edge_overlap_count = len(edge_intersection)
    edge_overlap_ratio = edge_overlap_count / len(edge_union) if len(edge_union) > 0 else 0

    return edge_overlap_count, edge_overlap_ratio


In [5]:
import numpy as np
import networkx as nx
import scipy.sparse as sp
from scipy.linalg import eigh
from scipy.sparse.linalg import eigsh

def compute_normalized_laplacian_eigenvectors(G, k=3):
    if k >= G.number_of_nodes():
        raise ValueError("k must be smaller than the number of nodes in the graph.")

    A = nx.adjacency_matrix(G)
    degrees = np.array([d for n, d in G.degree()])
    D_inv_sqrt = sp.diags(1.0 / np.sqrt(degrees))
    I = sp.eye(G.number_of_nodes())
    L_normalized = I - D_inv_sqrt @ A @ D_inv_sqrt

    _, eigenvectors = eigsh(L_normalized, k=k+1, which='SM', sigma=0)

    return eigenvectors[:, 1:k+1]



In [6]:
G = nx.LFR_benchmark_graph(
    n=5000, tau1=3, tau2=1.5, mu=0.1, average_degree=10, min_community=400, seed=10
)
G.remove_edges_from(nx.selfloop_edges(G))
assert len(list(nx.selfloop_edges(G))) == 0

# Assuming G is your graph
k = 256
eigenvectors = compute_normalized_laplacian_eigenvectors(G, k=k)

In [7]:
import json
import os

import torch
import torch.nn as nn
from torch.optim import Adam

from torch_geometric.utils import from_networkx
import networkx as nx

def write_graph(G, fname):
   with open(fname, "w") as file:
      json.dump({'num_nodes': G.number_of_nodes(), 'edges': list(G.edges())}, file)



torch.manual_seed(12345)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

enc_in_channels = k
enc_hidden_channels = 256
enc_out_channels = 256
dec_hidden_channels = 256

model = DeepVGAE(encoder=GCNEncoder(enc_in_channels, enc_hidden_channels, enc_out_channels),
                 decoder=InnerProductDecoder(),
                 #decoder=SymmetricMLPDecoder(in_features=enc_out_channels, hidden_size=dec_hidden_channels)
).to(device)

optimizer = Adam(model.parameters(), lr=0.005)

node_to_index = {node: i for i, node in enumerate(G.nodes())}
for node in G.nodes():
    G.nodes[node]['x'] = torch.tensor(eigenvectors[node_to_index[node], :]).float()

data = from_networkx(G).to(device)
print(G)
print(data)

output_dir = './anon_vgae_attp2'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

crossed_thresholds = set()

last_edge_overlap_ratio = 0
dump_Ga_when_edge_overlap_is_greater_than = [0, 0.5, 0.1, 0.15, 0.2, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.60, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1]

for epoch in range(1000000):
    model.train()
    optimizer.zero_grad()
    loss = model.loss(data.x, data.edge_index)
    loss.backward()
    optimizer.step()
    if epoch % 50 == 0:
        print(epoch, "loss", loss.item())
        pass
    #if epoch % 50 == 0:
    #    with torch.no_grad():
    #        sampled_edge_index = model.threshold_sample_new_graph(data.x, data.edge_index, batch_size=1+int(G.number_of_edges()/20))
    #        print(epoch, "loss", loss.item(), "true 2*edges", data.edge_index.size(1), "sampled 2*edges", sampled_edge_index.size(1), "Overlap to Union", compute_edge_overlap(data.edge_index, sampled_edge_index))
    if epoch > 1000 and epoch % 800 == 0:
        print("Sampling new graph")
        model.eval()
        with torch.no_grad():
            sampled_edge_index = model.threshold_sample_new_graph(data.x, data.edge_index, batch_size=1+int(G.number_of_edges()/20))
            edge_overlap_count, edge_overlap_ratio = compute_edge_overlap(data.edge_index, sampled_edge_index)
            print(epoch, "loss", loss.item(), "true 2*edges", data.edge_index.size(1), "sampled 2*edges", sampled_edge_index.size(1), "Overlap to Union", edge_overlap_count, edge_overlap_ratio)
            G, G_a = compare_graphs(data.edge_index, sampled_edge_index, data.x.size(0))
            for threshold in dump_Ga_when_edge_overlap_is_greater_than:
                if edge_overlap_ratio > threshold and threshold not in crossed_thresholds:
                    # Dump the graph and mark the threshold as crossed
                    graph_path = os.path.join(output_dir, f'Ga_threshold_{int(threshold*100)}.json')
                    write_graph(G_a, graph_path)
                    crossed_thresholds.add(threshold)
                    print(f"Graph dumped for threshold {threshold} at epoch {epoch}.")
            print("Done Comparing!")

cuda
Graph with 5000 nodes and 29173 edges
Data(x=[5000, 256], edge_index=[2, 58346], community=[5000])
0 loss 14.053125381469727
50 loss 1.452804446220398
100 loss 1.4308600425720215
150 loss 1.4238165616989136
200 loss 1.3878412246704102
250 loss 1.2881783246994019
300 loss 1.257591724395752
350 loss 1.2105995416641235
400 loss 1.1914589405059814
450 loss 1.1836938858032227
500 loss 1.1633061170578003
550 loss 1.1293851137161255
600 loss 1.1178311109542847
650 loss 1.105799913406372
700 loss 1.1057363748550415
750 loss 1.108140468597412
800 loss 1.1053295135498047
850 loss 1.1018465757369995
900 loss 1.0992445945739746
950 loss 1.0979387760162354
1000 loss 1.09565007686615
1050 loss 1.084826946258545
1100 loss 1.08360755443573
1150 loss 1.0786463022232056
1200 loss 1.0816895961761475
1250 loss 1.0762934684753418
1300 loss 1.0725384950637817
1350 loss 1.0758756399154663
1400 loss 1.0739439725875854
1450 loss 1.0734245777130127
1500 loss 1.064861536026001
1550 loss 1.0607192516326904
1

INFO:anonymigraph.metrics.evaluator:Converting graphs to graphblas
INFO:anonymigraph.metrics.evaluator:Evaluating Metric |V|
INFO:anonymigraph.metrics.evaluator:Evaluating Metric |E|
INFO:anonymigraph.metrics.evaluator:Evaluating Metric |Δ|
INFO:anonymigraph.metrics.evaluator:Evaluating Metric |CC|
INFO:anonymigraph.metrics.evaluator:Evaluating Metric Median Deg.
INFO:anonymigraph.metrics.evaluator:Evaluating Metric Avg. Deg.
INFO:anonymigraph.metrics.evaluator:Evaluating Metric Max Deg.
INFO:anonymigraph.metrics.evaluator:Evaluating Metric Transitivity
INFO:anonymigraph.metrics.evaluator:Evaluating Metric Avg. Clustering
INFO:anonymigraph.metrics.evaluator:Evaluating Metric Degree Centrality
INFO:anonymigraph.metrics.evaluator:Evaluating Metric Eigenvector Centrality
INFO:anonymigraph.metrics.evaluator:Evaluating Metric TVD WL Colors d=2
INFO:anonymigraph.metrics.utility.structural.node_property_wl_colors:Calculating WL colors.
INFO:anonymigraph.metrics.utility.structural.node_propert

{'|V|': {'G': 5000, 'Ga': 5000}, '|E|': {'G': 29173, 'Ga': 29173}, '|Δ|': {'G': 8729.0, 'Ga': 2835.0}, '|CC|': {'G': 1, 'Ga': 1}, 'Median Deg.': {'G': 9.0, 'Ga': 11.0}, 'Avg. Deg.': {'G': 11.6692, 'Ga': 11.6692}, 'Max Deg.': {'G': 582, 'Ga': 373}, 'Transitivity': {'G': 0.03025370271956376, 'Ga': 0.016031682600959073}, 'Avg. Clustering': {'G': 0.06567556457456328, 'Ga': 0.02017125792027448}, 'Degree Centrality': 0.00028773754750950094, 'Eigenvector Centrality': 0.002406540402570709, 'TVD WL Colors d=2': 0.9992000000000003, 'PageRank': 2.1343626290400504e-05, 'Local Clustering Coefficient': 0.045504306654289506}
Graph dumped for threshold 0 at epoch 1600.
Graph dumped for threshold 0.1 at epoch 1600.
Graph dumped for threshold 0.15 at epoch 1600.
Graph dumped for threshold 0.2 at epoch 1600.
Graph dumped for threshold 0.3 at epoch 1600.
Graph dumped for threshold 0.35 at epoch 1600.
Graph dumped for threshold 0.4 at epoch 1600.
Done Comparing!
1650 loss 1.0509158372879028
1700 loss 1.052

KeyboardInterrupt: 

In [None]:
def edge_index_to_networkx(edge_index, num_nodes):
    G = nx.Graph()
    G.add_nodes_from(range(num_nodes))
    edge_list = edge_index.t().tolist()
    G.add_edges_from(edge_list)
    return G

true_edge_index, sampled_edge_index, num_nodes = data.edge_index, sampled_edge_index, data.x.size(0)

true_graph = edge_index_to_networkx(true_edge_index, num_nodes)
sampled_graph = edge_index_to_networkx(sampled_edge_index, num_nodes)

metrics = {
    "|V|": NumberOfNodesMetric(),
    "|E|": NumberOfEdgesMetric(),
    "|Δ|": NumberOfTrianglesMetric(),
    "|CC|": ConnectedComponentsMetric(),
    "Median Deg.": MedianDegreeMetric(),
    "Avg. Deg.": MeanDegreeMetric(),
    "Max Deg.": MaxDegreeMetric(),
    "Transitivity": TransitivityMetric(),
    "Avg. Clustering": AverageClusteringCoefficientMetric(),
    "Degree Centrality": DegreeCentralityMetric(),
    "Eigenvector Centrality": EigenvectorMetric(),
    "TVD WL Colors d=2": WLColorMetric(depth=2),
    "PageRank":	PageRankMetric(),
    "Local Clustering Coefficient": LocalClusteringCoefficientMetric()
}

evaluator = Evaluator(metrics, use_graphblas=False)
print(evaluator.evaluate(true_graph, sampled_graph))