In [1]:
import networkx as nx
import optimal1dclustering
import numpy as np
from scipy.sparse.linalg import eigsh

# LFR graph

In [4]:
from anonymigraph.anonymization._external.nest_model._rewire import _rewire

# Get a Graph
n = 400
tau1 = 3
tau2 = 1.5
mu = 0.1
G = nx.LFR_benchmark_graph(
    n, tau1, tau2, mu, average_degree=10, min_community=80, seed=10
)
G.remove_edges_from(nx.selfloop_edges(G)) # some random graph


# get dominant eigenvetor and adj matrix
A_G = nx.adjacency_matrix(G).astype(np.float64)  # Get the adjacency matrix as a sparse matrix
lam_G, x = largest_eigen(A_G)

print("Dominant eigenvalue:", lam_G)

data_dict = {}
for min_cluster_size in [1, 2, 4, 8, 16]:
    print(f"Starting gathering metric for min_cluster_size: {min_cluster_size}")
    data = {}
    for k in range(1, n//min_cluster_size):
        results = {}
        mode = 2
        colors, centroids = optimal1dclustering.cluster(
            x, k, mode=mode, min_cluster_size=min_cluster_size
        )
        colors = np.array(colors)
        clusterLoss = calculate_kmeans_cluster_loss(x, colors, centroids, mode = mode)

        # Get new synthetic graph
        edges = np.array(G.edges(), dtype=np.uint32)
        edges_rewired = _rewire(edges, colors.reshape(1, -1), r=10, parallel=False, random_seed=44)

        Ga = nx.Graph()
        Ga.add_nodes_from(G.nodes(data=True))
        Ga.add_edges_from(edges_rewired)

        A_Ga = nx.adjacency_matrix(Ga).astype(np.float64)
        delta = get_delta(A_Ga, lam_G, x)
        delta_norm = np.linalg.norm(delta)
        lam_Ga, x_Ga = largest_eigen(A_Ga)
        eigenvec_diff_norm = np.linalg.norm(x - x_Ga)

        results['Clustering Loss'] = clusterLoss
        results['Delta L2 Magnitude'] = delta_norm
        results['Delta L2 Magnitude / Lambda_G'] = delta_norm/lam_G
        results['Dominant Lambda Diff'] = lam_Ga - lam_G
        results['Eigenvec L2 Norm Diff'] = eigenvec_diff_norm
        data[k] = results

    data_dict[min_cluster_size] = data

Dominant eigenvalue: 22.100950910040144
Starting gathering metric for min_cluster_size: 1
Starting gathering metric for min_cluster_size: 2
Starting gathering metric for min_cluster_size: 4
Starting gathering metric for min_cluster_size: 8
Starting gathering metric for min_cluster_size: 16


In [9]:
from anonymigraph.anonymization import NestModelAnonymizer
from anonymigraph.anonymization._external.nest_model.fast_wl import WL_fast

original_nest = {}
for depth in [1,2,3,4,5,6]:
    # same as following call but we also get number of classes
    # NestModelAnonymizer(depth=d, r=10).anonymize(G, random_seed=42)

    edges = np.array(G.edges(), dtype=np.uint32)
    bidirectional_edges = np.row_stack((edges, edges[:, [1, 0]]))

    all_depth_colors = WL_fast(bidirectional_edges, labels=None, max_iter=depth)
    colors = all_depth_colors[-1].reshape(1, -1)
    edges_rewired = _rewire(
        edges, colors, r=10, parallel=False, random_seed=42
    )
    k_approx = len(np.unique(colors))

    Ga = nx.Graph()
    Ga.add_nodes_from(G.nodes(data=True))
    Ga.add_edges_from(edges_rewired)

    A_Ga = nx.adjacency_matrix(Ga).astype(np.float64)
    lam_Ga, x_Ga = largest_eigen(A_Ga)
    eigenvec_diff_norm = np.linalg.norm(x - x_Ga)

    results = {}
    results['Eigenvec L2 Norm Diff'] = eigenvec_diff_norm
    original_nest[k_approx] = results

In [24]:
import plotly.graph_objects as go

# Create traces for each metric
traces = []
for min_cluster_size in data_dict.keys():
    data = data_dict[min_cluster_size]

    trace = go.Scatter(x=list(data.keys()),
                                    y=[data[k]['Eigenvec L2 Norm Diff'] for k in data.keys()],
                                    mode='lines',
                                    name=f"min_cluster_size={min_cluster_size}")
    traces.append(trace)

trace = go.Scatter(x=list(original_nest.keys()),
                                y=[original_nest[k]['Eigenvec L2 Norm Diff'] for k in original_nest.keys()],
                                mode='markers',
                                name=f"Original Nest (for depth = 1,2,3,... with k = number of unique colors)",
                                marker=dict(symbol='x', size=10))
traces.append(trace)

layout_loglog = go.Layout(title='L2 norm of difference between Perron eigenvector of G and Ga (randomized)',
                          xaxis_title='k',
                          yaxis_title='Metric Values (Log Scale)',
                          #xaxis_type='log',  # Set x-axis to log scale
                          yaxis_type='log',  # Set y-axis to log scale
                          hovermode='closest',
                          height=900,
                          legend=dict(
                                orientation="h",
                                x=0.5,
                                y=-0.1,
                                xanchor="center",
                                yanchor="top"
                            )

                          )

fig_loglog = go.Figure(data=traces, layout=layout_loglog)
fig_loglog.show()


# erdos_renyi_graph n=500, avg_degree=10

In [25]:
from anonymigraph.anonymization._external.nest_model._rewire import _rewire

# Get a Graph
n = 500
p = 10/n
G = nx.erdos_renyi_graph(n, p)
G.remove_edges_from(nx.selfloop_edges(G)) # some random graph


# get dominant eigenvetor and adj matrix
A_G = nx.adjacency_matrix(G).astype(np.float64)  # Get the adjacency matrix as a sparse matrix
lam_G, x = largest_eigen(A_G)

print("Dominant eigenvalue:", lam_G)

data_dict = {}
for min_cluster_size in [1, 2, 4, 8, 16]:
    print(f"Starting gathering metric for min_cluster_size: {min_cluster_size}")
    data = {}
    for k in range(1, n//min_cluster_size):
        results = {}
        mode = 2
        colors, centroids = optimal1dclustering.cluster(
            x, k, mode=mode, min_cluster_size=min_cluster_size
        )
        colors = np.array(colors)
        clusterLoss = calculate_kmeans_cluster_loss(x, colors, centroids, mode = mode)

        # Get new synthetic graph
        edges = np.array(G.edges(), dtype=np.uint32)
        edges_rewired = _rewire(edges, colors.reshape(1, -1), r=10, parallel=False, random_seed=44)

        Ga = nx.Graph()
        Ga.add_nodes_from(G.nodes(data=True))
        Ga.add_edges_from(edges_rewired)

        A_Ga = nx.adjacency_matrix(Ga).astype(np.float64)
        delta = get_delta(A_Ga, lam_G, x)
        delta_norm = np.linalg.norm(delta)
        lam_Ga, x_Ga = largest_eigen(A_Ga)
        eigenvec_diff_norm = np.linalg.norm(x - x_Ga)

        results['Clustering Loss'] = clusterLoss
        results['Delta L2 Magnitude'] = delta_norm
        results['Delta L2 Magnitude / Lambda_G'] = delta_norm/lam_G
        results['Dominant Lambda Diff'] = lam_Ga - lam_G
        results['Eigenvec L2 Norm Diff'] = eigenvec_diff_norm
        data[k] = results

    data_dict[min_cluster_size] = data

Dominant eigenvalue: 11.004502948639608
Starting gathering metric for min_cluster_size: 1
Starting gathering metric for min_cluster_size: 2
Starting gathering metric for min_cluster_size: 4
Starting gathering metric for min_cluster_size: 8
Starting gathering metric for min_cluster_size: 16


In [26]:
from anonymigraph.anonymization import NestModelAnonymizer
from anonymigraph.anonymization._external.nest_model.fast_wl import WL_fast

original_nest = {}
for depth in [1,2,3,4,5,6]:
    # same as following call but we also get number of classes
    # NestModelAnonymizer(depth=d, r=10).anonymize(G, random_seed=42)

    edges = np.array(G.edges(), dtype=np.uint32)
    bidirectional_edges = np.row_stack((edges, edges[:, [1, 0]]))

    all_depth_colors = WL_fast(bidirectional_edges, labels=None, max_iter=depth)
    colors = all_depth_colors[-1].reshape(1, -1)
    edges_rewired = _rewire(
        edges, colors, r=10, parallel=False, random_seed=42
    )
    k_approx = len(np.unique(colors))

    Ga = nx.Graph()
    Ga.add_nodes_from(G.nodes(data=True))
    Ga.add_edges_from(edges_rewired)

    A_Ga = nx.adjacency_matrix(Ga).astype(np.float64)
    lam_Ga, x_Ga = largest_eigen(A_Ga)
    eigenvec_diff_norm = np.linalg.norm(x - x_Ga)

    results = {}
    results['Eigenvec L2 Norm Diff'] = eigenvec_diff_norm
    original_nest[k_approx] = results

In [27]:
import plotly.graph_objects as go

# Create traces for each metric
traces = []
for min_cluster_size in data_dict.keys():
    data = data_dict[min_cluster_size]

    trace = go.Scatter(x=list(data.keys()),
                                    y=[data[k]['Eigenvec L2 Norm Diff'] for k in data.keys()],
                                    mode='lines',
                                    name=f"min_cluster_size={min_cluster_size}")
    traces.append(trace)

trace = go.Scatter(x=list(original_nest.keys()),
                                y=[original_nest[k]['Eigenvec L2 Norm Diff'] for k in original_nest.keys()],
                                mode='markers',
                                name=f"Original Nest (for depth = 1,2,3,... with k = number of unique colors)",
                                marker=dict(symbol='x', size=10))
traces.append(trace)

layout_loglog = go.Layout(title='L2 norm of difference between Perron eigenvector of G and Ga (randomized)',
                          xaxis_title='k',
                          yaxis_title='Metric Values (Log Scale)',
                          #xaxis_type='log',  # Set x-axis to log scale
                          yaxis_type='log',  # Set y-axis to log scale
                          hovermode='closest',
                          height=900,
                          legend=dict(
                                orientation="h",
                                x=0.5,
                                y=-0.1,
                                xanchor="center",
                                yanchor="top"
                            )

                          )

fig_loglog = go.Figure(data=traces, layout=layout_loglog)
fig_loglog.show()


# erdos_renyi_graph n=500, avg_degree=3

In [29]:
from anonymigraph.anonymization._external.nest_model._rewire import _rewire

# Get a Graph
n = 500
p = 3/n
G = nx.erdos_renyi_graph(n, p)
G.remove_edges_from(nx.selfloop_edges(G)) # some random graph


# get dominant eigenvetor and adj matrix
A_G = nx.adjacency_matrix(G).astype(np.float64)  # Get the adjacency matrix as a sparse matrix
lam_G, x = largest_eigen(A_G)

print("Dominant eigenvalue:", lam_G)

data_dict = {}
for min_cluster_size in [1, 2, 4, 8, 16]:
    print(f"Starting gathering metric for min_cluster_size: {min_cluster_size}")
    data = {}
    for k in range(1, n//min_cluster_size):
        results = {}
        mode = 2
        colors, centroids = optimal1dclustering.cluster(
            x, k, mode=mode, min_cluster_size=min_cluster_size
        )
        colors = np.array(colors)
        clusterLoss = calculate_kmeans_cluster_loss(x, colors, centroids, mode = mode)

        # Get new synthetic graph
        edges = np.array(G.edges(), dtype=np.uint32)
        edges_rewired = _rewire(edges, colors.reshape(1, -1), r=10, parallel=False, random_seed=44)

        Ga = nx.Graph()
        Ga.add_nodes_from(G.nodes(data=True))
        Ga.add_edges_from(edges_rewired)

        A_Ga = nx.adjacency_matrix(Ga).astype(np.float64)
        delta = get_delta(A_Ga, lam_G, x)
        delta_norm = np.linalg.norm(delta)
        lam_Ga, x_Ga = largest_eigen(A_Ga)
        eigenvec_diff_norm = np.linalg.norm(x - x_Ga)

        results['Clustering Loss'] = clusterLoss
        results['Delta L2 Magnitude'] = delta_norm
        results['Delta L2 Magnitude / Lambda_G'] = delta_norm/lam_G
        results['Dominant Lambda Diff'] = lam_Ga - lam_G
        results['Eigenvec L2 Norm Diff'] = eigenvec_diff_norm
        data[k] = results

    data_dict[min_cluster_size] = data

Dominant eigenvalue: 4.3104652660821
Starting gathering metric for min_cluster_size: 1
Starting gathering metric for min_cluster_size: 2
Starting gathering metric for min_cluster_size: 4
Starting gathering metric for min_cluster_size: 8
Starting gathering metric for min_cluster_size: 16


In [30]:
from anonymigraph.anonymization import NestModelAnonymizer
from anonymigraph.anonymization._external.nest_model.fast_wl import WL_fast

original_nest = {}
for depth in [1,2,3,4,5,6]:
    # same as following call but we also get number of classes
    # NestModelAnonymizer(depth=d, r=10).anonymize(G, random_seed=42)

    edges = np.array(G.edges(), dtype=np.uint32)
    bidirectional_edges = np.row_stack((edges, edges[:, [1, 0]]))

    all_depth_colors = WL_fast(bidirectional_edges, labels=None, max_iter=depth)
    colors = all_depth_colors[-1].reshape(1, -1)
    edges_rewired = _rewire(
        edges, colors, r=10, parallel=False, random_seed=42
    )
    k_approx = len(np.unique(colors))

    Ga = nx.Graph()
    Ga.add_nodes_from(G.nodes(data=True))
    Ga.add_edges_from(edges_rewired)

    A_Ga = nx.adjacency_matrix(Ga).astype(np.float64)
    lam_Ga, x_Ga = largest_eigen(A_Ga)
    eigenvec_diff_norm = np.linalg.norm(x - x_Ga)

    results = {}
    results['Eigenvec L2 Norm Diff'] = eigenvec_diff_norm
    original_nest[k_approx] = results

In [31]:
import plotly.graph_objects as go

# Create traces for each metric
traces = []
for min_cluster_size in data_dict.keys():
    data = data_dict[min_cluster_size]

    trace = go.Scatter(x=list(data.keys()),
                                    y=[data[k]['Eigenvec L2 Norm Diff'] for k in data.keys()],
                                    mode='lines',
                                    name=f"min_cluster_size={min_cluster_size}")
    traces.append(trace)

trace = go.Scatter(x=list(original_nest.keys()),
                                y=[original_nest[k]['Eigenvec L2 Norm Diff'] for k in original_nest.keys()],
                                mode='markers',
                                name=f"Original Nest (for depth = 1,2,3,... with k = number of unique colors)",
                                marker=dict(symbol='x', size=10))
traces.append(trace)

layout_loglog = go.Layout(title='L2 norm of difference between Perron eigenvector of G and Ga (randomized)',
                          xaxis_title='k',
                          yaxis_title='Metric Values (Log Scale)',
                          #xaxis_type='log',  # Set x-axis to log scale
                          yaxis_type='log',  # Set y-axis to log scale
                          hovermode='closest',
                          height=900,
                          legend=dict(
                                orientation="h",
                                x=0.5,
                                y=-0.1,
                                xanchor="center",
                                yanchor="top"
                            )

                          )

fig_loglog = go.Figure(data=traces, layout=layout_loglog)
fig_loglog.show()
