In [1]:
%load_ext autoreload
%autoreload 2

import logging

logging.basicConfig(level=logging.WARNING)
logging.getLogger('anonymigraph').setLevel(logging.INFO)
logging.getLogger('anonymigraph.metrics').setLevel(logging.INFO)


In [2]:
from anonymigraph.metrics.evaluator import Evaluator
from anonymigraph.metrics.utility.structural.privacy_metrics import PercentageKDegreeAnonMetric

from anonymigraph.metrics.utility.structural import (
    DegreeCentralityMetric,
    EigenvectorMetric,
    PageRankMetric,
    ClosenessCentralityMetric,
    LocalClusteringCoefficientMetric,
    WLColorMetric,

    ConnectedComponentsMetric,
    NumberOfEdgesMetric,
    NumberOfNodesMetric,
    NumberOfTrianglesMetric,
    MeanDegreeMetric,
    MaxDegreeMetric,
    MedianDegreeMetric,
    AverageClusteringCoefficientMetric,
    TransitivityMetric,

    EdgeJaccardMetric,
    KatzCentralityMetric,

)

from anonymigraph.anonymization import (
    KDegreeAnonymizer,
    RandomEdgeAddDelAnonymizer,
    ConfigurationModelAnonymizer,
    NestModelAnonymizer,
    PygmalionModelAnonymizer,
    PrivateColorAnonymizer,
)


# CA-GrQc

In [None]:
import gzip
import networkx as nx
import urllib.request
import os
from scipy.sparse.linalg import eigs
import numpy as np


if not os.path.exists('ca-GrQc.txt.gz'):
    urllib.request.urlretrieve('https://snap.stanford.edu/data/ca-GrQc.txt.gz', 'ca-GrQc.txt.gz')

with gzip.open('ca-GrQc.txt.gz', 'rt') as f:
    G = nx.read_edgelist(f)

# relabel and remove self loops
G = nx.convert_node_labels_to_integers(G)
G.remove_edges_from(nx.selfloop_edges(G)) # There are 12 self loops in the original graph

eigenvalues, _ = eigs(nx.adjacency_matrix(G).astype(np.float64), k=1, which='LM')
max_alpha = 1 / np.abs(eigenvalues).max()

alpha=0.5*max_alpha
beta=1
print(G)
print("Alpha:", alpha)

In [4]:
from anonymigraph.anonymization.method_private_colors import (
    SamplingFreeEvaluator,
    LocalSearchColorOptimizer,
    Optimal1dColorOptimizer,
    RandomColorSampler,
)

from anonymigraph.anonymization.method_private_colors_soft_assignment import SoftColorOptimizer

class SoftColorAnonymizer:
    def __init__(self, w, k_max, alpha, use_katz_utility, beta=1):
        self.w = w
        self.alpha = alpha
        self.beta = beta
        self.use_katz_utility = use_katz_utility
        self.k_max = k_max

    def anonymize(self, G: nx.Graph, random_seed=None) -> nx.Graph:
        soft_optim = SoftColorOptimizer(
            G, k_max=self.k_max, w=self.w,
            use_katz_utility = self.use_katz_utility,
            use_entropy_reg = True,
            alpha=self.alpha, beta=self.beta,
            eps_utility = 1e-9, eps_privacy = 1e-9,
            lr=0.1, patience=20, threshold=5e-2, initial_lam=1e-7, factor=1.1, #factor 1.1 lr 0.1
            device='cpu', seed=random_seed
        )

        soft_optim.fit(max_epochs=int(1e8), epoch_report_frequency=2000)

        # Sample a new graph
        A, A_prime = RandomColorSampler(G, soft_optim.colors, use_configuration_to_sample=False).sample(
            seed=random_seed
        )

        # Create networkx anonyimzed graph from A_prime (using same node order as G)
        Ga = nx.from_scipy_sparse_array(A_prime)
        order = list(G.nodes())
        relabel_map = {i: order[i] for i in range(len(order))}
        G_new = nx.relabel_nodes(Ga, relabel_map)
        return G_new


In [None]:
import pickle

ca_GRQC_samples_data = []
for seed in range(60, 60+4): # 4 samples with different seeds
    # METRICS
    metrics = {
        # Important
        # Graph Level
        #"|Δ|": NumberOfTrianglesMetric(),
        #"Transitivity": TransitivityMetric(),

        # Node Level
        "Katz": KatzCentralityMetric(alpha=alpha),
        "Ev.": EigenvectorMetric(),
        "LCC": LocalClusteringCoefficientMetric(),
        #"TVD WL Colors d=2": WLColorMetric(depth=2),

        # Graph Level
        #"|CC|": ConnectedComponentsMetric(),
        #"Median Deg.": MedianDegreeMetric(),
        #"Avg. Deg.": MeanDegreeMetric(),
        #"Max Deg.": MaxDegreeMetric(),
        #"PageRank":	PageRankMetric(),

        "EJacc": EdgeJaccardMetric(),
    }

    methods = {}

    alpha_placeholder = 1e-8
    methods[r"1e6"] = SoftColorAnonymizer(w=1e+6, k_max=50, alpha=alpha_placeholder, use_katz_utility=False)
    methods[r"1e4"] = SoftColorAnonymizer(w=1e+4, k_max=50, alpha=alpha_placeholder, use_katz_utility=False)
    methods[r"1e3"] = SoftColorAnonymizer(w=1e+3, k_max=70, alpha=alpha_placeholder, use_katz_utility=False)
    methods[r"1e2"] = SoftColorAnonymizer(w=1e+2, k_max=70, alpha=alpha_placeholder, use_katz_utility=False)
    methods[r"1e1"] = SoftColorAnonymizer(w=1e+1, k_max=70, alpha=alpha_placeholder, use_katz_utility=False)
    methods[r"1e0"] = SoftColorAnonymizer(w=1e-0, k_max=80, alpha=alpha_placeholder, use_katz_utility=False)

    Ga_graphs = {}

    for method_name, method in methods.items():
        print(f"Anonymizing with method {method_name}")
        Ga = method.anonymize(G, random_seed=seed)
        print(f"Evaluating method {method_name}")
        Ga_graphs[method_name] = Ga

    ca_GRQC_samples_data.append(Ga_graphs)

    os.makedirs('cache', exist_ok=True)
    with open('cache/exp4_ca_GRQC_sampled_graphs.pkl', 'wb') as f:
        pickle.dump(ca_GRQC_samples_data, f)

