In [1]:
import os
os.environ["OMP_NUM_THREADS"] = "1"

import networkx as nx
from pasco.pasco import Pasco
from pasco.data_generation import generate_or_import_SBM
import numpy as np
from sklearn.cluster import SpectralClustering
from sklearn.metrics.cluster import normalized_mutual_info_score as nmi, adjusted_mutual_info_score as ami, \
    adjusted_rand_score as ari
import multiprocessing
from time import time

# Create a big graph with a community structure

In [2]:
n = int(1e4) # number of nodes 
k = 100  # number of communities 
d = 1.5 # to set the average degree
alpha = 1/(2*(k-1)) # ration of probabilities. Here half the conjectured threshold. See Paper. 

n_k = n//k # number of nodes per community
avg_d = d*np.log(n) # average degree
pin = avg_d / ((1 + (k-1) *alpha )*n_k) # inside community edge probability
pout = alpha * pin # between communities edge probability


partition_true = np.array([i for i in range(k) for j in range(n_k)]) # the true nodes partition
G = generate_or_import_SBM(n, k, pin, pout, data_folder="experiments/data/graphs/SBMs/", seed=2024)
A = nx.adjacency_matrix(G , nodelist=range(n))

Graph currently does not exist. We generate it.
Graph generated


# Compute PASCO with Spectral Clustering

In [3]:
n_cpu = multiprocessing.cpu_count()

rho = 10 # reduction factor (the coarsened graph will have a size rho times smaller)
R = 5 # number of repetitions of the coarsening. R should be kept below the number of CPUs so that all clusterings can be computed in one batch.
solver = "SC" # we use SC to compute the partition of the coarsened graphs.

ti = time()
pasco = Pasco(k, rho, R, solver=solver)
partition_pasco = pasco.fit_transform(A)
tf = time()

print("AMI with PASCO+SC : {:5.3f}".format(ami(partition_pasco, partition_true)))
print("Computation time: {:5.3f}sec".format((tf-ti)))

KeyboardInterrupt: 

# How to use PASCO with your own clustering algorithm

We show here how to use PASCO with a clustering algorithm that would not be implemented in the PASCO package. \
Here we are going to do as if SC was not implemented in PASCO. 

To do so, we need to create a proxy function. It should take as input a sparse csr_array and potentially a number of clusters $k$ and returns a partition as an array-like. We defined in the `tutorial_utils.py` file a clustering function `my_clustering(A,k)`.

In [None]:
from tutorial_utils import my_clustering

Now we can use our function `my_clustering` and pass it to the `solver` argument.

In [None]:
solver = my_clustering # clustering is computed using `my_clustering`

ti = time()
pasco = Pasco(k, rho, R, solver=solver)
partition_pasco = pasco.fit_transform(A)
tf = time()

print("AMI with PASCO+SC : {:5.3f}".format(ami(partition_pasco, partition_true)))
print("Computation time: {:5.3f}sec".format((tf-ti)))

### with extra arguments

If arguments, other than `A` and `k`, need to be pass to the function, they can be passed through the `solver_args` arguments as a dictionnary. \
Here is an example where we want to specify which eigen solver to use in SC. We use the function `my_clustering2(A,k,eigen_solver)`.

In [None]:
from tutorial_utils import my_clustering2

In [None]:
solver = my_clustering2 # we use SC to compute the partition of the coarsened graphs.
solver_args = {"eigen_solver":'lobpcg'}

ti = time()
pasco = Pasco(k, rho, R, solver=solver, solver_args=solver_args)
partition_pasco = pasco.fit_transform(A)
tf = time()

print("AMI with PASCO+SC : {:5.3f}".format(ami(partition_pasco, partition_true)))
print("Computation time: {:5.3f}sec".format((tf-ti)))