In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import networkx as nx
import numpy as np
from scipy.sparse.linalg import eigs
import scipy.sparse as sp
from numba import njit

from randcolorgraphs.utils.calculate_katz import calculate_katz

@njit
def generate_edge_indices(n, p, seed):
    np.random.seed(seed)
    row_indices = []
    col_indices = []

    # Expected number of edges
    E = int(n * (n * p))

    # Generate E unique edge indices
    edge_set = set()
    while len(edge_set) < E:
        i = np.random.randint(0, n)
        j = np.random.randint(0, n)
        edge_set.add((i, j))

    # Separate the set into row and column indices
    for edge in edge_set:
        row_indices.append(edge[0])
        col_indices.append(edge[1])

    return row_indices, col_indices

def get_edge_vector_from_adj_matrix(adj_matrix):
    sparse_matrix = sp.csr_matrix(adj_matrix)
    row_indices, col_indices = sparse_matrix.nonzero()
    edge_vector = np.vstack((row_indices, col_indices)).T
    return edge_vector

def generate_edge_list_and_katz_vector(n, p, seed=42, alpha=0.1, beta=1.0):
    row_indices, col_indices = generate_edge_indices(n, p, seed)
    print("Got Edge Indices")

    data = np.ones(len(row_indices))
    adj_matrix = sp.csr_matrix((data, (row_indices, col_indices)), shape=(n, n), dtype=np.float64)

    eigenvalues, _ = eigs(adj_matrix, k=1, which='LM')
    spectral_radius = np.abs(eigenvalues).max()
    print(f"Spectral Radius: {spectral_radius}, alpha_max = {1/spectral_radius}")

    katz_centrality = calculate_katz(adj_matrix, alpha=alpha, beta=beta)
    index_mapping =  np.argsort(np.argsort(katz_centrality))

    mapped_row_indices = index_mapping[row_indices]
    mapped_col_indices = index_mapping[col_indices]

    adj_matrix = sp.csr_matrix((data, (mapped_row_indices, mapped_col_indices)), shape=(n, n), dtype=np.float64)
    new_katz = calculate_katz(adj_matrix, alpha=alpha, beta=beta)
    assert np.allclose(np.sort(new_katz), new_katz)

    return adj_matrix, np.sort(new_katz)


n = 100
p = 6/n
adj_matrix, katz_vector = generate_edge_list_and_katz_vector(n=n, p=p, alpha=0.01)
edges = get_edge_vector_from_adj_matrix(adj_matrix)


Got Edge Indices
Spectral Radius: 6.0895023996065145, alpha_max = 0.16421703028881587
Katz converged after 11 iterations.
Katz converged after 11 iterations.


In [3]:
import logging
h=logging.StreamHandler(); h.setFormatter(logging.Formatter(fmt="%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S")); r=logging.getLogger(); r.setLevel(logging.INFO); r.handlers.clear() if r.hasHandlers() else None; r.addHandler(h);


In [4]:
from greedy_algo_fast import greedy_search

w = np.exp(-9)
clusters_fast, _  = greedy_search(katz_vector, edges, np.array([0]*len(katz_vector)), w=w, max_interaction_dist=2)

Iteration 0 Objective: 0.030361978333363217 move_type split expected_edge_overlap 48.11754611754611
Iteration 1 Objective: 0.022363023728179048 move_type split expected_edge_overlap 53.40873015873016
Iteration 2 Objective: 0.01444448765126113 move_type split expected_edge_overlap 59.46038961038961
Iteration 3 Objective: 0.013398665688751492 move_type split expected_edge_overlap 64.366341991342
Iteration 4 Objective: 0.012530081965618346 move_type split expected_edge_overlap 68.8306277056277
Iteration 5 Objective: 0.011658279069397419 move_type merge-split expected_edge_overlap 70.15876623376623
Iteration 6 Objective: 0.011376216807715209 move_type split expected_edge_overlap 74.9325036075036
Iteration 7 Objective: 0.01135039377710733 move_type merge-split expected_edge_overlap 74.3497655122655
Iteration 8 Objective: 0.011325065875131489 move_type swap expected_edge_overlap 73.65088780328718
Iteration 9 Objective: 0.011307895502941478 move_type swap expected_edge_overlap 73.103932178932

In [13]:
n = 100_000
p = 6/n
adj_matrix, katz_vector = generate_edge_list_and_katz_vector(n=n, p=p, alpha=0.1)
edges = get_edge_vector_from_adj_matrix(adj_matrix)
print("katz vector mean and max", np.mean(katz_vector), np.max(katz_vector))
print("there are", len(edges), "edges")

Got Edge Indices
Spectral Radius: 5.998332004691925, alpha_max = 0.16671301275384476
Katz converged after 66 iterations.
Katz converged after 66 iterations.
katz vector mean and max 2.4995622989083937 6.244579087025354
there are 600000 edges


# Assert against unoptimized_greedy_search_linear_scalarization

In [6]:
from randcolorgraphs.algorithms.linear_scalarization.unoptimized_greedy_search import unoptimized_greedy_search_linear_scalarization

def are_clusterings_equivalent(cluster1, cluster2):
    label_mapping = dict(zip(cluster1, cluster2))
    relabeled_cluster1 = np.array([label_mapping[label] for label in cluster1])
    return np.all(relabeled_cluster1 == cluster2)

w = np.exp(-50)

clusters_fast, obj_fast = greedy_search(katz_vector, edges, np.array([0]*len(katz_vector)), w=w, max_interaction_dist=5, max_iter=3)

print("-"*30)
#clusters, obj = unoptimized_greedy_search_linear_scalarization(katz_vector, adj_matrix.todense(), np.array([0]*len(katz_vector), dtype=np.int64), w=w, pam_cluster_dist=1)

#assert are_clusterings_equivalent(clusters_fast, clusters)

Iteration 0 Objective: 14389.815899509005 move_type split expected_edge_overlap 48.012427082275536
Iteration 1 Objective: 9364.648727957101 move_type split expected_edge_overlap 54.00164839250773
Iteration 2 Objective: 4584.0619480747555 move_type split expected_edge_overlap 60.04099264239527
------------------------------


## Greedy Search with 1dkmeans init

In [7]:
import kmeans1d
import pandas as pd
from randcolorgraphs.objectives.get_cluster_loss_ell_sqr import get_cluster_loss_ell_sqr
from randcolorgraphs.objectives.get_expected_edgeoverlap import get_expected_edgeoverlap
from randcolorgraphs.algorithms.linear_scalarization.optimal_contiguous.optimal_contiguous_linear_scalarization_algo import optimal_contiguous_linear_scalarization_algo

w = np.exp(-4)

def evaluate_cluster_assignment(katz_centrality, A, clusters, w):
    ell_sqr = get_cluster_loss_ell_sqr(katz_centrality, clusters)
    expected_edge_overlap = get_expected_edgeoverlap(A, clusters)
    linear_scalar_loss = ell_sqr + w * expected_edge_overlap
    return [ell_sqr, expected_edge_overlap, linear_scalar_loss]

### Analyze all possible clusters for their performance regarding specific merics and booleans
df = pd.DataFrame(columns=["ell_sqr", "expected_edge_overlap", "linear_scalar_loss", "method_type"])

for k in range(115, 125, 3):
    colors, _ = kmeans1d.cluster(katz_vector, k)
    clusters = np.array(colors)
    df.loc[len(df)] = evaluate_cluster_assignment(katz_vector, adj_matrix, clusters, w) + [f"Kmeans1d (k={k}) on Katz centrality"]
display(df)

Unnamed: 0,ell_sqr,expected_edge_overlap,linear_scalar_loss,method_type
0,6.9092,721.115889,20.116899,Kmeans1d (k=115) on Katz centrality
1,6.565892,739.027216,20.101647,Kmeans1d (k=118) on Katz centrality
2,6.234911,757.397144,20.107124,Kmeans1d (k=121) on Katz centrality
3,5.930052,773.954321,20.10552,Kmeans1d (k=124) on Katz centrality


In [8]:
colors, _ = kmeans1d.cluster(katz_vector, k=118)
clusters = np.array(colors)
clusters_fast, obj_fast = greedy_search(katz_vector, edges, clusters, w=w, max_interaction_dist=1)


Iteration 0 Objective: 20.099881347762256 move_type merge-split expected_edge_overlap 738.7350289171253
Iteration 1 Objective: 20.098517701469703 move_type split expected_edge_overlap 743.5683622504586
Iteration 2 Objective: 20.07891273330934 move_type swap expected_edge_overlap 743.279900711997
Iteration 3 Objective: 20.078246734974872 move_type swap expected_edge_overlap 743.2821910573107
Iteration 4 Objective: 20.077694426657267 move_type merge-split expected_edge_overlap 743.2375656131517
Iteration 5 Objective: 20.077428828700636 move_type merge-split expected_edge_overlap 743.2189841521069
Iteration 6 Objective: 20.077164246630176 move_type merge-split expected_edge_overlap 743.1959807822083
Iteration 7 Objective: 20.076903528321747 move_type merge-split expected_edge_overlap 743.1691965819513
Iteration 8 Objective: 20.076651795383704 move_type split expected_edge_overlap 743.1884938279152
Iteration 9 Objective: 20.07643022824496 move_type swap expected_edge_overlap 743.1725532062

In [9]:
clusters_fast, obj_fast = greedy_search(katz_vector, edges, clusters_fast, w=w, max_interaction_dist=2)


Iteration 0 Objective: 20.0432673294519 move_type swap expected_edge_overlap 739.4804283203209
Iteration 1 Objective: 20.043237600727924 move_type swap expected_edge_overlap 739.4784830773992
Iteration 2 Objective: 20.04321097626729 move_type swap expected_edge_overlap 739.4766696565869
Iteration 3 Objective: 20.0431880636958 move_type swap expected_edge_overlap 739.4746119457898
Iteration 4 Objective: 20.043173162897784 move_type swap expected_edge_overlap 739.4725059594211
Iteration 5 Objective: 20.043159926517045 move_type swap expected_edge_overlap 739.4707273079282
Iteration 6 Objective: 20.043150111691894 move_type swap expected_edge_overlap 739.4691581518227
Iteration 7 Objective: 20.04314020517176 move_type swap expected_edge_overlap 739.4672273691197
Iteration 8 Objective: 20.043131956716316 move_type swap expected_edge_overlap 739.4674224811965
Iteration 9 Objective: 20.043128084719655 move_type swap expected_edge_overlap 739.4671619731441
Iteration 10 Objective: 20.043124513

In [10]:
clusters_fast, obj_fast = greedy_search(katz_vector, edges, clusters_fast, w=w, max_interaction_dist=3)

In [11]:
clusters_fast, obj_fast = greedy_search(katz_vector, edges, clusters_fast, w=w, max_interaction_dist=4)

In [12]:
clusters_fast, obj_fast = greedy_search(katz_vector, edges, clusters_fast, w=w, max_interaction_dist=30)