In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import networkx as nx
import numpy as np
from scipy.sparse.linalg import eigs
import scipy.sparse as sp
from numba import njit

from randcolorgraphs.utils.calculate_katz import calculate_katz

@njit
def generate_edge_indices(n, p, seed):
    np.random.seed(seed)
    row_indices = []
    col_indices = []

    # Expected number of edges
    E = int(n * (n * p))

    # Generate E unique edge indices
    edge_set = set()
    while len(edge_set) < E:
        i = np.random.randint(0, n)
        j = np.random.randint(0, n)
        edge_set.add((i, j))

    # Separate the set into row and column indices
    for edge in edge_set:
        row_indices.append(edge[0])
        col_indices.append(edge[1])

    return row_indices, col_indices

def get_edge_vector_from_adj_matrix(adj_matrix):
    sparse_matrix = sp.csr_matrix(adj_matrix)
    row_indices, col_indices = sparse_matrix.nonzero()
    edge_vector = np.vstack((row_indices, col_indices)).T
    return edge_vector

def generate_edge_list_and_katz_vector(n, p, seed=42, alpha=0.1, beta=1.0):
    row_indices, col_indices = generate_edge_indices(n, p, seed)
    print("Got Edge Indices")

    data = np.ones(len(row_indices))
    adj_matrix = sp.csr_matrix((data, (row_indices, col_indices)), shape=(n, n), dtype=np.float64)

    eigenvalues, _ = eigs(adj_matrix, k=1, which='LM')
    spectral_radius = np.abs(eigenvalues).max()
    print(f"Spectral Radius: {spectral_radius}, alpha_max = {1/spectral_radius}")

    katz_centrality = calculate_katz(adj_matrix, alpha=alpha, beta=beta)
    index_mapping =  np.argsort(np.argsort(katz_centrality))

    mapped_row_indices = index_mapping[row_indices]
    mapped_col_indices = index_mapping[col_indices]

    adj_matrix = sp.csr_matrix((data, (mapped_row_indices, mapped_col_indices)), shape=(n, n), dtype=np.float64)
    new_katz = calculate_katz(adj_matrix, alpha=alpha, beta=beta)
    assert np.allclose(np.sort(new_katz), new_katz)

    return adj_matrix, np.sort(new_katz)


n = 100
p = 6/n
adj_matrix, katz_vector = generate_edge_list_and_katz_vector(n=n, p=p, alpha=0.01)
edges = get_edge_vector_from_adj_matrix(adj_matrix)


In [None]:
import logging
h=logging.StreamHandler(); h.setFormatter(logging.Formatter(fmt="%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S")); r=logging.getLogger(); r.setLevel(logging.INFO); r.handlers.clear() if r.hasHandlers() else None; r.addHandler(h);


In [None]:
from greedy_algo_fast import greedy_search

w = np.exp(-9)
clusters_fast, _  = greedy_search(katz_vector, edges, np.array([0]*len(katz_vector)), w=w, max_interaction_dist=2)

In [None]:
n = 100_000
p = 6/n
adj_matrix, katz_vector = generate_edge_list_and_katz_vector(n=n, p=p, alpha=0.1)
edges = get_edge_vector_from_adj_matrix(adj_matrix)

# Assert against unoptimized_greedy_search_linear_scalarization

In [None]:
from randcolorgraphs.algorithms.linear_scalarization.unoptimized_greedy_search import unoptimized_greedy_search_linear_scalarization

def are_clusterings_equivalent(cluster1, cluster2):
    label_mapping = dict(zip(cluster1, cluster2))
    relabeled_cluster1 = np.array([label_mapping[label] for label in cluster1])
    return np.all(relabeled_cluster1 == cluster2)

w = np.exp(-4)

clusters_fast, obj_fast = greedy_search(katz_vector, edges, np.array([0]*len(katz_vector)), w=w, max_interaction_dist=5, max_iter=3)

print("-"*30)
#clusters, obj = unoptimized_greedy_search_linear_scalarization(katz_vector, adj_matrix.todense(), np.array([0]*len(katz_vector), dtype=np.int64), w=w, pam_cluster_dist=1)

#assert are_clusterings_equivalent(clusters_fast, clusters)

## Greedy Search with 1dkmeans init

In [23]:
import kmeans1d
import pandas as pd
from randcolorgraphs.objectives.get_cluster_loss_ell_sqr import get_cluster_loss_ell_sqr
from randcolorgraphs.objectives.get_expected_edgeoverlap import get_expected_edgeoverlap
from randcolorgraphs.algorithms.linear_scalarization.optimal_contiguous.optimal_contiguous_linear_scalarization_algo import optimal_contiguous_linear_scalarization_algo


def evaluate_cluster_assignment(katz_centrality, A, clusters, w):
    ell_sqr = get_cluster_loss_ell_sqr(katz_centrality, clusters)
    expected_edge_overlap = get_expected_edgeoverlap(A, clusters)
    linear_scalar_loss = ell_sqr + w * expected_edge_overlap
    return [ell_sqr, expected_edge_overlap, linear_scalar_loss]

### Analyze all possible clusters for their performance regarding specific merics and booleans
df = pd.DataFrame(columns=["ell_sqr", "expected_edge_overlap", "linear_scalar_loss", "method_type"])

for k in range(115, 125, 3):
    colors, _ = kmeans1d.cluster(katz_vector, k)
    clusters = np.array(colors)
    df.loc[len(df)] = evaluate_cluster_assignment(katz_vector, adj_matrix, clusters, w) + [f"Kmeans1d (k={k}) on Katz centrality"]
display(df)

Unnamed: 0,ell_sqr,expected_edge_overlap,linear_scalar_loss,method_type
0,6.9092,721.115889,20.116899,Kmeans1d (k=115) on Katz centrality
1,6.565892,739.027216,20.101647,Kmeans1d (k=118) on Katz centrality
2,6.234911,757.397144,20.107124,Kmeans1d (k=121) on Katz centrality
3,5.930052,773.954321,20.10552,Kmeans1d (k=124) on Katz centrality


In [24]:
colors, _ = kmeans1d.cluster(katz_vector, k=118)
clusters = np.array(colors)
clusters_fast, obj_fast = greedy_search(katz_vector, edges, clusters, w=w, max_interaction_dist=1)


Iteration 0 Objective: 20.099881347762256 move_type merge-split
Iteration 1 Objective: 20.098517701469703 move_type split
Iteration 2 Objective: 20.07891273330934 move_type swap
Iteration 3 Objective: 20.078246734974872 move_type swap
Iteration 4 Objective: 20.077694426657267 move_type merge-split
Iteration 5 Objective: 20.077428828700636 move_type merge-split
Iteration 6 Objective: 20.077164246630176 move_type merge-split
Iteration 7 Objective: 20.076903528321747 move_type merge-split
Iteration 8 Objective: 20.076651795383704 move_type split
Iteration 9 Objective: 20.07643022824496 move_type swap
Iteration 10 Objective: 20.076234826158306 move_type swap
Iteration 11 Objective: 20.07604422243661 move_type merge-split
Iteration 12 Objective: 20.075855976086732 move_type merge-split
Iteration 13 Objective: 20.075669974376233 move_type merge-split
Iteration 14 Objective: 20.075490555159803 move_type merge-split
Iteration 15 Objective: 20.0753118002737 move_type merge-split
Iteration 16 Ob

In [25]:
clusters_fast, obj_fast = greedy_search(katz_vector, edges, clusters_fast, w=w, max_interaction_dist=2)


Iteration 0 Objective: 20.0432673294519 move_type swap
Iteration 1 Objective: 20.043237600727924 move_type swap
Iteration 2 Objective: 20.04321097626729 move_type swap
Iteration 3 Objective: 20.0431880636958 move_type swap
Iteration 4 Objective: 20.043173162897784 move_type swap
Iteration 5 Objective: 20.043159926517045 move_type swap
Iteration 6 Objective: 20.043150111691894 move_type swap
Iteration 7 Objective: 20.04314020517176 move_type swap
Iteration 8 Objective: 20.043131956716316 move_type swap
Iteration 9 Objective: 20.043128084719655 move_type swap
Iteration 10 Objective: 20.043124513825106 move_type swap
Iteration 11 Objective: 20.043120498310643 move_type swap
Iteration 12 Objective: 20.0431157184247 move_type swap
Iteration 13 Objective: 20.0431115059018 move_type swap
Iteration 14 Objective: 20.0431077451201 move_type swap
Iteration 15 Objective: 20.04310423449524 move_type swap
Iteration 16 Objective: 20.04310007876339 move_type swap
Iteration 17 Objective: 20.04309555428

In [27]:
clusters_fast, obj_fast = greedy_search(katz_vector, edges, clusters_fast, w=w, max_interaction_dist=3)

In [28]:
clusters_fast, obj_fast = greedy_search(katz_vector, edges, clusters_fast, w=w, max_interaction_dist=4)

In [30]:
clusters_fast, obj_fast = greedy_search(katz_vector, edges, clusters_fast, w=w, max_interaction_dist=30)