In [15]:
import numpy as np
# Generate random clusters with whole number (x, y) coordinates just to test to see if this would work 
#since actual clustered data had not been generated yet
#not needed once data is obtained
def generate_random_clusters_integers(num_clusters, points_per_cluster, x_range, y_range):

    clusters = {}
    for i in range(num_clusters):
        x_coords = np.random.randint(x_range[0], x_range[1] + 1, points_per_cluster)
        y_coords = np.random.randint(y_range[0], y_range[1] + 1, points_per_cluster)
        cluster = np.column_stack((x_coords, y_coords))
        clusters[f"cluster{i}"] = cluster
    return clusters

# Parameters
num_clusters = 4
points_per_cluster = 10
x_range = (0, 100)
y_range = (0, 100)

# Generate clusters and save them to variables
clusters = generate_random_clusters_integers(num_clusters, points_per_cluster, x_range, y_range)

# Save clusters to variables dynamically using globals()
for name, value in clusters.items():
    globals()[name] = value

# Verify the variables
print("Cluster0:\n", cluster0)
print("Cluster1:\n", cluster1)
print("Cluster2:\n", cluster2)
print("Cluster3:\n", cluster3)


Cluster0:
 [[19 28]
 [56 65]
 [ 9 60]
 [80 86]
 [92 27]
 [ 1 44]
 [93  6]
 [71 46]
 [44 53]
 [78 11]]
Cluster1:
 [[93  0]
 [46 90]
 [74  0]
 [12 93]
 [24 20]
 [58 52]
 [76 27]
 [36 94]
 [72 61]
 [68 24]]
Cluster2:
 [[ 3 79]
 [13 17]
 [94 21]
 [21 10]
 [61 68]
 [ 3 59]
 [92 11]
 [72 98]
 [74  1]
 [46 54]]
Cluster3:
 [[ 24   0]
 [  4  81]
 [ 47   8]
 [ 76  99]
 [ 34   2]
 [ 93  85]
 [ 99 100]
 [ 27  96]
 [ 15  36]
 [ 74  76]]


In [30]:
import numpy as np
from scipy.optimize import linear_sum_assignment

def calculate_overlap(cluster_a, cluster_b):
    #Calculate the overlap (intersection) between two clusters
    return len(np.intersect1d(cluster_a, cluster_b))

def compute_overlap_matrix(method_1_clusters, method_2_clusters):
    """
    Compute the overlap matrix between clusters of two methods.
    Arguments:
    - method_1_clusters: List of arrays representing clusters from Method 1.
    - method_2_clusters: List of arrays representing clusters from Method 2.
    Returns:
    - overlap_matrix: Matrix of overlap counts.
    """
    n_clusters = len(method_1_clusters)
    overlap_matrix = np.zeros((n_clusters, n_clusters))

    for i, cluster_1 in enumerate(method_1_clusters):
        for j, cluster_2 in enumerate(method_2_clusters):
            overlap_matrix[i, j] = calculate_overlap(cluster_1, cluster_2)
    
    return overlap_matrix

def match_and_relabel(base_clusters, target_clusters):
    """
    Match clusters in target_clusters to base_clusters and relabel them.
    Arguments:
    - base_clusters: Reference clusters (e.g., Louvain).
    - target_clusters: Clusters to be matched and relabeled (e.g., K-means or Agglomerative).
    Returns:
    - relabeled_clusters: Target clusters relabeled to match base_clusters.
    """
    # Step 1: Compute overlap matrix
    overlap_matrix = compute_overlap_matrix(base_clusters, target_clusters)
    
    # Step 2: Use Hungarian Algorithm for optimal matching
    row_ind, col_ind = linear_sum_assignment(-overlap_matrix)  # Maximize overlap by using -ve matrix
    
    # Step 3: Create a mapping and relabel clusters
    mapping = {col: row for row, col in zip(row_ind, col_ind)}
    relabeled_clusters = [None] * len(target_clusters)
    
    for old_label, cluster in enumerate(target_clusters):
        new_label = mapping[old_label]
        relabeled_clusters[new_label] = cluster
    
    return relabeled_clusters, mapping

# Example:
# Assume we have 4 clusters (n=4) from Louvain, K-means, and Agglomerative methods
louvain_clusters = [cluster1, cluster2, cluster3, cluster0]
kmeans_clusters = [cluster0, cluster3, cluster2, cluster1]
agglomerative_clusters = [cluster0, cluster2, cluster1,cluster3]

# Step 1: Match and Relabel Clusters
relabeled_kmeans, mapping_kmeans = match_and_relabel(louvain_clusters, kmeans_clusters)
relabeled_agglomerative, mapping_agglomerative = match_and_relabel(louvain_clusters, agglomerative_clusters)

# Step 2: Print Results
print("Louvain Clusters:", louvain_clusters)
print("Relabeled K-means Clusters:", relabeled_kmeans)
print("Relabeled Agglomerative Clusters:", relabeled_agglomerative)
print("K-means Mapping:", mapping_kmeans)
print("Agglomerative Mapping:", mapping_agglomerative)


Louvain Clusters: [array([[93,  0],
       [46, 90],
       [74,  0],
       [12, 93],
       [24, 20],
       [58, 52],
       [76, 27],
       [36, 94],
       [72, 61],
       [68, 24]]), array([[ 3, 79],
       [13, 17],
       [94, 21],
       [21, 10],
       [61, 68],
       [ 3, 59],
       [92, 11],
       [72, 98],
       [74,  1],
       [46, 54]]), array([[ 24,   0],
       [  4,  81],
       [ 47,   8],
       [ 76,  99],
       [ 34,   2],
       [ 93,  85],
       [ 99, 100],
       [ 27,  96],
       [ 15,  36],
       [ 74,  76]]), array([[19, 28],
       [56, 65],
       [ 9, 60],
       [80, 86],
       [92, 27],
       [ 1, 44],
       [93,  6],
       [71, 46],
       [44, 53],
       [78, 11]])]
Relabeled K-means Clusters: [array([[93,  0],
       [46, 90],
       [74,  0],
       [12, 93],
       [24, 20],
       [58, 52],
       [76, 27],
       [36, 94],
       [72, 61],
       [68, 24]]), array([[ 3, 79],
       [13, 17],
       [94, 21],
       [21, 10],
    