We have obtained the embeddings for 10,000 node networks of params in this folder:

In [3]:
K=5 # k = {5,10,50}
mu = 0.1
run_no = 1

base = f"/nobackup/gogandhi/alt_means_sans_k/data/experiment_n2v_metric_change_10000_{k}_3.0_minc50/Run_{run_no}/" 

net_filename = f"net_LFR_n_10000_tau1_3.0_tau2_1.0_mu_{mu}_k_{k}_mincomm_50.npz"  # A = sp.load_npz(net_path)
comm_filename = f"community_table_LFR_n_10000_tau1_3.0_tau2_1.0_mu_{mu}_k_{k}_mincomm_50.csv" # pd.read_csv()
emb_filename = f"embeddings_LFR_n_10000_tau1_3.0_tau2_1.0_mu_{mu}_k_{k}_mincomm_50.pkl" # embeddings_dict

For instance, we load it to see the embeddings as:

In [4]:
import pickle
with open(base+emb_filename, 'rb') as f:  # open a text file
    emb_dict = pickle.load(f) # deserialize using load()
emb_dict.keys()

dict_keys(['dot', 'euclidean', 'cosine'])

Now we want to take these embeddings, and run k-means clustering with different metrics on them to see which combination comes out on top.
N2V ... K-Means \
Euc ... Dot? \
Euc ... Euc? \
Dot ... Dot? \
I have a strong feeling this is bound to change based on the dimensionality of the embedding vectors, so I will test the cases with embedding dimensions = 8,16,32,128 also. But we're getting ahead of ourselves now.


# The modified K-Means algorithm:

In [5]:
# Define a function that calculates element-centric similarity:
def calc_esim(y, ypred):

    ylab, y = np.unique(y, return_inverse=True)
    ypredlab, ypred = np.unique(ypred, return_inverse=True)
    
    Ka, Kb = len(ylab), len(ypredlab)
    K = np.maximum(Ka, Kb)
    N = len(y)
    
    UA = sparse.csr_matrix((np.ones_like(y), (np.arange(y.size), y)), shape=(N,K))
    UB = sparse.csr_matrix((np.ones_like(ypred), (np.arange(ypred.size), ypred)), shape=(N, K))    
    
    nA = np.array(UA.sum(axis=0)).reshape(-1)
    nB = np.array(UB.sum(axis=0)).reshape(-1)

# nAB[i][j] is read as the number of elements that belong to ith ground truth label and jth predicrted label.
# nAB[1][0] = 1 For ground truth label with index 1 and predicted label 0 we have 1 element. i.e. 0000|1| vs 1110|0|

    nAB = (UA.T @ UB).toarray()
    nAB_rand = np.outer(nA, nB) / N
    
# assuming that each element has an equal probability of being assigned to any label,
# and the expected counts are calculated based on label frequencies.


    # Calc element-centric similarity
    Q = np.maximum(nA[:, None] @ np.ones((1, K)), np.ones((K, 1)) @ nB[None, :]) 
    Q = 1 / np.maximum(Q, 1)
    S = np.sum(np.multiply(Q, (nAB**2))) / N
    
    # Calc the expected element-centric similarity for random partitions
    #Q = np.maximum(nA[:, None] @ np.ones((1, K)), np.ones((K, 1)) @ nB[None, :]) 
    #Q = 1 / np.maximum(Q, 1)
    Srand = np.sum(np.multiply(Q, (nAB_rand**2))) / N
    Scorrected = (S - Srand) / (1 - Srand)
    return Scorrected


In [14]:
import numpy as np
from scipy.spatial.distance import cdist
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.utils import check_random_state
from numba import njit
from sklearn.cluster import KMeans 
from scipy import sparse

import pandas as pd
import pickle


class CustomKMeans:
    def __init__(self, n_clusters, metric='euclidean', max_iter=300, tol=1e-4, random_state=None, n_init=20, init='k-means++', batch_size=None):
        self.n_clusters = n_clusters
        self.metric = metric
        self.max_iter = max_iter
        self.tol = tol
        self.random_state = random_state
        self.n_init = n_init
        self.init = init
        self.batch_size = batch_size  # Mini-batch size, if any

    def calculate_distances(self, X, centroids):
        """Optimized distance calculation for the specified metric."""
        if self.metric == 'euclidean':
            return cdist(X, centroids, metric='euclidean')
        elif self.metric == 'manhattan':
            return cdist(X, centroids, metric='cityblock')
        elif self.metric == 'cosine':
            return 1 - cosine_similarity(X, centroids)
        elif self.metric == 'dot':
            return -np.dot(X, centroids.T)
        elif self.metric == 'geodesic':
            # Calculate geodesic distance as arccos(cosine_similarity) for normalized data
            cos_sim = cosine_similarity(X, centroids)
            # Clip values to avoid out-of-domain errors in arccos
            cos_sim = np.clip(cos_sim, -1.0, 1.0)
            return np.arccos(cos_sim)
        else:
            raise ValueError(f"Unsupported metric: {self.metric}")

    def _initialize_centroids(self, X, rng):
        """Efficient k-means++ initialization."""
        
        centroids = [X[rng.randint(X.shape[0])]]
        closest_dist_sq = self.calculate_distances(X, np.array(centroids))[:, 0] ** 2

        for _ in range(1, self.n_clusters):
            probs = closest_dist_sq / closest_dist_sq.sum()
            cumulative_probs = np.cumsum(probs)
            r = rng.rand()
            idx = np.searchsorted(cumulative_probs, r)
            idx = min(idx, X.shape[0] - 1)
            new_centroid = X[idx]
            
            #new_centroid = X[np.searchsorted(cumulative_probs, r)]
            centroids.append(new_centroid)
            new_dist_sq = self.calculate_distances(X, np.array([new_centroid]))[:, 0] ** 2
            closest_dist_sq = np.minimum(closest_dist_sq, new_dist_sq)
        
        return np.array(centroids)

    @staticmethod
    @njit
    def _update_centroids(X, labels, n_clusters):
        """Compute new centroids using JIT compilation for efficiency."""
        new_centroids = np.zeros((n_clusters, X.shape[1]), dtype=X.dtype)
        counts = np.zeros(n_clusters, dtype=np.int64)
        
        for i in range(X.shape[0]):
            new_centroids[labels[i]] += X[i]
            counts[labels[i]] += 1
        
        for j in range(n_clusters):
            if counts[j] > 0:
                new_centroids[j] /= counts[j]
        
        return new_centroids

    def _run_kmeans(self, X, rng):
        """Run a single instance of K-means clustering with optional mini-batch."""
        centroids = self._initialize_centroids(X, rng)
        n_samples = X.shape[0]

        for i in range(self.max_iter):
            if self.batch_size:
                batch_indices = rng.choice(n_samples, self.batch_size, replace=False)
                X_batch = X[batch_indices]
                distances = self.calculate_distances(X_batch, centroids)
                labels = np.argmin(distances, axis=1)
            else:
                distances = self.calculate_distances(X, centroids)
                labels = np.argmin(distances, axis=1)
            
            new_centroids = self._update_centroids(X, labels, self.n_clusters)
            
            # Convergence check based on relative tolerance
            centroid_shifts = np.linalg.norm(new_centroids - centroids, axis=1)
            if np.all(centroid_shifts < self.tol * np.linalg.norm(centroids, axis=1)):
                break
            
            centroids = new_centroids
        
        # Inertia calculation for this run
        inertia = np.sum(np.min(distances, axis=1) ** 2)
        return centroids, labels, inertia

    def fit(self, X):
        """Run KMeans with multiple initializations to get the best clustering."""
        best_inertia = np.inf
        best_centroids = None
        best_labels = None
        rng = check_random_state(self.random_state)

        for _ in range(self.n_init):
            centroids, labels, inertia = self._run_kmeans(X, rng)
            
            if inertia < best_inertia:
                best_inertia = inertia
                best_centroids = centroids
                best_labels = labels
        
        # Set final results
        self.centroids_ = best_centroids
        self.labels_ = best_labels
        self.inertia_ = best_inertia
        return self

    def predict(self, X):
        """Predict the closest cluster each sample in X belongs to."""
        distances = self.calculate_distances(X, self.centroids_)
        return np.argmin(distances, axis=1)


In [15]:
def clustering_method_values(net, community_table, emb, score_keys):
    # Normalize the vector of each node to have unit length. This normalization improves clustering.
    #X = np.einsum("ij,i->ij", emb, 1 / np.maximum(np.linalg.norm(emb, axis=1), 1e-24))
    #X = emb.copy()

    def method_score(key): 
        if key == "kmeans++":
            kmeans = KMeans(n_clusters=len(set(community_table["community_id"])), init='k-means++').fit(emb)
            return calc_esim(community_table["community_id"], kmeans.labels_)

        elif key.startswith("kmeans_"):  # Parse metric and apply CustomKMeans with modified metrics
            metric = key.split("_", 1)[1]
            custom_kmeans = CustomKMeans(n_clusters=len(set(community_table["community_id"])), metric=metric).fit(emb)
            return calc_esim(community_table["community_id"], custom_kmeans.labels_)

    
    # Calculate and store scores for each clustering method in score_keys
    score_dictionary = {}
    for key in score_keys:
        score_dictionary[key] = method_score(key)

    return score_dictionary

Before we parallelize and get results for all the LFR networks of varying mixing rates and varying network densities. We will start with an example of just one network. We load the network, community information, embeddings, and run the modified K-Means using Dot, Euclidean and Cosine similarities on the embedding vectors generated using Node2Vec using Dot, Euclidean and Cosine similarities.

In [8]:
def load_net_and_embedding(net_filename, comm_filename, emb_filename):
    net = sparse.load_npz(net_filename)
    community_table = pd.read_csv(comm_filename)
    
    with open(emb_filename, 'rb') as f:  # open a text file
        emb_dict = pickle.load(f) # deserialize using load()

    return net, community_table, emb_dict

In [9]:
N=10000
mu_values = np.round(np.arange(0.05, 1.05, 0.05),decimals=2)

params = {
    "N": N,
    "k": 5,
    "maxk":  int(np.sqrt(10 * N)),
    "minc": 50,
    "maxc": int(np.ceil(np.sqrt(N * 10))),
    "tau": 3.0,
    "tau2": 1.0,
    "mu": 0.2,
    }


emb_params = {
    "method": "node2vec",
    "window_length": 10,
    "walk_length": 80,
    "num_walks": 10,
    "dim": 64,
}


k=5 # k = {5,10,50}
mu = 0.1
run_no = 1

path_name = f"/nobackup/gogandhi/alt_means_sans_k/data/experiment_n2v_metric_change_10000_{k}_3.0_minc50/Run_{run_no}/" 

net_filename = path_name + f"net_LFR_n_10000_tau1_3.0_tau2_1.0_mu_{mu}_k_{k}_mincomm_50.npz"  # A = sp.load_npz(net_path)
comm_filename = path_name + f"community_table_LFR_n_10000_tau1_3.0_tau2_1.0_mu_{mu}_k_{k}_mincomm_50.csv" # pd.read_csv()
emb_filename = path_name + f"embeddings_LFR_n_10000_tau1_3.0_tau2_1.0_mu_{mu}_k_{k}_mincomm_50.pkl" # embeddings_dict

#"community_table_LFR_n_10000_tau1_3.0_tau2_1.0_mu_0.1_k_50_mincomm_50.npz"

net, community_table, emb_dict = load_net_and_embedding(net_filename, comm_filename, emb_filename)

In [74]:
score_keys=['kmeans++','kmeans_euclidean','kmeans_dot','kmeans_cosine'] 
for key in ['dot', 'euclidean', 'cosine']:
    emb = emb_dict[key]
    print(clustering_method_values(net, community_table, emb, score_keys))

{'kmeans++': 0.8883255562176959, 'kmeans_euclidean': 0.9190984190926753, 'kmeans_dot': 0.676045991603713, 'kmeans_cosine': 0.8216621800325635}
{'kmeans++': 0.9925325345751824, 'kmeans_euclidean': 0.9925325345751824, 'kmeans_dot': 0.6464478510220129, 'kmeans_cosine': 0.9903730792768748}
{'kmeans++': 0.9478108231369303, 'kmeans_euclidean': 0.9385970563145652, 'kmeans_dot': 0.7824215655820644, 'kmeans_cosine': 0.9515917299553007}


We go through 20 iterations of Kmeans, and use the best clustering of the them. The Kmeans++ (which is the standard optimized implementation) performs comparable to our Kmeans_euclidean which is our baseline for our modified version especially in the euclidean-euclidean case. When embeddings are generated using other methods, it does falter a bit. This is exciting news! Which means there could be more to uncover!

# Parallelization to get clustering

In [21]:
import os
import numpy as np
import pandas as pd
import time
import torch
from tqdm import tqdm

# Assuming these functions are defined elsewhere:
# from your_module import load_net_and_embedding, clustering_method_values

# Parameters
N = 10000
K = 5
mu_values = np.round(np.arange(0.05, 1.05, 0.05), decimals=2)
params_template = {
    "N": N,
    "k": K,
    "maxk": int(np.sqrt(10 * N)),
    "minc": 50,
    "maxc": int(np.ceil(np.sqrt(N * 10))),
    "tau": 3.0,
    "tau2": 1.0,
}
emb_params = {
    "method": "node2vec",
    "window_length": 10,
    "walk_length": 80,
    "num_walks": 10,
    "dim": 64,
}

score_keys = ['kmeans++', 'kmeans_euclidean', 'kmeans_dot', 'kmeans_cosine']

# Output directory – one file per embedding type
output_dir = f"/nobackup/gogandhi/alt_means_sans_k/data/experiment_n2v_metric_change_10000_{K}_3.0_minc50/"
os.makedirs(output_dir, exist_ok=True)

# Function to process a single run and mu value
def process_run(run_no, mu):
    # Build file paths based on run and mu
    path_name = f"/nobackup/gogandhi/alt_means_sans_k/data/experiment_n2v_metric_change_10000_{K}_3.0_minc50/Run_{run_no}/"
    net_filename = path_name + f"net_LFR_n_10000_tau1_3.0_tau2_1.0_mu_{mu}_k_{K}_mincomm_50.npz"
    comm_filename = path_name + f"community_table_LFR_n_10000_tau1_3.0_tau2_1.0_mu_{mu}_k_{K}_mincomm_50.csv"
    emb_filename = path_name + f"embeddings_LFR_n_10000_tau1_3.0_tau2_1.0_mu_{mu}_k_{K}_mincomm_50.pkl"
    
    # Load network, community table, and the embedding dictionary
    net, community_table, emb_dict = load_net_and_embedding(net_filename, comm_filename, emb_filename)
    
    # For each embedding in the dictionary, run clustering and prepare a result string
    results = []
    for emb_key, emb in emb_dict.items():
        result = clustering_method_values(net, community_table, emb, score_keys)
        result_values = [result[key] for key in score_keys]
        # Format: run_no,mu,score1,score2,...
        result_str = f"{run_no},{mu}," + ",".join(map(str, result_values))
        results.append((emb_key, result_str))
        print(f"Completed Run {run_no} with Mu {mu} for embedding '{emb_key}'")
    return results

# Function to process all run/mu combinations sequentially with tqdm and immediate file writing
def process_all_combinations_sequential():
    # Generate all combinations of run numbers and mu values (modify as needed)
    runs_mu_combinations = [(run_no, mu) for run_no in range(8, 11) for mu in mu_values]
    total_combinations = len(runs_mu_combinations)
    start_time = time.time()
    
    # Process each combination sequentially with a tqdm progress bar
    for run_no, mu in tqdm(runs_mu_combinations, total=total_combinations, desc="Processing combinations"):
        run_results = process_run(run_no, mu)
        for emb_key, result_str in run_results:
            output_file = os.path.join(output_dir, f"n2v_{emb_key}_kmeans_clustering.txt")
            # Write header if file doesn't exist
            if not os.path.exists(output_file):
                with open(output_file, "w") as f:
                    header = "run_no,mu," + ",".join(score_keys) + "\n"
                    f.write(header)
            # Append result line and flush immediately
            with open(output_file, "a") as f:
                f.write(result_str + "\n")
    
    total_elapsed_time = time.time() - start_time
    print(f"All combinations processed sequentially. Total elapsed time: {total_elapsed_time:.2f} seconds.")

if __name__ == "__main__":
    process_all_combinations_sequential()


Processing combinations:   0%|                                                                                                         | 0/60 [00:00<?, ?it/s]

Completed Run 8 with Mu 0.05 for embedding 'dot'
Completed Run 8 with Mu 0.05 for embedding 'euclidean'


Processing combinations:   2%|█▌                                                                                             | 1/60 [01:02<1:01:24, 62.45s/it]

Completed Run 8 with Mu 0.05 for embedding 'cosine'
Completed Run 8 with Mu 0.1 for embedding 'dot'
Completed Run 8 with Mu 0.1 for embedding 'euclidean'


Processing combinations:   3%|███▏                                                                                           | 2/60 [02:20<1:09:02, 71.43s/it]

Completed Run 8 with Mu 0.1 for embedding 'cosine'
Completed Run 8 with Mu 0.15 for embedding 'dot'
Completed Run 8 with Mu 0.15 for embedding 'euclidean'


Processing combinations:   5%|████▊                                                                                          | 3/60 [03:35<1:09:20, 72.99s/it]

Completed Run 8 with Mu 0.15 for embedding 'cosine'
Completed Run 8 with Mu 0.2 for embedding 'dot'
Completed Run 8 with Mu 0.2 for embedding 'euclidean'


Processing combinations:   7%|██████▎                                                                                        | 4/60 [04:43<1:06:20, 71.07s/it]

Completed Run 8 with Mu 0.2 for embedding 'cosine'
Completed Run 8 with Mu 0.25 for embedding 'dot'
Completed Run 8 with Mu 0.25 for embedding 'euclidean'


Processing combinations:   8%|███████▉                                                                                       | 5/60 [06:07<1:09:25, 75.73s/it]

Completed Run 8 with Mu 0.25 for embedding 'cosine'
Completed Run 8 with Mu 0.3 for embedding 'dot'
Completed Run 8 with Mu 0.3 for embedding 'euclidean'


Processing combinations:  10%|█████████▌                                                                                     | 6/60 [07:29<1:10:06, 77.90s/it]

Completed Run 8 with Mu 0.3 for embedding 'cosine'
Completed Run 8 with Mu 0.35 for embedding 'dot'
Completed Run 8 with Mu 0.35 for embedding 'euclidean'


Processing combinations:  12%|███████████                                                                                    | 7/60 [09:12<1:16:01, 86.07s/it]

Completed Run 8 with Mu 0.35 for embedding 'cosine'
Completed Run 8 with Mu 0.4 for embedding 'dot'
Completed Run 8 with Mu 0.4 for embedding 'euclidean'


Processing combinations:  13%|████████████▋                                                                                  | 8/60 [10:51<1:18:10, 90.20s/it]

Completed Run 8 with Mu 0.4 for embedding 'cosine'
Completed Run 8 with Mu 0.45 for embedding 'dot'
Completed Run 8 with Mu 0.45 for embedding 'euclidean'


Processing combinations:  15%|██████████████▎                                                                                | 9/60 [12:46<1:23:19, 98.03s/it]

Completed Run 8 with Mu 0.45 for embedding 'cosine'
Completed Run 8 with Mu 0.5 for embedding 'dot'
Completed Run 8 with Mu 0.5 for embedding 'euclidean'


Processing combinations:  17%|███████████████▌                                                                             | 10/60 [15:27<1:37:53, 117.47s/it]

Completed Run 8 with Mu 0.5 for embedding 'cosine'
Completed Run 8 with Mu 0.55 for embedding 'dot'
Completed Run 8 with Mu 0.55 for embedding 'euclidean'


Processing combinations:  18%|█████████████████                                                                            | 11/60 [17:59<1:44:41, 128.19s/it]

Completed Run 8 with Mu 0.55 for embedding 'cosine'
Completed Run 8 with Mu 0.6 for embedding 'dot'
Completed Run 8 with Mu 0.6 for embedding 'euclidean'


Processing combinations:  20%|██████████████████▌                                                                          | 12/60 [20:07<1:42:21, 127.95s/it]

Completed Run 8 with Mu 0.6 for embedding 'cosine'
Completed Run 8 with Mu 0.65 for embedding 'dot'
Completed Run 8 with Mu 0.65 for embedding 'euclidean'


Processing combinations:  22%|████████████████████▏                                                                        | 13/60 [22:07<1:38:16, 125.45s/it]

Completed Run 8 with Mu 0.65 for embedding 'cosine'
Completed Run 8 with Mu 0.7 for embedding 'dot'
Completed Run 8 with Mu 0.7 for embedding 'euclidean'


Processing combinations:  23%|█████████████████████▋                                                                       | 14/60 [24:47<1:44:16, 136.00s/it]

Completed Run 8 with Mu 0.7 for embedding 'cosine'
Completed Run 8 with Mu 0.75 for embedding 'dot'
Completed Run 8 with Mu 0.75 for embedding 'euclidean'


Processing combinations:  25%|███████████████████████▎                                                                     | 15/60 [27:16<1:44:51, 139.82s/it]

Completed Run 8 with Mu 0.75 for embedding 'cosine'
Completed Run 8 with Mu 0.8 for embedding 'dot'
Completed Run 8 with Mu 0.8 for embedding 'euclidean'


Processing combinations:  27%|████████████████████████▊                                                                    | 16/60 [29:39<1:43:18, 140.87s/it]

Completed Run 8 with Mu 0.8 for embedding 'cosine'
Completed Run 8 with Mu 0.85 for embedding 'dot'
Completed Run 8 with Mu 0.85 for embedding 'euclidean'


Processing combinations:  28%|██████████████████████████▎                                                                  | 17/60 [31:48<1:38:29, 137.42s/it]

Completed Run 8 with Mu 0.85 for embedding 'cosine'
Completed Run 8 with Mu 0.9 for embedding 'dot'
Completed Run 8 with Mu 0.9 for embedding 'euclidean'


Processing combinations:  30%|███████████████████████████▉                                                                 | 18/60 [34:12<1:37:36, 139.43s/it]

Completed Run 8 with Mu 0.9 for embedding 'cosine'
Completed Run 8 with Mu 0.95 for embedding 'dot'
Completed Run 8 with Mu 0.95 for embedding 'euclidean'


Processing combinations:  32%|█████████████████████████████▍                                                               | 19/60 [36:24<1:33:43, 137.16s/it]

Completed Run 8 with Mu 0.95 for embedding 'cosine'
Completed Run 8 with Mu 1.0 for embedding 'dot'
Completed Run 8 with Mu 1.0 for embedding 'euclidean'


Processing combinations:  33%|███████████████████████████████                                                              | 20/60 [38:40<1:31:14, 136.87s/it]

Completed Run 8 with Mu 1.0 for embedding 'cosine'
Completed Run 9 with Mu 0.05 for embedding 'dot'
Completed Run 9 with Mu 0.05 for embedding 'euclidean'


Processing combinations:  35%|████████████████████████████████▌                                                            | 21/60 [39:56<1:17:03, 118.54s/it]

Completed Run 9 with Mu 0.05 for embedding 'cosine'
Completed Run 9 with Mu 0.1 for embedding 'dot'
Completed Run 9 with Mu 0.1 for embedding 'euclidean'


Processing combinations:  37%|██████████████████████████████████                                                           | 22/60 [41:07<1:05:57, 104.14s/it]

Completed Run 9 with Mu 0.1 for embedding 'cosine'
Completed Run 9 with Mu 0.15 for embedding 'dot'
Completed Run 9 with Mu 0.15 for embedding 'euclidean'


Processing combinations:  38%|████████████████████████████████████                                                          | 23/60 [42:30<1:00:17, 97.77s/it]

Completed Run 9 with Mu 0.15 for embedding 'cosine'
Completed Run 9 with Mu 0.2 for embedding 'dot'
Completed Run 9 with Mu 0.2 for embedding 'euclidean'


Processing combinations:  40%|██████████████████████████████████████▍                                                         | 24/60 [43:45<54:36, 91.03s/it]

Completed Run 9 with Mu 0.2 for embedding 'cosine'
Completed Run 9 with Mu 0.25 for embedding 'dot'
Completed Run 9 with Mu 0.25 for embedding 'euclidean'


Processing combinations:  42%|████████████████████████████████████████                                                        | 25/60 [44:48<48:15, 82.74s/it]

Completed Run 9 with Mu 0.25 for embedding 'cosine'
Completed Run 9 with Mu 0.3 for embedding 'dot'
Completed Run 9 with Mu 0.3 for embedding 'euclidean'


Processing combinations:  43%|█████████████████████████████████████████▌                                                      | 26/60 [46:24<49:06, 86.67s/it]

Completed Run 9 with Mu 0.3 for embedding 'cosine'
Completed Run 9 with Mu 0.35 for embedding 'dot'
Completed Run 9 with Mu 0.35 for embedding 'euclidean'


Processing combinations:  45%|███████████████████████████████████████████▏                                                    | 27/60 [47:53<48:00, 87.29s/it]

Completed Run 9 with Mu 0.35 for embedding 'cosine'
Completed Run 9 with Mu 0.4 for embedding 'dot'
Completed Run 9 with Mu 0.4 for embedding 'euclidean'


Processing combinations:  47%|████████████████████████████████████████████▊                                                   | 28/60 [49:57<52:29, 98.43s/it]

Completed Run 9 with Mu 0.4 for embedding 'cosine'
Completed Run 9 with Mu 0.45 for embedding 'dot'
Completed Run 9 with Mu 0.45 for embedding 'euclidean'


Processing combinations:  48%|█████████████████████████████████████████████▉                                                 | 29/60 [52:04<55:14, 106.91s/it]

Completed Run 9 with Mu 0.45 for embedding 'cosine'
Completed Run 9 with Mu 0.5 for embedding 'dot'
Completed Run 9 with Mu 0.5 for embedding 'euclidean'


Processing combinations:  50%|███████████████████████████████████████████████▌                                               | 30/60 [54:17<57:23, 114.77s/it]

Completed Run 9 with Mu 0.5 for embedding 'cosine'
Completed Run 9 with Mu 0.55 for embedding 'dot'
Completed Run 9 with Mu 0.55 for embedding 'euclidean'


Processing combinations:  52%|█████████████████████████████████████████████████                                              | 31/60 [56:30<58:02, 120.08s/it]

Completed Run 9 with Mu 0.55 for embedding 'cosine'
Completed Run 9 with Mu 0.6 for embedding 'dot'
Completed Run 9 with Mu 0.6 for embedding 'euclidean'


Processing combinations:  53%|██████████████████████████████████████████████████▋                                            | 32/60 [58:45<58:06, 124.51s/it]

Completed Run 9 with Mu 0.6 for embedding 'cosine'
Completed Run 9 with Mu 0.65 for embedding 'dot'
Completed Run 9 with Mu 0.65 for embedding 'euclidean'


Processing combinations:  55%|███████████████████████████████████████████████████▏                                         | 33/60 [1:01:08<58:34, 130.17s/it]

Completed Run 9 with Mu 0.65 for embedding 'cosine'
Completed Run 9 with Mu 0.7 for embedding 'dot'
Completed Run 9 with Mu 0.7 for embedding 'euclidean'


Processing combinations:  57%|████████████████████████████████████████████████████▋                                        | 34/60 [1:03:25<57:17, 132.22s/it]

Completed Run 9 with Mu 0.7 for embedding 'cosine'
Completed Run 9 with Mu 0.75 for embedding 'dot'
Completed Run 9 with Mu 0.75 for embedding 'euclidean'


Processing combinations:  58%|██████████████████████████████████████████████████████▎                                      | 35/60 [1:05:49<56:36, 135.86s/it]

Completed Run 9 with Mu 0.75 for embedding 'cosine'
Completed Run 9 with Mu 0.8 for embedding 'dot'
Completed Run 9 with Mu 0.8 for embedding 'euclidean'


Processing combinations:  60%|███████████████████████████████████████████████████████▊                                     | 36/60 [1:08:27<56:58, 142.43s/it]

Completed Run 9 with Mu 0.8 for embedding 'cosine'
Completed Run 9 with Mu 0.85 for embedding 'dot'
Completed Run 9 with Mu 0.85 for embedding 'euclidean'


Processing combinations:  62%|█████████████████████████████████████████████████████████▎                                   | 37/60 [1:10:43<53:50, 140.44s/it]

Completed Run 9 with Mu 0.85 for embedding 'cosine'
Completed Run 9 with Mu 0.9 for embedding 'dot'
Completed Run 9 with Mu 0.9 for embedding 'euclidean'


Processing combinations:  63%|██████████████████████████████████████████████████████████▉                                  | 38/60 [1:12:55<50:35, 137.96s/it]

Completed Run 9 with Mu 0.9 for embedding 'cosine'
Completed Run 9 with Mu 0.95 for embedding 'dot'
Completed Run 9 with Mu 0.95 for embedding 'euclidean'


Processing combinations:  65%|████████████████████████████████████████████████████████████▍                                | 39/60 [1:15:26<49:40, 141.94s/it]

Completed Run 9 with Mu 0.95 for embedding 'cosine'
Completed Run 9 with Mu 1.0 for embedding 'dot'
Completed Run 9 with Mu 1.0 for embedding 'euclidean'


Processing combinations:  67%|██████████████████████████████████████████████████████████████                               | 40/60 [1:17:53<47:48, 143.43s/it]

Completed Run 9 with Mu 1.0 for embedding 'cosine'
Completed Run 10 with Mu 0.05 for embedding 'dot'
Completed Run 10 with Mu 0.05 for embedding 'euclidean'


Processing combinations:  68%|███████████████████████████████████████████████████████████████▌                             | 41/60 [1:19:11<39:09, 123.65s/it]

Completed Run 10 with Mu 0.05 for embedding 'cosine'
Completed Run 10 with Mu 0.1 for embedding 'dot'
Completed Run 10 with Mu 0.1 for embedding 'euclidean'


Processing combinations:  70%|█████████████████████████████████████████████████████████████████                            | 42/60 [1:20:25<32:38, 108.82s/it]

Completed Run 10 with Mu 0.1 for embedding 'cosine'
Completed Run 10 with Mu 0.15 for embedding 'dot'
Completed Run 10 with Mu 0.15 for embedding 'euclidean'


Processing combinations:  72%|██████████████████████████████████████████████████████████████████▋                          | 43/60 [1:21:45<28:25, 100.34s/it]

Completed Run 10 with Mu 0.15 for embedding 'cosine'
Completed Run 10 with Mu 0.2 for embedding 'dot'
Completed Run 10 with Mu 0.2 for embedding 'euclidean'


Processing combinations:  73%|████████████████████████████████████████████████████████████████████▉                         | 44/60 [1:22:57<24:25, 91.58s/it]

Completed Run 10 with Mu 0.2 for embedding 'cosine'
Completed Run 10 with Mu 0.25 for embedding 'dot'
Completed Run 10 with Mu 0.25 for embedding 'euclidean'


Processing combinations:  75%|██████████████████████████████████████████████████████████████████████▌                       | 45/60 [1:24:10<21:30, 86.05s/it]

Completed Run 10 with Mu 0.25 for embedding 'cosine'
Completed Run 10 with Mu 0.3 for embedding 'dot'
Completed Run 10 with Mu 0.3 for embedding 'euclidean'


Processing combinations:  77%|████████████████████████████████████████████████████████████████████████                      | 46/60 [1:25:46<20:46, 89.00s/it]

Completed Run 10 with Mu 0.3 for embedding 'cosine'
Completed Run 10 with Mu 0.35 for embedding 'dot'
Completed Run 10 with Mu 0.35 for embedding 'euclidean'


Processing combinations:  78%|█████████████████████████████████████████████████████████████████████████▋                    | 47/60 [1:27:26<20:01, 92.45s/it]

Completed Run 10 with Mu 0.35 for embedding 'cosine'
Completed Run 10 with Mu 0.4 for embedding 'dot'
Completed Run 10 with Mu 0.4 for embedding 'euclidean'


Processing combinations:  80%|███████████████████████████████████████████████████████████████████████████▏                  | 48/60 [1:29:15<19:27, 97.29s/it]

Completed Run 10 with Mu 0.4 for embedding 'cosine'
Completed Run 10 with Mu 0.45 for embedding 'dot'
Completed Run 10 with Mu 0.45 for embedding 'euclidean'


Processing combinations:  82%|███████████████████████████████████████████████████████████████████████████▉                 | 49/60 [1:31:29<19:51, 108.36s/it]

Completed Run 10 with Mu 0.45 for embedding 'cosine'
Completed Run 10 with Mu 0.5 for embedding 'dot'
Completed Run 10 with Mu 0.5 for embedding 'euclidean'


Processing combinations:  83%|█████████████████████████████████████████████████████████████████████████████▌               | 50/60 [1:33:48<19:35, 117.54s/it]

Completed Run 10 with Mu 0.5 for embedding 'cosine'
Completed Run 10 with Mu 0.55 for embedding 'dot'
Completed Run 10 with Mu 0.55 for embedding 'euclidean'


Processing combinations:  85%|███████████████████████████████████████████████████████████████████████████████              | 51/60 [1:36:18<19:05, 127.29s/it]

Completed Run 10 with Mu 0.55 for embedding 'cosine'
Completed Run 10 with Mu 0.6 for embedding 'dot'
Completed Run 10 with Mu 0.6 for embedding 'euclidean'


Processing combinations:  87%|████████████████████████████████████████████████████████████████████████████████▌            | 52/60 [1:38:52<18:02, 135.37s/it]

Completed Run 10 with Mu 0.6 for embedding 'cosine'
Completed Run 10 with Mu 0.65 for embedding 'dot'
Completed Run 10 with Mu 0.65 for embedding 'euclidean'


Processing combinations:  88%|██████████████████████████████████████████████████████████████████████████████████▏          | 53/60 [1:41:00<15:32, 133.24s/it]

Completed Run 10 with Mu 0.65 for embedding 'cosine'
Completed Run 10 with Mu 0.7 for embedding 'dot'
Completed Run 10 with Mu 0.7 for embedding 'euclidean'


Processing combinations:  90%|███████████████████████████████████████████████████████████████████████████████████▋         | 54/60 [1:43:07<13:08, 131.34s/it]

Completed Run 10 with Mu 0.7 for embedding 'cosine'
Completed Run 10 with Mu 0.75 for embedding 'dot'
Completed Run 10 with Mu 0.75 for embedding 'euclidean'


Processing combinations:  92%|█████████████████████████████████████████████████████████████████████████████████████▎       | 55/60 [1:45:18<10:55, 131.05s/it]

Completed Run 10 with Mu 0.75 for embedding 'cosine'
Completed Run 10 with Mu 0.8 for embedding 'dot'
Completed Run 10 with Mu 0.8 for embedding 'euclidean'


Processing combinations:  93%|██████████████████████████████████████████████████████████████████████████████████████▊      | 56/60 [1:47:43<09:01, 135.31s/it]

Completed Run 10 with Mu 0.8 for embedding 'cosine'
Completed Run 10 with Mu 0.85 for embedding 'dot'
Completed Run 10 with Mu 0.85 for embedding 'euclidean'


Processing combinations:  95%|████████████████████████████████████████████████████████████████████████████████████████▎    | 57/60 [1:50:16<07:01, 140.54s/it]

Completed Run 10 with Mu 0.85 for embedding 'cosine'
Completed Run 10 with Mu 0.9 for embedding 'dot'
Completed Run 10 with Mu 0.9 for embedding 'euclidean'


Processing combinations:  97%|█████████████████████████████████████████████████████████████████████████████████████████▉   | 58/60 [1:52:22<04:32, 136.16s/it]

Completed Run 10 with Mu 0.9 for embedding 'cosine'
Completed Run 10 with Mu 0.95 for embedding 'dot'
Completed Run 10 with Mu 0.95 for embedding 'euclidean'


Processing combinations:  98%|███████████████████████████████████████████████████████████████████████████████████████████▍ | 59/60 [1:54:32<02:14, 134.58s/it]

Completed Run 10 with Mu 0.95 for embedding 'cosine'
Completed Run 10 with Mu 1.0 for embedding 'dot'
Completed Run 10 with Mu 1.0 for embedding 'euclidean'


Processing combinations: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [1:56:44<00:00, 116.74s/it]

Completed Run 10 with Mu 1.0 for embedding 'cosine'
All combinations processed sequentially. Total elapsed time: 7004.47 seconds.





In [12]:
# Run 7, mu = 0.5 has a problem, hmmmm

array([0.5 , 0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1.  ])