In [1]:
import os
import glob
import numpy as np
import pandas as pd
import subprocess

# Configure algorithm and parameter sweeps
# 'faiss_gpu_kmeans' case not using GPU. Need to resolve issue.
algorithms = ['cuvs_kmeans', 'cuvs_kmeans_balanced', 'faiss_cpu_kmeans']
dataset_names = [f'miracl-fp32-1024d-{n}M' for n in [1,2,4,8,16]]
k_values = [10, 100, 1000]

# # # Test cases
# algorithms = ['cuvs_kmeans']
# # # algorithms = ['faiss_cpu_kmeans']
# dataset_names = [f'miracl-fp32-1024d-{n}M' for n in [2, 4, 8]]
# k_values = [10] #, 100, 1000]

# Delete all files in ./results if True
clear_previous_results = True

if clear_previous_results:
    subprocess.run(['rm ./results/*'], shell=True)

# Start parameter sweep
for dataset_name in dataset_names:
    for algorithm in algorithms:
        try:
            subprocess.run([f"python3 kmeans_sweep.py -apply_scaler True -algorithm {algorithm} -dataset_name {dataset_name} \
        -k_values {str(k_values).replace(' ', '')}"], 
                       shell=True)
        except:
            print(f'Failed to process {dataset_name} using {algorithm}. Continuing evaluations....')
            pass


Starting cuvs_kmeans sweeps.... 

Loading dataset: miracl-fp32-1024d-1M
Data shape: (999994, 1024) 

{'load_data_avg_cpu_util': 1.02, 'load_data_max_cpu_util': 2.5, 'load_data_max_ram_gb': 16.87535858154297, 'load_data_avg_gpu_util': 0.0, 'load_data_max_gpu_util': 0, 'load_data_max_vram_gb': 0.749267578125, 'load_data_duration_sec': 1.1414644932374358}

Applying scaling to dataset....
{'scaler_avg_cpu_util': 0.8444444444444446, 'scaler_max_cpu_util': 1.0, 'scaler_max_ram_gb': 24.87337875366211, 'scaler_avg_gpu_util': 29.22222222222222, 'scaler_max_gpu_util': 96, 'scaler_max_vram_gb': 9.7598876953125, 'scaler_duration_sec': 2.308595276903361}

Starting cuvs_kmeans parameter sweep....
n_clusters = 10
Training cuvs_kmeans model....
{'kmeans_train_avg_cpu_util': 2.0, 'kmeans_train_max_cpu_util': 2.0, 'kmeans_train_max_ram_gb': 17.35007095336914, 'kmeans_train_avg_gpu_util': 99.0, 'kmeans_train_max_gpu_util': 99, 'kmeans_train_max_vram_gb': 8.8165283203125, 'kmeans_train_duration_sec': 0.69

Error in sys.excepthook:

Original exception was:


Starting cuvs_kmeans_balanced sweeps.... 

Loading dataset: miracl-fp32-1024d-1M
Data shape: (999994, 1024) 

{'load_data_avg_cpu_util': 0.68, 'load_data_max_cpu_util': 0.9, 'load_data_max_ram_gb': 16.884952545166016, 'load_data_avg_gpu_util': 0.0, 'load_data_max_gpu_util': 0, 'load_data_max_vram_gb': 0.749267578125, 'load_data_duration_sec': 1.1358731668442488}

Applying scaling to dataset....
{'scaler_avg_cpu_util': 0.79, 'scaler_max_cpu_util': 1.0, 'scaler_max_ram_gb': 24.944358825683594, 'scaler_avg_gpu_util': 26.4, 'scaler_max_gpu_util': 80, 'scaler_max_vram_gb': 9.7598876953125, 'scaler_duration_sec': 2.31703680427745}

Starting cuvs_kmeans_balanced parameter sweep....
n_clusters = 10
Training cuvs_kmeans_balanced model....
{'kmeans_train_avg_cpu_util': 0.8, 'kmeans_train_max_cpu_util': 0.8, 'kmeans_train_max_ram_gb': 17.328018188476562, 'kmeans_train_avg_gpu_util': 94.0, 'kmeans_train_max_gpu_util': 94, 'kmeans_train_max_vram_gb': 8.9317626953125, 'kmeans_train_duration_sec': 1.

Error in sys.excepthook:

Original exception was:


Sampling a subset of 2560 / 999994 for training
Clustering 2560 points in 1024D to 10 clusters, redo 1 times, 20 iterations
  Preprocessing in 0.55 s
  Iteration 19 (1.70 s, search 0.56 s): objective=2166.19 imbalance=1.062 nsplit=0       
Sampling a subset of 25600 / 999994 for training
Clustering 25600 points in 1024D to 100 clusters, redo 1 times, 20 iterations
  Preprocessing in 0.62 s
  Iteration 19 (10.04 s, search 8.82 s): objective=19928.7 imbalance=1.174 nsplit=0       
Sampling a subset of 256000 / 999994 for training
Clustering 256000 points in 1024D to 1000 clusters, redo 1 times, 20 iterations
  Preprocessing in 1.26 s
Starting faiss_cpu_kmeans sweeps....  s): objective=181960 imbalance=1.178 nsplit=0       

Loading dataset: miracl-fp32-1024d-1M
Data shape: (999994, 1024) 

{'load_data_avg_cpu_util': 0.74, 'load_data_max_cpu_util': 0.9, 'load_data_max_ram_gb': 16.776935577392578, 'load_data_duration_sec': 1.1400736882351339}

Applying scaling to dataset....
{'scaler_avg_c

Error in sys.excepthook:

Original exception was:


Starting cuvs_kmeans_balanced sweeps.... 

Loading dataset: miracl-fp32-1024d-2M
Data shape: (1999994, 1024) 

{'load_data_avg_cpu_util': 0.8111111111111111, 'load_data_max_cpu_util': 1.2, 'load_data_max_ram_gb': 21.025890350341797, 'load_data_avg_gpu_util': 0.0, 'load_data_max_gpu_util': 0, 'load_data_max_vram_gb': 0.749267578125, 'load_data_duration_sec': 2.1418931880034506}

Applying scaling to dataset....
{'scaler_avg_cpu_util': 0.9777777777777779, 'scaler_max_cpu_util': 2.2, 'scaler_max_ram_gb': 36.9516487121582, 'scaler_avg_gpu_util': 29.22222222222222, 'scaler_max_gpu_util': 100, 'scaler_max_vram_gb': 18.3497314453125, 'scaler_duration_sec': 4.342116347979754}

Starting cuvs_kmeans_balanced parameter sweep....
n_clusters = 10
Training cuvs_kmeans_balanced model....
{'kmeans_train_avg_cpu_util': 0.9, 'kmeans_train_max_cpu_util': 0.9, 'kmeans_train_max_ram_gb': 21.694072723388672, 'kmeans_train_avg_gpu_util': 94.0, 'kmeans_train_max_gpu_util': 94, 'kmeans_train_max_vram_gb': 16.56

Error in sys.excepthook:

Original exception was:


Sampling a subset of 2560 / 1999994 for training
Clustering 2560 points in 1024D to 10 clusters, redo 1 times, 20 iterations
  Preprocessing in 1.10 s
  Iteration 19 (1.72 s, search 0.64 s): objective=2169.64 imbalance=1.140 nsplit=0       
Sampling a subset of 25600 / 1999994 for training
Clustering 25600 points in 1024D to 100 clusters, redo 1 times, 20 iterations
  Preprocessing in 1.17 s
  Iteration 19 (11.34 s, search 10.27 s): objective=19913.4 imbalance=1.130 nsplit=0       
Sampling a subset of 256000 / 1999994 for training
Clustering 256000 points in 1024D to 1000 clusters, redo 1 times, 20 iterations
  Preprocessing in 2.11 s
Starting faiss_cpu_kmeans sweeps....  s): objective=181799 imbalance=1.158 nsplit=0       

Loading dataset: miracl-fp32-1024d-2M
Data shape: (1999994, 1024) 

{'load_data_avg_cpu_util': 0.8000000000000002, 'load_data_max_cpu_util': 0.9, 'load_data_max_ram_gb': 20.90549087524414, 'load_data_duration_sec': 2.1450657551176846}

Applying scaling to dataset.

Segmentation fault (core dumped)


Starting cuvs_kmeans_balanced sweeps.... 

Loading dataset: miracl-fp32-1024d-4M
Data shape: (3999994, 1024) 

{'load_data_avg_cpu_util': 0.8722222222222222, 'load_data_max_cpu_util': 1.4, 'load_data_max_ram_gb': 28.746768951416016, 'load_data_avg_gpu_util': 0.0, 'load_data_max_gpu_util': 0, 'load_data_max_vram_gb': 0.749267578125, 'load_data_duration_sec': 4.446702281944454}

Applying scaling to dataset....
{'scaler_avg_cpu_util': 0.8676470588235294, 'scaler_max_cpu_util': 1.5, 'scaler_max_ram_gb': 60.41508483886719, 'scaler_avg_gpu_util': 26.529411764705884, 'scaler_max_gpu_util': 100, 'scaler_max_vram_gb': 35.5958251953125, 'scaler_duration_sec': 8.339000151026994}

Starting cuvs_kmeans_balanced parameter sweep....
n_clusters = 10
Training cuvs_kmeans_balanced model....
{'kmeans_train_avg_cpu_util': 0.9, 'kmeans_train_max_cpu_util': 0.9, 'kmeans_train_max_ram_gb': 29.86129379272461, 'kmeans_train_avg_gpu_util': 96.0, 'kmeans_train_max_gpu_util': 96, 'kmeans_train_max_vram_gb': 31.81

Error in sys.excepthook:

Original exception was:


Sampling a subset of 2560 / 3999994 for training
Clustering 2560 points in 1024D to 10 clusters, redo 1 times, 20 iterations
  Preprocessing in 2.22 s
  Iteration 19 (1.79 s, search 0.66 s): objective=2161.81 imbalance=1.038 nsplit=0       
Sampling a subset of 25600 / 3999994 for training
Clustering 25600 points in 1024D to 100 clusters, redo 1 times, 20 iterations
  Preprocessing in 2.28 s
  Iteration 19 (11.20 s, search 10.29 s): objective=19970 imbalance=1.165 nsplit=0        
Sampling a subset of 256000 / 3999994 for training
Clustering 256000 points in 1024D to 1000 clusters, redo 1 times, 20 iterations
  Preprocessing in 3.11 s
Starting faiss_cpu_kmeans sweeps....  s): objective=181821 imbalance=1.159 nsplit=0       

Loading dataset: miracl-fp32-1024d-4M
Data shape: (3999994, 1024) 

{'load_data_avg_cpu_util': 0.9055555555555556, 'load_data_max_cpu_util': 1.5, 'load_data_max_ram_gb': 28.572315216064453, 'load_data_duration_sec': 4.481788755394518}

Applying scaling to dataset..

Segmentation fault (core dumped)


Starting cuvs_kmeans_balanced sweeps.... 

Loading dataset: miracl-fp32-1024d-8M
Data shape: (7999994, 1024) 

{'load_data_avg_cpu_util': 0.8862068965517241, 'load_data_max_cpu_util': 1.7, 'load_data_max_ram_gb': 43.96116256713867, 'load_data_avg_gpu_util': 0.0, 'load_data_max_gpu_util': 0, 'load_data_max_vram_gb': 0.749267578125, 'load_data_duration_sec': 7.166086952667683}

Applying scaling to dataset....
{'scaler_avg_cpu_util': 1.2589743589743587, 'scaler_max_cpu_util': 3.0, 'scaler_max_ram_gb': 107.1356086730957, 'scaler_avg_gpu_util': 24.653846153846153, 'scaler_max_gpu_util': 100, 'scaler_max_vram_gb': 70.0177001953125, 'scaler_duration_sec': 19.418684131931514}

Starting cuvs_kmeans_balanced parameter sweep....
n_clusters = 10
Training cuvs_kmeans_balanced model....
{'kmeans_train_avg_cpu_util': 0.9, 'kmeans_train_max_cpu_util': 0.9, 'kmeans_train_max_ram_gb': 45.997711181640625, 'kmeans_train_avg_gpu_util': 94.0, 'kmeans_train_max_gpu_util': 94, 'kmeans_train_max_vram_gb': 62.3

Error in sys.excepthook:

Original exception was:


Sampling a subset of 2560 / 7999994 for training
Clustering 2560 points in 1024D to 10 clusters, redo 1 times, 20 iterations
  Preprocessing in 4.47 s
  Iteration 19 (1.76 s, search 0.59 s): objective=2173.74 imbalance=1.084 nsplit=0       
Sampling a subset of 25600 / 7999994 for training
Clustering 25600 points in 1024D to 100 clusters, redo 1 times, 20 iterations
  Preprocessing in 4.65 s
  Iteration 19 (10.13 s, search 8.81 s): objective=19992.5 imbalance=1.118 nsplit=0       
Sampling a subset of 256000 / 7999994 for training
Clustering 256000 points in 1024D to 1000 clusters, redo 1 times, 20 iterations
  Preprocessing in 5.72 s
Starting faiss_cpu_kmeans sweeps.... 74 s): objective=181952 imbalance=1.181 nsplit=0       

Loading dataset: miracl-fp32-1024d-8M
Data shape: (7999994, 1024) 

{'load_data_avg_cpu_util': 0.8793103448275864, 'load_data_max_cpu_util': 1.4, 'load_data_max_ram_gb': 43.76277160644531, 'load_data_duration_sec': 7.189145251177251}

Applying scaling to dataset.

Traceback (most recent call last):
  File "/myworkspace/kmeans_sweep.py", line 403, in <module>
    telem, kmeans_model = train_kmeans(X, n_clusters, algorithm, time_delay)
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/myworkspace/kmeans_sweep.py", line 142, in wrapper
    result = func(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^
  File "/myworkspace/kmeans_sweep.py", line 227, in train_kmeans
    centroids, inertia, n_iter = cuvs_kmeans.fit(cuvs_kmeans_params, X)
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "cuvs/common/resources.pyx", line 110, in cuvs.common.resources.auto_sync_resources.wrapper
  File "/opt/conda/lib/python3.11/site-packages/pylibraft/common/outputs.py", line 83, in wrapper
    ret_value = f(*args, **kwargs)
                ^^^^^^^^^^^^^^^^^^
  File "cuvs/cluster/kmeans/kmeans.pyx", line 240, in cuvs.cluster.kmeans.kmeans.fit
  File "cuvs/cluster/kmeans/kmeans.pyx", line 241, in cuvs.

Failed to process miracl-fp32-1024d-16M using cuvs_kmeans. Continuing evaluations....
Starting cuvs_kmeans_balanced sweeps.... 

Loading dataset: miracl-fp32-1024d-16M
Data shape: (15999993, 1024) 

{'load_data_avg_cpu_util': 1.6269841269841268, 'load_data_max_cpu_util': 7.1, 'load_data_max_ram_gb': 102.20945358276367, 'load_data_avg_gpu_util': 0.7936507936507936, 'load_data_max_gpu_util': 38, 'load_data_max_vram_gb': 62.6466064453125, 'load_data_duration_sec': 25.69012861372903}

Applying scaling to dataset....

KMeans parameter sweep completed successfully.
Output written to ./results/cuvs_kmeans_balanced_miracl-fp32-1024d-16M.csv

Starting cuvs_kmeans_balanced parameter sweep....
n_clusters = 10
Training cuvs_kmeans_balanced model....
ERROR: failure encountered during model training.


Traceback (most recent call last):
  File "/myworkspace/kmeans_sweep.py", line 404, in <module>
    telem, kmeans_model = train_kmeans(X, n_clusters, algorithm, time_delay)
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/myworkspace/kmeans_sweep.py", line 142, in wrapper
    result = func(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^
  File "/myworkspace/kmeans_sweep.py", line 227, in train_kmeans
    centroids, inertia, n_iter = cuvs_kmeans.fit(cuvs_kmeans_params, X)
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "cuvs/common/resources.pyx", line 110, in cuvs.common.resources.auto_sync_resources.wrapper
  File "/opt/conda/lib/python3.11/site-packages/pylibraft/common/outputs.py", line 83, in wrapper
    ret_value = f(*args, **kwargs)
                ^^^^^^^^^^^^^^^^^^
  File "cuvs/cluster/kmeans/kmeans.pyx", line 240, in cuvs.cluster.kmeans.kmeans.fit
  File "cuvs/cluster/kmeans/kmeans.pyx", line 241, in cuvs.

Failed to process miracl-fp32-1024d-16M using cuvs_kmeans_balanced. Continuing evaluations....
Sampling a subset of 2560 / 15999993 for training
Clustering 2560 points in 1024D to 10 clusters, redo 1 times, 20 iterations
  Preprocessing in 12.69 s
  Iteration 19 (1.71 s, search 0.61 s): objective=2158.57 imbalance=1.077 nsplit=0       
Sampling a subset of 25600 / 15999993 for training
Clustering 25600 points in 1024D to 100 clusters, redo 1 times, 20 iterations
  Preprocessing in 9.27 s
  Iteration 19 (14.81 s, search 14.02 s): objective=19948.8 imbalance=1.151 nsplit=0       
Sampling a subset of 256000 / 15999993 for training
Clustering 256000 points in 1024D to 1000 clusters, redo 1 times, 20 iterations
  Preprocessing in 10.90 s
Starting faiss_cpu_kmeans sweeps.... 65 s): objective=181849 imbalance=1.162 nsplit=0       

Loading dataset: miracl-fp32-1024d-16M
Data shape: (15999993, 1024) 

{'load_data_avg_cpu_util': 1.9182926829268294, 'load_data_max_cpu_util': 3.1, 'load_data_max

In [2]:
# Read all csv files in directory and merge them
results_csv = glob.glob("./results/*.csv")
results_csv.sort()

merged_results = pd.concat([pd.read_csv(fn) for fn in results_csv]).reset_index(drop=True)
merged_results.to_csv('merged_kmeans_results.csv', float_format='%.3f', index=False)