In [None]:
import os
import glob
import numpy as np
import pandas as pd
import subprocess

# Configure algorithm and parameters for grid search
# TODO: issue with 'faiss_gpu_kmeans' algorithm not using GPU in docker.
algorithms = ['cuvs_kmeans', 'cuvs_kmeans_balanced', 'faiss_cpu_kmeans']
dataset_names = [f'miracl-fp32-1024d-{n}M' for n in [1,2,4,8]]
k_values = [10, 100, 1000]

# Delete all files in ./results if True
clear_previous_results = True

## Avoid modifying code below:
# Create results directory
os.makedirs('./results', exist_ok=True)

if clear_previous_results:
    subprocess.run(['rm ./results/*'], shell=True)

# Start parameter sweep
for dataset_name in dataset_names:
    for algorithm in algorithms:
        try:
            subprocess.run([f"python3 kmeans_sweep.py -apply_scaler True -algorithm {algorithm} -dataset_name {dataset_name} \
        -k_values {str(k_values).replace(' ', '')}"], shell=True)
        except:
            print(f'Failed to process {dataset_name} using {algorithm}. \n \
            Press the "STOP" button in JupyterLab to skip error and continue remaining evaluations. \n')
            pass

In [None]:
# Read all csv files in directory and merge them
results_csv = glob.glob("./results/*.csv")
results_csv.sort()

merged_results = pd.concat([pd.read_csv(fn) for fn in results_csv]).reset_index(drop=True)
merged_results.to_csv('merged_kmeans_results.csv', float_format='%.3f', index=False)