In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import wrapper as wp
import os
import optuna

In [2]:
os.environ['PARLAY_NUM_THREADS'] = '128'
INDEX_DIR = 'index_cache/'

In [18]:
def parse_framework_output(data):
    lines = [line.strip() for line in data.split("\n") if line.strip()]
    entries = []
    entries = []
    for line in lines:
        if line.startswith("Computing"):
            continue
        match = re.search(r'ParlayIVF\((.*?)\)\s+(\d+\.\d+)\s+(\d+\.\d+)', line)
        params = match.group(1)
        recall = match.group(2)
        qps = match.group(3)

        param_dict = {}
        for param in params.split(","):
            if "=" in param:
                key, val = param.split("=")
                key = key.strip()
                # Convert numbers with commas to integers
                if ',' in val:
                    val = int(val.replace(',', ''))
                # Convert lists to tuples
                elif '[' in val and ']' in val:
                    val = tuple(map(int, re.findall(r'\d+', val)))
                # Convert tuple strings to actual tuples
                elif '(' in val and ')' in val:
                    val = tuple(map(int, re.findall(r'\d+', val)))
                else:
                    val = val.strip()
                param_dict[key] = val
        
        param_dict['recall'] = float(recall)
        param_dict['qps'] = float(qps)
        entries.append(param_dict)

    return pd.DataFrame(entries)



In [19]:
framework_output = """Computing knn metrics
  0: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=20,000, tiny_cutoff=0, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[128, 128, 128], search_limits=[100000, 400000, 3000000])        0.926    13284.645
Computing knn metrics
  1: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=15,000, tiny_cutoff=0, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[90, 90, 90], search_limits=[100000, 400000, 3000000])        0.905    16296.918
Computing knn metrics
  2: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=20,000, tiny_cutoff=0, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[90, 90, 90], search_limits=[100000, 400000, 3000000])        0.914    14097.419
Computing knn metrics
  4: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=15,000, tiny_cutoff=1,500, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[128, 128, 128], search_limits=[100000, 400000, 3000000])        0.918    15059.208
Computing knn metrics
  5: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=20,000, tiny_cutoff=1,000, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[90, 90, 90], search_limits=[100000, 400000, 3000000])        0.915    13754.467
Computing knn metrics
  7: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=20,000, tiny_cutoff=0, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[80, 80, 80], search_limits=[100000, 400000, 3000000])        0.909    14194.552
Computing knn metrics
  8: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=20,000, tiny_cutoff=1,000, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[100, 100, 100], search_limits=[100000, 400000, 3000000])        0.919    13741.418
Computing knn metrics
  9: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=20,000, tiny_cutoff=1,000, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[80, 80, 80], search_limits=[100000, 400000, 3000000])        0.910    14084.299
Computing knn metrics
 10: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=15,000, tiny_cutoff=0, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[100, 100, 100], search_limits=[100000, 400000, 3000000])        0.908    16085.959
Computing knn metrics
 11: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=15,000, tiny_cutoff=0, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[80, 80, 80], search_limits=[100000, 400000, 3000000])        0.900    16608.533
Computing knn metrics
 12: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=20,000, tiny_cutoff=1,000, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[128, 128, 128], search_limits=[100000, 400000, 3000000])        0.927    12812.983
Computing knn metrics
 14: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=15,000, tiny_cutoff=1,500, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[80, 80, 80], search_limits=[100000, 400000, 3000000])        0.902    16298.183
Computing knn metrics
 15: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=15,000, tiny_cutoff=1,500, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[100, 100, 100], search_limits=[100000, 400000, 3000000])        0.911    15766.889
Computing knn metrics
 16: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=15,000, tiny_cutoff=0, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[128, 128, 128], search_limits=[100000, 400000, 3000000])        0.916    15482.303
Computing knn metrics
 17: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=20,000, tiny_cutoff=0, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[100, 100, 100], search_limits=[100000, 400000, 3000000])        0.918    13811.775
Computing knn metrics
 18: ParlayIVF(metric=Euclidian, dtype=uint8, T=8, cluster_size=2,500, cutoff=20,000, target_points=15,000, tiny_cutoff=1,500, max_iter=40, weight_classes=(100000, 400000), max_degrees=(8, 10, 12), beam_widths=[90, 90, 90], search_limits=[100000, 400000, 3000000])        0.907    16175.226"""

In [21]:
framework_df = parse_framework_output(framework_output)
framework_df.sort_values(by=['qps'], inplace=True)

In [22]:
framework_df

Unnamed: 0,metric,dtype,T,cluster_size,cutoff,target_points,tiny_cutoff,max_iter,weight_classes,max_degrees,beam_widths,search_limits,recall,qps
10,Euclidian,uint8,8,2,20,20,1,40,(100000,(8,[128,[100000,0.927,12812.983
0,Euclidian,uint8,8,2,20,20,0,40,(100000,(8,[128,[100000,0.926,13284.645
6,Euclidian,uint8,8,2,20,20,1,40,(100000,(8,[100,[100000,0.919,13741.418
4,Euclidian,uint8,8,2,20,20,1,40,(100000,(8,[90,[100000,0.915,13754.467
14,Euclidian,uint8,8,2,20,20,0,40,(100000,(8,[100,[100000,0.918,13811.775
7,Euclidian,uint8,8,2,20,20,1,40,(100000,(8,[80,[100000,0.91,14084.299
2,Euclidian,uint8,8,2,20,20,0,40,(100000,(8,[90,[100000,0.914,14097.419
5,Euclidian,uint8,8,2,20,20,0,40,(100000,(8,[80,[100000,0.909,14194.552
3,Euclidian,uint8,8,2,20,15,1,40,(100000,(8,[128,[100000,0.918,15059.208
13,Euclidian,uint8,8,2,20,15,0,40,(100000,(8,[128,[100000,0.916,15482.303


In [None]:
def build_with_params(max_degrees, weight_classes, cutoff, cluster_size):
    index = wp.ParlayIVFIndex('Euclidian', 'uint8')
    for i in range(3):
        index.set_build_params(wp.BuildParams(max_degrees[i], 500, 1.175))