In [4]:
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
import matplotlib
import numpy as np
import pandas as pd

for font in font_manager.findSystemFonts('/Users/leonardokuffo/Library/Fonts/'):
    font_manager.fontManager.addfont(font)

matplotlib.font_manager.findSystemFonts(fontpaths=None, fontext='ttf')
matplotlib.rc('font', family='Droid Serif') 

In [5]:
DATASETS_MAPPING = {
    'contriever-768': 'Contriever/768',
    'deep-image-96-angular': 'DEEP/96',
    'fashion-mnist-784-euclidean': 'F-MNIST/784',
    'mnist-784-euclidean': 'MNIST/784',
    'gist-960-euclidean': 'GIST/960',
    'glove-200-angular': 'GloVe/200',
    'glove-25-angular': 'GloVe/25',
    'glove-50-angular': 'GloVe/50',
    'har-561': 'HAR/561',
    'msong-420': 'MSong/420',
    'nytimes-16-angular': 'NYTimes/16',
    'sift-128-euclidean': 'SIFT/128',
    'stl-9216': 'STL/9216',
    'trevi-4096': 'Trevi/4096',
}
arch = 'ZEN4'

In [6]:
linear = pd.read_csv(f'../{arch}_NOVEC/IVF_BRUTEFORCE.csv')
ads = pd.read_csv(f'../{arch}_NOVEC/IVF_NARY_ADSAMPLING.csv')
pdx_ads = pd.read_csv(f'../{arch}_NOVEC/IVF_PDX_ADSAMPLING.csv')
bsa = pd.read_csv(f'../{arch}_NOVEC/IVF_NARY_BSA.csv')
pdx_bsa = pd.read_csv(f'../{arch}_NOVEC/IVF_PDX_BSA.csv')

linear['algorithm'] = 'Linear-Scan'
ads['algorithm'] = 'ADSampling'
pdx_ads['algorithm'] = 'ADSampling-PDX'
bsa['algorithm'] = 'BSA'
pdx_bsa['algorithm'] = 'BSA-PDX'

In [7]:
combined = linear.merge(
    ads, on=['dataset', 'ivf_nprobe'], how='inner', suffixes=('_linear', '_ads')
).merge(
    pdx_ads, on=['dataset', 'ivf_nprobe'], how='inner', suffixes=('', '_pdx_ads')
).merge(
    bsa, on=['dataset', 'ivf_nprobe'], how='inner', suffixes=('', '_bsa')
).merge(
    pdx_bsa, on=['dataset', 'ivf_nprobe'], how='inner', suffixes=('', '_pdx_bsa')
)

combined = combined[['dataset', 'recall_linear', 'ivf_nprobe', 'avg_linear', 'avg_ads', 'avg', 'avg_bsa', 'avg_pdx_bsa']]
combined['avg_linear'] = 1000 / combined['avg_linear']
combined['avg_ads'] = 1000 / combined['avg_ads']
combined['avg'] = 1000 / combined['avg']
combined['avg_bsa'] = 1000 / combined['avg_bsa']
combined['avg_pdx_bsa'] = 1000 / combined['avg_pdx_bsa']
combined = combined.rename(columns = {'avg': 'avg_pdx_ads'})

# Taking the minimum nprobe to achieve the highest possible recall 
combined = combined.groupby(['dataset', 'recall_linear']).last().reset_index().groupby(['dataset']).last().reset_index()

combined = combined[combined['dataset'].isin(list(DATASETS_MAPPING.keys()))]

combined['speedup_adsampling'] = combined['avg_pdx_ads'] / combined['avg_ads']
combined['speedup_bsa'] = combined['avg_pdx_bsa'] / combined['avg_bsa']
combined['speedup_linear_ads'] = combined['avg_pdx_ads'] / combined['avg_linear']
combined['speedup_linear_bsa'] = combined['avg_pdx_bsa'] / combined['avg_linear']

print('Results for', arch)
print('Average Speedup on BSA:', combined['speedup_bsa'].mean())
print('Average Speedup on ADSampling:', combined['speedup_adsampling'].mean())
print('Average Speedup of ADSampling against Linear:', combined['speedup_linear_ads'].mean())
print('Average Speedup of BSA against Linear:', combined['speedup_linear_bsa'].mean())
combined[['dataset', 'recall_linear', 'ivf_nprobe', 'speedup_adsampling', 'speedup_bsa', 'speedup_linear_ads', 'speedup_linear_bsa']]

Results for ZEN4
Average Speedup on BSA: 1.0726679444972862
Average Speedup on ADSampling: 1.7116518472335855
Average Speedup of ADSampling against Linear: 8.0519197241518
Average Speedup of BSA against Linear: 6.959405123248803


Unnamed: 0,dataset,recall_linear,ivf_nprobe,speedup_adsampling,speedup_bsa,speedup_linear_ads,speedup_linear_bsa
0,contriever-768,0.998,144,1.492215,1.194697,8.086707,5.008654
1,deep-image-96-angular,0.998,96,1.773348,1.980353,3.208062,2.175109
2,fashion-mnist-784-euclidean,1.0,20,1.016203,0.883032,10.638527,6.319484
3,gist-960-euclidean,0.996,160,1.360533,1.22786,11.119883,10.424766
5,glove-200-angular,0.974,160,1.458011,0.976956,3.006629,1.622043
6,glove-25-angular,0.995,160,3.30329,1.155692,1.467587,0.837168
7,glove-50-angular,0.995,160,2.098947,0.978676,1.721602,1.100177
8,har-561,1.0,20,1.048522,0.870833,6.266461,4.999154
10,mnist-784-euclidean,1.0,80,1.302605,0.953037,21.935305,26.445298
11,msong-420,1.0,64,1.619341,1.295303,12.443735,5.32061
