In [31]:
# notebook config

# Relative path to competition data export file
CSV = "data_export_m4-metal-medium.csv"

# Relative path to competition directory
COMPETITION_DIR = ".." 

# Competition recall/ap threshold
RECALL_AP_THRESHOLD = 0.9

# Markdown template file
MARKDOWN = "./_latitude-m4-metal-medium.md"

# Algorithms to ignore - mostly to deal with transient issues like new algorithms not yet merged
IGNORE_ALGOS=["hanns"] 

In [32]:
# package imports

import pandas as pd
import os
import glob
import math
import re
import helper
import importlib
from IPython.core.display import HTML

In [33]:
# global config

pd.set_option('display.precision', 2)

In [34]:
# read CSV

df = pd.read_csv( CSV )

In [35]:
# get track/dataset groups

grps = df.groupby(["track","dataset"])

In [37]:
# transform columns

dfs = [] # accumulate individual track dataframes

# iterate groups
for name, group in grps:

    # extract groupby track and dataset
    track = name[0]
    dataset = name[1]
   
    # produce track ranking
    ranking_df = group[ group["recall/ap"]>=RECALL_AP_THRESHOLD ]\
        .groupby(["algorithm"]) \
        .max("qps") \
        .sort_values("qps", ascending=False) \
        [["qps","recall/ap"]] 
    
    # set 'status' column as 'qualified' for all surviving rows
    ranking_df['status'] = 'ok'

    # return the algorithm index as a column
    ranking_df.reset_index(inplace=True)
    #print(ranking_df)
    #print(list(ranking_df.index))

    # retrieve all participating track algorithm names via track algo subdirectory 
    track_dir = os.path.join( COMPETITION_DIR, "%s/*/Dockerfile" % track )
    algos_participating = [os.path.basename(os.path.dirname(p)) for p in glob.glob( track_dir ) ]

    # remove any temp ignores
    algos_participating = list( set(algos_participating ) -  set( IGNORE_ALGOS ) ) 
  
    # compute difference of algo lists of track subdirs and algos in results - these did not qualify
    algos_did_not_qualify = list( set(algos_participating) - set(list(ranking_df["algorithm"])) )

    # append not-qualified algos to dataframe
    for algo in algos_did_not_qualify:
        ranking_df = pd.concat([ranking_df, pd.DataFrame([{'algorithm':algo,'status':'error'}])], ignore_index = True)

    # create a rank numeric column
    ranking_df['rank'] = ranking_df.apply( lambda row: int(row.name)+1, axis=1)
    ranking_df['rank'] = ranking_df['rank'].astype('Int64') # Int64 supports NaN

    # change 'qps' to K
    ranking_df['qps'] = ranking_df['qps']/1000.0
    
    # return the track and dataset as column
    ranking_df['track'] = track
    ranking_df['dataset'] = dataset
    #print(ranking_df)

    dfs.append(ranking_df)


In [38]:
# combine all track dataframes
master_df = pd.concat(dfs)
master_df

Unnamed: 0,algorithm,qps,recall/ap,status,rank,track,dataset
0,zilliz,213.29,0.93,ok,1,filter,yfcc-10M
1,pinecone,146.72,0.92,ok,2,filter,yfcc-10M
2,puck,62.26,0.94,ok,3,filter,yfcc-10M
3,parlayivf,55.03,0.95,ok,4,filter,yfcc-10M
4,wm_filter,20.87,0.92,ok,5,filter,yfcc-10M
5,pyanns,8.99,0.91,ok,6,filter,yfcc-10M
6,faissplus,8.49,0.91,ok,7,filter,yfcc-10M
7,faiss,7.33,0.99,ok,8,filter,yfcc-10M
8,cufe,6.32,0.94,ok,9,filter,yfcc-10M
9,dhq,,,error,10,filter,yfcc-10M


In [39]:
# do a quick sanity check on rankings

sanity_df = master_df.sort_values(['track','qps'],ascending=False)
sanity_df

Unnamed: 0,algorithm,qps,recall/ap,status,rank,track,dataset
0,zilliz,34.76,0.93,ok,1,sparse,sparse-full
1,pyanns,26.86,0.92,ok,2,sparse,sparse-full
2,pinecone_smips,12.0,0.9,ok,3,sparse,sparse-full
3,shnsw,8.25,0.94,ok,4,sparse,sparse-full
4,nle,2.95,0.94,ok,5,sparse,sparse-full
5,cufe,0.08,0.99,ok,6,sparse,sparse-full
6,linscan,0.06,0.98,ok,7,sparse,sparse-full
7,spmat,,,error,8,sparse,sparse-full
8,sustech-whu,,,error,9,sparse,sparse-full
0,scann,107.41,0.91,ok,1,ood,text2image-10M


In [40]:
# pivot via 'rank' so that track rankings are parallel across columns

pivot_df =  master_df.pivot_table(index=['rank'], 
                                  columns=['track'], 
                                  values=['algorithm','qps','status'],
                                  aggfunc='first')
#pivot_df

In [41]:
# group specific track columns

reorder_df = pivot_df[ [ ('algorithm','filter'), ( 'qps','filter' ), \
            ('algorithm','sparse'), ( 'qps','sparse' ), \
            ('algorithm','ood'),    ( 'qps','ood' ) ] ]
#reorder_df

In [42]:
# swap hierarchical index for columns

swap_df = reorder_df.swaplevel(0,1, axis=1)
#swap_df

In [43]:
# add per track 'rank' column ensuring no rank to algorithms that did not qualify

for track in master_df['track'].unique():
    qps = list( swap_df[ (track,'qps') ] )
    track_ranking = map(lambda el: float('nan') if math.isnan(qps[el-1]) else el,
                  range(1,len(qps)+1) )
    #print(track_ranking, list(ranking))
    col_to_find = (track, 'algorithm')
    col_idx = list(swap_df.columns).index(col_to_find)

    swap_df.insert(col_idx, (track,'rank'), list(track_ranking) )
    swap_df[(track,'rank')] = swap_df[(track,'rank')].astype('Int64')
    
#swap_df

In [44]:
# style the dataframe (getting ready for html export)

display_df_styled = swap_df.style
display_df_styled

track,filter,filter,filter,sparse,sparse,sparse,ood,ood,ood
Unnamed: 0_level_1,rank,algorithm,qps,rank,algorithm,qps,rank,algorithm,qps
rank,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
1,1.0,zilliz,213.285059,1.0,zilliz,34.759976,1.0,scann,107.413009
2,2.0,pinecone,146.721242,2.0,pyanns,26.858519,2.0,pinecone-ood,76.865423
3,3.0,puck,62.257695,3.0,pinecone_smips,12.003182,3.0,zilliz,73.50961
4,4.0,parlayivf,55.033198,4.0,shnsw,8.247277,4.0,pyanns,55.504352
5,5.0,wm_filter,20.874857,5.0,nle,2.945626,5.0,sustech-ood,28.458263
6,6.0,pyanns,8.988022,6.0,cufe,0.084976,6.0,mysteryann-dif,27.946097
7,7.0,faissplus,8.493264,7.0,linscan,0.060756,7.0,mysteryann,26.560867
8,8.0,faiss,7.327855,,spmat,,8.0,vamana,19.965517
9,9.0,cufe,6.318462,,sustech-whu,,9.0,puck,18.960224
10,,dhq,,,,,10.0,ngt,11.921399


In [45]:
# convert styled table to html string

importlib.reload(helper) # need to reload if editing simultaneuosly

# hide row index and export to html
html = display_df_styled.\
    format(formatter="{:20,.1f}K", na_rep="", \
           subset=pd.IndexSlice[:, pd.IndexSlice['filter','qps']]).\
    format(formatter="{:20,.1f}K", na_rep="", \
           subset=pd.IndexSlice[:, pd.IndexSlice['sparse','qps']]).\
    format(formatter="{:20,.1f}K", na_rep="", \
           subset=pd.IndexSlice[:, pd.IndexSlice['ood','qps']]).\
    hide(axis=0).\
    to_html()

# remove style tag (markdown doesn't use it anyway)
new_html = helper.remove_style_prefix(html)

# replace all "nan"s with empty string
new_html = new_html.replace("nan","")

# insert links for specific use cases (ie, errors)
new_html = helper.replace_table_with_links(new_html, swap_df, IGNORE_ALGOS)

# display in-line
HTML(new_html)



filter,filter,filter,sparse,sparse,sparse,ood,ood,ood
rank,algorithm,qps,rank,algorithm,qps,rank,algorithm,qps
1.0,zilliz,213.3K,1.0,zilliz,34.8K,1.0,scann,107.4K
2.0,pinecone,146.7K,2.0,pyanns,26.9K,2.0,pinecone-ood,76.9K
3.0,puck,62.3K,3.0,pinecone_smips,12.0K,3.0,zilliz,73.5K
4.0,parlayivf,55.0K,4.0,shnsw,8.2K,4.0,pyanns,55.5K
5.0,wm_filter,20.9K,5.0,nle,2.9K,5.0,sustech-ood,28.5K
6.0,pyanns,9.0K,6.0,cufe,0.1K,6.0,mysteryann-dif,27.9K
7.0,faissplus,8.5K,7.0,linscan,0.1K,7.0,mysteryann,26.6K
8.0,faiss,7.3K,,spmat,,8.0,vamana,20.0K
9.0,cufe,6.3K,,sustech-whu,,9.0,puck,19.0K
,dhq,,,,,10.0,ngt,11.9K


In [46]:
# insert html into the markdown

with open(MARKDOWN,'r') as md:
    contents = md.read()
    
# locate insertion point via regex
regexpr = "(# Eval.*\n)"
matches = re.search(regexpr, contents, re.M)

# insert table html
new_contents = contents[0:matches.span(0)[1]] + "\n" + new_html +" \n" + contents[matches.span(0)[1]:]
#print(new_contents[0:1000])

# update the markdown
fname = os.path.join( "..", MARKDOWN.replace("_","") )
with open( fname, 'w') as md:
    contents = md.write(new_contents)

print("Wrote", fname)

Wrote .././latitude-m4-metal-medium.md
