In [5]:
import pandas as pd

learned_indexes = ['alexolc', 'sali']
traditional_indexes = ['artolc', 'btreeolc']

df = pd.read_csv('./multi_thread_thp.csv')

# trim read_ratio = 0
df = df[df['read_ratio'] != 0]

df = df[df['thread_num'] == 16]

grouped = df.groupby(['index_type', 'key_path', 'test_suite', 'init_table_size'])['throughput'].mean().reset_index()
learned_dfs = {index: grouped[grouped['index_type'] == index] for index in learned_indexes}
traditional_dfs = {index: grouped[grouped['index_type'] == index] for index in traditional_indexes}

# Counting the number of cases where the throughput of learned indexes is worse and better than traditional indexes
worse_cases_counts = {}
better_cases_counts = {}
for learned_index, learned_df in learned_dfs.items():
    worse_cases_counts[learned_index] = {}
    better_cases_counts[learned_index] = {}
    for traditional_index, traditional_df in traditional_dfs.items():
        # Merging the DataFrames
        merged_df = pd.merge(learned_df, traditional_df, on=['key_path', 'test_suite', 'init_table_size'], suffixes=(f'_{learned_index}', f'_{traditional_index}'))
        # Counting the worse and better cases
        worse_cases_count = (merged_df[f'throughput_{learned_index}'] < merged_df[f'throughput_{traditional_index}']).sum()
        better_cases_count = (merged_df[f'throughput_{learned_index}'] >= merged_df[f'throughput_{traditional_index}']).sum()
        worse_cases_counts[learned_index][traditional_index] = worse_cases_count
        better_cases_counts[learned_index][traditional_index] = better_cases_count

worse_cases_df = pd.DataFrame(worse_cases_counts)
worse_cases_df = worse_cases_df.T
print("learned indexes worse than traditional indexes cases:")
print(worse_cases_df)

better_cases_df = pd.DataFrame(better_cases_counts)
better_cases_df = better_cases_df.T
print("learned indexes better than traditional indexes cases:")
print(better_cases_df)

learned indexes worse than traditional indexes cases:
         artolc  btreeolc
alexolc      33         0
sali        103        58
learned indexes better than traditional indexes cases:
         artolc  btreeolc
alexolc      23        56
sali          7        52
