# Read Benchmark Plots

In [None]:
%matplotlib widget
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import re
import warnings
import itertools

In [None]:
def generate_result_tex_file(variable_dict, output_path):
    tex_content = "%!TEX root = ../thesis.tex\n\n% File was automatically generated.\n\n"
    
    tex_content += "\\newcommand{\\DefineResult}[2]{%\n"
    tex_content += "\t\\expandafter\\newcommand\\csname rmk-#1\\endcsname{#2}%\n"
    tex_content += "}\n"
    tex_content += "\\newcommand{\\Result}[1]{\\csname rmk-#1\\endcsname}\n\n"
    
    
    for identifier, value in variable_dict.items():
        tex_content += "\\DefineResult{" + str(identifier) + "}{" + str(value) + "}\n"
    
    with open(output_path, "w") as output_file:
        output_file.write(tex_content)
        
def cpu_to_prefix(cpu):
    if cpu == "A64FX ARM":
        return "A64FX"
    
    if cpu == "Intel x86":
        return "Int"
    
    if cpu == "AMD x86":
        return "Amd"
    
    if cpu == "Power":
        return "Pwr"
    
    raise ValueError(f"unknown cpu {cpu}")
        

In [None]:
colors = ["#E69F00" ,"#009E73" ,"#0072B2" ,"#999999"]
colors_lighter = ["#f0c466" ,"#80cfba" ,"#80b8d9" ,"#cccccc"]

sns.set_style("ticks")

USE_SVG = False

if not USE_SVG:
    from matplotlib.backends.backend_pgf import FigureCanvasPgf
    matplotlib.backend_bases.register_backend('pdf', FigureCanvasPgf)

    plt.rcParams.update({
            'svg.fonttype': 'none',
            "pgf.texsystem":   "pdflatex", # or any other engine you want to use
            "text.usetex":     True,       # use TeX for all texts
            "font.family":     "serif",
            "font.serif":      [],         # empty entries should cause the usage of the document fonts
            "font.sans-serif": [],
            "font.monospace":  [],
            # Use 10pt font in plots, to match 10pt font in document
            "axes.labelsize": 10,
            "font.size": 10,
            # Make the legend/label fonts a little smaller
            "legend.fontsize": 9,
            "xtick.labelsize": 9,
            "ytick.labelsize": 9
    })
else:
    plt.rcParams.update({
            'svg.fonttype': 'none'
    })
    
    
plt.rc('axes', axisbelow=True) 
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', None)

HM_SIZE = 134217728 # size of the hashmap we are interested in in the analysis

result_dict = {}

MAJOR_LINE_THICKNESS_PRINT = 1.2
MINOR_LINE_THICKNESS_PRINT = 0.7

MAJOR_LINE_THICKNESS_DIGITAL = 0.8
MINOR_LINE_THICKNESS_DIGITAL = 0.3

MAJOR_LINE_THICKNESS = MAJOR_LINE_THICKNESS_DIGITAL
MINOR_LINE_THICKNESS = MINOR_LINE_THICKNESS_DIGITAL

DISABLE_ASSERTS = False
DATA_LINEWIDTH = 0.8
MARKER_EDGEWIDTH = 0.3
MARKER_SIZE = 4
MARKER_EDGECOLOR = "black"
LABEL_ROTATION=0
LEGEND_HANDLELENGTH = 1
LEGEND_HANDLETEXTPAD = 0.1
LEGEND_COLUMNSPACING = 0.2

In [None]:
def load_read_benchmark_data(use_mean_aggregation = True):  # if false, median is used
    intel_x86_files = ["data/v1/basic_reads_intel.csv", "data/v1/uniform_hash_reads_intel.csv", "data/v1/dense_reads_intel.csv",
                       "data/v1/string_reads_intel.csv", "data/v1/dummy_reads_intel.csv", "data/v1/dummyptr_reads_intel.csv"]
        
    amd_x86_files = ["data/v1/basic_reads_amd.csv",  "data/v1/uniform_hash_reads_amd.csv",  "data/v1/dense_reads_amd.csv", 
                     "data/v1/string_reads_amd.csv", "data/v1/dummy_reads_amd.csv", "data/v1/dummyptr_reads_amd.csv",
                     "data/v1/dummytuple128_reads_amd.csv", "data/v1/dummyvalue128ptr_reads_amd.csv"]
    
    power_files = ["data/v1/basic_reads_power.csv", "data/v1/uniform_hash_reads_power.csv", "data/v1/dense_reads_power.csv",
                   "data/v1/string_reads_power.csv", "data/v1/dummy_reads_power_p1.csv", "data/v1/dummy_reads_power_p2.csv",
                   "data/v1/dummyptr_reads_power.csv"]
    
    a64fx_arm_files = ["data/v1/basic_reads_arm.csv",  "data/v1/uniform_hash_reads_arm.csv",  "data/v1/dense_reads_arm.csv",
                      "data/v1/string_reads_arm.csv", "data/v1/dummy_reads_arm.csv", "data/v1/dummyptr_reads_arm.csv"]

    intel_x86_results = pd.read_csv(intel_x86_files[0], index_col=False)
    
    for x86_file in intel_x86_files[1:]:
        intel_x86_results = pd.concat([intel_x86_results, pd.read_csv(x86_file, index_col=False)], ignore_index = True, verify_integrity=True)
    
    ## FIX CSVS ##
    intel_x86_results["ThreadCount"] = 1
    intel_x86_results["Successful"] = True
    intel_x86_results["ThreadTableSize"] = intel_x86_results["Size"]
    intel_x86_results["ThreadAvgRuntime"] = intel_x86_results["Runtime"]
    intel_x86_results["ThreadMaxRuntime"] = intel_x86_results["Runtime"]
    ## FIX CSVS ##
        
    ## READ MT ##
    intel_mt_results_path = "data/v1/mt_reads_intel.csv"
    intel_mt_results = pd.read_csv(intel_mt_results_path, index_col=False)
    intel_mt_results = intel_mt_results.loc[intel_mt_results["ThreadCount"] > 1]
    intel_x86_results = pd.concat([intel_x86_results, intel_mt_results], ignore_index = True, verify_integrity=True)    
    ## READ MT ##
    
    # ZIPF #
    intel_x86_results["Zipf"] = False
    intel_zipf_results_path = "data/v1/zipf_reads_intel.csv"
    intel_zipf_results = pd.read_csv(intel_zipf_results_path, index_col=False)
    intel_zipf_results["Zipf"] = intel_zipf_results["Zipf"].astype(bool)

    intel_x86_results = pd.concat([intel_x86_results, intel_zipf_results], ignore_index = True, verify_integrity=True)    

    intel_x86_results.insert(0, "Arch", "Intel x86")
    intel_x86_results.reset_index(drop=True, inplace=True)

    amd_x86_results = pd.read_csv(amd_x86_files[0], index_col=False)
    
    for x86_file in amd_x86_files[1:]:
        amd_x86_results = pd.concat([amd_x86_results, pd.read_csv(x86_file, index_col=False)], ignore_index = True, verify_integrity=True)

    ## FIX CSVS ##
    amd_x86_results["ThreadCount"] = 1
    amd_x86_results["Successful"] = True
    amd_x86_results["ThreadTableSize"] = amd_x86_results["Size"]
    amd_x86_results["ThreadAvgRuntime"] = amd_x86_results["Runtime"]
    amd_x86_results["ThreadMaxRuntime"] = amd_x86_results["Runtime"]
    ## FIX CSVS ##

    ## READ MT ##
    amd_mt_results_path = "data/v1/mt_reads_amd.csv"
    amd_mt_results = pd.read_csv(amd_mt_results_path, index_col=False)
    amd_mt_results = amd_mt_results.loc[amd_mt_results["ThreadCount"] > 1]
    amd_x86_results = pd.concat([amd_x86_results, amd_mt_results], ignore_index = True, verify_integrity=True)    
    ## READ MT ##    
    # ZIPF #
    amd_x86_results["Zipf"] = False
    amd_zipf_results_path = "data/v1/zipf_reads_amd.csv"
    amd_zipf_results = pd.read_csv(amd_zipf_results_path, index_col=False)
    amd_zipf_results["Zipf"] = amd_zipf_results["Zipf"].astype(bool)
    amd_x86_results = pd.concat([amd_x86_results, amd_zipf_results], ignore_index = True, verify_integrity=True)    

    amd_x86_results.insert(0, "Arch", "AMD x86")
    amd_x86_results.reset_index(drop=True, inplace=True)
    
    a64fx_arm_results = pd.read_csv(a64fx_arm_files[0], index_col=False)
    for arm_file in a64fx_arm_files[1:]:
        a64fx_arm_results = pd.concat([a64fx_arm_results, pd.read_csv(arm_file, index_col=False)], ignore_index = True, verify_integrity=True)

    ## FIX CSVS ##
    a64fx_arm_results["ThreadCount"] = 1
    a64fx_arm_results["Successful"] = True
    a64fx_arm_results["ThreadTableSize"] = a64fx_arm_results["Size"]
    a64fx_arm_results["ThreadAvgRuntime"] = a64fx_arm_results["Runtime"]
    a64fx_arm_results["ThreadMaxRuntime"] = a64fx_arm_results["Runtime"]
    ## FIX CSVS ##        
    
    ## READ MT ##
    arm_mt_results_path = "data/v1/mt_reads_arm.csv"
    arm_mt_results = pd.read_csv(arm_mt_results_path, index_col=False)
    arm_mt_results = arm_mt_results.loc[arm_mt_results["ThreadCount"] > 1]
    a64fx_arm_results = pd.concat([a64fx_arm_results, arm_mt_results], ignore_index = True, verify_integrity=True)    
    ## READ MT ##        
    # ZIPF #
    a64fx_arm_results["Zipf"] = False
    arm_zipf_results_path = "data/v1/zipf_reads_arm.csv"
    arm_zipf_results = pd.read_csv(arm_zipf_results_path, index_col=False)
    arm_zipf_results["Zipf"] = arm_zipf_results["Zipf"].astype(bool)

    a64fx_arm_results = pd.concat([a64fx_arm_results, arm_zipf_results], ignore_index = True, verify_integrity=True)    

    
    a64fx_arm_results.insert(0, "Arch", "A64FX ARM")
    a64fx_arm_results.reset_index(drop=True, inplace=True)

    power_results = pd.read_csv(power_files[0], index_col=False)
    for power_file in power_files[1:]:
        power_results = pd.concat([power_results, pd.read_csv(power_file, index_col=False)], ignore_index = True, verify_integrity=True)
        
    ## FIX CSVS ##
    power_results["ThreadCount"] = 1
    power_results["Successful"] = True
    power_results["ThreadTableSize"] = power_results["Size"]
    power_results["ThreadAvgRuntime"] = power_results["Runtime"]
    power_results["ThreadMaxRuntime"] = power_results["Runtime"]
    ## FIX CSVS ##   
   
    ## READ MT ##
    power_mt_results_path = "data/v1/mt_reads_power.csv"
    power_mt_results = pd.read_csv(power_mt_results_path, index_col=False)
    power_mt_results = power_mt_results.loc[power_mt_results["ThreadCount"] > 1]
    power_results = pd.concat([power_results, power_mt_results], ignore_index = True, verify_integrity=True)    
    ## READ MT ##   
    # ZIPF #
    power_results["Zipf"] = False
    power_zipf_results_path = "data/v1/zipf_reads_power.csv"
    power_zipf_results = pd.read_csv(power_zipf_results_path, index_col=False)
    power_zipf_results["Zipf"] = power_zipf_results["Zipf"].astype(bool)
    power_results = pd.concat([power_results, power_zipf_results], ignore_index = True, verify_integrity=True)    

    power_results.insert(0, "Arch", "Power")
    power_results.reset_index(drop=True, inplace=True)
    
    merged_results = pd.concat([intel_x86_results, amd_x86_results, a64fx_arm_results, power_results], ignore_index = True, verify_integrity=True)
    merged_results = merged_results.loc[merged_results["Successful"] == True]

    # calculate throughput

    merged_results["WorkloadInBytes"] = merged_results["Workload"] * 1024**2 # probably not even used but whatever
    merged_results["Lookups/s"] = (merged_results["EntriesProcessed"].astype(float) * merged_results["ThreadCount"]) / (merged_results["Runtime"].astype(float) / 1000.0)
    merged_results["MLookups/s"] = merged_results["Lookups/s"].astype(float) / 1000000.0
   
    
    # calculate mean or median for same columns

    group_attributes = ["Arch", "Hashmap", "Compiler", "PageSize", "HugePageSize", "LoadFactor", "SQR", "Size", "Distribution", "Workload", "KeySize", "ValueSize", "EntrySize", "EntriesProcessed", "Zipf", "ThreadCount"]

    if use_mean_aggregation:
        merged_results = merged_results.groupby(group_attributes).mean().reset_index()
    else:
        merged_results = merged_results.groupby(group_attributes).median().reset_index()

    # calculate mt speedup
    def get_st_lookups(row, df1):
        if row["ThreadCount"] == 1:
            return row["Runtime"]
        
        tmp = df1[(df1["Arch"] == row["Arch"]) & (df1["Hashmap"] == row["Hashmap"]) & (df1["Compiler"] == row["Compiler"]) & (df1["Zipf"] == row["Zipf"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["ThreadCount"] == 1)]
        if len(tmp) == 0:
            if row["Hashmap"].startswith("ChainedHashTable"):
                return float('nan')
            else:
                print(row)
                print(tmp)
                raise ValueError(row)
        else:
            if len(tmp) > 1:
                print(row)
                print(tmp)
                raise ValueError(row)
            
        return tmp.iloc[0]["Runtime"]

    merged_results["STPerf"] = merged_results.apply(lambda row: get_st_lookups(row, merged_results), axis=1)
    merged_results["STSpeedup"] = merged_results["STPerf"] / merged_results["Runtime"].astype(float)
        
    merged_results = merged_results[merged_results["Size"] == HM_SIZE]
    merged_results["ThreadCount"] = merged_results["ThreadCount"].astype(int)
    merged_results["Zipf"] = merged_results["Zipf"].astype(bool)

    return merged_results


In [None]:
def load_write_benchmark_data(use_mean_aggregation = True):  # if false, median is used
    intel_x86_files = [ "data/v1/basic_writes_intel.csv", "data/v1/uniform_hash_writes_intel.csv", "data/v1/dense_writes_intel.csv",
                       "data/v1/string_writes_intel.csv", "data/v1/dummy_writes_intel.csv", "data/v1/dummyptr_writes_intel.csv"]
    
    amd_x86_files = ["data/v1/basic_writes_amd.csv", "data/v1/uniform_hash_writes_amd.csv", "data/v1/dense_writes_amd.csv",
                    "data/v1/string_writes_amd.csv", "data/v1/dummy_writes_amd.csv", "data/v1/dummyptr_writes_amd.csv",
                    "data/v1/dummytuple128_writes_amd.csv", "data/v1/dummyvalue128ptr_writes_amd.csv"]
    
    power_files = ["data/v1/basic_writes_power.csv",  "data/v1/uniform_hash_writes_power.csv", "data/v1/dense_writes_power.csv",
                   "data/v1/string_writes_power.csv", "data/v1/dummy_writes_power.csv", "data/v1/dummyptr_writes_power.csv"]
    
    a64fx_arm_files = ["data/v1/basic_writes_arm.csv",  "data/v1/uniform_hash_writes_arm.csv",  "data/v1/dense_writes_arm.csv",
                       "data/v1/string_writes_arm.csv", "data/v1/dummy_writes_arm.csv", "data/v1/dummyptr_writes_arm.csv"]

    if len(intel_x86_files) > 0:
        intel_x86_results = pd.read_csv(intel_x86_files[0], index_col=False)

        for x86_file in intel_x86_files[1:]:
            intel_x86_results = pd.concat([intel_x86_results, pd.read_csv(x86_file, index_col=False)], ignore_index = True, verify_integrity=True)

        ## FIX CSVS ##
        intel_x86_results["ThreadCount"] = 1
        intel_x86_results["Successful"] = True
        intel_x86_results["ThreadTableSize"] = intel_x86_results["Size"]
        intel_x86_results["ThreadAvgRuntime"] = intel_x86_results["Runtime"]
        intel_x86_results["ThreadMaxRuntime"] = intel_x86_results["Runtime"]
        ## FIX CSVS ##            
        
        ## READ MT ##
        intel_mt_results_path = "data/v1/mt_writes_intel.csv"
        intel_mt_results = pd.read_csv(intel_mt_results_path, index_col=False)
        intel_mt_results = intel_mt_results.loc[intel_mt_results["ThreadCount"] > 1]
        intel_x86_results = pd.concat([intel_x86_results, intel_mt_results], ignore_index = True, verify_integrity=True)    
        ## READ MT ##        
    
        
        intel_x86_results.insert(0, "Arch", "Intel x86")
        intel_x86_results.reset_index(drop=True, inplace=True)
    else:
        intel_x86_results = None

    if len(amd_x86_files) > 0:
        amd_x86_results = pd.read_csv(amd_x86_files[0], index_col=False)

        for x86_file in amd_x86_files[1:]:
            amd_x86_results = pd.concat([amd_x86_results, pd.read_csv(x86_file, index_col=False)], ignore_index = True, verify_integrity=True)

        ## FIX CSVS ##
        amd_x86_results["ThreadCount"] = 1
        amd_x86_results["Successful"] = True
        amd_x86_results["ThreadTableSize"] = amd_x86_results["Size"]
        amd_x86_results["ThreadAvgRuntime"] = amd_x86_results["Runtime"]
        amd_x86_results["ThreadMaxRuntime"] = amd_x86_results["Runtime"]
        ## FIX CSVS ##            
        ## READ MT ##
        amd_mt_results_path = "data/v1/mt_writes_amd.csv"
        amd_mt_results = pd.read_csv(amd_mt_results_path, index_col=False)
        amd_mt_results = amd_mt_results.loc[amd_mt_results["ThreadCount"] > 1]
        amd_x86_results = pd.concat([amd_x86_results, amd_mt_results], ignore_index = True, verify_integrity=True)    
        ## READ MT ##    
        amd_x86_results.insert(0, "Arch", "AMD x86")
        amd_x86_results.reset_index(drop=True, inplace=True)
    else:
        amd_x86_results = None        
        
    if len(a64fx_arm_files) > 0:
        a64fx_arm_results = pd.read_csv(a64fx_arm_files[0], index_col=False)
        for arm_file in a64fx_arm_files[1:]:
            a64fx_arm_results = pd.concat([a64fx_arm_results, pd.read_csv(arm_file, index_col=False)], ignore_index = True, verify_integrity=True)
        
        ## FIX CSVS ##
        a64fx_arm_results["ThreadCount"] = 1
        a64fx_arm_results["Successful"] = True
        a64fx_arm_results["ThreadTableSize"] = a64fx_arm_results["Size"]
        a64fx_arm_results["ThreadAvgRuntime"] = a64fx_arm_results["Runtime"]
        a64fx_arm_results["ThreadMaxRuntime"] = a64fx_arm_results["Runtime"]
        ## FIX CSVS ##   
        
        ## READ MT ##
        arm_mt_results_path = "data/v1/mt_writes_arm.csv"
        arm_mt_results = pd.read_csv(arm_mt_results_path, index_col=False)
        arm_mt_results = arm_mt_results.loc[arm_mt_results["ThreadCount"] > 1]
        a64fx_arm_results = pd.concat([a64fx_arm_results, arm_mt_results], ignore_index = True, verify_integrity=True)    
        ## READ MT ##   
    
        a64fx_arm_results.insert(0, "Arch", "A64FX ARM")
        a64fx_arm_results.reset_index(drop=True, inplace=True)
    else:
        a64fx_arm_results = None

    power_results = pd.read_csv(power_files[0], index_col=False)
    for power_file in power_files[1:]:
        power_results = pd.concat([power_results, pd.read_csv(power_file, index_col=False)], ignore_index = True, verify_integrity=True)

    ## FIX CSVS ##
    power_results["ThreadCount"] = 1
    power_results["Successful"] = True
    power_results["ThreadTableSize"] = power_results["Size"]
    power_results["ThreadAvgRuntime"] = power_results["Runtime"]
    power_results["ThreadMaxRuntime"] = power_results["Runtime"]
    ## FIX CSVS ##          
    ## READ MT ##
    power_mt_results_path = "data/v1/mt_writes_power.csv"
    power_mt_results = pd.read_csv(power_mt_results_path, index_col=False)
    power_mt_results = power_mt_results.loc[power_mt_results["ThreadCount"] > 1]
    power_results = pd.concat([power_results, power_mt_results], ignore_index = True, verify_integrity=True)    
    ## READ MT ##   
    power_results.insert(0, "Arch", "Power")
    power_results.reset_index(drop=True, inplace=True)
    
    merged_results = pd.concat([x for x in [intel_x86_results, amd_x86_results, a64fx_arm_results, power_results] if x is not None], ignore_index = True, verify_integrity=True)

    # calculate throughput
    merged_results["Inserts/s"] = (merged_results["EntriesProcessed"].astype(float) * merged_results["ThreadCount"]) / (merged_results["Runtime"].astype(float) / 1000.0)
    merged_results["MInserts/s"] = merged_results["Inserts/s"].astype(float) / 1000000.0
   
    # calculate mean or median for same columns

    group_attributes = ["Arch", "Hashmap", "Compiler", "PageSize", "HugePageSize", "LoadFactor", "Size", "Distribution", "KeySize", "ValueSize", "EntrySize", "EntriesProcessed", "ThreadCount"]

    if use_mean_aggregation:
        merged_results = merged_results.groupby(group_attributes).mean().reset_index()
    else:
        merged_results = merged_results.groupby(group_attributes).median().reset_index()

    # calculate mt speedup
    def get_st_lookups(row, df1):
        if row["ThreadCount"] == 1:
            return row["Runtime"]
        
        tmp = df1[(df1["Arch"] == row["Arch"]) & (df1["Hashmap"] == row["Hashmap"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["ThreadCount"] == 1)]
        if len(tmp) == 0:
            if row["Hashmap"].startswith("ChainedHashTable"):
                return float('nan')
            else:
                print(row)
                print(tmp)
                raise ValueError(row)
        else:
            if len(tmp) > 1:
                print(row)
                print(tmp)
                raise ValueError(row)
            
        return tmp.iloc[0]["Runtime"]

    merged_results["STPerf"] = merged_results.apply(lambda row: get_st_lookups(row, merged_results), axis=1)
    merged_results["STSpeedup"] = merged_results["STPerf"] / merged_results["Runtime"].astype(float)        
        
    merged_results = merged_results[merged_results["Size"] == HM_SIZE]
    merged_results["ThreadCount"] = merged_results["ThreadCount"].astype(int)
    return merged_results


In [None]:
read_benchmark_results = load_read_benchmark_data()
write_benchmark_results = load_write_benchmark_data()

In [None]:
def set_size(width, fraction=1, subplots=(1, 1)):
    """Set figure dimensions to avoid scaling in LaTeX.

    Parameters
    ----------
    width: float or string
            Document width in points, or string of predined document type
    fraction: float, optional
            Fraction of the width which you wish the figure to occupy
    subplots: array-like, optional
            The number of rows and columns of subplots.
    Returns
    -------
    fig_dim: tuple
            Dimensions of figure in inches
    """
    if width == 'thesis':
        width_pt = 426.79135
    elif width == 'beamer':
        width_pt = 307.28987
    else:
        width_pt = width

    # Width of figure (in pts)
    fig_width_pt = width_pt * fraction
    # Convert from pt to inches
    inches_per_pt = 1 / 72.27

    # Golden ratio to set aesthetic figure height
    # https://disq.us/p/2940ij3
    golden_ratio = (5**.5 - 1) / 2

    # Figure width in inches
    fig_width_in = fig_width_pt * inches_per_pt
    # Figure height in inches
    fig_height_in = fig_width_in * golden_ratio * (subplots[0] / subplots[1])

    return (fig_width_in, fig_height_in)

In [None]:
from matplotlib.ticker import FuncFormatter
deep_palette = sns.color_palette('deep', n_colors=10)

def analysis_plot(hashmap_filter, palette_dict, ARCHITECTURES, LOAD_FACTORS, width, show_thp = True, show_pref = True, show_unroll = True,
                  show_avx512 = True, show_intel_512 = False,
                  hide_svbcast=False, hide_non_svbcast=False, hide_non_s2n=False, hide_non_avx512 = False, hide_external=True,
                  split_by_simd_size = False, hide_non_thp = False, word_filter=[], simd_sizes = [], exclude_prefixes = [], 
                  file_path=None, force_include=None, power_force_include=None, ncol_legend=8, collect_all_labels=True, 
                  hide_soa = True, hide_clang = True,  hide_msb_fingerprints=True, hide_likely_hints=True, hide_non_likely_hints=False, hide_vec_iterators=True,
                  hide_notestz=False, hide_lsb_fingerprints=False, legend_label_dict = {}, hide_simd_larger_128 = False, legend_height_shift = 0,
                  hide_s2n=False, hide_non_512_on_intel=False, hide_non_256_on_amd = False, hide_sve=False, hide_neon=False, hide_16b_fingerprints=False, hide_avx512_smaller_512=False, additional_include_filter=[],
                  hide_vfp_16b=False, show_nonmultshift64=False, hide_multshift64=False, show_modulo=False, hide_non_modulo=False, show_dense=False, hide_uniform=False,
                  hide_8b_fingerprints=False, hide_xx_hash=False, hide_128b_hasher=False, hide_multaddshift_murmur=False, hide_multaddshift=False, hide_msblsb_multshift=False,
                  show_stringkeys=False, show_inline_dummytuple=False, show_pointer_dummytuple=False,show_intpointer=False, only_show_dummy=False, only_show_dummy_and_intptr=False, filter_load_factor_90=False,
                  additional_prefetching=[], label_order = None, show_fallback = False, hide_256_on_intel = False, hide_msblsb_fingerprints=True, hide_lsbmsb_fingerprints=True, hide_non_kv_budget=True, ylabel_padding=0, xlabel_padding=0,
                  plot_uniform_and_dense = False, multshift_only_lsbmsb=False, hide_insertions=False, disable_legend_border=False, reduce_xticks=False, reduce_yticks=False, reduce_minor=False, other_ylimit=False, power_axis=False, single_row_lf90=False,
                  plot_speedup = False, force_sqr=None, force_write_lf=None, arch_ylimits={}, thread_count=1, analyze_threadcount=False, only_two_sqr=False, markevery=None, hide_zipf = True, hue_order=None, hide_non_zipf=False, extra_legend_columnspacing = 0, extra_legend_handlelength = 0,
                  hide_dummytuple128 = True):
    
    num_rows = len(ARCHITECTURES)
    to_enumerate = ARCHITECTURES
    
    if (plot_speedup or analyze_threadcount) and force_sqr is None:
        raise ValueError("need to supply force_sqr")
    
    if plot_speedup and analyze_threadcount:
        raise ValueError("can only do one of them")
    
    assert not (split_by_simd_size and plot_uniform_and_dense), "Can only have one of split_by_simd_size and plot_uniform_and_dense active"
    
    if show_stringkeys and (hide_xx_hash or not show_nonmultshift64):
        assert 1 == 0, "Stringkeys need xx hash"
    
    if split_by_simd_size:
        assert len(simd_sizes) > 0, "Need to tell me at least 1 simd sizes"
        assert len(ARCHITECTURES) == 1, "Can do SIMD Size Plot only for 1 Architecture ATM"
        num_rows = len(simd_sizes) 
        to_enumerate = simd_sizes
        
    if plot_uniform_and_dense:
        assert len(ARCHITECTURES) == 1, "Can do Dense/Uniform only for 1 Architecture ATM"
        num_rows = 2
        to_enumerate = ["Uniform", "Dense"]
        
    if single_row_lf90:
        num_rows = 1
        LOAD_FACTORS = [90]
        hide_insertions = True
        
    figsize = set_size(width, subplots=(num_rows,len(LOAD_FACTORS) + 1))
    xlabel_fontsize = 7
    ylabel_fontsize = 7
    title_fontsize = 8
    legend_fontsize = 8
    
    if hide_insertions:
        num_cols = len(LOAD_FACTORS)
    else:
        num_cols = len(LOAD_FACTORS) + 1
    
    if single_row_lf90:
        num_cols = len(ARCHITECTURES)
        
    figsize = set_size(width, subplots=(num_rows,num_cols))

    sharex = False if reduce_xticks else "col"
    
    fig, axes = plt.subplots(num_rows, num_cols, sharex=sharex, sharey="row", figsize=figsize)
    
    
    if not single_row_lf90:
        plt.subplots_adjust(hspace = 0.05, wspace = 0.07)
    else:
        plt.subplots_adjust(hspace = 0.05, wspace = 0.1)

    read_data = read_benchmark_results.copy()
    write_data = write_benchmark_results.copy()
    
    def map_name(row, hide_gcc = True):
        name = row["Hashmap"]

        new_name = f"Unknown_{name}"
        simd = False
        splitted = name.split(";")
        
        if "LinearProbingAoS" in name:
            new_name = "LP"
        elif "QuadraticProbingAoS" in name:
            new_name = "QP"
        elif "RecalculatingRobinHoodAoS" in name:
            new_name = "RecalcRH"
        elif "StoringRobinHoodAoS" in name:
            new_name = "StoreRH"
        elif "LinearProbingPackedSoA" in name:
            new_name = "LPPackedSoA"
        elif "LinearProbingSoA" in name:
            new_name = "LPSoA"
        elif "UnchunkedSIMDSoAHashTable" in name:
            new_name = "UnchkdSIMD"
            simd = True
        elif "FingerprintingSIMDSoAHashTable" in name:
            new_name = "FingerPSIMD"
            simd = True
        elif "BucketingSIMDHashTable" in name:
            new_name = "BucketSIMD"
            simd = True
        elif "AbseilFlatHashTable" in name:
            new_name = "AbseilFlat_Ext"
        elif "MartinusRHFlatHashTable" in name:
            new_name = "MRHFlat_Ext"
        elif "F14ValueHashTable" in name:
            new_name = "F14V_Ext"
        elif "ChainedHashTable" in name:
            new_name = "Chain"
        
        if new_name in ["FingerPSIMD", "BucketSIMD"]:
            fingerprint_t = splitted[3]
            
            if "uint8_t" in fingerprint_t:
                new_name += "_8B"
            elif "uint16_t" in fingerprint_t:
                new_name += "_16B"
            else:
                new_name += f"_UnknownFingerprintT{fingerprint_t.strip()}"
                        
            if "LSBLSB" in name:
                new_name += "_LSBLSB"
                
            if "MSBLSB" in name:
                new_name += "_MSBLSB"
                
            if "LSBMSB" in name:
                new_name += "_LSBMSB"
            
            if "FPPB" in name:
                for part in splitted:
                    if "FPPB" in part:
                        new_name += f"_{part.replace('>', '').strip()}"
                        
            if "Bucket" in name:
                for part in splitted:
                        if "KeyValueAoSStoringBucket" in part:
                            new_name += "_KVBu"
                            
            if "UMINV" in name:
                new_name += "_VecIt"
        
        if "AutoPadded" in name:
            new_name += "_AP"
        elif "NaturalAligned" in name:
            new_name += "_NA"
        elif "Unaligned" in name:
            new_name += "_UN"
            
        if "Budget" in name:
            for split in splitted:
                if "Budget" in split and "Additional" not in split:
                    new_name += f"_{split.replace('Budget', 'Bud').strip()}"
                    break

        if (not hide_gcc) or row["Compiler"] != "gcc":
            new_name += f"_{row['Compiler']}"
        
        is_fallback = False
        
        if simd:
            if "NO_TESTZ" in name:
                new_name += "_NOTESTZ"
            elif "TESTZ" in name:
                new_name += "_TESTZ"
            elif "MANUAL_ON_MATCH" in name:
                new_name += "_MOM"
            else:
                new_name += "_UnknownSIMDALGO"
 
            for part in splitted:
                if "Fallback" in part and "NoFallback" not in part:
                    new_name += f"_{re.sub('[^0-9]','', part)}"
                    is_fallback = True

            if row["Arch"] == "Intel x86" or row["Arch"] == "AMD x86":
                if not is_fallback:
                    simd_str = "_UnknownSIMDSize"
                    for part in splitted:
                        if "__" in part and "mmask" not in part:
                            simd_str = f"_{re.sub('[^0-9]','', part)}"

                    new_name += simd_str
                
                if "NoAVX512" not in name:
                    assert("AVX512" in name)
                    new_name += "_AVX512"
            
            elif row["Arch"] == "Power":

                if not is_fallback:
                    new_name += "_128"
                
                if "UnchunkedSIMDSoAHashTable" in name:
                    if "UMINV" in name:
                        new_name += "_Garbage"
                
            elif row["Arch"] == "A64FX ARM":
                if "NoSVE;" not in name:
                    assert("SVE" in name)
                    assert("svuint" in name)
                    assert("svbool" in name)
                    if not is_fallback:
                        new_name += "_512_SVE"
                    else:
                        new_name += "_SVE"

                    if "NoSVEBroadcast" not in name:
                        assert("SVEBroadcast" in name)
                        new_name += "_SVBCast"

                else:
                    assert("svuint" not in name), f"Invalid name {name}"
                    assert("svbool" not in name)

                    if not is_fallback:
                        new_name += "_128_NEON"
                    else:
                        new_name += "_NEON"
                        
                    if "SSE2NEON" in name:
                        new_name += "_S2N"
                    elif "AARCH64" in name:
                        new_name += "_A64MM"
                    elif "UMINV" in name:
                        new_name += "_UMINV"
                    else:
                        new_name += "_UnknownARMALGO"
            else:
                new_name += f"_UnkownSIMDArch{row['Arch']}"
        
        if "NoLikelyHints" not in name:
            if "LikelyHints" in name:
                new_name += "_Hints"
        
        if "NoPref" not in name:
            for part in splitted:
                if "Pref" in part:
                    new_name += f"_{part.strip()}"
                    
                if "AddPrf" in part and "NoAddPrf" not in part:
                    new_name += f"_{part.strip().replace('>','')}"

        if "NoTHP" not in name:
            new_name += "_THP"
            
        if "NoUnroll" not in name:
            for part in splitted:
                if "Unroll" in part:
                    new_name += f"_{part.strip().replace('>','')}"
                    
                    
        if "ModHasher" in name:
            new_name += "_ModFinal"
        else:
            assert("BitHasher") in name, f"Unknown finalizer in {name}"
            
        if "MultShift128B" in name:
            new_name += "_MultShift128"
        elif "MultAddShift64" in name:
            new_name += "_MultAddShift64"
        elif "MultAddShift128" in name:
            new_name += "_MultAddShift128"
        elif "Murmur" in name:
            new_name += "_Murmur"
        elif "XX" in name:
            new_name += "_XX"
        else:
            assert "MultShift64B" in name, f"Unkown hash in {name}"
                    
        assert("Unknown" not in new_name or DISABLE_ASSERTS), f"Found unknown hashmap {new_name} (orig name = {name})"

        if row["Distribution"] == "Dense":
            new_name += "_Dense"
        else:
            assert row["Distribution"] == "Uniform", f"Unhandled Distribution {row['Distribution']}"
        
        if "stringkey" in name:
            new_name += "_StringKey"
            
        if "DummyTuple" in name and "ptr_DummyTuple" not in name and "DummyTuple128" not in name:
            new_name += "_DummyTuple"
            
        if "ptr_DummyTuple" in name and "ptr_DummyTuple128" not in name:
            new_name += "_PtrDummyTuple"
            
        if "DummyTuple128" in name and "ptr_DummyTuple128" not in name:
            new_name += "_128DummyTuple"
        
        if "ptr_DummyTuple128" in name:
            new_name += "_Ptr128DummyTuple"
            
        if "ptr_uint64_t" in name:
            new_name += "_PtrUint"
            
        if is_fallback:
            new_name += "_Fallback"
            
        if "Zipf" in row and row["Zipf"] == True:
            new_name += "_Zipf"
            
        #if row["ThreadCount"] > 1:
        #    new_name += f"_Thread{row['ThreadCount']}"
        
        return new_name

    if force_include is None:
        force_include = []
        
    if power_force_include is None:
        power_force_include = []
        
    full_read_data = read_data.copy()
    full_write_data = write_data.copy()

    def filter_data(data):
        data["PlotIdentifier"] = data.apply(lambda row : map_name(row.copy()), axis=1)
        data = data[data["Runtime"] != 0]
        
        if len(additional_include_filter) > 0:
            data = data[data["Hashmap"].str.contains(hashmap_filter) | data["Hashmap"].str.contains('|'.join(additional_include_filter)) | data["PlotIdentifier"].isin(force_include) | ((data["Arch"] == "Power") & data["PlotIdentifier"].isin(power_force_include))]
        else:
            data = data[data["Hashmap"].str.contains(hashmap_filter) | data["PlotIdentifier"].isin(force_include) | ((data["Arch"] == "Power") & data["PlotIdentifier"].isin(power_force_include))]
        
        data = data.loc[~data["PlotIdentifier"].str.contains("_Garbage") | data["PlotIdentifier"].isin(force_include)]

        if split_by_simd_size or plot_uniform_and_dense:
            data = data[data["Arch"] == ARCHITECTURES[0]]
        
        if not show_thp:
            data = data.loc[~data["PlotIdentifier"].str.contains("THP") | data["PlotIdentifier"].isin(force_include)]

        if not show_pref:
            data = data.loc[~data["PlotIdentifier"].str.contains("Pref") | data["PlotIdentifier"].isin(force_include)]

        if not show_unroll:
            data = data.loc[~data["PlotIdentifier"].str.contains("Unroll") | data["PlotIdentifier"].isin(force_include)]

        if not show_avx512:
            if (split_by_simd_size and "512" in simd_sizes) or show_intel_512:
                data = data.loc[~data["PlotIdentifier"].str.contains("AVX512") | data["PlotIdentifier"].isin(force_include) | data["PlotIdentifier"].str.contains("_512") ]
            else:
                data = data.loc[~data["PlotIdentifier"].str.contains("AVX512") | ~data["Arch"].str.contains("x86") | data["PlotIdentifier"].isin(force_include)]

        if hide_non_avx512:
            data = data.loc[data["PlotIdentifier"].str.contains("AVX512") |  ~data["Arch"].str.contains("Intel") | data["PlotIdentifier"].isin(force_include)]

        if hide_non_thp:
            data = data.loc[data["PlotIdentifier"].str.contains("THP") | data["PlotIdentifier"].isin(force_include)]

        if hide_svbcast:
            data = data.loc[~data["PlotIdentifier"].str.contains("_SVBCast") | ~data["Arch"].str.contains("ARM") | data["PlotIdentifier"].isin(force_include)]

        if hide_non_svbcast:    
            data = data.loc[data["PlotIdentifier"].str.contains("_SVBCast") | ~data["PlotIdentifier"].str.contains("_SVE") |  ~data["Arch"].str.contains("ARM") | data["PlotIdentifier"].isin(force_include)]

        if hide_non_s2n:
            data = data.loc[data["PlotIdentifier"].str.contains("_S2N") | data["PlotIdentifier"].str.contains("_SVE") | ~data["Arch"].str.contains("ARM") | data["PlotIdentifier"].isin(force_include)]

        if hide_external:
            data = data.loc[~data["PlotIdentifier"].str.contains("_Ext") | data["PlotIdentifier"].isin(force_include)]

        if hide_soa:
            data = data.loc[~data["PlotIdentifier"].str.contains("SoA") | data["PlotIdentifier"].isin(force_include)]

        if hide_clang:
            data = data.loc[~data["PlotIdentifier"].str.contains("clang") | data["PlotIdentifier"].isin(force_include)]

        if hide_msb_fingerprints:
            data = data.loc[~data["PlotIdentifier"].str.contains("_MSBLSB") | ~data["PlotIdentifier"].str.contains("_LSBMSB") | data["PlotIdentifier"].isin(force_include)]

        if hide_likely_hints:
            data = data.loc[~data["PlotIdentifier"].str.contains("_Hints") | data["PlotIdentifier"].isin(force_include)]

        if hide_vec_iterators:
            data = data.loc[~data["PlotIdentifier"].str.contains("_VecIt") | data["PlotIdentifier"].isin(force_include)]

        if hide_non_likely_hints:
            data = data.loc[data["PlotIdentifier"].str.contains("_Hints") | data["PlotIdentifier"].isin(force_include)]

        if hide_notestz:   
            data = data.loc[~data["PlotIdentifier"].str.contains("_NOTESTZ") | data["PlotIdentifier"].isin(force_include)]
    
        if hide_lsb_fingerprints:
            data = data.loc[~data["PlotIdentifier"].str.contains("_LSBLSB") | data["PlotIdentifier"].isin(force_include)]
      
        if hide_lsbmsb_fingerprints:
            data = data.loc[~data["PlotIdentifier"].str.contains("_LSBMSB") | data["PlotIdentifier"].isin(force_include)]

        if hide_msblsb_fingerprints:
            data = data.loc[~data["PlotIdentifier"].str.contains("_MSBLSB") | data["PlotIdentifier"].isin(force_include)]
            

        if hide_simd_larger_128:
            data = data.loc[data["PlotIdentifier"].str.contains("_128") | data["PlotIdentifier"].isin(force_include)]

        if hide_s2n:
            data = data.loc[~data["PlotIdentifier"].str.contains("_S2N") | data["PlotIdentifier"].isin(force_include)]

        if hide_non_512_on_intel:
            data = data.loc[data["PlotIdentifier"].str.contains("_512") | ~data["Arch"].str.contains("Intel") | data["PlotIdentifier"].isin(force_include)]

        if hide_non_256_on_amd:
            data = data.loc[data["PlotIdentifier"].str.contains("_256") | ~data["Arch"].str.contains("AMD") | data["PlotIdentifier"].isin(force_include)]

        if hide_sve:
            data = data.loc[data["PlotIdentifier"].str.contains("_128") | ~data["Arch"].str.contains("A64FX") | data["PlotIdentifier"].isin(force_include)]

        if hide_neon:
            data = data.loc[data["PlotIdentifier"].str.contains("_512") | ~data["Arch"].str.contains("A64FX") | data["PlotIdentifier"].isin(force_include)]

            
            
        if hide_16b_fingerprints:
            data = data.loc[~data["PlotIdentifier"].str.contains("_16B") | data["PlotIdentifier"].isin(force_include)]

        if hide_8b_fingerprints:
            data = data.loc[~data["PlotIdentifier"].str.contains("_8B") | data["PlotIdentifier"].isin(force_include)]

        if hide_avx512_smaller_512:
            data = data.loc[~data["PlotIdentifier"].str.contains("AVX512") | data["PlotIdentifier"].str.contains("_512") |  ~data["Arch"].str.contains("x86") | data["PlotIdentifier"].isin(force_include)]

        if hide_vfp_16b:
            data = data.loc[~data["PlotIdentifier"].str.contains("FingerPSIMD_16B") | data["PlotIdentifier"].isin(force_include)]

        hash_suffixes = ["_MultShift128", "_MultAddShift64", "_MultAddShift128", "_Murmur", "_XX"]
        if not show_nonmultshift64: 
            data = data.loc[~data["PlotIdentifier"].str.contains('|'.join(hash_suffixes)) | data["PlotIdentifier"].isin(force_include)]

        if hide_multshift64:
            data = data.loc[data["PlotIdentifier"].str.contains('|'.join(hash_suffixes)) | data["PlotIdentifier"].isin(force_include)]
        
        mas_suffixes = ["_MultAddShift64", "_MultAddShift128"]        
        
        if hide_multaddshift:
            data = data.loc[~data["PlotIdentifier"].str.contains('|'.join(mas_suffixes)) | data["PlotIdentifier"].isin(force_include)]

        if hide_msblsb_multshift:
            data = data.loc[~(data["PlotIdentifier"].str.contains("_MSBLSB") & ~data["PlotIdentifier"].str.contains('|'.join(hash_suffixes))) | data["PlotIdentifier"].isin(force_include)]
            
        non_ms_suffixes = ["_Murmur", "_XX"]            
            
        if multshift_only_lsbmsb:
            data = data.loc[data["PlotIdentifier"].str.contains('|'.join(non_ms_suffixes)) | data["PlotIdentifier"].str.contains("_LSBMSB") | data["PlotIdentifier"].isin(force_include)]

        if not show_modulo:
            data = data.loc[~data["PlotIdentifier"].str.contains("_Mod") | data["PlotIdentifier"].isin(force_include)]

        if hide_non_modulo:
            data = data.loc[data["PlotIdentifier"].str.contains("_Mod") | data["PlotIdentifier"].isin(force_include)]

        if not show_dense:
            data = data.loc[~(data["PlotIdentifier"].str.contains("_Dense")) | data["PlotIdentifier"].isin(force_include)]

        if hide_uniform:
            data = data.loc[data["PlotIdentifier"].str.contains("_Dense") | data["PlotIdentifier"].isin(force_include)]

        non_multshifthash_suffixes = ["_MultAddShift64", "_MultAddShift128", "_Murmur", "_XX"]

        if not (plot_speedup or analyze_threadcount):
            data = data.loc[data["ThreadCount"] == thread_count]
            
        if plot_speedup:
            data = data.loc[~data["STSpeedup"].isna()]
            if "SQR" in data.columns:
                data = data.loc[data["SQR"] == force_sqr]
                
        if force_write_lf is not None:
            if "SQR" not in data.columns:
                data = data.loc[data["LoadFactor"] == force_write_lf]      
                
        if analyze_threadcount and "SQR" in data.columns:
            data = data.loc[data["SQR"] == force_sqr]
            
        if hide_xx_hash:
            data = data.loc[~data["PlotIdentifier"].str.contains("_XX") | data["PlotIdentifier"].isin(force_include)]

        for prefix in exclude_prefixes:
            data = data.loc[~data["PlotIdentifier"].str.startswith(prefix) | data["PlotIdentifier"].isin(force_include)]

        onehundred28_suffixes = ["_MultAddShift128", "_MultShift128"]

        if hide_128b_hasher:
            data = data.loc[~data["PlotIdentifier"].str.contains('|'.join(onehundred28_suffixes)) | data["PlotIdentifier"].isin(force_include)]

        funny_hash_suffixes = ["_MultAddShift128", "_MultAddShift64", "_Murmur"]
 
        if hide_multaddshift_murmur:
            data = data.loc[~data["PlotIdentifier"].str.contains('|'.join(funny_hash_suffixes)) | data["PlotIdentifier"].isin(force_include)]

        if not show_stringkeys:
            data = data.loc[~data["PlotIdentifier"].str.contains("_StringKey") | data["PlotIdentifier"].isin(force_include)]
            
        if hide_dummytuple128:
            data = data.loc[~data["PlotIdentifier"].str.contains("_128DummyTuple") | data["PlotIdentifier"].isin(force_include)]
            data = data.loc[~data["PlotIdentifier"].str.contains("_Ptr128DummyTuple") | data["PlotIdentifier"].isin(force_include)]

        if not show_inline_dummytuple:
            data = data.loc[~data["PlotIdentifier"].str.contains("_DummyTuple") | data["PlotIdentifier"].isin(force_include)]

        if not show_pointer_dummytuple:
            data = data.loc[~data["PlotIdentifier"].str.contains("_PtrDummyTuple") | data["PlotIdentifier"].isin(force_include)]

        if not show_intpointer:
            data = data.loc[~data["PlotIdentifier"].str.contains("_PtrUint") | data["PlotIdentifier"].isin(force_include)]

        if only_show_dummy:
            data = data.loc[data["PlotIdentifier"].str.contains("DummyTuple") | data["PlotIdentifier"].isin(force_include)]

        if only_show_dummy_and_intptr:
            data = data.loc[data["PlotIdentifier"].str.contains("DummyTuple") | data["PlotIdentifier"].str.contains("_PtrUint") | data["PlotIdentifier"].isin(force_include)]

        if filter_load_factor_90:
            data = data.loc[data["LoadFactor"] < 90]
            
        if single_row_lf90:
            data = data.loc[data["LoadFactor"] == 90]
        
        if hide_zipf:
            data = data.loc[~data["PlotIdentifier"].str.contains("_Zipf") | data["PlotIdentifier"].isin(force_include)]

        if only_two_sqr:
            if "SQR" in data.columns:
                data = data.loc[(data["SQR"] == 0) | (data["SQR"] == 100)]
        
        if hide_non_kv_budget:
            data = data.loc[~data["PlotIdentifier"].str.contains("Chain") | ~data["PlotIdentifier"].str.contains("BudKV")  | data["PlotIdentifier"].isin(force_include)]

        if not show_fallback:
            data = data.loc[~data["PlotIdentifier"].str.contains("_Fallback") | data["PlotIdentifier"].isin(force_include)]
            
        if hide_256_on_intel:
            data = data.loc[~data["PlotIdentifier"].str.contains("_256") | ~data["Arch"].str.contains("Intel") | data["PlotIdentifier"].isin(force_include)]
        
        if hide_non_zipf:
            data = data.loc[data["PlotIdentifier"].str.contains("_Zipf") | data["PlotIdentifier"].isin(force_include)]
        
        if show_pref:
            add_pref_suffix = [f"_AddPrf{x}" for x in additional_prefetching]
            if len(add_pref_suffix) > 0:
                data = data.loc[data["PlotIdentifier"].str.contains('|'.join(add_pref_suffix)) | ~data["PlotIdentifier"].str.contains("_AddPrf") | data["PlotIdentifier"].isin(force_include)]
            else:
                data = data.loc[~data["PlotIdentifier"].str.contains("_AddPrf") | data["PlotIdentifier"].isin(force_include)]
                
        def filter_words(row):
            identifier = row["PlotIdentifier"]
            for word in word_filter:
                identifier = identifier.replace(word, "")

            return identifier
        
        if len(word_filter) > 0 and len(data) > 0:
            data["PlotIdentifier"] = data.apply(lambda row : filter_words(row.copy()), axis=1)
                        
        return data
    
    read_data = filter_data(read_data)
    write_data = filter_data(write_data)

    if not single_row_lf90:
        if not hide_insertions:
            if len(to_enumerate) == 1:
                axes[0].set_title(f"Insertions", size=title_fontsize)
            else:
                axes[0,0].set_title(f"Insertions", size=title_fontsize)

        for i, lf in enumerate(LOAD_FACTORS):
            if not hide_insertions:
                idx = i + 1
            else:
                idx = i

            if len(to_enumerate) == 1:
                axes[idx].set_title(f"Lookups@LF {lf}", size=title_fontsize)
            else:
                axes[0, idx].set_title(f"Lookups@LF {lf}", size=title_fontsize)
    else:
        for i, arch in enumerate(to_enumerate):
            axes[i].set_title(f"{arch.replace('A64FX ', '')}", size=title_fontsize)


    return_read_data2 = read_data.copy()
    return_write_data2 = write_data.copy()
    
    markers = ["o", "s", "P", "X", "D", "v", "h", ">", "<", "^", "d"]
    marker_dict = dict()
    for i,key in enumerate(list(palette_dict)):
        marker_dict[key] = markers[i]    
    
    filter_str = "PlotIdentifier"        

    for i, arch in enumerate(to_enumerate):
        hue_str = "PlotIdentifier"

        if plot_uniform_and_dense:
            def distri_name_fix(row):
                if "_Dense" in row[hue_str]:
                    return row[hue_str]
                else:
                    return f"{row[hue_str]}_Uniform"

            write_data["DistribPlotIdentifier"] = write_data.apply(lambda row : distri_name_fix(row.copy()), axis=1)
            read_data["DistribPlotIdentifier"] = read_data.apply(lambda row : distri_name_fix(row.copy()), axis=1)
            filter_str = "DistribPlotIdentifier"

        if split_by_simd_size:
            read_arch_data = read_data.loc[read_data[filter_str].str.contains(f"_{arch}") | read_data[filter_str].isin(force_include)]
            write_arch_data = write_data.loc[write_data[filter_str].str.contains(f"_{arch}") | write_data[filter_str].isin(force_include)]
        else:
            if plot_uniform_and_dense:
                read_arch_data = read_data.loc[read_data[filter_str].str.contains(f"_{arch}")]
                write_arch_data = write_data.loc[write_data[filter_str].str.contains(f"_{arch}")]
            else:
                read_arch_data = read_data[read_data["Arch"] == arch]
                write_arch_data = write_data[write_data["Arch"] == arch]   
                    
        if not hide_insertions:
            # Plot Insertions
            if len(to_enumerate) == 1:
                ax = axes[0]
            else:
                ax = axes[i,0]

            insert_data = write_arch_data.copy()

            if plot_uniform_and_dense:
                def distri_name_fix(row):
                    return row[hue_str].replace("_Dense", "")

                insert_data["DPlotIdentifier"] = insert_data.apply(lambda row : distri_name_fix(row.copy()), axis=1)
                hue_str = "DPlotIdentifier"

            if split_by_simd_size:
                def simd_name_fix(row, simd_size):
                    repl1 = row[hue_str].replace(f"_{simd_size}", "")

                    if hide_non_s2n:
                        repl1= repl1.replace("_S2N", "")

                    if hide_non_svbcast:
                        repl1 = repl1.replace("_SVBCast", "")

                    if simd_size == "512" and hide_non_avx512 == False:
                        return repl1.replace("_AVX512", "").replace("_SVE", "")
                    elif simd_size == "512":
                        return repl1.replace("_SVE", "")
                    elif simd_size == "128":
                        return repl1.replace("_NEON", "")
                    else:
                        return repl1

                insert_data["SIMDPlotIdentifier"] = insert_data.apply(lambda row : simd_name_fix(row.copy(), arch), axis=1)
                hue_str = "SIMDPlotIdentifier"


            if len(insert_data) > 0:
                
                if plot_speedup:
                    x_str = "ThreadCount"
                    y_str = "STSpeedup"
                elif analyze_threadcount:
                    x_str = "ThreadCount"
                    y_str = "MInserts/s"
                else:
                    x_str = "LoadFactor"
                    y_str = "MInserts/s"
                    
                p = sns.lineplot(ax=ax, data=insert_data, x=x_str, y=y_str,
                                 hue=hue_str,  hue_order=hue_order, style=hue_str, markers=marker_dict, markevery=markevery, dashes=False, 
                                 linewidth = DATA_LINEWIDTH, palette=palette_dict,
                                 markeredgewidth=MARKER_EDGEWIDTH, markersize=MARKER_SIZE, markeredgecolor=MARKER_EDGECOLOR)
                warnings.filterwarnings("ignore") # mathplotlib spams some kind of weird warning here for whatever reason
                p.set_yticklabels(p.get_yticks(), size = ylabel_fontsize)
                warnings.resetwarnings()

                xticks = [25,50,70,90]
                
                if filter_load_factor_90:
                    xticks = [25,50,70]
                    
                if (plot_speedup or analyze_threadcount):
                    xticks = [1, 2, 4, 8, 16, 32, 64]
                    
                if reduce_xticks and i < len(to_enumerate) - 1:
                    xticks = []
                    
                
                
                    
                p.set_xticks(xticks)

                p.set_xticklabels(p.get_xticks(), size = xlabel_fontsize, rotation=LABEL_ROTATION)
                if plot_speedup or analyze_threadcount and len(p.get_xticks()) > 0:
                    p.set_xticklabels([1, None, None, 8, 16, 32, 64], size = xlabel_fontsize, rotation=LABEL_ROTATION)

                if i == len(to_enumerate) - 1:
                    if plot_speedup or analyze_threadcount:
                        p.set_xlabel("Threads", fontsize=xlabel_fontsize, labelpad=xlabel_padding)
                    else:
                        p.set_xlabel("LF", fontsize=xlabel_fontsize, labelpad=xlabel_padding)

                else:
                    p.set_xlabel("", fontsize=xlabel_fontsize, labelpad=xlabel_padding)
                ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: ('%f' % x).rstrip('0').rstrip('.')))

                ax.get_legend().remove()

                ax.get_yaxis().set_minor_locator(matplotlib.ticker.AutoMinorLocator())
                
                if not reduce_minor:
                    ax.get_yaxis().set_minor_locator(matplotlib.ticker.AutoMinorLocator())
                    ax.grid(axis='y', which='minor', linewidth=MINOR_LINE_THICKNESS)
                else:
                    ax.get_yaxis().set_major_locator(matplotlib.ticker.MaxNLocator(min_n_ticks=3, nbins=4))
                    ax.minorticks_off()
                    
                ax.grid(axis='y', which='major', linewidth=MAJOR_LINE_THICKNESS)

                    
        # Plot Lookups
        for j, lf in enumerate(LOAD_FACTORS):
            if hide_insertions:
                col_idx = j
            else:
                col_idx = j + 1
                              
            if single_row_lf90:
                col_idx = i
            
            lf_data = read_arch_data[read_arch_data["LoadFactor"] == lf].copy()
                        
            if len(to_enumerate) == 1 or single_row_lf90:
                ax = axes[col_idx]
            else:
                ax = axes[i,col_idx]

            if len(lf_data) == 0:
                continue
                
            hue_str = "PlotIdentifier"
            
            if plot_uniform_and_dense:
                def distri_name_fix(row):
                    return row[hue_str].replace("_Dense", "")

                lf_data["DPlotIdentifier"] = lf_data.apply(lambda row : distri_name_fix(row.copy()), axis=1)
                hue_str = "DPlotIdentifier"

            
            if split_by_simd_size:                
                def simd_name_fix(row, simd_size):
                    repl1 = row[hue_str].replace(f"_{simd_size}", "")
                    
                    if hide_non_s2n:
                        repl1= repl1.replace("_S2N", "")
                        
                    if hide_non_svbcast:
                        repl1 = repl1.replace("_SVBCast", "")
                    
                    if simd_size == "512" and hide_non_avx512 == False:
                        return repl1.replace("_AVX512", "").replace("_SVE", "")
                    elif simd_size == "512":
                        return repl1.replace("_SVE", "")
                    elif simd_size == "128":
                        return repl1.replace("_NEON", "")
                    else:
                        return repl1
                
                lf_data["SIMDPlotIdentifier"] = lf_data.apply(lambda row : simd_name_fix(row.copy(), arch), axis=1)
                hue_str = "SIMDPlotIdentifier"
                            
            
            if plot_speedup:
                x_str = "ThreadCount"
                y_str = "STSpeedup"
            elif analyze_threadcount:
                x_str = "ThreadCount"
                y_str = "MLookups/s"
            else:
                x_str = "SQR"
                y_str = "MLookups/s"
                            
            p = sns.lineplot(ax=ax, data=lf_data, x=x_str, y=y_str, hue=hue_str, hue_order=hue_order, style=hue_str, markers=marker_dict, markevery=markevery, dashes=False, linewidth = DATA_LINEWIDTH, palette=palette_dict, markeredgewidth=MARKER_EDGEWIDTH, markersize=MARKER_SIZE, markeredgecolor=MARKER_EDGECOLOR)
            warnings.filterwarnings("ignore") # mathplotlib spams some kind of weird warning here for whatever reason

            if reduce_yticks and not single_row_lf90 and (not hide_insertions and (insert_data is not None or len(insert_data) > 0 or j > 0)):
                ax.tick_params(axis='y', which="both", length=0, width=0)

            if single_row_lf90 and i > 0:
                ax.tick_params(axis='y', which="both", length=0, width=0)
            
            p.set_yticklabels(p.get_yticks(), size = ylabel_fontsize)
            warnings.resetwarnings()
            
            if reduce_xticks:
                if i == len(to_enumerate) - 1 or single_row_lf90:
                    if not (plot_speedup or analyze_threadcount):
                        p.set_xticks([0, 50, 100])
                    else:
                        p.set_xticks([1, 2, 4, 8, 16, 32, 64])
                else:
                    p.set_xticks([])
            else:
                if not (plot_speedup or analyze_threadcount):
                    p.set_xticks([0, 50, 100])
                else:
                    p.set_xticks([1, 2, 4, 8, 16, 32, 64])
                
            p.set_xticklabels(p.get_xticks(), size = xlabel_fontsize, rotation=LABEL_ROTATION)
            
            if plot_speedup or analyze_threadcount and len(p.get_xticks()) > 0:
                p.set_xticklabels([1, None, None, 8, 16, 32, 64], size = xlabel_fontsize, rotation=LABEL_ROTATION)
            
            if i == len(to_enumerate) - 1 or single_row_lf90:
                if plot_speedup or analyze_threadcount:
                    p.set_xlabel("Threads", fontsize=xlabel_fontsize, labelpad=xlabel_padding)
                else:
                    p.set_xlabel("SQR", fontsize=xlabel_fontsize, labelpad=xlabel_padding)
            else:
                p.set_xlabel("", fontsize=xlabel_fontsize, labelpad=xlabel_padding)
    
            ax.set_ylim(bottom=0,top=arch_ylimits.get(arch, None))
            
            ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: ('%f' % x).rstrip('0').rstrip('.')))
            
            
            if not reduce_minor:
                ax.get_yaxis().set_minor_locator(matplotlib.ticker.AutoMinorLocator())
                ax.grid(axis='y', which='minor', linewidth=MINOR_LINE_THICKNESS)      
            else:
                n_bins = 3 if other_ylimit else 4
                min_n_ticks = 2 if other_ylimit else 3
                
                if power_axis and i == len(to_enumerate) - 1:
                    n_bins = 3
                    min_n_ticks = 2
                
                ax.get_yaxis().set_major_locator(matplotlib.ticker.MaxNLocator(min_n_ticks=min_n_ticks, nbins=n_bins))
                ax.minorticks_off()
                
            ax.grid(axis='y', which='major', linewidth=MAJOR_LINE_THICKNESS)

                
            ax.get_legend().remove()

    for i, arch in enumerate(to_enumerate):
        for j, lf in enumerate(LOAD_FACTORS):           
            if split_by_simd_size:
                if len(simd_sizes) == 1:
                    axes[j].set_ylabel(f"{ARCHITECTURES[0].replace('ARM','')} - {arch}\nM Ops/s"  if j == 0 else "", fontsize=ylabel_fontsize, labelpad=ylabel_padding)
                else:
                    axes[i, j].set_ylabel(f"{ARCHITECTURES[0].replace('ARM','')} - {arch}\nM Ops/s"  if j == 0 else "", fontsize=ylabel_fontsize, labelpad=ylabel_padding)
            else:
                if plot_uniform_and_dense:
                    axes[i, j].set_ylabel(f"{arch}\nM Ops/s" if j == 0 else "", fontsize=ylabel_fontsize, labelpad=ylabel_padding)
                else:
                    if single_row_lf90:
                        axes[i].set_ylabel("M Ops/s" if i == 0 else "", fontsize=ylabel_fontsize, labelpad=ylabel_padding)
                    else:
                        if len(ARCHITECTURES) == 1:
                            axes[j].set_ylabel(f"{arch.replace('A64FX ', '') }\nM Ops/s" if j == 0 else "", fontsize=ylabel_fontsize, labelpad=ylabel_padding)
                        else:
                            axes[i, j].set_ylabel(f"{arch.replace('A64FX ', '') }\nM Ops/s"  if j == 0 else "", fontsize=ylabel_fontsize, labelpad=ylabel_padding)
    
    handles, labels = None, None
    
    all_labels = []
    
    if collect_all_labels:
        handles = []
        labels = []
        for ax in axes.flat:
            _handles, _labels = ax.get_legend_handles_labels()
            all_labels += _labels
            
            for idx,label in enumerate(_labels):
                if label not in labels:
                    labels.append(label)
                    handles.append(_handles[idx])
                
        labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
        
    else:
        for ax in axes.flat:
            _handles, _labels = ax.get_legend_handles_labels()
            all_labels += _labels
            if labels is None or len(labels) < len(_labels):
                handles = _handles
                labels = _labels

    if len(LOAD_FACTORS)  > 2:
        bbox = (0,-0.01 + legend_height_shift,1,1)
    else:
        if ncol_legend > 3:
            bbox = (0,-0.03 + legend_height_shift,1,1)
        else:
            bbox = (0,-0.08 + legend_height_shift,1,1)
        
    transformed_labels = ()
    for label in labels:
        if label in legend_label_dict:
            transformed_labels = transformed_labels + (legend_label_dict[label],)
        else:
            label = label.replace("_", "-") # ensure latex can handle this
            transformed_labels = transformed_labels + (label,)
       
    if label_order is not None:
        handles = [handles[idx] for idx in label_order]
        transformed_labels = [transformed_labels[idx] for idx in label_order]
    
    
    legend = fig.legend(handles, transformed_labels, frameon=not disable_legend_border, loc='lower center', handlelength=LEGEND_HANDLELENGTH + extra_legend_handlelength, columnspacing=LEGEND_COLUMNSPACING + extra_legend_columnspacing, handletextpad=LEGEND_HANDLETEXTPAD, ncol = ncol_legend, bbox_to_anchor = bbox, bbox_transform = fig.transFigure, fontsize=legend_fontsize, title_fontsize=legend_fontsize, markerscale=0.5)
    
    # set the linewidth of each legend object
    for legobj in legend.legendHandles:
        legobj.set_linewidth(DATA_LINEWIDTH)
        legobj.set_markersize(MARKER_SIZE)
        legobj.set_markeredgewidth(MARKER_EDGEWIDTH)
        legobj.set_markeredgecolor(MARKER_EDGECOLOR)
        
    if file_path is not None:
        if "pdf" in file_path:
            plt.savefig(file_path, bbox_inches='tight', backend='pgf')
        else:
            plt.savefig(file_path, bbox_inches='tight')

  
    plt.show()
    
    return return_read_data2, return_write_data2
    

## Analysis of Linear Probing

In [None]:
pal_dict = {'LP_AP': deep_palette[0], 'LP_NA': deep_palette[1], 'LP_UN_THP': deep_palette[2], 'LP_UN': deep_palette[3]}
read_d, write_d = analysis_plot("LinearProbing", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                  show_thp=True, show_pref=False, show_unroll=False, hide_soa = True, hide_clang = True,
                  file_path="full_lp_memlayout_thp.pdf", 
                  legend_label_dict={"LP_AP": "AutoPadded", "LP_NA": "NaturallyAligned", "LP_UN": "Unaligned", "LP_UN_THP": "Unaligned+THP"})

In [None]:
_ = analysis_plot("LinearProbing", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25,70], 240, ncol_legend=2, legend_height_shift=-0.06,
                  show_thp=True, show_pref=False, show_unroll=False,
                  file_path="lp_memlayout_thp.pdf",
                  legend_label_dict={"LP_AP": "AutoPadded", "LP_NA": "NaturallyAligned", "LP_UN": "Unaligned", "LP_UN_THP": "Unaligned+THP"})

In [None]:
# Numbers
df1 = read_d.copy()
df2 = write_d.copy()
df3 = read_d.copy()
df4 = write_d.copy()

def get_ap_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_AP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_AP")].iloc[0]["MLookups/s"]

def get_ap_inserts(row):
    assert len(df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) & (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "LP_AP")]) == 1
    return df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) &  (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "LP_AP")].iloc[0]["MInserts/s"]

def get_nonthp_un_lookups(row):
    assert len(df3[(df3["Arch"] == row["Arch"]) & (df3["Compiler"] == row["Compiler"]) & (df3["PageSize"] == row["PageSize"]) &  (df3["SQR"] == row["SQR"]) & (df3["HugePageSize"] == row["HugePageSize"]) & (df3["Size"] == row["Size"]) & (df3["Distribution"] == row["Distribution"]) & (df3["LoadFactor"] == row["LoadFactor"]) & (df3["PlotIdentifier"] == "LP_UN")]) == 1
    return df3[(df3["Arch"] == row["Arch"]) & (df3["Compiler"] == row["Compiler"]) & (df3["PageSize"] == row["PageSize"]) &  (df3["SQR"] == row["SQR"]) & (df3["HugePageSize"] == row["HugePageSize"]) &  (df3["Size"] == row["Size"]) & (df3["Distribution"] == row["Distribution"]) & (df3["LoadFactor"] == row["LoadFactor"]) & (df3["PlotIdentifier"] == "LP_UN")].iloc[0]["MLookups/s"]

def get_nonthp_un_inserts(row):
    assert len(df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) & (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "LP_UN")]) == 1
    return df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) &  (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "LP_UN")].iloc[0]["MInserts/s"]

df1["APPerf"] = df1.apply(lambda row: get_ap_lookups(row), axis=1)
df1["Speedup"] = df1["MLookups/s"] / df1["APPerf"]
df1["InvSpeedup"] = df1["APPerf"] / df1["MLookups/s"]

df2["APPerf"] = df2.apply(lambda row: get_ap_inserts(row), axis=1)
df2["Speedup"] = df2["MInserts/s"] / df2["APPerf"]
df2["InvSpeedup"] = df2["APPerf"] / df2["MInserts/s"]

df3["NoTHPPerf"] = df3.apply(lambda row: get_nonthp_un_lookups(row), axis=1)
df3["Speedup"] = df3["MLookups/s"] / df3["NoTHPPerf"]
df3["InvSpeedup"] = df3["NoTHPPerf"] / df3["MLookups/s"]

df4["NoTHPPerf"] = df4.apply(lambda row: get_nonthp_un_inserts(row), axis=1)
df4["Speedup"] = df4["MInserts/s"] / df4["NoTHPPerf"]
df4["InvSpeedup"] = df4["NoTHPPerf"] / df4["MInserts/s"]

avg_unaligned_speedup = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "LP_UN") & (df1["LoadFactor"] == 25)]["Speedup"].mean()
max_unaligned_speedup = df1[(df1["Arch"] == "AMD x86") &(df1["PlotIdentifier"] == "LP_UN") & (df1["LoadFactor"] == 25)]["Speedup"].max()
min_unaligned_speedup = df1[(df1["Arch"] == "AMD x86") &(df1["PlotIdentifier"] == "LP_UN") & (df1["LoadFactor"] == 25)]["Speedup"].min()
lf70_sqr50_invspeedup = df1[(df1["Arch"] == "AMD x86") &(df1["PlotIdentifier"] == "LP_UN") & (df1["LoadFactor"] == 70) & (df1["SQR"] == 50)].iloc[0]["InvSpeedup"]
lf70_sqr100_speedup = df1[(df1["Arch"] == "AMD x86") &(df1["PlotIdentifier"] == "LP_UN") & (df1["LoadFactor"] == 70) & (df1["SQR"] == 100)].iloc[0]["Speedup"]
lf90_sqr100_speedup = df1[(df1["Arch"] == "AMD x86") &(df1["PlotIdentifier"] == "LP_UN") & (df1["LoadFactor"] == 90) & (df1["SQR"] == 100)].iloc[0]["Speedup"]
lf70_avg_speedup_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "LP_UN") & (df1["LoadFactor"] == 70)].iloc[0]["Speedup"]
avg_unaligned_speedup_arm_lf25 = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "LP_UN") & (df1["LoadFactor"] == 25)]["Speedup"].mean()
avg_unaligned_speedup_arm_lf50 = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "LP_UN") & (df1["LoadFactor"] == 50)]["Speedup"].mean()
avg_unaligned_speedup_arm_lf70 = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "LP_UN") & (df1["LoadFactor"] == 70)]["Speedup"].mean()
avg_unaligned_speedup_arm_lf90 = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "LP_UN") & (df1["LoadFactor"] == 90)]["Speedup"].mean()
max_unaligned_speedup = df1[(df1["PlotIdentifier"] == "LP_UN")]["Speedup"].max()
max_unaligned_write_speedup = df2[(df2["PlotIdentifier"] == "LP_UN")]["Speedup"].max()
mean_unaligned_write_speedup_a64fx_lf70 = df2[(df2["PlotIdentifier"] == "LP_UN") & (df2["LoadFactor"] == 70) & (df2["Arch"] == "A64FX ARM")]["Speedup"].mean()
intel_lf70_thp_speedup = df4[(df4["Arch"] == "Intel x86") & (df4["PlotIdentifier"] == "LP_UN_THP") & (df4["LoadFactor"] == 70)]["Speedup"].mean()
amd_lf70_thp_speedup = df4[(df4["Arch"] == "AMD x86") & (df4["PlotIdentifier"] == "LP_UN_THP") & (df4["LoadFactor"] == 70)]["Speedup"].mean()
a64fx_lf70_thp_speedup = df4[(df4["Arch"] == "A64FX ARM") & (df4["PlotIdentifier"] == "LP_UN_THP") & (df4["LoadFactor"] == 70)]["Speedup"].mean()
power_lf70_thp_speedup = df4[(df4["Arch"] == "Power") & (df4["PlotIdentifier"] == "LP_UN_THP") & (df4["LoadFactor"] == 70)]["Speedup"].mean()
thp_speedup_arm_lf90_sqr100 = df3[(df3["Arch"] == "A64FX ARM") & (df3["PlotIdentifier"] == "LP_UN_THP") & (df3["LoadFactor"] == 90) & (df3["SQR"] == 100)]["Speedup"].mean()

avg_unaligned_speedup_amd_lf70 = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "LP_UN") & (df1["LoadFactor"] == 70)]["Speedup"].mean()
mean_unaligned_write_speedup_amd_lf70 = df2[(df2["PlotIdentifier"] == "LP_UN") & (df2["LoadFactor"] == 70) & (df2["Arch"] == "AMD x86")]["Speedup"].mean()

print(f"(1) average unaligned speedup on AMD of unaligned over autopadded @ LF25: {avg_unaligned_speedup}\n"
      f"(2) max unaligned speedup on AMD of unaligned over autopadded @ LF25: {max_unaligned_speedup}\n"
      f"(3) min unaligned speedup on AMD of unaligned over autopadded @ LF25: {min_unaligned_speedup}\n"
      f"(4) unaligned speedup on AMD of autopadded over unaligned @ LF70, SQR50: {lf70_sqr50_invspeedup}\n"
      f"(5) unaligned speedup on AMD of unaligned over autopadded @ LF70, SQR100: {lf70_sqr100_speedup}\n"
      f"(6) unaligned speedup on AMD of unaligned over autopadded @ LF90, SQR100: {lf90_sqr100_speedup}\n"
      f"(7) unaligned speedup on A64FX of unaligned over autopadded @ LF25: {avg_unaligned_speedup_arm_lf25}\n"
      f"(8) unaligned speedup on A64FX of unaligned over autopadded @ LF50: {avg_unaligned_speedup_arm_lf50}\n"
      f"(9) unaligned speedup on A64FX of unaligned over autopadded @ LF70: {avg_unaligned_speedup_arm_lf70}\n"
      f"(10) unaligned speedup on A64FX of unaligned over autopadded @ LF90: {avg_unaligned_speedup_arm_lf90}\n"
      f"(11) max speedup of unaligned over autopadded: {max_unaligned_speedup}\n"
      f"(12) max speedup of unaligned over autopadded (writes): {max_unaligned_write_speedup}\n"
      f"(13) A64FX LF 70 unaligned speedup (writes): {mean_unaligned_write_speedup_a64fx_lf70}\n"
      f"(14) A64FX LF 70 avg speedup reads: {lf70_avg_speedup_a64fx}\n"
      f"(15) Intel x86 LF70 THP Write Speedup: {intel_lf70_thp_speedup}\n"
      f"(16) AMD x86 LF70 THP Write Speedup: {amd_lf70_thp_speedup}\n"
      f"(17) A64FX LF70 THP Write Speedup: {a64fx_lf70_thp_speedup}\n"
      f"(18) Power LF70 THP Write Speedup: {power_lf70_thp_speedup}\n"
      f"(19) A64FX LF90 SQR100 THP Speedup: {thp_speedup_arm_lf90_sqr100}\n"
      f"(20) AMD LF 70 Unaligned vs AP (Reads) Speedup: {avg_unaligned_speedup_amd_lf70}\n"
      f"(21) AMD LF 70 Unaligned vs AP (Writes) Speedup: {mean_unaligned_write_speedup_amd_lf70}\n"

     )

result_dict["AmdLf25SpeedupUnVsAp"] = round(avg_unaligned_speedup, 2)
result_dict["AmdLf70Sqr50SpeedupApVsUn"] = round(lf70_sqr50_invspeedup, 2)
result_dict["AmdLf70Sqr100SpeedupUnVsAp"] = round(lf70_sqr100_speedup, 2)
result_dict["A64FXLf25SpeedupUnVsAp"] = round(avg_unaligned_speedup_arm_lf25, 2)
result_dict["A64FXLf50SpeedupUnVsAp"] = round(avg_unaligned_speedup_arm_lf50, 2)
result_dict["A64FXLf70SpeedupUnVsAp"] = round(avg_unaligned_speedup_arm_lf70, 2)
result_dict["A64FXLf90SpeedupUnVsAp"] = round(avg_unaligned_speedup_arm_lf90, 2)
result_dict["A64FXLf70UnalignedWriteSpeedup"] = round(mean_unaligned_write_speedup_a64fx_lf70, 2)
result_dict["MaxSpeedupUnVsAp"] = round(max_unaligned_speedup, 2)
result_dict["MaxWriteSpeedupUnVsAp"] = round(max_unaligned_write_speedup, 2)
result_dict["A64FXLf90Sqr100SpeedupTHP"] = round(thp_speedup_arm_lf90_sqr100, 2)
result_dict["IntLf70THPWriteSpeedup"] = round(intel_lf70_thp_speedup, 2)
result_dict["AmdLf70THPWriteSpeedup"] = round(amd_lf70_thp_speedup, 2)
result_dict["A64FXLf70THPWriteSpeedup"] = round(a64fx_lf70_thp_speedup, 2)
result_dict["PwrLf70THPWriteSpeedup"] = round(power_lf70_thp_speedup, 2)



id_ = 20

overall_min_speedup = None
overall_max_speedup = None
overall_max_max_speedup = None

for arch in ["Intel x86", "AMD x86", "A64FX ARM", "Power"]:
    
    min_speedup = None
    max_speedup = None
    
    for lf in [25, 50, 70, 90]:
        avg_unaligned_speedup = df3[(df3["Arch"] == arch) & (df3["PlotIdentifier"] == "LP_UN_THP") & (df3["LoadFactor"] == lf)]["Speedup"].mean()
        min_unaligned_speedup = df3[(df3["Arch"] == arch) & (df3["PlotIdentifier"] == "LP_UN_THP") & (df3["LoadFactor"] == lf)]["Speedup"].min()
        max_unaligned_speedup = df3[(df3["Arch"] == arch) & (df3["PlotIdentifier"] == "LP_UN_THP") & (df3["LoadFactor"] == lf)]["Speedup"].max()
        print(f"({id_}) avg speedup thp over non-thp on {arch} @ LF {lf}: {avg_unaligned_speedup}\n"
              f"({id_ + 1}) min speedup thp over non-thp on {arch} @ LF {lf}: {min_unaligned_speedup}\n"
              f"({id_ + 2}) max speedup thp over non-thp on {arch} @ LF {lf}: {max_unaligned_speedup}"
             )
        
        result_dict[f"{cpu_to_prefix(arch)}Lf{lf}AvgTHPSpeedup"] = round(avg_unaligned_speedup, 2)
        id_ = id_ + 3
        
        if min_speedup is None:
            min_speedup = avg_unaligned_speedup
            max_speedup = avg_unaligned_speedup
        
        if overall_min_speedup is None:
            overall_min_speedup = avg_unaligned_speedup
            overall_max_speedup = avg_unaligned_speedup
            
        if overall_max_max_speedup is None:
            overall_max_max_speedup = max_unaligned_speedup
            
        min_speedup = min(min_speedup, avg_unaligned_speedup)
        max_speedup = max(max_speedup, avg_unaligned_speedup)
        overall_min_speedup = min(overall_min_speedup, avg_unaligned_speedup)
        overall_max_speedup = max(overall_max_speedup, avg_unaligned_speedup)
        overall_max_max_speedup = max(overall_max_max_speedup, max_unaligned_speedup)
        
    result_dict[f"{cpu_to_prefix(arch)}MinTHPSpeedup"] = round(min_speedup, 2)
    result_dict[f"{cpu_to_prefix(arch)}MaxTHPSpeedup"] = round(max_speedup, 2)
    
result_dict["OverallMinTHPSpeedup"] = round(overall_min_speedup, 2)
result_dict["OverallMaxTHPSpeedup"] = round(overall_max_speedup, 2)
result_dict["OverallMaxMaxTHPSpeedup"] = round(overall_max_max_speedup, 2)

In [None]:
pal_dict = {'LPSoA_THP': deep_palette[0], 'LPPackedSoA_THP': deep_palette[1], 'LP_UN_THP': deep_palette[2]}
read_d, write_d = analysis_plot("LinearProbing", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000, hide_soa=False, show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True, exclude_prefixes=["LP_NA", "LP_AP"],  file_path="full_lp_aos_vs_soa.pdf", legend_label_dict={"LP_UN_THP": "AoS(Unaligned)+THP", "LPPackedSoA_THP": "SoA(Packed)+THP", "LPSoA_THP": "SoA+THP"})

In [None]:
_ = analysis_plot("LinearProbing", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [90], 375, 
                  hide_soa=False, show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True, 
                  exclude_prefixes=["LP_NA", "LP_AP"],  file_path="lp_aos_vs_soa.pdf",
                  legend_label_dict={"LP_UN_THP": "AoS(Unaligned)", "LPPackedSoA_THP": "SoA(Packed)", "LPSoA_THP": "SoA"})

In [None]:
# Numbers
df1 = read_d.copy()
#df2 = write_d.copy()

def get_aos_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")].iloc[0]["MLookups/s"]

#def get_ap_inserts(row):
#    assert len(df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) & (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "LP_AP")]) == 1
#    return df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) &  (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "LP_AP")].iloc[0]["MInserts/s"]


df1["APPerf"] = df1.apply(lambda row: get_aos_lookups(row), axis=1)
df1["Speedup"] = df1["MLookups/s"] / df1["APPerf"]
df1["InvSpeedup"] = df1["APPerf"] / df1["MLookups/s"]

#df2["APPerf"] = df2.apply(lambda row: get_ap_inserts(row), axis=1)
#df2["Speedup"] = df2["MInserts/s"] / df2["APPerf"]
#df2["InvSpeedup"] = df2["APPerf"] / df2["MInserts/s"]

avg_packedsoa_speedup_lf25_sqr0 = df1[(df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 25) & (df1["SQR"] == 0)]["Speedup"].mean()
avg_packedsoa_speedup_lf25_sqr100 = df1[(df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 25) & (df1["SQR"] == 100)]["Speedup"].mean()
avg_packedsoa_invspeedup_lf25_sqr100 = df1[(df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 25) & (df1["SQR"] == 100)]["InvSpeedup"].mean()
avg_packedsoa_speedup_lf90_sqr0 = df1[(df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 90) & (df1["SQR"] == 0)]["Speedup"].mean()
avg_packedsoa_speedup_lf90_sqr100 = df1[(df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 90) & (df1["SQR"] == 100)]["Speedup"].mean()
avg_packedsoa_speedup_lf90_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 90)]["Speedup"].mean()
avg_packedsoa_speedup_lf90_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 90)]["Speedup"].mean()
avg_packedsoa_speedup_lf90_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 90)]["Speedup"].mean()
avg_packedsoa_speedup_lf90_power = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 90)]["Speedup"].mean()
min_packedsoa_speedup_lf25_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 25)]["Speedup"].min()
avg_packedsoa_invspeedup_lf25_sqr50_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 25) & (df1["SQR"] == 50)]["InvSpeedup"].mean()
avg_packedsoa_invspeedup_lf25_sqr50_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 25) & (df1["SQR"] == 50)]["InvSpeedup"].mean()

avg_packedsoa_speedup_lf90_allsys = df1[(df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 90)]["Speedup"].mean()

result_dict["AllSysLf90PSoAVSAoSSpeedup"] = round(avg_packedsoa_speedup_lf90_allsys, 2)

result_dict["Lf25Sqr0PSoAVSAoSSpeedup"] = round(avg_packedsoa_speedup_lf25_sqr0, 2)
result_dict["Lf25Sqr100AoSvPSoASpeedup"] = round(avg_packedsoa_invspeedup_lf25_sqr100, 2)
result_dict["IntLf90PSoAVSAoSSpeedup"] = round(avg_packedsoa_speedup_lf90_intel, 2)
result_dict["AmdLf90PSoAVSAoSSpeedup"] = round(avg_packedsoa_speedup_lf90_amd, 2)
result_dict["A64FXLf90PSoAVSAoSSpeedup"] = round(avg_packedsoa_speedup_lf90_a64fx, 2)
result_dict["PwrLf90PSoAVSAoSSpeedup"] = round(avg_packedsoa_speedup_lf90_power, 2)
result_dict["IntLf25Sqr50AoSVSPSoASpeedup"] = round(avg_packedsoa_invspeedup_lf25_sqr50_intel, 2)
result_dict["A64FXLf25Sqr50AoSVSPSoASpeedup"] = round(avg_packedsoa_invspeedup_lf25_sqr50_a64fx, 2)
result_dict["A64FXPackedSoAMaxPerfDrop"] = round(min_packedsoa_speedup_lf25_a64fx * 100, 1)

print(f"(1) average speedup of PackedSoA over AoS @ LF25, SQR0: {avg_packedsoa_speedup_lf25_sqr0}\n"
      f"(2) average speedup of PackedSoA over AoS @ LF25, SQR100: {avg_packedsoa_speedup_lf25_sqr100}\n"
      f"(2.5) average inv speedup of PackedSoA over AoS @ LF25, SQR100: {avg_packedsoa_invspeedup_lf25_sqr100}\n"
      f"(3) average speedup of PackedSoA over AoS @ LF90, SQR0: {avg_packedsoa_speedup_lf90_sqr0}\n"
      f"(4) average speedup of PackedSoA over AoS @ LF90, SQR100: {avg_packedsoa_speedup_lf90_sqr100}\n"
      f"(5) average speedup of PackedSoA over AoS @ LF90, Intel: {avg_packedsoa_speedup_lf90_intel}\n"
      f"(6) average speedup of PackedSoA over AoS @ LF90, AMD: {avg_packedsoa_speedup_lf90_amd}\n"
      f"(7) average speedup of PackedSoA over AoS @ LF90, A64FX: {avg_packedsoa_speedup_lf90_a64fx}\n"
      f"(8) average speedup of PackedSoA over AoS @ LF90, Power: {avg_packedsoa_speedup_lf90_power}\n"
      f"(ABC) average speedup of PackedSoA over AoS @ LF90, all sys: {avg_packedsoa_speedup_lf90_allsys}\n"
      f"(9) min speedup of PackedSoA over AoS @ LF25, A64FX: {min_packedsoa_speedup_lf25_a64fx}\n"
      f"(9) average speedup of AoS over PackedSoA @ LF25/SQR50, Intel: {avg_packedsoa_invspeedup_lf25_sqr50_intel}\n"
      f"(9) average speedup of AoS over PackedSoA @ LF25/SQR50, A64FX: {avg_packedsoa_invspeedup_lf25_sqr50_a64fx}\n"
      
     )


In [None]:
pal_dict = {'LP_UN_Pref0_THP': deep_palette[0], 'LP_UN_Pref1_THP': deep_palette[1], 'LP_UN_THP': deep_palette[2], 'LP_UN_Pref2_THP': deep_palette[3], 'LP_UN_Pref3_THP': deep_palette[4] }
_ = analysis_plot("LinearProbing", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], 
                  [25, 50, 70, 90], 1000, show_thp=True, hide_non_thp=True, show_pref=True, show_unroll=False,
                  exclude_prefixes=["LP_NA", "LP_AP"],  file_path="full_lp_prefetching.pdf",
                  legend_label_dict={"LP_UN_THP": "AoS(Unaligned)", "LP_UN_Pref0_THP": "Pref0", "LP_UN_Pref1_THP": "Pref1","LP_UN_Pref2_THP": "Pref2","LP_UN_Pref3_THP": "Pref3"})

In [None]:
pal_dict = {'LP_UN_Pref0_THP': deep_palette[0], 'LP_UN_Pref0_AddPrf1_THP': deep_palette[1], 'LP_UN_THP': deep_palette[2], 'LP_UN_Pref0_AddPrf5_THP': deep_palette[3], 'LP_UN_Pref0_AddPrf7_THP': deep_palette[4] }
_ = analysis_plot("LinearProbing", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], 
                  [25, 50, 70, 90], 1000, show_thp=True, hide_non_thp=True, show_pref=True, show_unroll=False,
                  additional_prefetching = [1,5,7],
                  exclude_prefixes=["LP_NA", "LP_AP", "LP_UN_Pref1", "LP_UN_Pref2", "LP_UN_Pref3"],  file_path="full_lp_prefetching_lengths_pref0.pdf",
                  legend_label_dict={"LP_UN_THP": "AoS(Unaligned)", "LP_UN_Pref0_THP": "Pref0 (3)", "LP_UN_Pref0_AddPrf1_THP": "Pref0 (4)","LP_UN_Pref0_AddPrf5_THP": "Pref0 (8)","LP_UN_Pref0_AddPrf7_THP": "Pref0 (10)"})

In [None]:
pal_dict = {'LP_UN_Pref1_THP': deep_palette[0], 'LP_UN_Pref1_AddPrf1_THP': deep_palette[1], 'LP_UN_THP': deep_palette[2], 'LP_UN_Pref1_AddPrf5_THP': deep_palette[3], 'LP_UN_Pref1_AddPrf7_THP': deep_palette[4] }
_ = analysis_plot("LinearProbing", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], 
                  [25, 50, 70, 90], 1000, show_thp=True, hide_non_thp=True, show_pref=True, show_unroll=False,
                  additional_prefetching = [1,5,7],
                  exclude_prefixes=["LP_NA", "LP_AP", "LP_UN_Pref0", "LP_UN_Pref2", "LP_UN_Pref3"],  file_path="full_lp_prefetching_lengths_pref1.pdf",
                  legend_label_dict={"LP_UN_THP": "AoS(Unaligned)", "LP_UN_Pref1_THP": "Pref1 (3)", "LP_UN_Pref1_AddPrf1_THP": "Pref1 (4)","LP_UN_Pref1_AddPrf5_THP": "Pref1 (8)","LP_UN_Pref1_AddPrf7_THP": "Pref1 (10)"})

In [None]:
pal_dict = {'LP_UN_Pref2_THP': deep_palette[0], 'LP_UN_Pref2_AddPrf1_THP': deep_palette[1], 'LP_UN_THP': deep_palette[2], 'LP_UN_Pref2_AddPrf5_THP': deep_palette[3], 'LP_UN_Pref2_AddPrf7_THP': deep_palette[4] }
_ = analysis_plot("LinearProbing", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], 
                  [25, 50, 70, 90], 1000, show_thp=True, hide_non_thp=True, show_pref=True, show_unroll=False,
                  additional_prefetching = [1,5,7],
                  exclude_prefixes=["LP_NA", "LP_AP", "LP_UN_Pref0", "LP_UN_Pref1", "LP_UN_Pref3"],  file_path="full_lp_prefetching_lengths_pref2.pdf",
                  legend_label_dict={"LP_UN_THP": "AoS(Unaligned)", "LP_UN_Pref2_THP": "Pref2 (3)", "LP_UN_Pref2_AddPrf1_THP": "Pref2 (4)","LP_UN_Pref2_AddPrf5_THP": "Pref2 (8)","LP_UN_Pref2_AddPrf7_THP": "Pref2 (10)"})

In [None]:
pal_dict = {'LP_UN_Pref3_THP': deep_palette[0], 'LP_UN_Pref3_AddPrf1_THP': deep_palette[1], 'LP_UN_THP': deep_palette[2], 'LP_UN_Pref3_AddPrf5_THP': deep_palette[3], 'LP_UN_Pref3_AddPrf7_THP': deep_palette[4] }
_ = analysis_plot("LinearProbing", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], 
                  [25, 50, 70, 90], 1000, show_thp=True, hide_non_thp=True, show_pref=True, show_unroll=False,
                  additional_prefetching = [1,5,7],
                  exclude_prefixes=["LP_NA", "LP_AP", "LP_UN_Pref0", "LP_UN_Pref1", "LP_UN_Pref2"],  file_path="full_lp_prefetching_lengths_pref3.pdf",
                  legend_label_dict={"LP_UN_THP": "AoS(Unaligned)", "LP_UN_Pref3_THP": "Pref3 (3)", "LP_UN_Pref3_AddPrf1_THP": "Pref3 (4)","LP_UN_Pref3_AddPrf5_THP": "Pref3 (8)","LP_UN_Pref3_AddPrf7_THP": "Pref3 (10)"})

In [None]:
pal_dict = {'LP_UN_Pref0_THP': deep_palette[0], 'LP_UN_Pref1_THP': deep_palette[1], 'LP_UN_THP': deep_palette[2], 'LP_UN_Pref2_THP': deep_palette[3], 'LP_UN_Pref3_THP': deep_palette[4] }
_ = analysis_plot("LinearProbing", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [50, 90], 375, show_thp=True, hide_non_thp=True, show_pref=True, show_unroll=False,
                  exclude_prefixes=["LP_NA", "LP_AP"],  file_path="lp_prefetching.pdf",
                  legend_label_dict={"LP_UN_THP": "AoS(Unaligned)", "LP_UN_Pref0_THP": "Pref0", "LP_UN_Pref1_THP": "Pref1","LP_UN_Pref2_THP": "Pref2","LP_UN_Pref3_THP": "Pref3"})

In [None]:
pal_dict = {'LP_UN_THP_Unroll4': deep_palette[0], 'LP_UN_THP_Unroll8': deep_palette[1], 'LP_UN_THP': deep_palette[2]}
read_d, write_d  = analysis_plot("LinearProbing", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], 
                                 [25, 50, 70, 90], 1000, show_thp=True, hide_non_thp=True,
                                 show_pref=False, show_unroll=True, exclude_prefixes=["LP_NA", "LP_AP"], file_path="full_lp_unrolling.pdf", 
                                 legend_label_dict={"LP_UN_THP": "AoS(Unaligned)", "LP_UN_THP_Unroll4": "Unrolled (4)", "LP_UN_THP_Unroll8": "Unrolled (8)"})

In [None]:
_ = analysis_plot("LinearProbing", pal_dict,["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25,70], 375, 
                 show_thp=True, hide_non_thp=True,
                 show_pref=False, show_unroll=True, exclude_prefixes=["LP_NA", "LP_AP"], file_path="lp_unrolling.pdf", 
                 legend_label_dict={"LP_UN_THP": "AoS(Unaligned)", "LP_UN_THP_Unroll4": "Unrolled (4)", "LP_UN_THP_Unroll8": "Unrolled (8)"})

In [None]:
# Numbers
df1 = read_d.copy()
#df2 = write_d.copy()

def get_aos_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")].iloc[0]["MLookups/s"]

df1["APPerf"] = df1.apply(lambda row: get_aos_lookups(row), axis=1)
df1["Speedup"] = df1["MLookups/s"] / df1["APPerf"]
df1["InvSpeedup"] = df1["APPerf"] / df1["MLookups/s"]

max_perfdecreases_unroll8 = df1[(df1["PlotIdentifier"] == "LP_UN_THP_Unroll8")]["Speedup"].min()
avg_speedup_unroll4_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP_Unroll4")]["Speedup"].mean()
avg_speedup_unroll4_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "LP_UN_THP_Unroll4")]["Speedup"].mean()
avg_speedup_unroll4_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "LP_UN_THP_Unroll4")]["Speedup"].mean()
avg_speedup_unroll4_power = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "LP_UN_THP_Unroll4")]["Speedup"].mean()
max_perfincrease_unroll4 = df1[(df1["PlotIdentifier"] == "LP_UN_THP_Unroll4")]["Speedup"].max()
avg_speedup_unroll4_a64fx_lf25 = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "LP_UN_THP_Unroll4") & (df1["LoadFactor"] == 25)]["Speedup"].mean()
avg_speedup_unroll4_a64fx_lf70 = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "LP_UN_THP_Unroll4") & (df1["LoadFactor"] == 70)]["Speedup"].mean()

print(f"(1) maximum performance decrease unroll 8: {max_perfdecreases_unroll8}\n"
      f"(1.5) maximum performance increase unroll 4: {max_perfincrease_unroll4}\n"
      f"(2) average performance increase unroll 4, Intel: {avg_speedup_unroll4_intel}\n"
      f"(3) average performance increase unroll 4, AMD: {avg_speedup_unroll4_amd}\n"
      f"(4) average performance increase unroll 4, A64FX: {avg_speedup_unroll4_a64fx}\n"
      f"(5) average performance increase unroll 4, Power: {avg_speedup_unroll4_power}\n"
      f"(6) average performance increase unroll 4, A64FX @ LF25: {avg_speedup_unroll4_a64fx_lf25}\n"
      f"(7) average performance increase unroll 4, A64FX @ LF70: {avg_speedup_unroll4_a64fx_lf70}\n"
     )

result_dict["LPUnrollingMaxDecrease"] = round(max_perfdecreases_unroll8 * 100, 1)
result_dict["A64FXUnrolling4Speedup"] = round(avg_speedup_unroll4_a64fx, 2)
result_dict["PwrUnrolling4Speedup"] = round(avg_speedup_unroll4_power, 2)
result_dict["AmdUnrolling4PerfDec"] = round(avg_speedup_unroll4_amd * 100, 1)
result_dict["A64FXLf25Unrolling4Speedup"] = round(avg_speedup_unroll4_a64fx_lf25, 1)
result_dict["A64FXLf70Unrolling4Speedup"] = round(avg_speedup_unroll4_a64fx_lf70, 2)
result_dict["LPUnrollingMaxIncrease"] = round(max_perfincrease_unroll4, 2)
result_dict["LPUnrollingMaxDecrease2"] = round(max_perfdecreases_unroll8, 2)

## Quadratic Probing

In [None]:
pal_dict = {'QP_UN_THP': deep_palette[0], 'QP_NA': deep_palette[1], 'LP_UN': deep_palette[2], 'QP_AP': deep_palette[3], 'QP_UN': deep_palette[4] }
_ = analysis_plot("QuadraticProbing", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000, show_thp=True, show_pref=False, show_unroll=False, file_path="full_qp_memlayout_thp.pdf")

In [None]:
_ = analysis_plot("QuadraticProbing", pal_dict, ["Intel x86", "AMD x86", "Power"], [25,70], 375, show_thp=True, show_pref=False, show_unroll=False) # Currently not in thesis

In [None]:
pal_dict = {'QP_UN_THP': deep_palette[0], 'QP_NA': deep_palette[1], 'LP_UN': deep_palette[2],
            'QP_AP': deep_palette[3], 'QP_UN': deep_palette[4], 'QP_UN_Pref3_THP': deep_palette[5],
            'QP_UN_Pref2_THP': deep_palette[6], 'QP_UN_Pref1_THP': deep_palette[7], 'QP_UN_Pref0_THP': deep_palette[8] }

_ = analysis_plot("QuadraticProbing", pal_dict, ["Intel x86","AMD x86", "Power"], # "A64FX ARM", "Power"],
                  [25, 50, 70, 90], 1000, show_thp=True, show_pref=True, show_unroll=False,
                  exclude_prefixes=["QP_NA", "QP_AP"])

## Robin Hood Hashing

In [None]:
pal_dict = {'RecalcRH_UN_THP': deep_palette[0], 'StoreRH_THP': deep_palette[1], "RecalcRH_AP": deep_palette[2], "RecalcRH_NA": deep_palette[3], 'StoreRH': deep_palette[4], 'RecalcRH_UN': deep_palette[5] }

read_d, write_d = analysis_plot("RobinHood", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000, show_thp=True, show_pref=False, show_unroll=False,  file_path="full_rh_memlayout_thp.pdf", 
                  legend_label_dict={"RecalcRH_UN_THP": "Recalculating(Unaligned)+THP", "StoreRH_THP": "Storing+THP", "RecalcRH_AP": "Recalculating(AP)", "RecalcRH_NA": "Recalculating(NA)",
                                    "StoreRH": "Storing", "RecalcRH_UN": "Recalculating(UN)"})

In [None]:
_ = analysis_plot("RobinHood", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25,70], 375, show_thp=True, hide_non_thp=True, show_pref=False, show_unroll=False, file_path="rh_variants.pdf",
                 legend_label_dict={"RecalcRH_UN_THP": "Recalculating", "StoreRH_THP": "Storing"})

In [None]:
# Numbers
df1 = read_d.copy()
#df2 = write_d.copy()

def get_recalc_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "RecalcRH_UN_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "RecalcRH_UN_THP")].iloc[0]["MLookups/s"]

#def get_ap_inserts(row):
#    assert len(df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) & (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "LP_AP")]) == 1
#    return df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) &  (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "LP_AP")].iloc[0]["MInserts/s"]


df1["APPerf"] = df1.apply(lambda row: get_recalc_lookups(row), axis=1)
df1["Speedup"] = df1["MLookups/s"] / df1["APPerf"]
df1["InvSpeedup"] = df1["APPerf"] / df1["MLookups/s"]

#df2["APPerf"] = df2.apply(lambda row: get_ap_inserts(row), axis=1)
#df2["Speedup"] = df2["MInserts/s"] / df2["APPerf"]
#df2["InvSpeedup"] = df2["APPerf"] / df2["MInserts/s"]

avg_storespeedup_lf25_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "StoreRH_THP") & (df1["LoadFactor"] == 25)]["Speedup"].mean()
avg_storespeedup_lf50_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "StoreRH_THP") & (df1["LoadFactor"] == 50)]["Speedup"].mean()
avg_storespeedup_lf70_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "StoreRH_THP") & (df1["LoadFactor"] == 70)]["Speedup"].mean()
avg_storespeedup_lf90_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "StoreRH_THP") & (df1["LoadFactor"] == 90)]["Speedup"].mean()
avg_storespeedup_alllfs_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "StoreRH_THP")]["Speedup"].mean()

max_storespeedup = df1[(df1["PlotIdentifier"] == "StoreRH_THP")]["Speedup"].max()

throughput_rh_recalc_lf70 = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "RecalcRH_UN_THP") & (df1["LoadFactor"] == 70)]["MLookups/s"].mean()
throughput_rh_storing_lf90 = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "StoreRH_THP") & (df1["LoadFactor"] == 90)]["MLookups/s"].mean()


print(f"(1) average speedup storing over unaligned recalc <Intel, LF25>: {avg_storespeedup_lf25_intel}\n"
      f"(2) average speedup storing over unaligned recalc <Intel, LF50>: {avg_storespeedup_lf50_intel}\n"
      f"(3) average speedup storing over unaligned recalc <Intel, LF70>: {avg_storespeedup_lf70_intel}\n"
      f"(4) average speedup storing over unaligned recalc <Intel, LF90>: {avg_storespeedup_lf90_intel}\n"
      f"(5) max speedup storing over unaligned recalc: {max_storespeedup}\n"
      f"(6) average speedup storing over unaligned recalc <Intel, all lfs>: {avg_storespeedup_alllfs_intel}\n"
      f"(7) throughput recalc lf 70 <Intel, LF70>: {throughput_rh_recalc_lf70}\n"
      f"(8) throughput storing lf 90 <Intel, LF90>: {throughput_rh_storing_lf90}\n"
     )


result_dict["IntLf25RHStoreSpeedup"] = round(avg_storespeedup_lf25_intel, 2)
result_dict["IntLf50RHStoreSpeedup"] = round(avg_storespeedup_lf50_intel, 2)
result_dict["IntLf70RHStoreSpeedup"] = round(avg_storespeedup_lf70_intel, 2)
result_dict["IntLf90RHStoreSpeedup"] = round(avg_storespeedup_lf90_intel, 2)
result_dict["IntAllLFRHStoreSpeedup"] = round(avg_storespeedup_alllfs_intel, 2)





## Comparing the Different Schemes (MA)

In [None]:
deep_palette2 = sns.color_palette('deep', n_colors=5)
compare_pal = {"RecalcRH_UN_THP": deep_palette2[0], "StoreRH_THP": deep_palette2[1], "QP_UN_THP": deep_palette2[2], "LP_UN_THP": deep_palette2[3], "LPPackedSoA_THP": deep_palette2[4]}

read_d, write_d = analysis_plot("", compare_pal, ["Intel x86", "AMD x86", "A64FX ARM", "Power"],  [25, 50, 70, 90], 800,
                  show_thp=False, show_pref=False, show_unroll=False,
                  exclude_prefixes=['Chain', 'StoreRH', 'LP_AP', 'QP_UN', 'QP_NA', 'LP_NA', 'RecalcRH_AP', 'LP_UN', 'RecalcRH_NA', 
                                   'RecalcRH_UN', 'QP_AP', 'UnchkdSIMD_NOTESTZ_AVX512', 'UnchkdSIMD_TESTZ', 'UnchkdSIMD_NOTESTZ', 'UnchkdSIMD_TESTZ_AVX512', 'FingerPSIMD'],
                  force_include=["RecalcRH_UN_THP", "StoreRH_THP", "QP_UN_THP", "LP_UN_THP", "LPPackedSoA_THP"],
                  legend_label_dict={"RecalcRH_UN_THP": "RecalcRH(Unal.)", "StoreRH_THP": "StoringRH", "QP_UN_THP": "QP(Unal.)", "LP_UN_THP": "LP(AoS/Unal.)", "LPPackedSoA_THP": "LP(SoA/Packed)"},
                   file_path="full_linear_baseline.pdf",
                 )

In [None]:
_ = analysis_plot("", compare_pal, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25,70], 375, force_include=["RecalcRH_UN_THP", "StoreRH_THP", "QP_UN_THP", "LP_UN_THP", "LPPackedSoA_THP"],
                  exclude_prefixes=['Chain', 'UnchkdSIMD', 'BucketSIMD', 'FingerPSIMD', 'StoreRH', 'LP_AP', 'QP_UN', 'QP_NA', 'LP_NA',
                                    'RecalcRH_AP', 'LP_UN', 'RecalcRH_NA', 'RecalcRH_UN', 'QP_AP', 'UnchkdSIMD_NOTESTZ_AVX512',
                                    'UnchkdSIMD_TESTZ', 'UnchkdSIMD_NOTESTZ', 'UnchkdSIMD_TESTZ_AVX512', 'UnchkdSIMD_MOM_128_THP_Unroll8',
                                    'UnchkdSIMD_MOM_256_THP_Unroll8', 'MRHFlat_THP', 'F14V_THP', 'UnchkdSIMD_MOM_128_THP', 'AbseilFlat_THP', 
                                    'UnchkdSIMD_MOM_256_THP', 'UnchkdSIMD_MOM_512_AVX512_THP_Unroll8', 'UnchkdSIMD_MOM_512_AVX512_THP'],
                  legend_label_dict={"RecalcRH_UN_THP": "RecalcRH(Unal.)", "StoreRH_THP": "StoringRH", "QP_UN_THP": "QP(Unal.)",
                                     "LP_UN_THP": "LP(AoS/Unal.)", "LPPackedSoA_THP": "LP(SoA/Packed)"},
                  ncol_legend=3, 
                  file_path="linear_baseline.pdf",
                 )

In [None]:
# Numbers
df1 = read_d.copy()
df2 = write_d.copy()

def get_lp_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")].iloc[0]["MLookups/s"]

def get_rhstoring_inserts(row):
    assert len(df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) & (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "StoreRH_THP")]) == 1
    return df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) &  (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "StoreRH_THP")].iloc[0]["MInserts/s"]


df1["APPerf"] = df1.apply(lambda row: get_lp_lookups(row), axis=1)
df1["Speedup"] = df1["MLookups/s"] / df1["APPerf"]
df1["InvSpeedup"] = df1["APPerf"] / df1["MLookups/s"]

df2["APPerf"] = df2.apply(lambda row: get_rhstoring_inserts(row), axis=1)
df2["Speedup"] = df2["MInserts/s"] / df2["APPerf"]
df2["InvSpeedup"] = df2["APPerf"] / df2["MInserts/s"]

avg_storespeedup_lf70_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "StoreRH_THP") & (df1["LoadFactor"] == 70)]["Speedup"].mean()
avg_storespeedup_lf70_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "StoreRH_THP") & (df1["LoadFactor"] == 70)]["Speedup"].mean()
avg_storespeedup_lf70_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "StoreRH_THP") & (df1["LoadFactor"] == 70)]["Speedup"].mean()
avg_storespeedup_lf70_power = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "StoreRH_THP") & (df1["LoadFactor"] == 70)]["Speedup"].mean()

avg_insertspeedup_lpoverrh_lf50_intel = df2[(df2["Arch"] == "Intel x86") & (df2["PlotIdentifier"] == "LP_UN_THP") & (df2["LoadFactor"] == 50)]["Speedup"].mean()
avg_insertspeedup_lpoverrh_lf70_intel = df2[(df2["Arch"] == "Intel x86") & (df2["PlotIdentifier"] == "LP_UN_THP") & (df2["LoadFactor"] == 70)]["Speedup"].mean()
avg_insertspeedup_lpoverrh_lf90_intel = df2[(df2["Arch"] == "Intel x86") & (df2["PlotIdentifier"] == "LP_UN_THP") & (df2["LoadFactor"] == 90)]["Speedup"].mean()


print(f"(1) average speedup storing over lp un thp <Intel, LF70>: {avg_storespeedup_lf70_intel}\n"
      f"(2) average speedup storing over lp un thp <AMD, LF70>: {avg_storespeedup_lf70_amd}\n"
      f"(3) average speedup storing over lp un thp <A64FX, LF70>: {avg_storespeedup_lf70_a64fx}\n"
      f"(4) average speedup storing over lp un thp <Power, LF70>: {avg_storespeedup_lf70_power}\n"
      f"(5) average insert speedup lp over storing <Intel, LF50>: {avg_insertspeedup_lpoverrh_lf50_intel}\n"
      f"(6) average insert speedup lp over storing <Intel, LF70>: {avg_insertspeedup_lpoverrh_lf70_intel}\n"
      f"(7) average insert speedup lp over storing <Intel, LF90>: {avg_insertspeedup_lpoverrh_lf90_intel}\n"
     )

result_dict["IntLf70StoreRHVSLp"] = round(avg_storespeedup_lf70_intel, 2)
result_dict["AmdLf70StoreRHVSLp"] = round(avg_storespeedup_lf70_amd, 2)
result_dict["A64FXLf70StoreRHVSLp"] = round(avg_storespeedup_lf70_a64fx, 2)
result_dict["PwrLf70StoreRHVSLp"] = round(avg_storespeedup_lf70_power, 2)
result_dict["IntLf50LpVSStoreRHInsert"] = round(avg_insertspeedup_lpoverrh_lf50_intel, 2)
result_dict["IntLf70LpVSStoreRHInsert"] = round(avg_insertspeedup_lpoverrh_lf70_intel, 2)
result_dict["IntLf90LpVSStoreRHInsert"] = round(avg_insertspeedup_lpoverrh_lf90_intel, 2)



## Paper Comparison Plot

In [None]:
compare_pal = {"LP_UN_THP": "#a1dab4", "RecalcRH_UN_THP": "#2c7fb8", "Chain_BudKV_THP": "#253494", "LPPackedSoA_THP": "#41b6c4"}

read_d, write_d = analysis_plot("", compare_pal, ["Intel x86", "AMD x86", "A64FX ARM", "Power"],  [25, 50, 70, 90], 800,
                  show_thp=False, show_pref=False, show_unroll=False,
                  exclude_prefixes=['StoreRH', 'LP_AP', 'QP_UN', 'QP_NA', 'LP_NA', 'RecalcRH_AP', 'LP_UN', 'RecalcRH_NA', 
                                   'RecalcRH_UN', 'QP_AP', 'UnchkdSIMD_NOTESTZ_AVX512', 'UnchkdSIMD_TESTZ', 'UnchkdSIMD_NOTESTZ', 'UnchkdSIMD_TESTZ_AVX512', 'FingerPSIMD'],
                  force_include=["RecalcRH_UN_THP",   "LP_UN_THP", "LPPackedSoA_THP", "Chain_BudKV_THP"], #"QP_UN_THP", "StoreRH_THP",
                  legend_label_dict={"RecalcRH_UN_THP": "RecalcRH", "StoreRH_THP": "StoringRH", "QP_UN_THP": "QP(Unal.)", "LP_UN_THP": "LP(AoS)", "LPPackedSoA_THP": "LP(SoA)", "Chain_BudKV_THP": "Chained"},
                   file_path="full_linear_baseline_paper.pdf",
                 )

In [None]:
_ = analysis_plot("", compare_pal, ["Intel x86", "AMD x86", "A64FX ARM", "Power"],  [50, 90], 240, ncol_legend=4, legend_height_shift=-0.05, ylabel_padding=0, xlabel_padding=0,
                  show_thp=False, show_pref=False, show_unroll=False, extra_legend_columnspacing=0.2, extra_legend_handlelength=0.7,
                  exclude_prefixes=['StoreRH', 'LP_AP', 'QP_UN', 'QP_NA', 'LP_NA', 'RecalcRH_AP', 'LP_UN', 'RecalcRH_NA', 
                                    'RecalcRH_UN', 'QP_AP', 'UnchkdSIMD_NOTESTZ_AVX512', 'UnchkdSIMD_TESTZ', 'UnchkdSIMD_NOTESTZ', 'UnchkdSIMD_TESTZ_AVX512', 'FingerPSIMD'],
                  force_include=["RecalcRH_UN_THP",   "LP_UN_THP", "LPPackedSoA_THP", "Chain_BudKV_THP"],
                  legend_label_dict={"RecalcRH_UN_THP": "RecalcRH", "StoreRH_THP": "StoringRH", "QP_UN_THP": "QP(Unal.)", "LP_UN_THP": "LP(AoS)", "LPPackedSoA_THP": "LP(SoA)", "Chain_BudKV_THP": "Chained"},
                   file_path="paper_classic_baseline.pdf", label_order=[2,1,3,0], disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True,
                   hue_order=(["Chain_BudKV_THP","RecalcRH_UN_THP", "LP_UN_THP", "LPPackedSoA_THP" ]))

In [None]:

# Numbers
df1 = read_d.copy()
df2 = write_d.copy()

def get_lp_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")].iloc[0]["MLookups/s"]



df1["APPerf"] = df1.apply(lambda row: get_lp_lookups(row), axis=1)
df1["Speedup"] = df1["MLookups/s"] / df1["APPerf"]
df1["InvSpeedup"] = df1["APPerf"] / df1["MLookups/s"]

avg_chainspeedup_lf50_sqr0_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "Chain_BudKV_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 0)]["Speedup"].mean()
avg_chainspeedup_lf50_sqr0_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "Chain_BudKV_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 0)]["Speedup"].mean()
avg_chainspeedup_lf50_sqr0_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "Chain_BudKV_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 0)]["Speedup"].mean()
avg_chainspeedup_lf50_sqr0_power = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "Chain_BudKV_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 0)]["Speedup"].mean()
intel_soa_vs_aos = df1[(df1["Arch"] == "Intel x86") &  (df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 90)]["Speedup"].mean()
amd_soa_vs_aos = df1[(df1["Arch"] == "AMD x86") &  (df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 90)]["Speedup"].mean()
arm_soa_vs_aos = df1[(df1["Arch"] == "A64FX ARM") &  (df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 90)]["Speedup"].mean()
power_soa_vs_aos = df1[(df1["Arch"] == "Power") &  (df1["PlotIdentifier"] == "LPPackedSoA_THP") & (df1["LoadFactor"] == 90)]["Speedup"].mean()


print(f"(1) average speedup chain over lp un thp <Intel, LF50, SQR0>: {avg_chainspeedup_lf50_sqr0_intel}\n"
      f"(2) average speedup chain over lp un thp <AMD, LF50, SQR0>: {avg_chainspeedup_lf50_sqr0_amd}\n"
      f"(3) average speedup chain over lp un thp <A64FX, LF50, SQR0>: {avg_chainspeedup_lf50_sqr0_a64fx}\n"
      f"(4) average speedup chain over lp un thp <Power, LF50, SQR0>: {avg_chainspeedup_lf50_sqr0_power}\n"
       f"(5) speedup soa vs aos <Intel, LF90>: {intel_soa_vs_aos}\n"
     f"(6) speedup soa vs aos <AMD, LF90>: {amd_soa_vs_aos}\n"
     f"(7) speedup soa vs aos <ARM, LF90>: {arm_soa_vs_aos}\n"
     f"(8) speedup soa vs aos <Power, LF90>: {power_soa_vs_aos}\n")

result_dict["IntLf50Sqr0ChainVsAoS"] = round(avg_chainspeedup_lf50_sqr0_intel, 2)
result_dict["AmdLf50Sqr0ChainVsAoS"] = round(avg_chainspeedup_lf50_sqr0_amd, 2)
result_dict["A64FXLf50Sqr0ChainVsAoS"] = round(avg_chainspeedup_lf50_sqr0_a64fx, 2)
result_dict["PwrLf50Sqr0ChainVsAoS"] = round(avg_chainspeedup_lf50_sqr0_power, 2)


In [None]:
generate_result_tex_file(result_dict, "performance_results.tex")

# Simple SIMD Analysis

## TEST vs NOTEST

In [None]:
pal_dict = {'UnchkdSIMD_TESTZ_128_THP': deep_palette[0], 'UnchkdSIMD_NOTESTZ_128_THP': deep_palette[1]}

read_d, write_d  = analysis_plot("UnchunkedSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                  show_thp=True, show_pref=False, show_unroll=False, show_avx512=False, hide_non_thp=True, hide_simd_larger_128 = True, exclude_prefixes=["UnchkdSIMD_MOM"],
                  hide_svbcast=True, hide_non_s2n=True, word_filter=["_NEON_S2N"],
                  legend_label_dict={'UnchkdSIMD_TESTZ_128_THP': "VLP (TEST)", 'UnchkdSIMD_NOTESTZ_128_THP': "VLP (NOTEST)"},
                  file_path="full_vlp_test_vs_notest.pdf",
                 )

In [None]:
read_d, write_d  = analysis_plot("UnchunkedSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 70], 375,
                  show_thp=True, show_pref=False, show_unroll=False, show_avx512=False, hide_non_thp=True, hide_simd_larger_128 = True, exclude_prefixes=["UnchkdSIMD_MOM"],
                  hide_svbcast=True, hide_non_s2n=True, word_filter=["_NEON_S2N"],
                  legend_label_dict={'UnchkdSIMD_TESTZ_128_THP': "VLP(TEST)", 'UnchkdSIMD_NOTESTZ_128_THP': "VLP(NOTEST)"},
                  file_path="vlp_test_vs_notest.pdf", 
                 )

In [None]:
# Numbers
df1 = read_d.copy()
#df2 = write_d.copy()

def get_notest_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "UnchkdSIMD_NOTESTZ_128_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "UnchkdSIMD_NOTESTZ_128_THP")].iloc[0]["MLookups/s"]

#def get_ap_inserts(row):
#    assert len(df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) & (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "LP_AP")]) == 1
#    return df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) &  (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "LP_AP")].iloc[0]["MInserts/s"]


df1["APPerf"] = df1.apply(lambda row: get_notest_lookups(row), axis=1)
df1["Speedup"] = df1["MLookups/s"] / df1["APPerf"]
df1["InvSpeedup"] = df1["APPerf"] / df1["MLookups/s"]

#df2["APPerf"] = df2.apply(lambda row: get_ap_inserts(row), axis=1)
#df2["Speedup"] = df2["MInserts/s"] / df2["APPerf"]
#df2["InvSpeedup"] = df2["APPerf"] / df2["MInserts/s"]

avg_speedup_lf50_sqr0_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 0)]["Speedup"].mean()
avg_speedup_lf50_sqr0_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 0)]["Speedup"].mean()
avg_speedup_lf50_sqr0_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 0)]["Speedup"].mean()
avg_speedup_lf50_sqr0_power = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 0)]["Speedup"].mean()
avg_speedup_lf50_sqr0_allsrv = df1[(df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 0)]["Speedup"].mean()

print(f"(1) average speedup TEST over NOTEST <Intel, LF50, SQR0>: {avg_speedup_lf50_sqr0_intel}\n"
      f"(2) average speedup TEST over NOTEST <AMD, LF50, SQR0>: {avg_speedup_lf50_sqr0_amd}\n"
      f"(3) average speedup TEST over NOTEST <A64FX, LF50, SQR0>: {avg_speedup_lf50_sqr0_a64fx}\n"
      f"(4) average speedup TEST over NOTEST <Power, LF50, SQR0>: {avg_speedup_lf50_sqr0_power}\n"
      f"(5) average speedup TEST over NOTEST <all servers, LF50, SQR0>: {avg_speedup_lf50_sqr0_allsrv}\n"
     )


result_dict["IntLf50Sqr0VLPTestVSNotest"] = round(avg_speedup_lf50_sqr0_intel, 2)
result_dict["AmdLf50Sqr0VLPTestVSNotest"] = round(avg_speedup_lf50_sqr0_amd, 2)
result_dict["A64FXLf50Sqr0VLPTestVSNotest"] = round(avg_speedup_lf50_sqr0_a64fx, 2)
result_dict["PwrLf50Sqr0VLPTestVSNotest"] = round(avg_speedup_lf50_sqr0_power, 2)
result_dict["AllSrvLf50Sqr0VLPTestVSNotest"] = round(avg_speedup_lf50_sqr0_allsrv, 2)


## X86 - SSE/AVX2 vs AVX512

In [None]:
pal_dict = {'UnchkdSIMD_TESTZ_THP': deep_palette[0], 'UnchkdSIMD_TESTZ_AVX512_THP': deep_palette[1]}

read_d, _ = analysis_plot("UnchunkedSIMDSoAHashTable",
                  pal_dict, ["Intel x86"], [25, 50, 70, 90], 1000, 
                  show_thp=True, hide_non_thp = True, show_pref=False, show_unroll=False, 
                  show_avx512=True, ncol_legend=4, split_by_simd_size = True, simd_sizes=["128", "256"], 
                  exclude_prefixes=["UnchkdSIMD_MOM", "UnchkdSIMD_NOTESTZ"],
                 file_path="full_vlp_avx512.pdf")

In [None]:
read_d["PlotIdentifier"].unique()

In [None]:
# Numbers
df1 = read_d.copy()
#df2 = write_d.copy()


def get_nonavx128_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP")].iloc[0]["MLookups/s"]


df1["APPerf"] = df1.apply(lambda row: get_nonavx128_lookups(row), axis=1)
df1["Speedup"] = df1["MLookups/s"] / df1["APPerf"]
df1["InvSpeedup"] = df1["APPerf"] / df1["MLookups/s"]

#df2["APPerf"] = df2.apply(lambda row: get_ap_inserts(row), axis=1)
#df2["Speedup"] = df2["MInserts/s"] / df2["APPerf"]
#df2["InvSpeedup"] = df2["APPerf"] / df2["MInserts/s"]

avg_avx_lf70_sse = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_AVX512_THP") & (df1["LoadFactor"] == 70)]["Speedup"].mean()

print(f"(1) average speedup AVX-512 over non AVX-512 <128-bit register, lf70>: {avg_avx_lf70_sse}\n"
     )

result_dict["IntLf70AVX512vsSSEVLP"] = round(avg_avx_lf70_sse, 2)


## ARM - SVBCast und UMINV

In [None]:
pal_dict = {'UnchkdSIMD_TESTZ_THP': deep_palette[0],  'UnchkdSIMD_TESTZ_UMINV_THP': deep_palette[1],
           'UnchkdSIMD_TESTZ_SVBCast_THP': deep_palette[2]}

read_d, write_d = analysis_plot("UnchunkedSIMDSoAHashTable", pal_dict, ["A64FX ARM"], [25, 50, 70, 90], 1000,
                  show_thp=True, show_unroll=False, hide_non_thp=True, ncol_legend=4, split_by_simd_size = True, hide_svbcast=False,
                  hide_non_s2n=False, simd_sizes=["128", "512"],word_filter=["_NEON_S2N"],
                  exclude_prefixes=["UnchkdSIMD_MOM", "UnchkdSIMD_NOTESTZ", "UnchkdSIMD_TESTZ_128_NEON_A64MM"],
                 legend_label_dict={'UnchkdSIMD_TESTZ_THP': "VLP (TEST)", 'UnchkdSIMD_TESTZ_UMINV_THP': 'VLP (TEST/UMINV)',
                                    'UnchkdSIMD_TESTZ_SVBCast_THP':  'VLP (TEST/SVBCast)'},
                  file_path="full_a64fx_uminv_svbcast.pdf")

In [None]:
_ = analysis_plot("UnchunkedSIMDSoAHashTable", pal_dict, ["A64FX ARM"], [25, 70], 375,
                  show_thp=True, show_unroll=False, hide_non_thp=True, ncol_legend=4, split_by_simd_size = True, hide_svbcast=False,
                  hide_non_s2n=False, simd_sizes=["128", "512"],word_filter=["_NEON_S2N"],
                  exclude_prefixes=["UnchkdSIMD_MOM", "UnchkdSIMD_NOTESTZ", "UnchkdSIMD_TESTZ_128_NEON_A64MM"],
                  legend_label_dict={'UnchkdSIMD_TESTZ_THP': "VLP(TEST)", 'UnchkdSIMD_TESTZ_UMINV_THP': 'VLP(TEST/UMINV)',
                                    'UnchkdSIMD_TESTZ_SVBCast_THP':  'VLP(TEST/SVBCast)'},
                  legend_height_shift = -0.14,
                  file_path="a64fx_uminv_svbcast.pdf")

In [None]:
# Numbers
df1 = read_d.copy()
#df2 = write_d.copy()

def get_s2n_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP")].iloc[0]["MLookups/s"]

df1["APPerf"] = df1.apply(lambda row: get_s2n_lookups(row), axis=1)
df1["Speedup"] = df1["MLookups/s"] / df1["APPerf"]
df1["InvSpeedup"] = df1["APPerf"] / df1["MLookups/s"]

avg_speedup_lf25_uminv = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_NEON_UMINV_THP") & (df1["LoadFactor"] == 25)]["Speedup"].mean()
avg_speedup_lf50_uminv = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_NEON_UMINV_THP") & (df1["LoadFactor"] == 50)]["Speedup"].mean()
avg_speedup_lf70_uminv = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_NEON_UMINV_THP") & (df1["LoadFactor"] == 70)]["Speedup"].mean()
avg_speedup_lf90_uminv = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_NEON_UMINV_THP") & (df1["LoadFactor"] == 90)]["Speedup"].mean()

avg_speedup_alllfs_uminv = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_NEON_UMINV_THP")]["Speedup"].mean()


print(f"(1) average speedup UMINV over SS2NEON <A64FX, LF25>: {avg_speedup_lf25_uminv}\n"
      f"(2) average speedup UMINV over SS2NEON <A64FX, LF50>: {avg_speedup_lf50_uminv}\n"
      f"(3) average speedup UMINV over SS2NEON <A64FX, LF70>: {avg_speedup_lf70_uminv}\n"
      f"(4) average speedup UMINV over SS2NEON <A64FX, LF90>: {avg_speedup_lf90_uminv}\n"
      f"(5) average speedup UMINV over SS2NEON <A64FX>: {avg_speedup_alllfs_uminv}\n"
     )


result_dict["A64FXLf25UminvVSS2NSpeedup"] = round(avg_speedup_lf25_uminv, 2)
result_dict["A64FXLf50UminvVSS2NSpeedup"] = round(avg_speedup_lf50_uminv, 2)
result_dict["A64FXLf70UminvVSS2NSpeedup"] = round(avg_speedup_lf70_uminv, 2)
result_dict["A64FXLf90UminvVSS2NSpeedup"] = round(avg_speedup_lf90_uminv, 2)
result_dict["A64FXAllLfUminvVSS2NSpeedup"] = round(avg_speedup_alllfs_uminv, 2)


## Vector Register Size (MA)

In [None]:
pal_dict = {'LP_UN_THP': deep_palette[0], 'LPSoA_THP': deep_palette[1], 'UnchkdSIMD_TESTZ_128_THP': deep_palette[2], 'UnchkdSIMD_TESTZ_256_THP': deep_palette[3],
            'UnchkdSIMD_TESTZ_512_THP': deep_palette[4]
           }

read_d, write_d = analysis_plot("UnchunkedSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 800,
           show_thp=True, hide_non_thp=True, show_pref=False, show_unroll=False,
           hide_non_s2n=False, hide_s2n=True, hide_svbcast=True, hide_non_avx512=True, show_avx512=True,
           ncol_legend=5, hide_external=True, 
           force_include=["LP_UN_THP", "LPSoA_THP"],
           exclude_prefixes=["UnchkdSIMD_NOTESTZ", "UnchkdSIMD_MOM", "UnchkdSIMD_TESTZ_128_NEON_A64MM"],
           word_filter=["_SVE", "_AVX512", "_NEON_UMINV"],
           legend_label_dict={'UnchkdSIMD_TESTZ_128_THP': "VLP(TEST/128)", 
                              'UnchkdSIMD_TESTZ_256_THP': "VLP(TEST/256)", 
                              'UnchkdSIMD_TESTZ_512_THP': "VLP(TEST/512)", 
                              'LP_UN_THP':  'LP(AoS/Unal.)', 'LPSoA_THP': 'LP(SoA)'},
           file_path="full_vlp_vecsize.pdf")

In [None]:
_ = analysis_plot("UnchunkedSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 70], 375,
           show_thp=True, hide_non_thp=True, show_pref=False, show_unroll=False,
           hide_non_s2n=False, hide_s2n=True, hide_svbcast=True, hide_non_avx512=True, show_avx512=True,
           ncol_legend=3, hide_external=True, 
           force_include=["LP_UN_THP", "LPSoA_THP"],
           exclude_prefixes=["UnchkdSIMD_NOTESTZ", "UnchkdSIMD_MOM", "UnchkdSIMD_TESTZ_128_NEON_A64MM", "UnchkdSIMD_TESTZ_128_AVX512_THP", "UnchkdSIMD_TESTZ_256_AVX512_THP"],
           word_filter=["_SVE", "_NEON_UMINV", "_AVX512"],
           legend_label_dict={'UnchkdSIMD_TESTZ_128_THP': "VLP(TEST/128)", 
                              'UnchkdSIMD_TESTZ_256_THP': "VLP(TEST/256)", 
                              'UnchkdSIMD_TESTZ_512_THP': "VLP(TEST/512)", 
                              'LP_UN_THP':  'LP(AoS/Unal.)', 'LPSoA_THP': 'LP(SoA)'},
           legend_height_shift = 0.02,
           file_path="vlp_vecsize.pdf")

In [None]:
# Numbers
df1 = read_d.copy()

def get_soa_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LPSoA_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LPSoA_THP")].iloc[0]["MLookups/s"]

def get_aos_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")].iloc[0]["MLookups/s"]

def get_128bit_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP")]) == 1        
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP")].iloc[0]["MLookups/s"]


df1["SoAPerf"] = df1.apply(lambda row: get_soa_lookups(row), axis=1)
df1["AoSPerf"] = df1.apply(lambda row: get_aos_lookups(row), axis=1)
df1["VLP128Perf"] = df1.apply(lambda row: get_128bit_lookups(row), axis=1)

df1["SoASpeedup"] = df1["MLookups/s"] / df1["SoAPerf"]
df1["AoSSpeedup"] = df1["MLookups/s"] / df1["AoSPerf"]
df1["InvAoSSpeedup"] = df1["AoSPerf"] / df1["MLookups/s"] 
df1["InvSoASpeedup"] = df1["SoAPerf"] / df1["MLookups/s"] 
df1["VLP128Speedup"] = df1["MLookups/s"] / df1["VLP128Perf"]


avg_speedup_lf90_sqr0_sve_vs_soa = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_512_THP") & (df1["LoadFactor"] == 90) & (df1["SQR"] == 0)]["SoASpeedup"].mean()
avg_speedup_lf90_sqr25_sve_vs_soa = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_512_THP") & (df1["LoadFactor"] == 90) & (df1["SQR"] == 25)]["SoASpeedup"].mean()
avg_speedup_lf70_sqr50_256bit_vs_soa_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_256_THP") & (df1["LoadFactor"] == 70) & (df1["SQR"] == 50)]["SoASpeedup"].mean()
avg_speedup_lf70_sqr50_256bit_vs_aos_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_256_THP") & (df1["LoadFactor"] == 70) & (df1["SQR"] == 50)]["AoSSpeedup"].mean()
avg_speedup_lf50_sqr50_256bit_vs_soa_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_256_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 50)]["SoASpeedup"].mean()
avg_speedup_lf50_sqr50_256bit_vs_aos_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_256_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 50)]["AoSSpeedup"].mean()
avg_speedup_lf90_sqr50_256bit_vs_soa_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_256_THP") & (df1["LoadFactor"] == 90) & (df1["SQR"] == 50)]["SoASpeedup"].mean()
avg_speedup_lf90_sqr50_256bit_vs_aos_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_256_THP") & (df1["LoadFactor"] == 90) & (df1["SQR"] == 50)]["AoSSpeedup"].mean()

avg_speedup_lf70_sqr50_128bit_vs_soa_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP") & (df1["LoadFactor"] == 70) & (df1["SQR"] == 50)]["SoASpeedup"].mean()
avg_speedup_lf70_sqr50_128bit_vs_aos_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP") & (df1["LoadFactor"] == 70) & (df1["SQR"] == 50)]["AoSSpeedup"].mean()
avg_speedup_lf70_sqr100_aos_vs_512_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_512_THP") & (df1["LoadFactor"] == 70) & (df1["SQR"] == 100)]["InvAoSSpeedup"].mean()
avg_speedup_lf70_sqr100_soa_vs_512_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_512_THP") & (df1["LoadFactor"] == 70) & (df1["SQR"] == 100)]["InvSoASpeedup"].mean()
avg_speedup_lf70_sqr100_soa_vs_256_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_256_THP") & (df1["LoadFactor"] == 70) & (df1["SQR"] == 100)]["InvSoASpeedup"].mean()
avg_speedup_lf70_sqr100_256_vs_soa_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_256_THP") & (df1["LoadFactor"] == 70) & (df1["SQR"] == 100)]["SoASpeedup"].mean()
avg_speedup_lf50_sqr100_256_vs_soa_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_256_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 100)]["SoASpeedup"].mean()
avg_speedup_lf90_sqr100_256_vs_soa_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_256_THP") & (df1["LoadFactor"] == 90) & (df1["SQR"] == 100)]["SoASpeedup"].mean()

avg_speedup_lf90_128bit_vs_soa_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP") & (df1["LoadFactor"] == 90)]["SoASpeedup"].mean()
avg_speedup_lf90_128bit_vs_aos_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP") & (df1["LoadFactor"] == 90)]["AoSSpeedup"].mean()
avg_speedup_lf90_256bit_vs_soa_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_256_THP") & (df1["LoadFactor"] == 90)]["SoASpeedup"].mean()
avg_speedup_lf90_256bit_vs_aos_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_256_THP") & (df1["LoadFactor"] == 90)]["AoSSpeedup"].mean()
avg_speedup_lf90_512bit_vs_soa_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["SoASpeedup"].mean()
avg_speedup_lf90_512bit_vs_aos_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["AoSSpeedup"].mean()
avg_speedup_lf90_128bit_vs_soa_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP") & (df1["LoadFactor"] == 90)]["SoASpeedup"].mean()
avg_speedup_lf90_128bit_vs_aos_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_128_THP") & (df1["LoadFactor"] == 90)]["AoSSpeedup"].mean()
avg_speedup_lf90_256bit_vs_soa_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_256_THP") & (df1["LoadFactor"] == 90)]["SoASpeedup"].mean()
avg_speedup_lf90_256bit_vs_aos_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_256_THP") & (df1["LoadFactor"] == 90)]["AoSSpeedup"].mean()
avg_speedup_lf70_sve_vs_neon = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_512_THP") & (df1["LoadFactor"] == 70)]["VLP128Speedup"].mean()
avg_speedup_lf90_sve_vs_neon = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "UnchkdSIMD_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["VLP128Speedup"].mean()


print(f"(1) average speedup SVE vs SoA LP <SQR0, LF90>: {avg_speedup_lf90_sqr0_sve_vs_soa}\n"
      f"(2) average speedup SVE vs SoA LP <SQR25, LF90>: {avg_speedup_lf90_sqr25_sve_vs_soa}\n"
      f"(3) average speedup 256bit VLP vs SoA <SQR50, LF70, Intel>: {avg_speedup_lf70_sqr50_256bit_vs_soa_intel}\n"
      f"(4) average speedup 256bit VLP vs AoS <SQR50, LF70, Intel>: {avg_speedup_lf70_sqr50_256bit_vs_aos_intel}\n"
      f"(3) average speedup 256bit VLP vs SoA <SQR50, LF50, Intel>: {avg_speedup_lf50_sqr50_256bit_vs_soa_intel}\n"
      f"(4) average speedup 256bit VLP vs AoS <SQR50, LF50, Intel>: {avg_speedup_lf50_sqr50_256bit_vs_aos_intel}\n"
      f"(3) average speedup 256bit VLP vs SoA <SQR50, LF90, Intel>: {avg_speedup_lf90_sqr50_256bit_vs_soa_intel}\n"
      f"(4) average speedup 256bit VLP vs AoS <SQR50, LF90, Intel>: {avg_speedup_lf90_sqr50_256bit_vs_aos_intel}\n"
      f"(5) average speedup 128bit VLP vs SoA <SQR50, LF70, AMD>: {avg_speedup_lf70_sqr50_128bit_vs_soa_amd}\n"
      f"(6) average speedup 128bit VLP vs SoA <SQR50, LF70, AMD>: {avg_speedup_lf70_sqr50_128bit_vs_aos_amd}\n"
      f"(7) average speedup AoS vs 512bit VLP <SQR100, LF70, Intel>: {avg_speedup_lf70_sqr100_aos_vs_512_intel}\n"
      f"(7.5) average speedup SoA vs 512bit VLP <SQR100, LF70, Intel>: {avg_speedup_lf70_sqr100_soa_vs_512_intel}\n"
      f"(7.55) average speedup SoA vs 256bit VLP <SQR100, LF70, Intel>: {avg_speedup_lf70_sqr100_soa_vs_256_intel}\n"
      f"(7.555) average slowdown 256bit VLP vs  SoA <SQR100, LF70, Intel>: {avg_speedup_lf70_sqr100_256_vs_soa_intel}\n"
      f"(7.555) average slowdown 256bit VLP vs  SoA <SQR100, LF50, Intel>: {avg_speedup_lf50_sqr100_256_vs_soa_intel}\n"
      f"(7.555) average slowdown 256bit VLP vs  SoA <SQR100, LF90, Intel>: {avg_speedup_lf90_sqr100_256_vs_soa_intel}\n"
      f"(8) average speedup 128bit VLP vs SoA <all SQR, LF90, Intel>: {avg_speedup_lf90_128bit_vs_soa_intel}\n"
      f"(9) average speedup 128bit VLP vs AoS <all SQR, LF90, Intel>: {avg_speedup_lf90_128bit_vs_aos_intel}\n"
      f"(10) average speedup 256bit VLP vs SoA <all SQR, LF90, Intel>: {avg_speedup_lf90_256bit_vs_soa_intel}\n"
      f"(11) average speedup 256bit VLP vs AoS <all SQR, LF90, Intel>: {avg_speedup_lf90_256bit_vs_aos_intel}\n"
      f"(12) average speedup 512bit VLP vs SoA <all SQR, LF90, Intel>: {avg_speedup_lf90_512bit_vs_soa_intel}\n"
      f"(13) average speedup 512bit VLP vs AoS <all SQR, LF90, Intel>: {avg_speedup_lf90_512bit_vs_aos_intel}\n"
      f"(14) average speedup 128bit VLP vs SoA <all SQR, LF90, AMD>: {avg_speedup_lf90_128bit_vs_soa_amd}\n"
      f"(15) average speedup 128bit VLP vs AoS <all SQR, LF90, AMD>: {avg_speedup_lf90_128bit_vs_aos_amd}\n"
      f"(16) average speedup 256bit VLP vs SoA <all SQR, LF90, AMD>: {avg_speedup_lf90_256bit_vs_soa_amd}\n"
      f"(17) average speedup 256bit VLP vs AoS <all SQR, LF90, AMD>: {avg_speedup_lf90_256bit_vs_aos_amd}\n"
      f"(18) average speedup SVE vs NEON <all SQR, LF70>: {avg_speedup_lf70_sve_vs_neon}\n"
      f"(19) average speedup SVE vs NEON <all SQR, LF90>: {avg_speedup_lf90_sve_vs_neon}\n"
     )

result_dict["A64FXLf90Sqr0VLPVsLPSpeedup"] = round(avg_speedup_lf90_sqr0_sve_vs_soa, 2)
result_dict["A64FXLf90Sqr25VLPVsLPSpeedup"] = round(avg_speedup_lf90_sqr25_sve_vs_soa, 2)
result_dict["IntLf70Sqr50VLP256SoASpeedup"] = round(avg_speedup_lf70_sqr50_256bit_vs_soa_intel, 2)
result_dict["IntLf70Sqr50VLP256AoSSpeedup"] = round(avg_speedup_lf70_sqr50_256bit_vs_aos_intel, 2)
result_dict["IntLf50Sqr50VLP256SoASpeedup"] = round(avg_speedup_lf50_sqr50_256bit_vs_soa_intel, 2)
result_dict["IntLf50Sqr50VLP256AoSSpeedup"] = round(avg_speedup_lf50_sqr50_256bit_vs_aos_intel, 2)
result_dict["IntLf90Sqr50VLP256SoASpeedup"] = round(avg_speedup_lf90_sqr50_256bit_vs_soa_intel, 2)
result_dict["IntLf90Sqr50VLP256AoSSpeedup"] = round(avg_speedup_lf90_sqr50_256bit_vs_aos_intel, 2)
result_dict["AmdLf70Sqr50VLP128SoASpeedup"] = round(avg_speedup_lf70_sqr50_128bit_vs_soa_amd, 2)
result_dict["AmdLf70Sqr50VLP128AoSSpeedup"] = round(avg_speedup_lf70_sqr50_128bit_vs_aos_amd, 2)
result_dict["IntLf70Sqr100AoSVS512VLPSpeedup"] = round(avg_speedup_lf70_sqr100_aos_vs_512_intel, 2)
result_dict["IntLf70Sqr100AoSVS256VLPSpeedup"] = round(avg_speedup_lf70_sqr100_soa_vs_256_intel, 2)
result_dict["IntLf70Sqr100256VLPVsAoSSlowdown"] = round(avg_speedup_lf70_sqr100_256_vs_soa_intel, 2)
result_dict["IntLf50Sqr100256VLPVsAoSSlowdown"] = round(avg_speedup_lf50_sqr100_256_vs_soa_intel, 2)
result_dict["IntLf90Sqr100256VLPVsAoSSlowdown"] = round(avg_speedup_lf90_sqr100_256_vs_soa_intel, 2)


result_dict["IntLf90VLP128SoASpeedup"] = round(avg_speedup_lf90_128bit_vs_soa_intel, 2)
result_dict["IntLf90VLP128AoSSpeedup"] = round(avg_speedup_lf90_128bit_vs_aos_intel, 2)
result_dict["IntLf90VLP256SoASpeedup"] = round(avg_speedup_lf90_256bit_vs_soa_intel, 2)
result_dict["IntLf90VLP256AoSSpeedup"] = round(avg_speedup_lf90_256bit_vs_aos_intel, 2)
result_dict["IntLf90VLP512SoASpeedup"] = round(avg_speedup_lf90_512bit_vs_soa_intel, 2)
result_dict["IntLf90VLP512AoSSpeedup"] = round(avg_speedup_lf90_512bit_vs_aos_intel, 2)

result_dict["AmdLf90VLP128SoASpeedup"] = round(avg_speedup_lf90_128bit_vs_soa_amd, 2)
result_dict["AmdLf90VLP128AoSSpeedup"] = round(avg_speedup_lf90_128bit_vs_aos_amd, 2)
result_dict["AmdLf90VLP256SoASpeedup"] = round(avg_speedup_lf90_256bit_vs_soa_amd, 2)
result_dict["AmdLf90VLP256AoSSpeedup"] = round(avg_speedup_lf90_256bit_vs_aos_amd, 2)

result_dict["A64FXLf70SVEvsNEONSpeedup"] = round(avg_speedup_lf70_sve_vs_neon, 2)
result_dict["A64FXLf90SVEvsNEONSpeedup"] = round(avg_speedup_lf90_sve_vs_neon, 2)


## Paper Plot

In [None]:
pal_dict = {'LP_UN_THP': deep_palette[0], 'LPPackedSoA_THP': deep_palette[1], 'UnchkdSIMD_TESTZ_128_THP': deep_palette[2], 'UnchkdSIMD_TESTZ_256_THP': deep_palette[3],
            'UnchkdSIMD_TESTZ_512_THP': deep_palette[4]
           }

read_d, write_d = analysis_plot("UnchunkedSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 800, ncol_legend=5,
           show_thp=True, hide_non_thp=True, show_pref=False, show_unroll=False,
           hide_non_s2n=False, hide_s2n=True, hide_svbcast=True, hide_non_avx512=True, show_avx512=True,
          hide_external=True, 
        force_include=["LP_UN_THP", "LPPackedSoA_THP"],
           exclude_prefixes=["UnchkdSIMD_NOTESTZ", "UnchkdSIMD_MOM", "UnchkdSIMD_TESTZ_128_NEON_A64MM"],
           word_filter=["_SVE", "_AVX512", "_NEON_UMINV"],
           legend_label_dict={'UnchkdSIMD_TESTZ_128_THP': "VLP(128)", 
                              'UnchkdSIMD_TESTZ_256_THP': "VLP(256)", 
                              'UnchkdSIMD_TESTZ_512_THP': "VLP(512)", 
                              'LP_UN_THP':  'LP(AoS)', 'LPPackedSoA_THP': 'LP(SoA)'},
           file_path="full_paper_vlp.pdf")

In [None]:
#compare_pal = {"LP_UN_THP": "#a1dab4", "RecalcRH_UN_THP": "#2c7fb8", "Chain_BudKV_THP": "#253494", "LPPackedSoA_THP": "#41b6c4"}

pal_dict = {'LPPackedSoA_THP': "#a1dab4", 'UnchkdSIMD_TESTZ_128_THP': "#2c7fb8", 'UnchkdSIMD_TESTZ_256_THP': "#253494",
            'UnchkdSIMD_TESTZ_512_THP': "#41b6c4"
           }


_ = analysis_plot("UnchunkedSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM"], [50, 90], 240,  ncol_legend=5, legend_height_shift=-0.1, ylabel_padding=0, xlabel_padding=0,
           show_thp=True, hide_non_thp=True, show_pref=False, show_unroll=False,
           hide_non_s2n=False, hide_s2n=True, hide_svbcast=True,
           hide_external=True, hide_non_avx512=True, show_avx512=True,
           force_include=["LPPackedSoA_THP"],
           exclude_prefixes=["UnchkdSIMD_NOTESTZ", "UnchkdSIMD_MOM", "UnchkdSIMD_TESTZ_128_NEON_A64MM"],
           word_filter=["_SVE", "_AVX512", "_NEON_UMINV"],
           legend_label_dict={'UnchkdSIMD_TESTZ_128_THP': "128", 
                              'UnchkdSIMD_TESTZ_256_THP': "256", 
                              'UnchkdSIMD_TESTZ_512_THP': "512", 
                              'LP_UN_THP':  'LP(AoS)', 'LPPackedSoA_THP': 'LP(SoA)'},
           file_path="paper_vlp.pdf", disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True,
                 extra_legend_columnspacing=0.5, extra_legend_handlelength=1)

# Fingerprinting Analysis

## TEST vs NOTEST

In [None]:
pal_dict = {'FingerPSIMD_8B_NOTESTZ_128_THP': deep_palette[0], 'FingerPSIMD_8B_TESTZ_128_THP': deep_palette[1]}

read_d, write_d  = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, show_avx512=False, hide_non_thp=True, hide_simd_larger_128 = True,
                   hide_msb_fingerprints=True, hide_likely_hints=True, hide_vec_iterators=True, hide_svbcast=True,
                   hide_non_s2n=True, exclude_prefixes=["FingerPSIMD_16B"],
                   legend_label_dict={}, word_filter=["_NEON_S2N", "_LSBLSB"],
                   file_path="full_vfp_test_vs_notest.pdf"  )

## SVBcast Validation

In [None]:
pal_dict = {'FingerPSIMD_16B_TESTZ_THP': deep_palette[1], 'FingerPSIMD_16B_TESTZ_SVBCast_THP': deep_palette[2],
            'FingerPSIMD_8B_TESTZ_SVBCast_THP': deep_palette[3],  'FingerPSIMD_8B_TESTZ_THP': deep_palette[0]
           }

read_d, write_d  = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["A64FX ARM"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, show_avx512=True, hide_non_thp=True, split_by_simd_size = True, simd_sizes=[512],
                   hide_msb_fingerprints=True, hide_likely_hints=True, hide_vec_iterators=True, hide_svbcast=False,
                   hide_non_s2n=False, exclude_prefixes=["FingerPSIMD_8B_LSBLSB_NOTEST", "FingerPSIMD_8B_LSBLSB_VecIt_NOTEST", "FingerPSIMD_16B_LSBLSB_VecIt_NOTEST", "FingerPSIMD_16B_LSBLSB_NOTEST"],
                   legend_label_dict={}, word_filter=["_SVE", "_LSBLSB"],
                   file_path="full_vfp_svbcast.pdf"  )

# We see that SVBcast is not faster

## Likely Hints

In [None]:
pal_dict = { 'FingerPSIMD_8B_TESTZ_256_THP': deep_palette[2], 'FingerPSIMD_8B_TESTZ_512_THP': deep_palette[4], 
            'FingerPSIMD_8B_TESTZ_256_Hints_THP': deep_palette[3], 'FingerPSIMD_8B_TESTZ_512_Hints_THP': deep_palette[5],
            'FingerPSIMD_8B_TESTZ_128_Hints_THP': deep_palette[1], 'FingerPSIMD_8B_TESTZ_128_THP': deep_palette[0] }

read_d, write_d  = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,
                   hide_msb_fingerprints=True, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True,
                   hide_non_s2n=False, exclude_prefixes=["FingerPSIMD_8B_LSBLSB_NOTEST", "FingerPSIMD_16B", 'FingerPSIMD_8B_LSBLSB_TESTZ_256_AVX512', 'FingerPSIMD_8B_LSBLSB_TESTZ_128_AVX512'],
                   legend_label_dict={}, word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_LSBLSB"],
                   file_path="full_vfp_likely_8b.pdf"  )

In [None]:
# Numbers
df1 = read_d[read_d["Arch"] == "Intel x86"].copy()
#df2 = write_d.copy()

def get_nonhint_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_512_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_512_THP")].iloc[0]["MLookups/s"]

df1["APPerf"] = df1.apply(lambda row: get_nonhint_lookups(row), axis=1)
df1["Speedup"] = df1["MLookups/s"] / df1["APPerf"]
df1["InvSpeedup"] = df1["APPerf"] / df1["MLookups/s"]

#df2["APPerf"] = df2.apply(lambda row: get_ap_inserts(row), axis=1)
#df2["Speedup"] = df2["MInserts/s"] / df2["APPerf"]
#df2["InvSpeedup"] = df2["APPerf"] / df2["MInserts/s"]

#avg_amd_256_8_lf70_hintspeedup = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_256_Hints_THP") & (df1["LoadFactor"] == 70)]["Speedup"].mean()
#avg_amd_256_8_lf90_hintspeedup = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_256_Hints_THP") & (df1["LoadFactor"] == 90)]["Speedup"].mean()
avg_intel_256_8_lf70_hintspeedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_512_Hints_THP") & (df1["LoadFactor"] == 70)]["Speedup"].mean()
avg_intel_256_8_lf70_sqr100_hintspeedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_512_Hints_THP") & (df1["LoadFactor"] == 70) & (df1["SQR"] == 100)]["Speedup"].mean()

avg_intel_256_8_lf90_hintspeedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_512_Hints_THP") & (df1["LoadFactor"] == 90)]["Speedup"].mean()
avg_intel_256_8_lf90_hintspeedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_512_Hints_THP") & (df1["LoadFactor"] == 90) ]["Speedup"].mean()


print(#f"(1) average speedup hints over no hints <AMD, 256-bit register, 8-bit fingerprints, lf70>: {avg_amd_256_8_lf70_hintspeedup}\n"
      #f"(2) average speedup hints over no hints <AMD, 256-bit register, 8-bit fingerprints, lf90>: {avg_amd_256_8_lf90_hintspeedup}\n"
      f"(3) average speedup hints over no hints <Intel, 512-bit register, 8-bit fingerprints, lf70>: {avg_intel_256_8_lf70_hintspeedup}\n"
     f"(3) average speedup hints over no hints <Intel, 512-bit register, 8-bit fingerprints, lf70, sqr100>: {avg_intel_256_8_lf70_sqr100_hintspeedup}\n"
      f"(4) average speedup hints over no hints <Intel, 512-bit register, 8-bit fingerprints, lf90>: {avg_intel_256_8_lf90_hintspeedup}\n"
     )

result_dict["IntLf70VFP5128HintSpeedup"] = round(avg_intel_256_8_lf70_hintspeedup, 2)
result_dict["IntLf70Sqr100VFP5128HintSpeedup"] = round(avg_intel_256_8_lf70_sqr100_hintspeedup, 2)

result_dict["IntLf90VFP5128HintSpeedup"] = round(avg_intel_256_8_lf90_hintspeedup, 2)


In [None]:
pal_dict = { 'FingerPSIMD_16B_TESTZ_256_THP': deep_palette[2], 'FingerPSIMD_16B_TESTZ_512_THP': deep_palette[4], 
            'FingerPSIMD_16B_TESTZ_256_Hints_THP': deep_palette[3], 'FingerPSIMD_16B_TESTZ_512_Hints_THP': deep_palette[5],
            'FingerPSIMD_16B_TESTZ_128_Hints_THP': deep_palette[1], 'FingerPSIMD_16B_TESTZ_128_THP': deep_palette[0] }

read_d, write_d  = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,
                   hide_msb_fingerprints=True, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True,
                   hide_non_s2n=False, exclude_prefixes=["FingerPSIMD_16B_LSBLSB_NOTEST", "FingerPSIMD_8", 'FingerPSIMD_16B_LSBLSB_TESTZ_256_AVX512', 'FingerPSIMD_16B_LSBLSB_TESTZ_128_AVX512'],
                   legend_label_dict={}, word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_LSBLSB"], 
                   file_path="full_vfp_likely_16b.pdf"  )

## Iterator Variants (mit Likely Hints)

In [None]:
pal_dict = {'FingerPSIMD_16B_TESTZ_128_THP': deep_palette[1], 'FingerPSIMD_16B_VecIt_TESTZ_128_THP': deep_palette[2],
            'FingerPSIMD_8B_VecIt_TESTZ_128_THP': deep_palette[3],  'FingerPSIMD_8B_TESTZ_128_THP': deep_palette[0]
           }

read_d, write_d  = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, show_avx512=True, hide_non_thp=True, hide_simd_larger_128 = True,
                   hide_msb_fingerprints=True, hide_likely_hints=False, hide_non_likely_hints=True, hide_vec_iterators=False, hide_svbcast=True,
                   hide_non_s2n=False, exclude_prefixes=["FingerPSIMD_8B_LSBLSB_NOTEST", "FingerPSIMD_8B_LSBLSB_VecIt_NOTEST", "FingerPSIMD_16B_LSBLSB_VecIt_NOTEST", "FingerPSIMD_16B_LSBLSB_NOTEST"],
                   legend_label_dict={}, word_filter=["_NEON_S2N", "_NEON_UMINV", "_LSBLSB", "_Hints"],
                   file_path="full_vfp_iterators.pdf"  )

In [None]:
# Numbers
df1 = read_d[(read_d["Arch"] == "A64FX ARM") | (read_d["Arch"] == "Power")].copy()

def get_vecit_lookups(row):
    if len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_VecIt_TESTZ_128_THP")]) != 1:
        print(row)
        print(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_VecIt_TESTZ_128_THP")])
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_VecIt_TESTZ_128_THP")].iloc[0]["MLookups/s"]

df1["APPerf"] = df1.apply(lambda row: get_vecit_lookups(row), axis=1)
df1["Speedup"] = df1["MLookups/s"] / df1["APPerf"]
df1["InvSpeedup"] = df1["APPerf"] / df1["MLookups/s"]

avg_arm_movemask_vs_vecit = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_128_THP")]["Speedup"].mean()
avg_power_movemask_vs_vecit = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_128_THP")]["Speedup"].mean()


print(f"(1) average speedup ARM movemask it vs vec-it <128-bit register, 8-bit fingerprints>: {avg_arm_movemask_vs_vecit}\n"
      f"(2) average speedup Power movemask it vs vec-it <128-bit register, 8-bit fingerprints>: {avg_power_movemask_vs_vecit}\n"
     )

result_dict["ArmVfpMovemaskAvgSpeedup"] = round(avg_arm_movemask_vs_vecit, 2)
result_dict["PwrVfpMovemaskAvgSpeedup"] = round(avg_power_movemask_vs_vecit, 2)



## AVX512 (mit Likely Hints)

In [None]:
pal_dict = {'FingerPSIMD_8B_TESTZ_Hints_THP': deep_palette[0], 'FingerPSIMD_8B_TESTZ_AVX512_Hints_THP': deep_palette[1],
           'FingerPSIMD_16B_TESTZ_AVX512_Hints_THP': deep_palette[2], 'FingerPSIMD_16B_TESTZ_Hints_THP': deep_palette[3]}

read_d, write_d  = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["Intel x86"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, show_avx512=True, hide_non_thp=True, hide_simd_larger_128 = False,
                   hide_msb_fingerprints=True, hide_likely_hints=False, hide_non_likely_hints=True, hide_vec_iterators=True, hide_svbcast=True, split_by_simd_size=True,
                   simd_sizes=["128", "256"],
                   hide_non_s2n=True, exclude_prefixes=["FingerPSIMD_8B_LSBLSB_NOTEST", "FingerPSIMD_16B_LSBLSB_NOTEST", 'FingerPSIMD_8B_LSBLSB_TESTZ_512',
                                                        "FingerPSIMD_8B_NOTEST", "FingerPSIMD_16B_NOTEST", 'FingerPSIMD_8B_TESTZ_512'],
                   legend_label_dict={}, word_filter=["_NEON_S2N", "_LSBLSB"],
                   file_path="full_vfp_avx512.pdf"  )

In [None]:
pal_dict = {'FingerPSIMD_8B_LSBLSB_TESTZ_Hints_THP': deep_palette[0], 'FingerPSIMD_8B_LSBLSB_TESTZ_AVX512_Hints_THP': deep_palette[1],
           'FingerPSIMD_16B_LSBLSB_TESTZ_AVX512_Hints_THP': deep_palette[2], 'FingerPSIMD_16B_LSBLSB_TESTZ_Hints_THP': deep_palette[3]}

_  = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["Intel x86"], [25,70], 375, ncol_legend=2,
                   show_thp=True, show_pref=False, show_unroll=False, show_avx512=True, hide_non_thp=True, hide_simd_larger_128 = False,
                   hide_msb_fingerprints=True, hide_likely_hints=False, hide_non_likely_hints=True, hide_vec_iterators=True, hide_svbcast=True, split_by_simd_size=True,
                   simd_sizes=["128", "256"],
                   hide_non_s2n=True, exclude_prefixes=["FingerPSIMD_8B_NOTEST", "FingerPSIMD_16B_NOTEST", 'FingerPSIMD_8B_TESTZ_512',
                                                       "FingerPSIMD_8B_LSBLSB_NOTEST", "FingerPSIMD_16B_LSBLSB_NOTEST", 'FingerPSIMD_8B_LSBLSB_TESTZ_512'],
                   legend_label_dict={"FingerPSIMD_8B_LSBLSB_TESTZ_Hints_THP": "VFP(8/TEST)", "FingerPSIMD_8B_LSBLSB_TESTZ_AVX512_Hints_THP": "VFP(8/TEST)+AVX512",
                                     "FingerPSIMD_16B_LSBLSB_TESTZ_AVX512_Hints_THP": "VFP(16/TEST)+AVX512","FingerPSIMD_16B_LSBLSB_TESTZ_Hints_THP": "VFP(16/TEST)"},
                   legend_height_shift=-0.14,              
                   file_path="vfp_avx512.pdf")

In [None]:
# Numbers
df1 = read_d.copy()
#df2 = write_d.copy()

def get_nonavx16b_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_128_Hints_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_128_Hints_THP")].iloc[0]["MLookups/s"]

def get_nonavx8b128_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_128_Hints_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_128_Hints_THP")].iloc[0]["MLookups/s"]

def get_nonavx8b256_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_256_Hints_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_256_Hints_THP")].iloc[0]["MLookups/s"]

df1["APPerf"] = df1.apply(lambda row: get_nonavx16b_lookups(row), axis=1)
df1["Speedup"] = df1["MLookups/s"] / df1["APPerf"]
df1["InvSpeedup"] = df1["APPerf"] / df1["MLookups/s"]

df1["8B128Perf"] = df1.apply(lambda row: get_nonavx8b128_lookups(row), axis=1)
df1["8B128Speedup"] = df1["MLookups/s"] / df1["8B128Perf"]
df1["8B256Perf"] = df1.apply(lambda row: get_nonavx8b256_lookups(row), axis=1)
df1["8B256Speedup"] = df1["MLookups/s"] / df1["8B256Perf"]

#df2["APPerf"] = df2.apply(lambda row: get_ap_inserts(row), axis=1)
#df2["Speedup"] = df2["MInserts/s"] / df2["APPerf"]
#df2["InvSpeedup"] = df2["APPerf"] / df2["MInserts/s"]

avg_avx_lf70_sqr100 = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_128_AVX512_Hints_THP") & (df1["LoadFactor"] == 70) & (df1["SQR"] == 100)]["Speedup"].mean()
avg_avx_128_8b = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_128_AVX512_Hints_THP")]["8B128Speedup"].mean()
avg_avx_256_8b = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_256_AVX512_Hints_THP")]["8B256Speedup"].mean()
avg_avx_128_16b = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_128_AVX512_Hints_THP")]["Speedup"].mean()
avg_avx_256_16b = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_256_AVX512_Hints_THP")]["Speedup"].mean()

avg_avx_128_8b_lf25 = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_128_AVX512_Hints_THP") & (df1["LoadFactor"] == 25)]["8B128Speedup"].mean()
avg_avx_256_8b_lf25 = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_256_AVX512_Hints_THP") & (df1["LoadFactor"] == 25)]["8B256Speedup"].mean()

print(f"(1) average speedup AVX-512 over non AVX-512 <128-bit register, 16-bit fingerprints, lf70, sqr100>: {avg_avx_lf70_sqr100}\n"
      f"(2) average speedup AVX-512 over non AVX-512 <128-bit register, 8-bit fingerprints>: {avg_avx_128_8b}\n"
      f"(3) average speedup AVX-512 over non AVX-512 <256-bit register, 8-bit fingerprints>: {avg_avx_256_8b}\n"
      f"(4) average speedup AVX-512 over non AVX-512 <128-bit register, 16-bit fingerprints>: {avg_avx_128_16b}\n"
      f"(5) average speedup AVX-512 over non AVX-512 <128-bit register, 16-bit fingerprints>: {avg_avx_256_16b}\n"
      f"(6) average speedup AVX-512 over non AVX-512 <128-bit register, 8-bit fingerprints, lf25>: {avg_avx_128_8b_lf25}\n"
      f"(7) average speedup AVX-512 over non AVX-512 <256-bit register, 8-bit fingerprints, lf25>: {avg_avx_256_8b_lf25}\n"
     )

result_dict["IntAVX512VsNon2568B"] = round(avg_avx_256_8b, 2)
result_dict["IntAVX512VsNon2568BLF25"] = round(avg_avx_256_8b_lf25, 2)


## LSBLSB vs LSBMSB (mit Likely Hints und AVX512, 8 B Fingerprints)

In [None]:
pal_dict = { 'FingerPSIMD_8B_LSBLSB_TESTZ_256_THP': deep_palette[2], 'FingerPSIMD_8B_LSBLSB_TESTZ_512_THP': deep_palette[4], 
             'FingerPSIMD_8B_LSBMSB_TESTZ_256_THP': deep_palette[3], 'FingerPSIMD_8B_LSBMSB_TESTZ_512_THP': deep_palette[5],
             'FingerPSIMD_8B_LSBMSB_TESTZ_128_THP': deep_palette[1], 'FingerPSIMD_8B_LSBLSB_TESTZ_128_THP': deep_palette[0] }


read_d, write_d  = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True, hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_non_avx512=True,
                   hide_non_s2n=False, exclude_prefixes=["FingerPSIMD_8B_LSBLSB_NOTEST", "FingerPSIMD_16B"],
                   legend_label_dict={ 'FingerPSIMD_8B_TESTZ_256_THP': "VFP(8/256)", 'FingerPSIMD_8B_TESTZ_512_THP': "VFP(8/512)",
                     'FingerPSIMD_8B_LSBMSB_TESTZ_256_THP': "VFP(8/256)+LSBMSB", 'FingerPSIMD_8B_LSBMSB_TESTZ_512_THP': "VFP(8/512)+LSBMSB",
                     'FingerPSIMD_8B_LSBMSB_TESTZ_128_THP': "VFP(8/128)+LSBMSB", 'FingerPSIMD_8B_TESTZ_128_THP': "VFP(8/128)" }, word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints"],
                   file_path="full_vfp_msb_8b.pdf"  )

In [None]:
_  = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 70], 375,
                  show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True, hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_non_avx512=True,
                   hide_non_s2n=False, exclude_prefixes=["FingerPSIMD_8B_LSBLSB_NOTEST", "FingerPSIMD_16B"],
                   ncol_legend=3, word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints"],
                    legend_label_dict = { 'FingerPSIMD_8B_LSBLSB_TESTZ_256_THP': "VFP(8/256)", 'FingerPSIMD_8B_LSBLSB_TESTZ_512_THP': "VFP(8/512)",
                     'FingerPSIMD_8B_LSBMSB_TESTZ_256_THP': "VFP(8/256)+LSBMSB", 'FingerPSIMD_8B_LSBMSB_TESTZ_512_THP': "VFP(8/512)+LSBMSB",
                     'FingerPSIMD_8B_LSBMSB_TESTZ_128_THP': "VFP(8/128)+LSBMSB", 'FingerPSIMD_8B_LSBLSB_TESTZ_128_THP': "VFP(8/128)" },
                   file_path="vfp_lsbmsb_8b.pdf"  )

In [None]:
# Numbers
df1 = read_d.copy()
#df2 = write_d.copy()

def get_lsb1288b_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) 
                   & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) &
                   (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) 
                   & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"])
                   & (df1["PlotIdentifier"] == "FingerPSIMD_8B_LSBLSB_TESTZ_128_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) &
               (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) &
               (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & 
               (df1["PlotIdentifier"] == "FingerPSIMD_8B_LSBLSB_TESTZ_128_THP")].iloc[0]["MLookups/s"]

df1["APPerf"] = df1.apply(lambda row: get_lsb1288b_lookups(row), axis=1)
df1["Speedup"] = df1["MLookups/s"] / df1["APPerf"]
df1["InvSpeedup"] = df1["APPerf"] / df1["MLookups/s"]

avg_intel_128_8_lf70_msbspeedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_LSBMSB_TESTZ_128_THP") & (df1["LoadFactor"] == 70)]["Speedup"].mean()
avg_amd_128_8_lf70_msbspeedup = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_LSBMSB_TESTZ_128_THP") & (df1["LoadFactor"] == 70)]["Speedup"].mean()
avg_a64fx_128_8_lf70_msbspeedup = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_LSBMSB_TESTZ_128_THP") & (df1["LoadFactor"] == 70)]["Speedup"].mean()
avg_power_128_8_lf70_msbspeedup = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_LSBMSB_TESTZ_128_THP") & (df1["LoadFactor"] == 70)]["Speedup"].mean()

avg_vfp_128_8_lf70_msbspeedup = df1[(df1["PlotIdentifier"] == "FingerPSIMD_8B_LSBMSB_TESTZ_128_THP") & (df1["LoadFactor"] == 70)]["Speedup"].mean()


max_intel_128_8_lf70_msbspeedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_LSBMSB_TESTZ_128_THP") & (df1["LoadFactor"] == 70) & (df1["SQR"] == 0)]["Speedup"].max()
max_amd_128_8_lf70_msbspeedup = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_LSBMSB_TESTZ_128_THP") & (df1["LoadFactor"] == 70)& (df1["SQR"] == 0)]["Speedup"].max()
max_a64fx_128_8_lf70_msbspeedup = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_LSBMSB_TESTZ_128_THP") & (df1["LoadFactor"] == 70)& (df1["SQR"] == 0)]["Speedup"].max()
max_power_128_8_lf70_msbspeedup = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_LSBMSB_TESTZ_128_THP") & (df1["LoadFactor"] == 70)& (df1["SQR"] == 0)]["Speedup"].max()
max_msb_speedup = df1[df1["PlotIdentifier"] == "FingerPSIMD_8B_LSBMSB_TESTZ_128_THP"]["Speedup"].max()
max_128_8_lf70_msbspeedup = df1[(df1["PlotIdentifier"] == "FingerPSIMD_8B_LSBMSB_TESTZ_128_THP") & (df1["LoadFactor"] == 70)& (df1["SQR"] == 0)]["Speedup"].max()



print(f"(1) average speedup msb over lsb <Intel, 128-bit register, 8-bit fingerprints, LF70>: {avg_intel_128_8_lf70_msbspeedup}\n"
      f"(2) average speedup msb over lsb <AMD, 128-bit register, 8-bit fingerprints, LF70>: {avg_amd_128_8_lf70_msbspeedup}\n"
      f"(3) average speedup msb over lsb <A64FX, 128-bit register, 8-bit fingerprints, LF70>: {avg_a64fx_128_8_lf70_msbspeedup}\n"
      f"(4) average speedup msb over lsb <Power, 128-bit register, 8-bit fingerprints, LF70>: {avg_power_128_8_lf70_msbspeedup}\n"
      f"(4.5) average speedup msb over lsb <all sys, 128-bit register, 8-bit fingerprints, LF70>: {avg_vfp_128_8_lf70_msbspeedup}\n"
      f"(5) max speedup msb over lsb <Intel, 128-bit register, 8-bit fingerprints, LF70, sqr0>: {max_intel_128_8_lf70_msbspeedup}\n"
      f"(6) max speedup msb over lsb <AMD, 128-bit register, 8-bit fingerprints, LF70, sqr0>: {max_amd_128_8_lf70_msbspeedup}\n"
      f"(7) max speedup msb over lsb <A64FX, 128-bit register, 8-bit fingerprints, LF70, sqr0>: {max_a64fx_128_8_lf70_msbspeedup}\n"
      f"(8) max speedup msb over lsb <Power, 128-bit register, 8-bit fingerprints, LF70, sqr0>: {max_power_128_8_lf70_msbspeedup}\n"
      f"(9) max speedup msb over lsb <Power, 128-bit register, 8-bit fingerprints>: {max_msb_speedup}\n"
      f"(10) max speedup msb over lsb <128-bit register, 8-bit fingerprints>: {max_128_8_lf70_msbspeedup}\n"
     )

result_dict["AllSysLf70VFP1288MSBSpeedup"] = round(avg_vfp_128_8_lf70_msbspeedup, 2)
result_dict["IntLf70VFP1288MSBSpeedup"] = round(avg_intel_128_8_lf70_msbspeedup, 2)
result_dict["AmdLf70VFP1288MSBSpeedup"] = round(avg_amd_128_8_lf70_msbspeedup, 2)
result_dict["A64FXLf70VFP1288MSBSpeedup"] = round(avg_a64fx_128_8_lf70_msbspeedup, 2)
result_dict["PwrLf70VFP1288MSBSpeedup"] = round(avg_power_128_8_lf70_msbspeedup, 2)
result_dict["IntLf70Sqr0VFP1288MSBSpeedupPeak"] = round(max_intel_128_8_lf70_msbspeedup, 2)
result_dict["AmdLf70Sqr0VFP1288MSBSpeedupPeak"] = round(max_amd_128_8_lf70_msbspeedup, 2)
result_dict["A64FXLf70Sqr0VFP1288MSBSpeedupPeak"] = round(max_a64fx_128_8_lf70_msbspeedup, 2)
result_dict["PwrLf70Sqr0VFP1288MSBSpeedupPeak"] = round(max_power_128_8_lf70_msbspeedup, 2)
result_dict["VFP1288MSBSpeedupPeak"] = round(max_msb_speedup, 2)
result_dict["AllSysLf70Sqr0VFP1288MSBSpeedupPeak"] = round(max_128_8_lf70_msbspeedup, 2)


In [None]:
pal_dict = { 'FingerPSIMD_16B_LSBLSB_TESTZ_256_THP': deep_palette[2], 'FingerPSIMD_16B_LSBLSB_TESTZ_512_THP': deep_palette[4], 
             'FingerPSIMD_16B_LSBMSB_TESTZ_256_THP': deep_palette[3], 'FingerPSIMD_16B_LSBMSB_TESTZ_512_THP': deep_palette[5],
             'FingerPSIMD_16B_LSBMSB_TESTZ_128_THP': deep_palette[1], 'FingerPSIMD_16B_LSBLSB_TESTZ_128_THP': deep_palette[0] }

read_d, write_d  = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True, hide_non_avx512=True,
                   hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True,
                   hide_non_s2n=False, exclude_prefixes=["FingerPSIMD_16B_LSBLSB_NOTEST", "FingerPSIMD_16B_LSBMSB_NOTEST", "FingerPSIMD_8B",],
                   legend_label_dict={}, word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints"],
                   file_path="full_vfp_msb_16b.pdf", ncol_legend=3  )

## LSBMSB vs MSBLSB

In [None]:
pal_dict = { 'FingerPSIMD_8B_MSBLSB_TESTZ_256_THP': deep_palette[2], 'FingerPSIMD_8B_MSBLSB_TESTZ_512_THP': deep_palette[4], 
             'FingerPSIMD_8B_LSBMSB_TESTZ_256_THP': deep_palette[3], 'FingerPSIMD_8B_LSBMSB_TESTZ_512_THP': deep_palette[5],
             'FingerPSIMD_8B_LSBMSB_TESTZ_128_THP': deep_palette[1], 'FingerPSIMD_8B_MSBLSB_TESTZ_128_THP': deep_palette[0] }


read_d, write_d  = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True, hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=False, hide_lsb_fingerprints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_non_avx512=True,
                   hide_non_s2n=False, exclude_prefixes=["FingerPSIMD_8B_NOTEST", "FingerPSIMD_16B", 'FingerPSIMD_8B_TESTZ_256_AVX512', 'FingerPSIMD_8B_TESTZ_128_AVX512'],
                   legend_label_dict={}, word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints"],
                   file_path="full_vfp_msblsb_vs_lsbmsb_8b.pdf"  )

## Paper Plot Vector Register Size / Fingerprint Size (AVX512, LSBMSB) 

In [None]:
pal_dict = { 'FingerPSIMD_8B_TESTZ_256_THP': deep_palette[2], 'FingerPSIMD_8B_TESTZ_512_THP': deep_palette[4], 
             'FingerPSIMD_16B_TESTZ_256_THP': deep_palette[3], 'FingerPSIMD_16B_TESTZ_512_THP': deep_palette[5],
             'FingerPSIMD_16B_TESTZ_128_THP': deep_palette[1], 'FingerPSIMD_8B_TESTZ_128_THP': deep_palette[0] }

read_d, write_d  = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True, hide_non_avx512=True,
                   hide_lsbmsb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, 
                   hide_non_s2n=False, exclude_prefixes=["FingerPSIMD_8B_NOTEST", "FingerPSIMD_16B_NOTEST", 'FingerPSIMD_16B_MSBFP_NOTESTZ'],
                   legend_label_dict={'FingerPSIMD_8B_TESTZ_256_THP': "VFP(8/256)", 'FingerPSIMD_8B_TESTZ_512_THP': "VFP(8/512)", 
                                     'FingerPSIMD_16B_TESTZ_256_THP': "VFP(16/256)", 'FingerPSIMD_16B_TESTZ_512_THP': "VFP(16/512)",
                                     'FingerPSIMD_16B_TESTZ_128_THP': "VFP(16/128)", 'FingerPSIMD_8B_TESTZ_128_THP': "VFP(8/128)"}, 
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints",  "_LSBMSB"],
                   file_path="full_vfp_vrs.pdf", ncol_legend=3)

In [None]:
#compare_pal = {"LP_UN_THP": "#a1dab4", "RecalcRH_UN_THP": "#2c7fb8", "Chain_BudKV_THP": "#253494", "LPPackedSoA_THP": "#41b6c4"}

pal_dict = { 'FingerPSIMD_8B_TESTZ_256_THP': "#2c7fb8", 'FingerPSIMD_8B_TESTZ_512_THP': "#253494", 
             'FingerPSIMD_16B_TESTZ_256_THP': "#fd8d3c", 'FingerPSIMD_16B_TESTZ_512_THP': "#e31a1c",
             'FingerPSIMD_16B_TESTZ_128_THP': "#fecc5c", 'FingerPSIMD_8B_TESTZ_128_THP': "#41b6c4" }


_  = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["Intel x86", "A64FX ARM"], [50, 90], 240, 
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True, 
                   hide_non_avx512=True, hide_lsbmsb_fingerprints=False, hide_likely_hints=False,
                   hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, 
                   hide_non_s2n=False, exclude_prefixes=["FingerPSIMD_8B_NOTEST", "FingerPSIMD_16B_NOTEST", 'FingerPSIMD_16B_LSBMSB_NOTESTZ'],
                   legend_label_dict={'FingerPSIMD_8B_TESTZ_256_THP': "8/256", 'FingerPSIMD_8B_TESTZ_512_THP': "8/512", 
                                     'FingerPSIMD_16B_TESTZ_256_THP': "16/256", 'FingerPSIMD_16B_TESTZ_512_THP': "16/512",
                                     'FingerPSIMD_16B_TESTZ_128_THP': "16/128", 'FingerPSIMD_8B_TESTZ_128_THP': "8/128"}, 
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB"],
                   file_path="paper_vfp_vrs.pdf", ncol_legend=6, label_order=[3,0,4,1,5,2],
                  legend_height_shift=-0.25, ylabel_padding=0.5, xlabel_padding=0.5,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True
                  )

In [None]:
# Numbers
df1 = read_d.copy()
#df2 = write_d.copy()

def get_128_8_lookups(row):
    if len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_128_THP")]) != 1:
        print(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_128_THP")])
        raise ValueError('A very specific bad thing happened.')

        
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_128_THP")].iloc[0]["MLookups/s"]

def get_128_16_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_128_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_128_THP")].iloc[0]["MLookups/s"]

def get_256_8_lookups(row):
    if row["Arch"] not in ["Intel x86", "AMD x86"]:
        return 1
    
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_256_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_256_THP")].iloc[0]["MLookups/s"]

def get_256_16_lookups(row):
    if row["Arch"] not in ["Intel x86", "AMD x86"]:
        return 1
    
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_256_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_256_THP")].iloc[0]["MLookups/s"]

def get_512_8_lookups(row):
    if row["Arch"] not in ["Intel x86", "A64FX ARM"]:
        return 1
    
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_512_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_512_THP")].iloc[0]["MLookups/s"]

def get_512_16_lookups(row):
    if row["Arch"] not in ["Intel x86", "A64FX ARM"]:
        return 1
    
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_512_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_512_THP")].iloc[0]["MLookups/s"]


df1["128_8_Perf"] = df1.apply(lambda row: get_128_8_lookups(row), axis=1)
df1["128_8_Speedup"] = df1["MLookups/s"] / df1["128_8_Perf"]

df1["256_8_Perf"] = df1.apply(lambda row: get_256_8_lookups(row), axis=1)
df1["256_8_Speedup"] = df1["MLookups/s"] / df1["256_8_Perf"]

df1["512_8_Perf"] = df1.apply(lambda row: get_512_8_lookups(row), axis=1)
df1["512_8_Speedup"] = df1["MLookups/s"] / df1["512_8_Perf"]

df1["128_16_Perf"] = df1.apply(lambda row: get_128_16_lookups(row), axis=1)
df1["128_16_Speedup"] = df1["MLookups/s"] / df1["128_16_Perf"]

df1["256_16_Perf"] = df1.apply(lambda row: get_256_16_lookups(row), axis=1)
df1["256_16_Speedup"] = df1["MLookups/s"] / df1["256_16_Perf"]

df1["512_16_Perf"] = df1.apply(lambda row: get_512_16_lookups(row), axis=1)
df1["512_16_Speedup"] = df1["MLookups/s"] / df1["512_16_Perf"]

avg_lf25_128_over_256_speedup_8_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_128_THP") & (df1["LoadFactor"] == 25)]["256_8_Speedup"].mean()
avg_lf25_128_over_256_speedup_16_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_128_THP") & (df1["LoadFactor"] == 25)]["256_16_Speedup"].mean()
avg_lf25_128_over_512_speedup_8_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_128_THP") & (df1["LoadFactor"] == 25)]["512_8_Speedup"].mean()
avg_lf25_128_over_512_speedup_16_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_128_THP") & (df1["LoadFactor"] == 25)]["512_16_Speedup"].mean()

avg_lf90_512_over_256_speedup_8_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["256_8_Speedup"].mean()
avg_lf90_512_over_256_speedup_16_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["256_16_Speedup"].mean()
avg_lf90_512_over_128_speedup_8_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["128_8_Speedup"].mean()
avg_lf90_512_over_128_speedup_16_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["128_16_Speedup"].mean()

avg_lf90_256_over_128_speedup_8_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_256_THP") & (df1["LoadFactor"] == 90)]["128_8_Speedup"].mean()
avg_lf90_256_over_128_speedup_16_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "FingerPSIMD_16B_TESTZ_256_THP") & (df1["LoadFactor"] == 90)]["128_16_Speedup"].mean()

avg_lf90_8_over_16_speedup_512_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["512_16_Speedup"].mean()
avg_lf90_8_over_16_speedup_256_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_256_THP") & (df1["LoadFactor"] == 90)]["256_16_Speedup"].mean()
avg_lf90_8_over_16_speedup_128_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["128_16_Speedup"].mean()
avg_lf90_8_over_16_speedup_128_power = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_TESTZ_128_THP") & (df1["LoadFactor"] == 90)]["128_16_Speedup"].mean()


print(f"(1) average speedup 128-bit over 256-bit <Intel, LF25, 8B>: {avg_lf25_128_over_256_speedup_8_intel}\n"
      f"(2) average speedup 128-bit over 256-bit <Intel, LF25, 16B>: {avg_lf25_128_over_256_speedup_16_intel}\n"
      f"(3) average speedup 128-bit over 512-bit <Intel, LF25, 8B>: {avg_lf25_128_over_512_speedup_8_intel}\n"
      f"(4) average speedup 128-bit over 512-bit <Intel, LF25, 16B>: {avg_lf25_128_over_512_speedup_16_intel}\n"
      f"(5) average speedup 512-bit over 256-bit <Intel, LF90, 8B>: {avg_lf90_512_over_256_speedup_8_intel}\n"
      f"(6) average speedup 512-bit over 256-bit <Intel, LF90, 16B>: {avg_lf90_512_over_256_speedup_16_intel}\n"
      f"(5) average speedup 512-bit over 128-bit <Intel, LF90, 8B>: {avg_lf90_512_over_128_speedup_8_intel}\n"
      f"(6) average speedup 512-bit over 128-bit <Intel, LF90, 16B>: {avg_lf90_512_over_128_speedup_16_intel}\n"
      f"(7) average speedup 256-bit over 128-bit <AMD, LF90, 8B>: {avg_lf90_256_over_128_speedup_8_amd}\n"
      f"(8) average speedup 256-bit over 128-bit <AMD, LF90, 16B>: {avg_lf90_256_over_128_speedup_16_amd}\n"
      f"(9) average speedup 8-bit over 16-bit <Intel, LF90, 512-bit>: {avg_lf90_8_over_16_speedup_512_intel}\n"
      f"(10) average speedup 8-bit over 16-bit <AMD, LF90, 256-bit>: {avg_lf90_8_over_16_speedup_256_amd}\n"
      f"(11) average speedup 8-bit over 16-bit <ARM, LF90, 128-bit>: {avg_lf90_8_over_16_speedup_128_a64fx}\n"
      f"(12) average speedup 8-bit over 16-bit <Power, LF90, 128-bit>: {avg_lf90_8_over_16_speedup_128_power}\n"
     )

result_dict["IntLf25VFP8Speedup128VS256"] = round(avg_lf25_128_over_256_speedup_8_intel, 2)
result_dict["IntLf25VFP16Speedup128VS256"] = round(avg_lf25_128_over_256_speedup_16_intel, 2)
result_dict["IntLf25VFP8Speedup128VS512"] = round(avg_lf25_128_over_512_speedup_8_intel, 2)
result_dict["IntLf25VFP16Speedup128VS512"] = round(avg_lf25_128_over_512_speedup_16_intel, 2)

result_dict["IntLf90VFP8Speedup512VS256"] = round(avg_lf90_512_over_256_speedup_8_intel, 2)
result_dict["IntLf90VFP16Speedup512VS256"] = round(avg_lf90_512_over_256_speedup_16_intel, 2)
result_dict["IntLf90VFP8Speedup512VS128"] = round(avg_lf90_512_over_128_speedup_8_intel, 2)
result_dict["IntLf90VFP16Speedup512VS128"] = round(avg_lf90_512_over_128_speedup_16_intel, 2)
result_dict["AmdLf90VFP8Speedup256VS128"] = round(avg_lf90_256_over_128_speedup_8_amd, 2)
result_dict["AmdLf90VFP16Speedup256VS128"] = round(avg_lf90_256_over_128_speedup_16_amd, 2)

result_dict["IntLf90VFP8Speedup8VS16"] = round(avg_lf90_8_over_16_speedup_512_intel, 2)
result_dict["AmdLf90VFP8Speedup8VS16"] = round(avg_lf90_8_over_16_speedup_256_amd, 2)
result_dict["A64FXLf90VFP8Speedup8VS16"] = round(avg_lf90_8_over_16_speedup_128_a64fx, 2)
result_dict["PwrLf90VFP8Speedup8VS16"] = round(avg_lf90_8_over_16_speedup_128_power, 2)

## Vergleich zu klassischen Hashmap (MA only)

In [None]:
pal_dict = { 'FingerPSIMD_8B_THP': deep_palette[3], 'StoreRH_THP': deep_palette[0], 'LP_UN_THP': deep_palette[1], 'LPPackedSoA_THP': deep_palette[2]}

read_d, write_d  = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, 
                   hide_svbcast=True, hide_lsb_fingerprints=True,hide_non_avx512=True, 
                   hide_lsbmsb_fingerprints=False,
                   hide_non_s2n=False, exclude_prefixes=["FingerPSIMD_8B_LSBMSB_NOTEST", "FingerPSIMD_16B_LSBMSB_NOTEST",  'FingerPSIMD_16B_MSBLSB_NOTESTZ'],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBFP", "_TESTZ", "_512", "_128", "_256", "_LSBMSB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True,
                   force_include=["LP_UN_THP", "LPPackedSoA_THP", "StoreRH_THP"],
                   legend_label_dict={"LP_UN_THP": "LP(AoS/Unal.)", "StoreRH_THP": "StoringRH", "FingerPSIMD_8B_THP": "VFP(8)", "LPPackedSoA_THP": "LP(SoA/Packed)"},
                   file_path="full_vfp_classic.pdf")

In [None]:
_ = analysis_plot("FingerprintingSIMDSoAHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [50, 90], 375,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, 
                   hide_svbcast=True, hide_lsb_fingerprints=True,hide_non_avx512=True, 
                   hide_lsbmsb_fingerprints=False,
                   hide_non_s2n=False, exclude_prefixes=["FingerPSIMD_8B_LSBMSB_NOTEST", "FingerPSIMD_16B_LSBMSB_NOTEST",  'FingerPSIMD_16B_MSBLSB_NOTESTZ'],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBFP", "_TESTZ", "_512", "_128", "_256", "_LSBMSB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True,
                   force_include=["LP_UN_THP", "LPPackedSoA_THP", "StoreRH_THP"],
                   legend_label_dict={"LP_UN_THP": "LP(AoS/Unal.)", "StoreRH_THP": "StoringRH", "FingerPSIMD_8B_THP": "VFP(8)", "LPPackedSoA_THP": "LP(SoA/Packed)"},
                   file_path="vfp_classic.pdf")

In [None]:
# Numbers
df1 = read_d.copy()
#df2 = write_d.copy()

def get_packed_soa_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LPPackedSoA_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LPPackedSoA_THP")].iloc[0]["MLookups/s"]


df1["PackedSoAPerf"] = df1.apply(lambda row: get_packed_soa_lookups(row), axis=1)
df1["PSoASpeedup"] = df1["MLookups/s"] / df1["PackedSoAPerf"]

avg_lf90_vfp_vs_soa_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["PSoASpeedup"].mean()
avg_lf90_vfp_vs_soa_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["PSoASpeedup"].mean()
avg_lf90_vfp_vs_soa_power = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["PSoASpeedup"].mean()
avg_lf90_vfp_vs_soa_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["PSoASpeedup"].mean()

avg_lf90_vfp_mops_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["MLookups/s"].mean()
avg_lf90_vfp_mops_power = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["MLookups/s"].mean()
avg_lf90_sqr0_vfp_vs_soa_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP") & (df1["LoadFactor"] == 90) & (df1["SQR"] == 0)]["PSoASpeedup"].mean()

print(f"(1) average speedup VFP over PackedSoa <Intel, LF90>: {avg_lf90_vfp_vs_soa_intel}\n"
      f"(2) average speedup VFP over PackedSoa <AMD, LF90>: {avg_lf90_vfp_vs_soa_amd}\n"
      f"(3) average speedup VFP over PackedSoa <Power, LF90>: {avg_lf90_vfp_vs_soa_power}\n"
      f"(3.5) average speedup VFP over PackedSoa <A64FX, LF90>: {avg_lf90_vfp_vs_soa_a64fx}\n"
      f"(4) average MOps/s <Intel, LF90>: {avg_lf90_vfp_mops_intel}\n"
      f"(5) average MOps/s <Power, LF90>: {avg_lf90_vfp_mops_power}\n"
      f"(6) average speedup VFP over PackedSoa <A64FX, LF90, SQR0>: {avg_lf90_sqr0_vfp_vs_soa_a64fx}\n"
     )


result_dict["IntLf90VFPvsSoA"] = round(avg_lf90_vfp_vs_soa_intel, 2)
result_dict["AmdLf90VFPvsSoA"] = round(avg_lf90_vfp_vs_soa_amd, 2)
result_dict["PwrLf90VFPvsSoA"] = round(avg_lf90_vfp_vs_soa_power, 2)
result_dict["A64FXLf90VFPvsSoA"] = round(avg_lf90_vfp_vs_soa_a64fx, 2)

result_dict["IntLf90VFPMops"] = round(avg_lf90_vfp_mops_intel, 2)
result_dict["PwrLf90VFPMops"] = round(avg_lf90_vfp_mops_power, 2)
result_dict["A64FXLf90Sqr0VFPvsSoA"] = round(avg_lf90_sqr0_vfp_vs_soa_a64fx, 2)


# Bucketing Analysis

## TEST vs NOTEST

In [None]:
pal_dict = {'BucketSIMD_8B_TESTZ_128_THP': deep_palette[0], 'BucketSIMD_8B_NOTESTZ_128_THP': deep_palette[1]}

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, show_avx512=True, hide_non_avx512=False, 
                   hide_non_thp=True, hide_simd_larger_128 = True, hide_lsb_fingerprints=True, hide_msb_fingerprints=False,
                   hide_msblsb_fingerprints=False, hide_likely_hints=True, hide_vec_iterators=True, hide_svbcast=True,
                   hide_non_s2n=True, exclude_prefixes=["BucketSIMD_16B"],
                   legend_label_dict={}, word_filter=["_NEON_S2N", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB", "_MSBLSB", "_AVX512"],
                   file_path="full_bbc_test_vs_notest_8b.pdf"  )

## Power/ARM Iterator Validation

In [None]:
pal_dict = {'BucketSIMD_16B_TESTZ_128_THP': deep_palette[1], 'BucketSIMD_16B_VecIt_TESTZ_128_THP': deep_palette[2],
            'BucketSIMD_8B_VecIt_TESTZ_128_THP': deep_palette[3],  'BucketSIMD_8B_TESTZ_128_THP': deep_palette[0]
           }

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, show_avx512=True, hide_non_thp=True, hide_simd_larger_128 = True,
                   hide_lsb_fingerprints=True, hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=False, hide_svbcast=True, hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], hide_non_likely_hints=True,
                   legend_label_dict={}, word_filter=["_LSBMSB", "_NEON_S2N", "_NEON_UMINV",  "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB", "_Hints"],
                   file_path="full_bbc_iterators.pdf"  )

## AVX-512 (mit likely hints)

In [None]:
pal_dict = {'BucketSIMD_8B_MSBLSB_TESTZ_THP': deep_palette[0], 'BucketSIMD_8B_MSBLSB_TESTZ_AVX512_THP': deep_palette[1],
           'BucketSIMD_8B_LSBMSB_TESTZ_THP': deep_palette[0], 'BucketSIMD_8B_LSBMSB_TESTZ_AVX512_THP': deep_palette[1]}

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, show_avx512=True, hide_non_thp=True, hide_simd_larger_128 = False,
                   hide_lsb_fingerprints=True, hide_likely_hints=False, hide_non_likely_hints=True, hide_vec_iterators=True, hide_svbcast=True, split_by_simd_size=True,
                   simd_sizes=["128", "256"], hide_notestz=True, hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=True,
                   hide_non_s2n=True, exclude_prefixes=["BucketSIMD_16B"],
                   legend_label_dict={}, word_filter=["_NEON_S2N", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB", "_Hints"],
                                file_path="full_bbc_avx512.pdf" )

## Bucketsize (im Paper)

In [None]:
pal_dict = { 'BucketSIMD_8B_TESTZ_256_THP': deep_palette[2], 'BucketSIMD_8B_TESTZ_512_THP': deep_palette[4], 
             'BucketSIMD_16B_TESTZ_256_THP': deep_palette[3], 'BucketSIMD_16B_TESTZ_512_THP': deep_palette[5],
             'BucketSIMD_16B_TESTZ_128_THP': deep_palette[1], 'BucketSIMD_8B_TESTZ_128_THP': deep_palette[0] }

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False,  hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_notestz=True, hide_non_avx512=True,
                   hide_non_s2n=False, exclude_prefixes=[], hide_256_on_intel=True,
                   legend_label_dict={'BucketSIMD_8B_TESTZ_256_THP': "BBC(8/256)", 'BucketSIMD_8B_TESTZ_512_THP': "BBC(8/512)", 
                                     'BucketSIMD_16B_TESTZ_256_THP': "BBC(16/256)", 'BucketSIMD_16B_TESTZ_512_THP': "BBC(16/512)",
                                     'BucketSIMD_16B_TESTZ_128_THP': "BBC(16/128)", 'BucketSIMD_8B_TESTZ_128_THP': "BBC(8/128)"}, 
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   file_path="full_paper_bbc_vrs.pdf", ncol_legend=3)

In [None]:
#pal_dict = { 'FingerPSIMD_8B_TESTZ_256_THP': "#2c7fb8", 'FingerPSIMD_8B_TESTZ_512_THP': "#253494", 
#             'FingerPSIMD_16B_TESTZ_256_THP': "#fd8d3c", 'FingerPSIMD_16B_TESTZ_512_THP': "#e31a1c",
#             'FingerPSIMD_16B_TESTZ_128_THP': "#fecc5c", 'FingerPSIMD_8B_TESTZ_128_THP': "#41b6c4" }
pal_dict = { 'BucketSIMD_8B_TESTZ_512_THP': "#253494", 
             'BucketSIMD_16B_TESTZ_512_THP': "#e31a1c",
             'BucketSIMD_16B_TESTZ_128_THP': "#fecc5c", 'BucketSIMD_8B_TESTZ_128_THP': "#41b6c4" }


_  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "A64FX ARM"], [50, 90], 240,   
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False,  hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_notestz=True, hide_non_avx512=True,
                   hide_non_s2n=False, exclude_prefixes=[], hide_256_on_intel=True,
                   legend_label_dict={'BucketSIMD_8B_TESTZ_512_THP': "8/512", 
                                     'BucketSIMD_16B_TESTZ_512_THP': "16/512",
                                     'BucketSIMD_16B_TESTZ_128_THP': "16/128", 'BucketSIMD_8B_TESTZ_128_THP': "8/128"}, 
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   file_path="paper_bbc_vrs.pdf", ncol_legend=4,
                  label_order=[2,3,0,1],
                  legend_height_shift=-0.25, ylabel_padding=0, xlabel_padding=0,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True,
                  extra_legend_columnspacing=0.5, extra_legend_handlelength=1,)

In [None]:
# Numbers
df1 = read_d.copy()
#df2 = write_d.copy()

def get_128_8_lookups(row):
    if len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_128_THP")]) != 1:
        print(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_128_THP")])
        raise ValueError('A very specific bad thing happened.')
        
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_128_THP")].iloc[0]["MLookups/s"]

def get_128_16_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_16B_TESTZ_128_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_16B_TESTZ_128_THP")].iloc[0]["MLookups/s"]

def get_256_8_lookups(row):
    if row["Arch"] not in ["Intel x86", "AMD x86"]:
        return 1
    
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_256_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_256_THP")].iloc[0]["MLookups/s"]

def get_256_16_lookups(row):
    if row["Arch"] not in ["Intel x86", "AMD x86"]:
        return 1
    
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_16B_TESTZ_256_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_16B_TESTZ_256_THP")].iloc[0]["MLookups/s"]

def get_512_8_lookups(row):
    if row["Arch"] not in ["Intel x86", "A64FX ARM"]:
        return 1
    
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_512_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_512_THP")].iloc[0]["MLookups/s"]

def get_512_16_lookups(row):
    if row["Arch"] not in ["Intel x86", "A64FX ARM"]:
        return 1
    
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_16B_TESTZ_512_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_16B_TESTZ_512_THP")].iloc[0]["MLookups/s"]


df1["128_8_Perf"] = df1.apply(lambda row: get_128_8_lookups(row), axis=1)
df1["128_8_Speedup"] = df1["MLookups/s"] / df1["128_8_Perf"]

df1["256_8_Perf"] = df1.apply(lambda row: get_256_8_lookups(row), axis=1)
df1["256_8_Speedup"] = df1["MLookups/s"] / df1["256_8_Perf"]

df1["512_8_Perf"] = df1.apply(lambda row: get_512_8_lookups(row), axis=1)
df1["512_8_Speedup"] = df1["MLookups/s"] / df1["512_8_Perf"]

df1["128_16_Perf"] = df1.apply(lambda row: get_128_16_lookups(row), axis=1)
df1["128_16_Speedup"] = df1["MLookups/s"] / df1["128_16_Perf"]

df1["256_16_Perf"] = df1.apply(lambda row: get_256_16_lookups(row), axis=1)
df1["256_16_Speedup"] = df1["MLookups/s"] / df1["256_16_Perf"]

df1["512_16_Perf"] = df1.apply(lambda row: get_512_16_lookups(row), axis=1)
df1["512_16_Speedup"] = df1["MLookups/s"] / df1["512_16_Perf"]

avg_lf25_128_over_256_speedup_8_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_128_THP") & (df1["LoadFactor"] == 25)]["256_8_Speedup"].mean()
avg_lf25_128_over_256_speedup_16_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_16B_TESTZ_128_THP") & (df1["LoadFactor"] == 25)]["256_16_Speedup"].mean()
avg_lf25_128_over_512_speedup_8_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_128_THP") & (df1["LoadFactor"] == 25)]["512_8_Speedup"].mean()
avg_lf25_128_over_512_speedup_16_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_16B_TESTZ_128_THP") & (df1["LoadFactor"] == 25)]["512_16_Speedup"].mean()

avg_lf70_256_over_128_speedup_8_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_256_THP") & (df1["LoadFactor"] == 70)]["128_8_Speedup"].mean()
avg_lf70_128_over_512_speedup_8_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_128_THP") & (df1["LoadFactor"] == 70)]["512_8_Speedup"].mean()

avg_lf90_512_over_256_speedup_8_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["256_8_Speedup"].mean()
avg_lf90_512_over_256_speedup_16_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_16B_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["256_16_Speedup"].mean()
avg_lf90_512_over_128_speedup_8_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["128_8_Speedup"].mean()
avg_lf90_512_over_128_speedup_16_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_16B_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["128_16_Speedup"].mean()

avg_lf90_256_over_128_speedup_8_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_256_THP") & (df1["LoadFactor"] == 90)]["128_8_Speedup"].mean()
avg_lf90_256_over_128_speedup_16_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_16B_TESTZ_256_THP") & (df1["LoadFactor"] == 90)]["128_16_Speedup"].mean()

avg_lf90_8_over_16_speedup_512_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["512_16_Speedup"].mean()
avg_lf90_8_over_16_speedup_256_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_256_THP") & (df1["LoadFactor"] == 90)]["256_16_Speedup"].mean()
avg_lf90_8_over_16_speedup_128_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_128_THP") & (df1["LoadFactor"] == 90)]["128_16_Speedup"].mean()

avg_lf90_8_over_16_speedup_256_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_256_THP") & (df1["LoadFactor"] == 90)]["256_16_Speedup"].mean()
avg_lf90_8_over_16_speedup_128_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_128_THP") & (df1["LoadFactor"] == 90)]["128_16_Speedup"].mean()

avg_lf90_8_over_16_speedup_128_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_128_THP") & (df1["LoadFactor"] == 90)]["128_16_Speedup"].mean()
avg_lf90_8_over_16_speedup_128_power = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_128_THP") & (df1["LoadFactor"] == 90)]["128_16_Speedup"].mean()

avg_lf25_neon_vs_sve_8bit_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_128_THP") & (df1["LoadFactor"] == 25)]["512_8_Speedup"].mean()
avg_lf90_sve_vs_neon_8bit_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "BucketSIMD_8B_TESTZ_512_THP") & (df1["LoadFactor"] == 90)]["128_8_Speedup"].mean()


print(f"(1) average speedup 128-bit over 256-bit <Intel, LF25, 8B>: {avg_lf25_128_over_256_speedup_8_intel}\n"
      f"(2) average speedup 128-bit over 256-bit <Intel, LF25, 16B>: {avg_lf25_128_over_256_speedup_16_intel}\n"
      f"(3) average speedup 128-bit over 512-bit <Intel, LF25, 8B>: {avg_lf25_128_over_512_speedup_8_intel}\n"
      f"(4) average speedup 128-bit over 512-bit <Intel, LF25, 16B>: {avg_lf25_128_over_512_speedup_16_intel}\n"
      f"(4.1) average speedup 256-bit over 128-bit <Intel, LF70, 8B>: {avg_lf70_256_over_128_speedup_8_intel}\n"
      f"(4.2) average speedup 128-bit over 512-bit <Intel, LF70, 8B>: {avg_lf70_128_over_512_speedup_8_intel}\n"
      f"(5) average speedup 512-bit over 256-bit <Intel, LF90, 8B>: {avg_lf90_512_over_256_speedup_8_intel}\n"
      f"(6) average speedup 512-bit over 256-bit <Intel, LF90, 16B>: {avg_lf90_512_over_256_speedup_16_intel}\n"
      f"(5) average speedup 512-bit over 128-bit <Intel, LF90, 8B>: {avg_lf90_512_over_128_speedup_8_intel}\n"
      f"(6) average speedup 512-bit over 128-bit <Intel, LF90, 16B>: {avg_lf90_512_over_128_speedup_16_intel}\n"
      f"(7) average speedup 256-bit over 128-bit <AMD, LF90, 8B>: {avg_lf90_256_over_128_speedup_8_amd}\n"
      f"(8) average speedup 256-bit over 128-bit <AMD, LF90, 16B>: {avg_lf90_256_over_128_speedup_16_amd}\n"
      f"(9) average speedup 8-bit over 16-bit <Intel, LF90, 512-bit>: {avg_lf90_8_over_16_speedup_512_intel}\n"
      f"(9.1) average speedup 8-bit over 16-bit <Intel, LF90, 256-bit>: {avg_lf90_8_over_16_speedup_256_intel}\n"
      f"(9.2) average speedup 8-bit over 16-bit <Intel, LF90, 128-bit>: {avg_lf90_8_over_16_speedup_128_intel}\n"
      f"(10) average speedup 8-bit over 16-bit <AMD, LF90, 256-bit>: {avg_lf90_8_over_16_speedup_256_amd}\n"
      f"(10.1) average speedup 8-bit over 16-bit <AMD, LF90, 128-bit>: {avg_lf90_8_over_16_speedup_128_amd}\n"
      f"(11) average speedup 8-bit over 16-bit <ARM, LF90, 128-bit>: {avg_lf90_8_over_16_speedup_128_a64fx}\n"
      f"(12) average speedup 8-bit over 16-bit <Power, LF90, 128-bit>: {avg_lf90_8_over_16_speedup_128_power}\n"
      f"(13) average speedup NEON vs SVE <A64FX, LF25, 8-bit>: {avg_lf25_neon_vs_sve_8bit_a64fx}\n"
      f"(14) average speedup SVE vs NEON <A64FX, LF90, 8-bit>: {avg_lf90_sve_vs_neon_8bit_a64fx}\n"
     )


result_dict["IntLf25BBC8128vs256Speedup"] = round(avg_lf25_128_over_256_speedup_8_intel, 2)
result_dict["IntLf25BBC8128vs512Speedup"] = round(avg_lf25_128_over_512_speedup_8_intel, 2)
result_dict["IntLf70BBC8256vs128Speedup"] = round(avg_lf70_256_over_128_speedup_8_intel, 2)
result_dict["IntLf70BBC8128vs512Speedup"] = round(avg_lf70_128_over_512_speedup_8_intel, 2)

result_dict["IntLf90BBC1288vs16Speedup"] = round(avg_lf90_8_over_16_speedup_128_intel, 2)
result_dict["IntLf90BBC2568vs16Speedup"] = round(avg_lf90_8_over_16_speedup_256_intel, 2)
result_dict["IntLf90BBC5128vs16Speedup"] = round(avg_lf90_8_over_16_speedup_512_intel, 2)
result_dict["AmdLf90BBC1288vs16Speedup"] = round(avg_lf90_8_over_16_speedup_128_amd, 2)
result_dict["AmdLf90BBC2568vs16Speedup"] = round(avg_lf90_8_over_16_speedup_256_amd, 2)

result_dict["A64FXLf25BBCNeonVsSveSpeedup"] = round(avg_lf25_neon_vs_sve_8bit_a64fx, 2)
result_dict["A64FXLf90BBCSveVsNeonSpeedup"] = round(avg_lf90_sve_vs_neon_8bit_a64fx, 2)


## Comparison to Classical and VFP

In [None]:
pal_dict = {"BucketSIMD_16B_THP": deep_palette[5], 'BucketSIMD_8B_THP': deep_palette[4], 'FingerPSIMD_8B_THP': deep_palette[3], 'StoreRH_THP': deep_palette[0], 'LP_UN_THP': deep_palette[1], 'Chain_BudBucket16FP_THP': deep_palette[2],
}

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, 
                    hide_notestz=True, hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=True, show_avx512=True, hide_non_avx512=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=["FingerprintingSIMDSoAHashTable"],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=False, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "StoreRH_THP", "Chain_BudBucket16FP_THP"],
                   legend_label_dict={"LP_UN_THP": "LP(AoS)", "StoreRH_THP": "StoringRH", "FingerPSIMD_8B_THP": "VFP(8)", "LPPackedSoA_THP": "LP(SoA/Packed)", "BucketSIMD_8B_THP": "BBC(8)", "BucketSIMD_16B_THP": "BBC(16)"},
                   file_path="full_bbc_classic.pdf")

In [None]:
_ = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [50, 90], 375,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, 
                    hide_notestz=True, hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=True, show_avx512=True, hide_non_avx512=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=["FingerprintingSIMDSoAHashTable"],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=False, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "StoreRH_THP"],
                   legend_label_dict={"LP_UN_THP": "LP(AoS/Unal.)", "StoreRH_THP": "StoringRH", "FingerPSIMD_8B_THP": "VFP(8)", "LPPackedSoA_THP": "LP(SoA/Packed)", "BucketSIMD_8B_THP": "BBC(8)", "BucketSIMD_16B_THP": "BBC(16)"},
                   file_path="bbc_classic.pdf", ncol_legend=3)

In [None]:
# Numbers
df1 = read_d.copy()
#df2 = write_d.copy()

def get_aos_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")].iloc[0]["MLookups/s"]


def get_vfp_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP")].iloc[0]["MLookups/s"]

def get_chain_lookups(row):
    if row["LoadFactor"] == 90:
        return 0
    
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "Chain_BudBucket16FP_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "Chain_BudBucket16FP_THP")].iloc[0]["MLookups/s"]

df1["AoSPerf"] = df1.apply(lambda row: get_aos_lookups(row), axis=1)
df1["AoSSpeedup"] = df1["MLookups/s"] / df1["AoSPerf"]

df1["VFPPerf"] = df1.apply(lambda row: get_vfp_lookups(row), axis=1)
df1["VFPSpeedup"] = df1["MLookups/s"] / df1["VFPPerf"]

df1["ChainPerf"] = df1.apply(lambda row: get_chain_lookups(row), axis=1)
df1["ChainSpeedup"] = df1["MLookups/s"] / df1["ChainPerf"]


avg_lf70_bbc_8_vs_vfp_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 70)]["VFPSpeedup"].mean()
avg_lf70_bbc_8_vs_vfp_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 70)]["VFPSpeedup"].mean()
avg_lf70_bbc_8_vs_vfp_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 70)]["VFPSpeedup"].mean()
avg_lf70_bbc_8_vs_vfp_power = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 70)]["VFPSpeedup"].mean()
avg_lf90_bbc_8_vs_vfp_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["VFPSpeedup"].mean()
avg_lf90_bbc_8_vs_vfp_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["VFPSpeedup"].mean()
avg_lf90_bbc_8_vs_vfp_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["VFPSpeedup"].mean()
avg_lf90_bbc_8_vs_vfp_power = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["VFPSpeedup"].mean()

avg_lf50_bbc_8_vs_vfp_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50)]["VFPSpeedup"].mean()
avg_lf50_bbc_8_vs_vfp_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50)]["VFPSpeedup"].mean()
avg_lf50_bbc_8_vs_vfp_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50)]["VFPSpeedup"].mean()
avg_lf50_bbc_8_vs_vfp_power = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50)]["VFPSpeedup"].mean()

avg_lf50_sqr0_bbc_8_vs_aos_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 0)]["AoSSpeedup"].mean()
avg_lf50_sqr0_bbc_8_vs_aos_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 0)]["AoSSpeedup"].mean()
avg_lf50_sqr0_bbc_8_vs_aos_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 0)]["AoSSpeedup"].mean()
avg_lf50_sqr0_bbc_8_vs_aos_power = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50) & (df1["SQR"] == 0)]["AoSSpeedup"].mean()

avg_lf50_bbc_8_vs_chained = df1[(df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50)]["ChainSpeedup"].mean()
avg_lf70_bbc_8_vs_chained = df1[(df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 70)]["ChainSpeedup"].mean()

avg_lf50_bbc_8_vs_aos = df1[(df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50)]["AoSSpeedup"].mean()
avg_lf70_bbc_8_vs_aos = df1[(df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 70)]["AoSSpeedup"].mean()
avg_lf90_bbc_8_vs_aos = df1[(df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["AoSSpeedup"].mean()

avg_lf70_bbc_8_vs_aos_intel = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 70)]["AoSSpeedup"].mean()
avg_lf70_bbc_8_vs_aos_amd = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 70)]["AoSSpeedup"].mean()
avg_lf70_bbc_8_vs_aos_a64fx = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 70)]["AoSSpeedup"].mean()
avg_lf70_bbc_8_vs_aos_power = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 70)]["AoSSpeedup"].mean()


print(f"(1) average speedup 8-bit BBC over VFP <Intel, LF70>: {avg_lf70_bbc_8_vs_vfp_intel}\n"
      f"(2) average speedup 8-bit BBC over VFP <AMD, LF70>: {avg_lf70_bbc_8_vs_vfp_amd}\n"
      f"(3) average speedup 8-bit BBC over VFP <A64FX, LF70>: {avg_lf70_bbc_8_vs_vfp_a64fx}\n"
      f"(4) average speedup 8-bit BBC over VFP <Power, LF70>: {avg_lf70_bbc_8_vs_vfp_power}\n"
      f"(5) average speedup 8-bit BBC over VFP <Intel, LF90>: {avg_lf90_bbc_8_vs_vfp_intel}\n"
      f"(6) average speedup 8-bit BBC over VFP <AMD, LF90>: {avg_lf90_bbc_8_vs_vfp_amd}\n"
      f"(7) average speedup 8-bit BBC over VFP <A64FX, LF90>: {avg_lf90_bbc_8_vs_vfp_a64fx}\n"
      f"(8) average speedup 8-bit BBC over VFP <Power, LF90>: {avg_lf90_bbc_8_vs_vfp_power}\n"
      f"(9) average speedup 8-bit BBC over VFP <Intel, LF50>: {avg_lf50_bbc_8_vs_vfp_intel}\n"
      f"(10) average speedup 8-bit BBC over VFP <AMD, LF50>: {avg_lf50_bbc_8_vs_vfp_amd}\n"
      f"(11) average speedup 8-bit BBC over VFP <A64FX, LF50>: {avg_lf50_bbc_8_vs_vfp_a64fx}\n"
      f"(12) average speedup 8-bit BBC over VFP <Power, LF50>: {avg_lf50_bbc_8_vs_vfp_power}\n"
      f"(13) average speedup 8-bit BBC over AoS <Intel, LF50, SQR0>: {avg_lf50_sqr0_bbc_8_vs_aos_intel}\n"
      f"(14) average speedup 8-bit BBC over AoS <AMD, LF50, SQR0>: {avg_lf50_sqr0_bbc_8_vs_aos_amd}\n"
      f"(15) average speedup 8-bit BBC over AoS <A64FX, LF50, SQR0>: {avg_lf50_sqr0_bbc_8_vs_aos_a64fx}\n"
      f"(16) average speedup 8-bit BBC over AoS <Power, LF50, SQR0>: {avg_lf50_sqr0_bbc_8_vs_aos_power}\n"
      f"(17) average speedup 8-bit BBC over Chained <LF50>: {avg_lf50_bbc_8_vs_chained}\n"
      f"(18) average speedup 8-bit BBC over Chained <LF70: {avg_lf70_bbc_8_vs_chained}\n"
      f"(19) average speedup 8-bit BBC over AoS <LF50>: {avg_lf50_bbc_8_vs_aos}\n"
      f"(20) average speedup 8-bit BBC over AoS <LF70>: {avg_lf70_bbc_8_vs_aos}\n"
      f"(20) average speedup 8-bit BBC over AoS <LF90>: {avg_lf90_bbc_8_vs_aos}\n"
      f"(5) average speedup 8-bit BBC over AoS <Intel, LF70>: {avg_lf70_bbc_8_vs_aos_intel}\n"
      f"(6) average speedup 8-bit BBC over AoS <AMD, LF70>: {avg_lf70_bbc_8_vs_aos_amd}\n"
      f"(7) average speedup 8-bit BBC over AoS <A64FX, LF70>: {avg_lf70_bbc_8_vs_aos_a64fx}\n"
      f"(8) average speedup 8-bit BBC over AoS <Power, LF70>: {avg_lf70_bbc_8_vs_aos_power}\n"
     )

result_dict["IntLf70BBC8VSVFP"] = round(avg_lf70_bbc_8_vs_vfp_intel, 2)
result_dict["AmdLf70BBC8VSVFP"] = round(avg_lf70_bbc_8_vs_vfp_amd, 2)
result_dict["A64FXLf70BBC8VSVFP"] = round(avg_lf70_bbc_8_vs_vfp_a64fx, 2)
result_dict["PwrLf70BBC8VSVFP"] = round(avg_lf70_bbc_8_vs_vfp_power, 2)
result_dict["IntLf90BBC8VSVFP"] = round(avg_lf90_bbc_8_vs_vfp_intel, 2)
result_dict["AmdLf90BBC8VSVFP"] = round(avg_lf90_bbc_8_vs_vfp_amd, 2)
result_dict["A64FXLf90BBC8VSVFP"] = round(avg_lf90_bbc_8_vs_vfp_a64fx, 2)
result_dict["PwrLf90BBC8VSVFP"] = round(avg_lf90_bbc_8_vs_vfp_power, 2)
result_dict["IntLf50BBC8VSVFP"] = round(avg_lf50_bbc_8_vs_vfp_intel, 2)
result_dict["AmdLf50BBC8VSVFP"] = round(avg_lf50_bbc_8_vs_vfp_amd, 2)
result_dict["A64FXLf50BBC8VSVFP"] = round(avg_lf50_bbc_8_vs_vfp_a64fx, 2)
result_dict["PwrLf50BBC8VSVFP"] = round(avg_lf50_bbc_8_vs_vfp_power, 2)

result_dict["IntLf50Sqr0BBC8VSAoS"] = round(avg_lf50_sqr0_bbc_8_vs_aos_intel, 2)
result_dict["AmdLf50Sqr0BBC8VSAoS"] = round(avg_lf50_sqr0_bbc_8_vs_aos_amd, 2)
result_dict["A64FXLf50Sqr0BBC8VSAoS"] = round(avg_lf50_sqr0_bbc_8_vs_aos_a64fx, 2)
result_dict["PwrLf50Sqr0BBC8VSAoS"] = round(avg_lf50_sqr0_bbc_8_vs_aos_power, 2)

result_dict["AllSysLf50BBC8VSAoS"] = round(avg_lf50_bbc_8_vs_aos, 2)
result_dict["AllSysLf70BBC8VSAoS"] = round(avg_lf70_bbc_8_vs_aos, 2)
result_dict["AllSysLf50BBC8VSChain"] = round(avg_lf50_bbc_8_vs_chained, 2)
result_dict["AllSysLf70BBC8VSChain"] = round(avg_lf70_bbc_8_vs_chained, 2)

### Paper Version

In [None]:
pal_dict = {"BucketSIMD_16B_THP": "#e31a1c", 'BucketSIMD_8B_THP': "#fd8d3c",
            'FingerPSIMD_8B_THP': "#fecc5c", 'RecalcRH_UN_THP':  "#2c7fb8",
            'LP_UN_THP':  "#a1dab4", 'Chain_BudBucket16FP_THP': "#253494"}

_ = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 550,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_notestz=True,
                   hide_non_avx512=True, hide_lsbmsb_fingerprints=False,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=["FingerprintingSIMDSoAHashTable"],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=False, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "Chain_BudBucket16FP_THP", "RecalcRH_UN_THP"],
                   legend_label_dict={"LP_UN_THP": "LP(AoS)", "RecalcRH_UN_THP": "RecalcRH", "FingerPSIMD_8B_THP": "VFP(8)", "Chain_BudBucket16FP_THP": "Chained(16)", "BucketSIMD_8B_THP": "BBC(8)", "BucketSIMD_16B_THP": "BBC(16)"},
                   file_path="paper_bbc_vfp_classic.pdf", ncol_legend=6,
                   label_order=[4,5,2,3,1,0],
                   legend_height_shift=-0.05, ylabel_padding=0.5, xlabel_padding=0.5,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True, power_axis=True)

In [None]:
pal_dict = {"BucketSIMD_16B_THP": "#e31a1c", 'BucketSIMD_8B_THP': "#fd8d3c",
            'FingerPSIMD_8B_THP': "#fecc5c", 'RecalcRH_UN_THP':  "#2c7fb8",
            'LP_UN_THP':  "#a1dab4", 'Chain_BudBucket16FP_THP': "#253494"}

_ = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [50, 90], 240,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_notestz=True,
                   hide_non_avx512=True, hide_lsbmsb_fingerprints=False,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=["FingerprintingSIMDSoAHashTable"],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=False, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "Chain_BudBucket16FP_THP", "RecalcRH_UN_THP"],
                   legend_label_dict={"LP_UN_THP": "LP", "RecalcRH_UN_THP": "RH", "FingerPSIMD_8B_THP": "VFP(8)", "Chain_BudBucket16FP_THP": "Chain(16)", "BucketSIMD_8B_THP": "BBC(8)", "BucketSIMD_16B_THP": "BBC(16)"},
                   file_path="paper_bbc_vfp_classic_small.pdf", ncol_legend=6,
                   label_order=[4,5,2,3,1,0],
                   legend_height_shift=-0.05, ylabel_padding=0, xlabel_padding=0,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True, power_axis=True)

In [None]:
pal_dict = {'BucketSIMD_8B_THP': "#fd8d3c",
            'RecalcRH_UN_THP':  "#2c7fb8",
            }
read_d, _ = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [90], 250,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_notestz=True,
                   hide_non_avx512=True, hide_lsbmsb_fingerprints=False,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["RecalcRH_UN_THP"],
                   legend_label_dict={"LP_UN_THP": "LP(AoS)", "RecalcRH_UN_THP": "Robin Hood", "BucketSIMD_8B_THP": "Bucket-Based Comparison"},
                   file_path="paper_outlook.pdf", ncol_legend=2,

                   legend_height_shift=-0.8, ylabel_padding=0, xlabel_padding=0, extra_legend_columnspacing=0.5, extra_legend_handlelength=1,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True, single_row_lf90=True)

In [None]:
df1 = read_d.copy()

def get_rh_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "RecalcRH_UN_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "RecalcRH_UN_THP")].iloc[0]["MLookups/s"]
df1["RHPerf"] = df1.apply(lambda row: get_rh_lookups(row), axis=1)
df1["RHSpeedup"] = df1["MLookups/s"] / df1["RHPerf"]

min_lf90_bbc_8_vs_recalc = df1[(df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["RHSpeedup"].min()
max_lf90_bbc_8_vs_recalc = df1[(df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["RHSpeedup"].max()
mean_lf90_bbc_8_vs_recalc = df1[(df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["RHSpeedup"].mean()

print(f"(ich will nicht mehr) min speedup 8-bit BBC over RH <LF 90>: {min_lf90_bbc_8_vs_recalc}\n"
      f"(ich will nicht mehr) max speedup 8-bit BBC over RH <LF 90>: {max_lf90_bbc_8_vs_recalc}\n"
     f"(ich will nicht mehr) mean speedup 8-bit BBC over RH <LF 90>: {mean_lf90_bbc_8_vs_recalc}\n")

## Hashfunctions - Uniform Keys

In [None]:
pal_dict = {"BucketSIMD_8B_THP": deep_palette[5], 'LP_UN_THP': deep_palette[1], 'BucketSIMD_8B_THP_MultShift128': deep_palette[2], "LP_UN_THP_MultShift128": deep_palette[3]}

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_non_avx512=True, hide_lsbmsb_fingerprints=False, hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "LP_UN_THP_MultShift128"],
                   show_nonmultshift64 = True, hide_xx_hash=True, hide_multaddshift_murmur=True,
                   legend_label_dict={"LP_UN_THP": "LP(AoS/Unal.)+MS64", "LP_UN_THP_MultShift128": "LP(AoS/Unal.)+MS128", "BucketSIMD_8B_THP": "BBC(8)+MS64", "BucketSIMD_8B_THP_MultShift128": "BBC(8)+MS128"},
                   file_path="full_64vs128_multshift.pdf")

- regular 64 bit mult shift hashing is a lot faster.
- we continue to compare mult shift vs mult add shift vs murmur (not xx, xx wird motiviert als hardware accelerated zu schluss)

In [None]:
pal_dict = {"BucketSIMD_8B_LSBMSB_THP": deep_palette[0], 'LP_UN_THP': deep_palette[1], 
            'BucketSIMD_8B_LSBMSB_THP_MultAddShift64': deep_palette[2], "LP_UN_THP_Murmur": deep_palette[3],
            "LP_UN_THP_MultAddShift64": deep_palette[4], "BucketSIMD_8B_MSBLSB_THP_Murmur": deep_palette[5]}

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False,  hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=False,
                   hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True,
                   show_avx512=True, hide_non_avx512=True, hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints","_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "LP_UN_THP_MultAddShift64", "LP_UN_THP_Murmur"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=True, multshift_only_lsbmsb=True,
                   legend_label_dict={"LP_UN_THP": "LP(AoS/Unal.)+MS64", "LP_UN_THP_MultAddShift64": "LP(AoS/Unal.)+MAS64","LP_UN_THP_Murmur": "LP(AoS/Unal.)+Murmur", 
                                      "BucketSIMD_8B_LSBMSB_THP": "BBC(8)+MS64", "BucketSIMD_8B_LSBMSB_THP_MultAddShift64": "BBC(8)+MAS64", "BucketSIMD_8B_MSBLSB_THP_Murmur": "BBC(8)+Murmur"},
                   file_path="full_hashfunctions_ms_mas_murmur.pdf")

In [None]:
_  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 70], 375,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False,  hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=False,
                   hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True,
                   show_avx512=True, hide_non_avx512=True, hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints","_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "LP_UN_THP_MultAddShift64", "LP_UN_THP_Murmur"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=True, multshift_only_lsbmsb=True,
                   ncol_legend=3,
                   legend_label_dict={"LP_UN_THP": "LP(AoS/Unal.)+MS64", "LP_UN_THP_MultAddShift64": "LP(AoS/Unal.)+MAS64","LP_UN_THP_Murmur": "LP(AoS/Unal.)+Murmur", 
                                      "BucketSIMD_16B_THP": "BBC(16)+MS64", "BucketSIMD_16B_THP_MultAddShift64": "BBC(16)+MAS64", "BucketSIMD_16B_THP_Murmur": "BBC(16)+Murmur"},
                   file_path="hashfunctions_ms_mas_murmur.pdf")

In [None]:
# Numbers
df1 = read_d.copy()

def get_lp_mas_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_MultAddShift64")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_MultAddShift64")].iloc[0]["MLookups/s"]

def get_lp_mumur_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_Murmur")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_Murmur")].iloc[0]["MLookups/s"]

def get_bbc_mas_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_LSBMSB_THP_MultAddShift64")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_LSBMSB_THP_MultAddShift64")].iloc[0]["MLookups/s"]

def get_bbc_mumur_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_MSBLSB_THP_Murmur")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_MSBLSB_THP_Murmur")].iloc[0]["MLookups/s"]

df1["LPMASPerf"] = df1.apply(lambda row: get_lp_mas_lookups(row), axis=1)
df1["LPMASpeedup"] = df1["MLookups/s"] / df1["LPMASPerf"]

df1["LPMurmurPerf"] = df1.apply(lambda row: get_lp_mumur_lookups(row), axis=1)
df1["LPMurmurSpeedup"] = df1["MLookups/s"] / df1["LPMurmurPerf"]

df1["BBCMASPerf"] = df1.apply(lambda row: get_bbc_mas_lookups(row), axis=1)
df1["BBCMASpeedup"] = df1["MLookups/s"] / df1["BBCMASPerf"]

df1["BBCMurmurPerf"] = df1.apply(lambda row: get_bbc_mumur_lookups(row), axis=1)
df1["BBCMurmurSpeedup"] = df1["MLookups/s"] / df1["BBCMurmurPerf"]

avg_lf25_intel_lp_ms_vs_mas = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP") & (df1["LoadFactor"] == 25)]["LPMASpeedup"].mean()
avg_lf50_intel_lp_ms_vs_mas = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP") & (df1["LoadFactor"] == 50)]["LPMASpeedup"].mean()
avg_lf70_intel_lp_ms_vs_mas = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP") & (df1["LoadFactor"] == 70)]["LPMASpeedup"].mean()
avg_lf90_intel_lp_ms_vs_mas = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP") & (df1["LoadFactor"] == 90)]["LPMASpeedup"].mean()

avg_lf25_intel_bbc_ms_vs_mas = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_LSBMSB_THP") & (df1["LoadFactor"] == 25)]["BBCMASpeedup"].mean()
avg_lf50_intel_bbc_ms_vs_mas = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_LSBMSB_THP") & (df1["LoadFactor"] == 50)]["BBCMASpeedup"].mean()
avg_lf70_intel_bbc_ms_vs_mas = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_LSBMSB_THP") & (df1["LoadFactor"] == 70)]["BBCMASpeedup"].mean()
avg_lf90_intel_bnc_ms_vs_mas = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_LSBMSB_THP") & (df1["LoadFactor"] == 90)]["BBCMASpeedup"].mean()

avg_lf25_intel_bbc_ms_vs_murmur = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_LSBMSB_THP") & (df1["LoadFactor"] == 25)]["BBCMurmurSpeedup"].mean()
avg_lf50_intel_bbc_ms_vs_murmur = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_LSBMSB_THP") & (df1["LoadFactor"] == 50)]["BBCMurmurSpeedup"].mean()
avg_lf70_intel_bbc_ms_vs_murmur = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_LSBMSB_THP") & (df1["LoadFactor"] == 70)]["BBCMurmurSpeedup"].mean()
avg_lf90_intel_bnc_ms_vs_murmur = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_LSBMSB_THP") & (df1["LoadFactor"] == 90)]["BBCMurmurSpeedup"].mean()

print(f"(1) average speedup MS over MAS <Intel, LF25, LP>: {avg_lf25_intel_lp_ms_vs_mas}\n"
      f"(2) average speedup MS over MAS <Intel, LF50, LP>: {avg_lf50_intel_lp_ms_vs_mas}\n"
      f"(3) average speedup MS over MAS <Intel, LF75, LP>: {avg_lf70_intel_lp_ms_vs_mas}\n"
      f"(4) average speedup MS over MAS <Intel, LF90, LP>: {avg_lf90_intel_lp_ms_vs_mas}\n"
      f"(5) average speedup MS over MAS <Intel, LF25, BBC>: {avg_lf25_intel_bbc_ms_vs_mas}\n"
      f"(6) average speedup MS over MAS <Intel, LF50, BBC>: {avg_lf50_intel_bbc_ms_vs_mas}\n"
      f"(7) average speedup MS over MAS <Intel, LF75, BBC>: {avg_lf70_intel_bbc_ms_vs_mas}\n"
      f"(8) average speedup MS over MAS <Intel, LF90, BBC>: {avg_lf90_intel_bnc_ms_vs_mas}\n"
      f"(9) average speedup MS over Murmur <Intel, LF25, BBC>: {avg_lf25_intel_bbc_ms_vs_murmur}\n"
      f"(10) average speedup MS over Murmur <Intel, LF50, BBC>: {avg_lf50_intel_bbc_ms_vs_murmur}\n"
      f"(11) average speedup MS over Murmur <Intel, LF70, BBC>: {avg_lf70_intel_bbc_ms_vs_murmur}\n"
      f"(12) average speedup MS over Murmur <Intel, LF90, BBC>: {avg_lf90_intel_bnc_ms_vs_murmur}\n"
     )

result_dict["IntLf25LpMSvsMAS"] = round(avg_lf25_intel_lp_ms_vs_mas, 2)
result_dict["IntLf25BbcMSvsMAS"] = round(avg_lf25_intel_bbc_ms_vs_mas, 2)
result_dict["IntLf25BbcMSvsMurmur"] = round(avg_lf25_intel_bbc_ms_vs_murmur, 2)
result_dict["IntLf50BbcMSvsMurmur"] = round(avg_lf50_intel_bbc_ms_vs_murmur, 2)
result_dict["IntLf70BbcMSvsMurmur"] = round(avg_lf70_intel_bbc_ms_vs_murmur, 2)
result_dict["IntLf90BbcMSvsMurmur"] = round(avg_lf90_intel_bnc_ms_vs_murmur, 2)


- für niedrigere LF ist Murmur schlechter als MS/MAS, für LF 90 => kein Use Case; bei BBC noch etwas deutlicher schlechter als bei LP
- MAS und MS sind sowohl für LP and auch für BBC virtually identical
- weiterhin ist MS der Favorit. Next: Wir vergleichen mit XX, einer Hardware-Accelerated hash Function

In [None]:
pal_dict = {"BucketSIMD_8B_LSBMSB_THP": deep_palette[0], 'LP_UN_THP': deep_palette[1], 'BucketSIMD_8B_MSBLSB_THP_XX': deep_palette[2], 
            "LP_UN_THP_XX": deep_palette[3]}

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                    hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True,
                    hide_lsb_fingerprints=True, hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=False,multshift_only_lsbmsb=True,
                   show_avx512=True, hide_non_avx512=True, hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBFP", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "LP_UN_THP_XX"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=False, hide_multaddshift_murmur=True,
                   legend_label_dict={"LP_UN_THP": "LP(AoS/Unal.)+MS64", "LP_UN_THP_XX": "LP(AoS/Unal.)+XX",
                                      "BucketSIMD_8B_LSBMSB_THP": "BBC(8)+MS64", "BucketSIMD_8B_MSBLSB_THP_XX": "BBC(8)+XX"},
                   file_path="full_hashfunctions_ms_xx.pdf")

In [None]:
_  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 70], 375,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                    hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True,
                    hide_lsb_fingerprints=True, hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=False,multshift_only_lsbmsb=True,
                   show_avx512=True, hide_non_avx512=True, hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBFP", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "LP_UN_THP_XX"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=False, hide_multaddshift_murmur=True,
                   ncol_legend=2,
                   legend_label_dict={"LP_UN_THP": "LP(AoS/Unal.)+MS64", "LP_UN_THP_XX": "LP(AoS/Unal.)+XX",
                                      "BucketSIMD_16B_THP": "BBC(16)+MS64", "BucketSIMD_16B_THP_XX": "BBC(16)+XX"},
                   file_path="hashfunctions_ms_xx.pdf")

In [None]:
# Numbers
df1 = read_d.copy()

def get_lp_xx_lookups(row):   
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_XX")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_XX")].iloc[0]["MLookups/s"]

df1["LPXXPerf"] = df1.apply(lambda row: get_lp_xx_lookups(row), axis=1)
df1["LPXXSpeedup"] = df1["MLookups/s"] / df1["LPXXPerf"]

avg_lf25_intel_lp_ms_vs_xx = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP") & (df1["LoadFactor"] == 25)]["LPXXSpeedup"].mean()
avg_lf50_intel_lp_ms_vs_xx = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP") & (df1["LoadFactor"] == 50)]["LPXXSpeedup"].mean()
avg_lf70_intel_lp_ms_vs_xx = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP") & (df1["LoadFactor"] == 70)]["LPXXSpeedup"].mean()
avg_lf90_intel_lp_ms_vs_xx = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP") & (df1["LoadFactor"] == 90)]["LPXXSpeedup"].mean()

print(f"(1) average speedup MS over XX <Intel, LF25, LP>: {avg_lf25_intel_lp_ms_vs_xx}\n"
      f"(2) average speedup MS over XX <Intel, LF50, LP>: {avg_lf50_intel_lp_ms_vs_xx}\n"
      f"(3) average speedup MS over XX <Intel, LF75, LP>: {avg_lf70_intel_lp_ms_vs_xx}\n"
      f"(4) average speedup MS over XX <Intel, LF90, LP>: {avg_lf90_intel_lp_ms_vs_xx}\n"

     )

result_dict["IntLf25LpMSvsXX"] = round(avg_lf25_intel_lp_ms_vs_xx, 2)
result_dict["IntLf50LpMSvsXX"] = round(avg_lf50_intel_lp_ms_vs_xx, 2)
result_dict["IntLf70LpMSvsXX"] = round(avg_lf70_intel_lp_ms_vs_xx, 2)
result_dict["IntLf90LpMSvsXX"] = round(avg_lf90_intel_lp_ms_vs_xx, 2)


## Hashfunctions - Dense Keys

- wir fiden erst die beste hash function und vergleichen dann am Ende nochmal dense keys vs uniform keys

In [None]:
pal_dict = {"BucketSIMD_8B_LSBMSB_THP_Dense": deep_palette[0], 'LP_UN_THP_Dense': deep_palette[1], 
            'BucketSIMD_8B_LSBMSB_THP_MultShift128_Dense': deep_palette[2], "LP_UN_THP_MultShift128_Dense": deep_palette[3]}

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_notestz=True,
                   show_avx512=True, hide_non_avx512=True,
                   hide_lsb_fingerprints=True, hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=False,multshift_only_lsbmsb=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBFP", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP_Dense", "LP_UN_THP_MultShift128_Dense"],
                   show_nonmultshift64 = True, hide_xx_hash=True, hide_multaddshift_murmur=True, show_dense=True, hide_uniform=True,
                   legend_label_dict={"LP_UN_THP_Dense": "LP(AoS/Unal.)+MS64", "LP_UN_THP_MultShift128_Dense": "LP(AoS/Unal.)+MS128",
                                      "BucketSIMD_8B_LSBMSB_THP_Dense": "BBC(16)+MS64", "BucketSIMD_8B_LSBMSB_THP_MultShift128_Dense": "BBC(16)+MS128"},
                   file_path="full_64vs128_multshift_dense.pdf")

- 128 bit arithmetik bringt auch wieder nix

In [None]:
pal_dict = {"BucketSIMD_8B_LSBMSB_THP_Dense": deep_palette[0], 'LP_UN_THP_Dense': deep_palette[1], 'BucketSIMD_8B_LSBMSB_THP_MultAddShift64_Dense': deep_palette[2], "LP_UN_THP_Murmur_Dense": deep_palette[3], "LP_UN_THP_MultAddShift64_Dense": deep_palette[4], "BucketSIMD_8B_MSBLSB_THP_Murmur_Dense": deep_palette[5]}

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True,
                    hide_notestz=True,
                   show_avx512=True, hide_non_avx512=True,
                   hide_lsb_fingerprints=True, hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=False,
                    multshift_only_lsbmsb=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBFP", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP_Dense", "LP_UN_THP_MultAddShift64_Dense", "LP_UN_THP_Murmur_Dense"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=True, show_dense=True, hide_uniform=True,
                   legend_label_dict={"LP_UN_THP_Dense": "LP(AoS/Unal.)+MS64", "LP_UN_THP_MultAddShift64_Dense": "LP(AoS/Unal.)+MAS64","LP_UN_THP_Murmur_Dense": "LP(AoS/Unal.)+Murmur", 
                                      "BucketSIMD_8B_LSBMSB_THP_Dense": "BBC(16)+MS64", "BucketSIMD_8B_LSBMSB_THP_MultAddShift64_Dense": "BBC(8)+MAS64", "BucketSIMD_8B_MSBLSB_THP_Murmur_Dense": "BBC(8)+Murmur"},
                   file_path="full_hashfunctions_ms_mas_murmur_dense.pdf")

In [None]:
_  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 70], 375,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True,
                    hide_notestz=True,
                   show_avx512=True, hide_non_avx512=True,
                   hide_lsb_fingerprints=True, hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=False,
                    multshift_only_lsbmsb=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBFP", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP_Dense", "LP_UN_THP_MultAddShift64_Dense", "LP_UN_THP_Murmur_Dense"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=True, show_dense=True, hide_uniform=True,
                   ncol_legend=3,
                   legend_label_dict={"LP_UN_THP_Dense": "LP(AoS/Unal.)+MS64", "LP_UN_THP_MultAddShift64_Dense": "LP(AoS/Unal.)+MAS64","LP_UN_THP_Murmur_Dense": "LP(AoS/Unal.)+Murmur", 
                                      "BucketSIMD_16B_THP_Dense": "BBC(16)+MS64", "BucketSIMD_16B_THP_MultAddShift64_Dense": "BBC(16)+MAS64", "BucketSIMD_16B_THP_Murmur_Dense": "BBC(16)+Murmur"},
                   file_path="hashfunctions_ms_mas_murmur_dense.pdf")

In [None]:
# Numbers
df1 = read_d.copy()

def get_lp_mas_lookups(row):   
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_MultAddShift64_Dense")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_MultAddShift64_Dense")].iloc[0]["MLookups/s"]

def get_lp_mumur_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_Murmur_Dense")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_Murmur_Dense")].iloc[0]["MLookups/s"]

df1["LPMASPerf"] = df1.apply(lambda row: get_lp_mas_lookups(row), axis=1)
df1["LPMASpeedup"] = df1["MLookups/s"] / df1["LPMASPerf"]

df1["LPMurmurPerf"] = df1.apply(lambda row: get_lp_mumur_lookups(row), axis=1)
df1["LPMurmurSpeedup"] = df1["MLookups/s"] / df1["LPMurmurPerf"]

avg_lf25_intel_lp_ms_vs_mas_dense = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP_Dense") & (df1["LoadFactor"] == 25)]["LPMASpeedup"].mean()
avg_lf50_intel_lp_ms_vs_mas_dense = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP_Dense") & (df1["LoadFactor"] == 50)]["LPMASpeedup"].mean()
avg_lf70_intel_lp_ms_vs_mas_dense = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP_Dense") & (df1["LoadFactor"] == 70)]["LPMASpeedup"].mean()
avg_lf90_intel_lp_ms_vs_mas_dense = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP_Dense") & (df1["LoadFactor"] == 90)]["LPMASpeedup"].mean()

avg_lf25_intel_lp_ms_vs_murmur = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP_Dense") & (df1["LoadFactor"] == 25)]["LPMurmurSpeedup"].mean()
avg_lf50_intel_lp_ms_vs_murmur = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP_Dense") & (df1["LoadFactor"] == 50)]["LPMurmurSpeedup"].mean()
avg_lf70_intel_lp_ms_vs_murmur = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP_Dense") & (df1["LoadFactor"] == 70)]["LPMurmurSpeedup"].mean()
avg_lf90_intel_lp_ms_vs_murmur = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP_Dense") & (df1["LoadFactor"] == 90)]["LPMurmurSpeedup"].mean()


print(f"(1) average speedup MS over MAS <Intel, LF25, LP>: {avg_lf25_intel_lp_ms_vs_mas_dense}\n"
      f"(2) average speedup MS over MAS <Intel, LF50, LP>: {avg_lf50_intel_lp_ms_vs_mas_dense}\n"
      f"(3) average speedup MS over MAS <Intel, LF75, LP>: {avg_lf70_intel_lp_ms_vs_mas_dense}\n"
      f"(4) average speedup MS over MAS <Intel, LF90, LP>: {avg_lf90_intel_lp_ms_vs_mas_dense}\n"
      f"(5) average speedup MS over Murmur <Intel, LF25, BBC>: {avg_lf25_intel_lp_ms_vs_murmur}\n"
      f"(6) average speedup MS over Murmur <Intel, LF50, BBC>: {avg_lf50_intel_lp_ms_vs_murmur}\n"
      f"(7) average speedup MS over Murmur <Intel, LF75, BBC>: {avg_lf70_intel_lp_ms_vs_murmur}\n"
      f"(8) average speedup MS over Murmur <Intel, LF90, BBC>: {avg_lf90_intel_lp_ms_vs_murmur}\n"
     )

result_dict["IntLf25LpMSvsMASDense"] = round(avg_lf25_intel_lp_ms_vs_mas_dense, 2)
result_dict["IntLf25LpMSvsMurmurDense"] = round(avg_lf25_intel_lp_ms_vs_murmur, 2)
result_dict["IntLf50LpMSvsMurmurDense"] = round(avg_lf50_intel_lp_ms_vs_murmur, 2)
result_dict["IntLf70LpMSvsMurmurDense"] = round(avg_lf70_intel_lp_ms_vs_murmur, 2)
result_dict["IntLf90LpMSvsMurmurDense"] = round(avg_lf90_intel_lp_ms_vs_murmur, 2)


- MAS und MS performen identical
- Murmur wieder sowohl für LP als auch für BBC schlecht

In [None]:
pal_dict = {"BucketSIMD_8B_LSBMSB_THP_Dense": deep_palette[0], 'LP_UN_THP_Dense': deep_palette[1], 
            'BucketSIMD_8B_MSBLSB_THP_XX_Dense': deep_palette[2], "LP_UN_THP_XX_Dense": deep_palette[3]}

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                    hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_notestz=True,
                   show_avx512=True, hide_non_avx512=True,
                   hide_lsb_fingerprints=True, hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=False,
                    multshift_only_lsbmsb=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBFP", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP_Dense", "LP_UN_THP_XX_Dense"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=False, hide_multaddshift_murmur=True, show_dense=True, hide_uniform=True,
                   legend_label_dict={"LP_UN_THP_Dense": "LP(AoS/Unal.)+MS64", "LP_UN_THP_XX_Dense": "LP(AoS/Unal.)+XX",
                                      "BucketSIMD_8B_LSBMSB_THP_Dense": "BBC(8)+MS64", "BucketSIMD_8B_MSBLSB_THP_XX_Dense": "BBC(8)+XX"},
                   file_path="full_ms_vs_xx_dense.pdf")

- xx auch kacke
- also noch einmal dense vs uniform keys just weil wirs können

In [None]:
pal_dict = {"BucketSIMD_8B_LSBMSB_THP_Dense": deep_palette[0], 'LP_UN_THP_Dense': deep_palette[1], 'BucketSIMD_8B_LSBMSB_THP': deep_palette[2], "LP_UN_THP": deep_palette[3]}

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True,  hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   show_avx512=True, hide_non_avx512=True,
                   hide_lsb_fingerprints=True, hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False, hide_msblsb_fingerprints=False,
                    multshift_only_lsbmsb=True,
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBFP", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP_Dense", "LP_UN_THP"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=True, hide_multaddshift_murmur=True, show_dense=True, hide_uniform=False,
                   legend_label_dict={"LP_UN_THP_Dense": "LP(AoS/Unal.)+Dense", "LP_UN_THP": "LP(AoS/Unal.)+Unif.",
                                      "BucketSIMD_8B_LSBMSB_THP_Dense": "BBC(8)+Dense", "BucketSIMD_8B_LSBMSB_THP": "BBC(8)+Unif."},
                   file_path="full_uniform_vs_dense.pdf")

### PAPER Plots

1. Plot mit allen Systemen für Uniform Keys to check whether Verhalten ist konsistent 
2. Plot mit allen Systemen für Dense Keys to check whether Verhalten ist konsistent 
3. Plot nur mit Intel, eine Reihe Uniform Keys, eine Reihe Dense Keys (actually im Paper)

In [None]:
# Uniform Keys test plot
pal_dict = {"BucketSIMD_8B_LSBMSB_THP": deep_palette[0], 'LP_UN_THP': deep_palette[1],
            'BucketSIMD_8B_MSBLSB_THP_XX': deep_palette[2],
            "LP_UN_THP_Murmur": deep_palette[3],
            "LP_UN_THP_XX": deep_palette[4], 
            "BucketSIMD_8B_MSBLSB_THP_Murmur": deep_palette[5]}


read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_non_avx512=True, hide_lsbmsb_fingerprints=False, 
                                 hide_msblsb_fingerprints=False, hide_notestz=True,multshift_only_lsbmsb=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints",  "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_8b_fingerprints=False, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "LP_UN_THP_XX", "LP_UN_THP_Murmur"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=False, hide_multaddshift=True, hide_msblsb_multshift=True,
                   legend_label_dict={"LP_UN_THP": "LP(AoS)+MS64", "LP_UN_THP_XX": "LP(AoS)+XX","LP_UN_THP_Murmur": "LP(AoS)+Murmur", 
                                      "BucketSIMD_8B_LSBMSB_THP": "BBC(8)+MS64", "BucketSIMD_8B_MSBLSB_THP_XX": "BBC(8)+XX", "BucketSIMD_8B_MSBLSB_THP_Murmur": "BBC(8)+Murmur"},
                   file_path="full_paper_uniform_hashfunctions.pdf")

In [None]:
# Dense Keys test plot
pal_dict = {"BucketSIMD_8B_LSBMSB_THP_Dense": deep_palette[0], 'LP_UN_THP_Dense': deep_palette[1],
            'BucketSIMD_8B_MSBLSB_THP_XX_Dense': deep_palette[2],
            "LP_UN_THP_Murmur_Dense": deep_palette[3],
            "LP_UN_THP_XX_Dense": deep_palette[4], 
            "BucketSIMD_8B_MSBLSB_THP_Murmur_Dense": deep_palette[5]}


read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_non_avx512=True, hide_lsbmsb_fingerprints=False, 
                                 hide_msblsb_fingerprints=False, hide_notestz=True,multshift_only_lsbmsb=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints",  "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_8b_fingerprints=False, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP_Dense", "LP_UN_THP_XX_Dense", "LP_UN_THP_Murmur_Dense"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=False, hide_multaddshift=True,
                    hide_msblsb_multshift=True, show_dense=True, hide_uniform=True,
                   legend_label_dict={"LP_UN_THP_Dense": "LP(AoS)+MS64", "LP_UN_THP_XX_Dense": "LP(AoS)+XX","LP_UN_THP_Murmur_Dense": "LP(AoS)+Murmur", 
                                      "BucketSIMD_8B_LSBMSB_THP_Dense": "BBC(8)+MS64", "BucketSIMD_8B_MSBLSB_THP_XX_Dense": "BBC(8)+XX", "BucketSIMD_8B_MSBLSB_THP_Murmur_Dense": "BBC(8)+Murmur"},
                   file_path="full_paper_dense_hashfunctions.pdf")

In [None]:
# True paper plot
pal_dict = {"BucketSIMD_8B_LSBMSB_THP": deep_palette[0], 'LP_UN_THP': deep_palette[1],
            'BucketSIMD_8B_MSBLSB_THP_XX': deep_palette[2],
            "LP_UN_THP_Murmur": deep_palette[3],
            "LP_UN_THP_XX": deep_palette[4], 
            "BucketSIMD_8B_MSBLSB_THP_Murmur": deep_palette[5]}


read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_non_avx512=True, hide_lsbmsb_fingerprints=False, 
                                 hide_msblsb_fingerprints=False, hide_notestz=True,multshift_only_lsbmsb=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints",  "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_8b_fingerprints=False, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "LP_UN_THP_XX", "LP_UN_THP_Murmur", "LP_UN_THP_Dense", "LP_UN_THP_XX_Dense", "LP_UN_THP_Murmur_Dense"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=False, hide_multaddshift=True,
                    hide_msblsb_multshift=True, show_dense=True, hide_uniform=False, plot_uniform_and_dense=True,
                   legend_label_dict={"LP_UN_THP": "LP(AoS)+MS64", "LP_UN_THP_XX": "LP(AoS)+XX","LP_UN_THP_Murmur": "LP(AoS)+Murmur", 
                                      "BucketSIMD_8B_LSBMSB_THP": "BBC(8)+MS64", "BucketSIMD_8B_MSBLSB_THP_XX": "BBC(8)+XX", "BucketSIMD_8B_MSBLSB_THP_Murmur": "BBC(8)+Murmur"},
                   file_path="full_paper_intel_hashfunctions.pdf")

In [None]:

_  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86"], [50, 70, 90], 240,   legend_height_shift=-0.35, ylabel_padding=-0.2, xlabel_padding=-0.7, ncol_legend=3,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_non_avx512=True, hide_lsbmsb_fingerprints=False, 
                                 hide_msblsb_fingerprints=False, hide_notestz=True,multshift_only_lsbmsb=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[], 
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints",  "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_8b_fingerprints=False, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "LP_UN_THP_XX", "LP_UN_THP_Murmur", "LP_UN_THP_Dense", "LP_UN_THP_XX_Dense", "LP_UN_THP_Murmur_Dense"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=False, hide_multaddshift=True,
                    hide_msblsb_multshift=True, show_dense=True, hide_uniform=False, plot_uniform_and_dense=True,
                   legend_label_dict={"LP_UN_THP": "LP(AoS)+MS64", "LP_UN_THP_XX": "LP(AoS)+XX","LP_UN_THP_Murmur": "LP(AoS)+Murmur", 
                                      "BucketSIMD_8B_LSBMSB_THP": "BBC(8)+MS64", "BucketSIMD_8B_MSBLSB_THP_XX": "BBC(8)+XX", "BucketSIMD_8B_MSBLSB_THP_Murmur": "BBC(8)+Murmur"},
                   hide_insertions=True,file_path="paper_intel_hashfunctions.pdf")

## String Keys

- compare string keys to integer keys mit XX hasher, damit die computational complexity des hashers nicht den performance unterschied macht

In [None]:
pal_dict = {'BucketSIMD_8B_THP_XX': deep_palette[0], 'LP_UN_THP_XX': deep_palette[1], 'BucketSIMD_8B_THP_XX_StringKey': deep_palette[2], "LP_UN_THP_XX_StringKey": deep_palette[3] }

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_msblsb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_avx512_smaller_512=False, hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   show_avx512=True, hide_non_avx512=True,
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBLSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP_XX", "LP_UN_THP_XX_StringKey"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=False, hide_multaddshift_murmur=True, hide_multshift64=True,
                   show_stringkeys=True,
                   legend_label_dict={"LP_UN_THP_XX_StringKey": "LP(AoS/Unal.)+StringKeys", "LP_UN_THP_XX": "LP(AoS/Unal.)+IntKeys",
                                      "BucketSIMD_8B_THP_XX_StringKey": "BBC(16)+StringKeys", "BucketSIMD_8B_THP_XX": "BBC(16)+IntKeys"},
                   file_path="full_stringkeys.pdf")

In [None]:
_  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 70], 375,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_msblsb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_avx512_smaller_512=False, hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   show_avx512=True, hide_non_avx512=True,
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBLSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP_XX", "LP_UN_THP_XX_StringKey"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=False, hide_multaddshift_murmur=True, hide_multshift64=True,
                   show_stringkeys=True,
                   ncol_legend=2,
                   legend_label_dict={"LP_UN_THP_XX_StringKey": "LP(AoS/Unal.)+StringKeys", "LP_UN_THP_XX": "LP(AoS/Unal.)+IntKeys",
                                      "BucketSIMD_8B_THP_XX_StringKey": "BBC(8)+StringKeys", "BucketSIMD_8B_THP_XX": "BBC(8)+IntKeys"},
                   file_path="stringkeys.pdf")

In [None]:
# Numbers
df1 = read_d.copy()

def get_lp_string_lookups(row):
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_XX_StringKey")].iloc[0]["MLookups/s"]

def get_bbc_string_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_XX_StringKey")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_XX_StringKey")].iloc[0]["MLookups/s"]

def get_lp_int_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_XX")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_XX")].iloc[0]["MLookups/s"]


df1["LPStringPerf"] = df1.apply(lambda row: get_lp_string_lookups(row), axis=1)
df1["LPStringSpeedup"] = df1["MLookups/s"] / df1["LPStringPerf"]

df1["BBCStringPerf"] = df1.apply(lambda row: get_bbc_string_lookups(row), axis=1)
df1["BBCStringSpeedup"] = df1["MLookups/s"] / df1["BBCStringPerf"]

df1["LPIntPerf"] = df1.apply(lambda row: get_lp_int_lookups(row), axis=1)
df1["LPIntSpeedup"] = df1["MLookups/s"] / df1["LPIntPerf"]


avg_lf70_amd_bbc_int_vs_string_0sqr = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_XX") & (df1["LoadFactor"] == 70)  & (df1["SQR"] == 0)]["BBCStringSpeedup"].mean()
avg_lf70_amd_bbc_int_vs_string_50sqr = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_XX") & (df1["LoadFactor"] == 70)  & (df1["SQR"] == 50)]["BBCStringSpeedup"].mean()
avg_lf70_amd_bbc_int_vs_string_100sqr = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_XX") & (df1["LoadFactor"] == 70)  & (df1["SQR"] == 100)]["BBCStringSpeedup"].mean()

avg_lf70_amd_lp_int_vs_string_0sqr = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "LP_UN_THP_XX") & (df1["LoadFactor"] == 70)  & (df1["SQR"] == 0)]["LPStringSpeedup"].mean()
avg_lf70_amd_lp_int_vs_string_50sqr = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "LP_UN_THP_XX") & (df1["LoadFactor"] == 70)  & (df1["SQR"] == 50)]["LPStringSpeedup"].mean()
avg_lf70_amd_lp_int_vs_string_100sqr = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "LP_UN_THP_XX") & (df1["LoadFactor"] == 70)  & (df1["SQR"] == 100)]["LPStringSpeedup"].mean()

avg_lf70_amd_string_bbc_vs_lp_0sqr = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_XX_StringKey") & (df1["LoadFactor"] == 70)  & (df1["SQR"] == 0)]["LPStringSpeedup"].mean()
avg_lf70_amd_string_bbc_vs_lp_100sqr = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_XX_StringKey") & (df1["LoadFactor"] == 70)  & (df1["SQR"] == 100)]["LPStringSpeedup"].mean()

avg_lf70_amd_int_bbc_vs_lp_0sqr = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_XX") & (df1["LoadFactor"] == 70)  & (df1["SQR"] == 0)]["LPIntSpeedup"].mean()
avg_lf70_amd_int_bbc_vs_lp_100sqr = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_XX") & (df1["LoadFactor"] == 70)  & (df1["SQR"] == 100)]["LPIntSpeedup"].mean()


print(f"(1) average speedup int over string <AMD, LF70, BBC, SQR0>: {avg_lf70_amd_bbc_int_vs_string_0sqr}\n"
      f"(2) average speedup int over string <AMD, LF70, BBC, SQR50>: {avg_lf70_amd_bbc_int_vs_string_50sqr}\n"
      f"(3) average speedup int over string <AMD, LF70, BBC, SQR100>: {avg_lf70_amd_bbc_int_vs_string_100sqr}\n"
      f"(4) average speedup int over string <AMD, LF70, LP, SQR0>: {avg_lf70_amd_lp_int_vs_string_0sqr}\n"
      f"(5) average speedup int over string <AMD, LF70, LP, SQR50>: {avg_lf70_amd_lp_int_vs_string_50sqr}\n"
      f"(6) average speedup int over string <AMD, LF70, LP, SQR100>: {avg_lf70_amd_lp_int_vs_string_100sqr}\n"
      f"(7) average speedup BBC over LP <AMD, LF70, String Keys, SQR0>: {avg_lf70_amd_string_bbc_vs_lp_0sqr}\n"
      f"(8) average speedup BBC over LP <AMD, LF70, String Keys, SQR100>: {avg_lf70_amd_string_bbc_vs_lp_100sqr}\n"
      f"(9) average speedup BBC over LP <AMD, LF70, int Keys, SQR0>: {avg_lf70_amd_int_bbc_vs_lp_0sqr}\n"
      f"(10) average speedup BBC over LP <AMD, LF70, int Keys, SQR100>: {avg_lf70_amd_int_bbc_vs_lp_100sqr}\n"
     )

result_dict["AmdLf70IntVsStrBBCSqr0"] = round(avg_lf70_amd_bbc_int_vs_string_0sqr, 2)
result_dict["AmdLf70IntVsStrBBCSqr50"] = round(avg_lf70_amd_bbc_int_vs_string_50sqr, 2)
result_dict["AmdLf70IntVsStrBBCSqr100"] = round(avg_lf70_amd_bbc_int_vs_string_100sqr, 2)

result_dict["AmdLf70IntVsStrLPSqr0"] = round(avg_lf70_amd_lp_int_vs_string_0sqr, 2)
result_dict["AmdLf70IntVsStrLPSqr50"] = round(avg_lf70_amd_lp_int_vs_string_50sqr, 2)
result_dict["AmdLf70IntVsStrLPSqr100"] = round(avg_lf70_amd_lp_int_vs_string_100sqr, 2)

result_dict["AmdLf70BbcVsLpStrSqr0"] = round(avg_lf70_amd_string_bbc_vs_lp_0sqr, 2)
result_dict["AmdLf70BbcVsLpStrSqr100"] = round(avg_lf70_amd_string_bbc_vs_lp_100sqr, 2)

result_dict["AmdLf70BbcVsLpStrInt0"] = round(avg_lf70_amd_int_bbc_vs_lp_0sqr, 2)
result_dict["AmdLf70BbcVsLpStrInt100"] = round(avg_lf70_amd_int_bbc_vs_lp_100sqr, 2)


- achtung: vergleich ist int keys mit xx hasher für fairness
- für 0% SQR machen wir gar kein strcmp bei BBC (kein Match), deshalb poerformance beinahe wie bei int keys
- bei 100% sqr machen wir mehr oder weniger genausoviel strcmp wie bei LP (wenn wir davon ausgehen, dass wir den hit früh treffen) und deshalb zumindest ähnlich eperformance
- also insbesondere für SQR < 100% enormes improvement über LP, allerdings trotzdem performance drop im vergleich zu int keys

### Paper

In [None]:
pal_dict = {'BucketSIMD_8B_MSBLSB_THP_XX': deep_palette[0], 'LP_UN_THP_XX': deep_palette[1], 'BucketSIMD_8B_MSBLSB_THP_XX_StringKey': deep_palette[2], "LP_UN_THP_XX_StringKey": deep_palette[3] }

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_msblsb_fingerprints=False, hide_lsbmsb_fingerprints=True, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBFP", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_8b_fingerprints=False, hide_16b_fingerprints=True,
                   force_include=["LP_UN_THP_XX", "LP_UN_THP_XX_StringKey"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=False, hide_multaddshift_murmur=True, hide_multshift64=True,
                   show_stringkeys=True,
                   legend_label_dict={"LP_UN_THP_XX_StringKey": "LP(AoS/Unal.)+StringKeys", "LP_UN_THP_XX": "LP(AoS/Unal.)+IntKeys",
                                      "BucketSIMD_8B_MSBLSB_THP_XX_StringKey": "BBC(8)+StringKeys", "BucketSIMD_8B_MSBLSB_THP_XX": "BBC(8)+IntKeys"},
                   file_path="full_paper_stringkeys.pdf")

In [None]:
pal_dict = {'BucketSIMD_8B_MSBLSB_THP_XX': "#fd8d3c", 'LP_UN_THP_XX': "#a1dab4", 
            'BucketSIMD_8B_MSBLSB_THP_XX_StringKey': "#e31a1c", "LP_UN_THP_XX_StringKey": "#2c7fb8" }

_  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["AMD x86", "Power"],  [50, 90], 240, ncol_legend=4,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_msblsb_fingerprints=False, hide_lsbmsb_fingerprints=True, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBFP", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_8b_fingerprints=False, hide_16b_fingerprints=True,
                   force_include=["LP_UN_THP_XX", "LP_UN_THP_XX_StringKey"],
                   show_nonmultshift64 = True, hide_128b_hasher=True, hide_xx_hash=False, hide_multaddshift_murmur=True, hide_multshift64=True,
                   show_stringkeys=True,
                   legend_label_dict={"LP_UN_THP_XX_StringKey": "LP+Str", "LP_UN_THP_XX": "LP+Int",
                                      "BucketSIMD_8B_MSBLSB_THP_XX_StringKey": "BBC(8)+Str", "BucketSIMD_8B_MSBLSB_THP_XX": "BBC(8)+Int"},
                   file_path="paper_stringkeys.pdf",
                  legend_height_shift=-0.22, ylabel_padding=0, xlabel_padding=0,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True, other_ylimit=True, only_two_sqr=False,
                  extra_legend_columnspacing=0.1, extra_legend_handlelength=0.3)

## Large Inline Values

In [None]:
pal_dict = {'BucketSIMD_8B_THP': deep_palette[0], 'LP_UN_THP': deep_palette[1], 'BucketSIMD_8B_THP_DummyTuple': deep_palette[2],
            "LP_UN_THP_DummyTuple": deep_palette[3] }

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_avx512_smaller_512=True, hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_LSBMSB", "_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBFP", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "LP_UN_THP_DummyTuple"],
                   show_inline_dummytuple=True,
                   legend_label_dict={"LP_UN_THP_DummyTuple": "LP(AoS/Unal.)+TupleValues", "LP_UN_THP": "LP(AoS/Unal.)+IntValues",
                                      "BucketSIMD_8B_THP_DummyTuple": "BBC(8)+TupleValues", "BucketSIMD_8B_THP": "BBC(8)+IntValues"},
                   file_path="full_inline_dummytuple.pdf")

In [None]:
_  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 70], 375,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_avx512_smaller_512=True, hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_LSBMSB", "_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_MSBFP", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "LP_UN_THP_DummyTuple"],
                   show_inline_dummytuple=True,
                   ncol_legend=2,
                   legend_label_dict={"LP_UN_THP_DummyTuple": "LP(AoS/Unal.)+TupleValues", "LP_UN_THP": "LP(AoS/Unal.)+IntValues",
                                      "BucketSIMD_8B_THP_DummyTuple": "BBC(8)+TupleValues", "BucketSIMD_8B_THP": "BBC(8)+IntValues"},
                   file_path="inline_dummytuple.pdf")

In [None]:
# Numbers
df1 = read_d.copy()
df2 = write_d.copy()

def get_lp_largeval_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_DummyTuple")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_DummyTuple")].iloc[0]["MLookups/s"]

def get_bbc_intval_inserts(row):
    assert len(df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) & (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "BucketSIMD_8B_THP")]) == 1
    return df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) &  (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "BucketSIMD_8B_THP")].iloc[0]["MInserts/s"]

def get_lp_intval_inserts(row):
    assert len(df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) & (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "LP_UN_THP")]) == 1
    return df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) &  (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "LP_UN_THP")].iloc[0]["MInserts/s"]


df1["LPTuplePerf"] = df1.apply(lambda row: get_lp_largeval_lookups(row), axis=1)
df1["LPTupleSpeedup"] = df1["MLookups/s"] / df1["LPTuplePerf"]

df2["BBCIntInsertPerf"] = df2.apply(lambda row: get_bbc_intval_inserts(row), axis=1)
df2["BBCIntInsertSpeedup"] = df2["MInserts/s"] / df2["BBCIntInsertPerf"]

df2["LPIntInsertPerf"] = df2.apply(lambda row: get_lp_intval_inserts(row), axis=1)
df2["LPIntInsertSpeedup"] = df2["MInserts/s"] / df2["LPIntInsertPerf"]


avg_lf25_intel_int_vs_tuple_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP") & (df1["LoadFactor"] == 25)]["LPTupleSpeedup"].mean()
avg_lf90_intel_int_vs_tuple_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP") & (df1["LoadFactor"] == 90)]["LPTupleSpeedup"].mean()
avg_lf25_intel_int_vs_tuple_abs_diff = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP") & (df1["LoadFactor"] == 25)]["MLookups/s"].mean() - df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP_DummyTuple") & (df1["LoadFactor"] == 25)]["MLookups/s"].mean()
avg_lf90_intel_int_vs_tuple_abs_diff = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP") & (df1["LoadFactor"] == 90)]["MLookups/s"].mean() - df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP_DummyTuple") & (df1["LoadFactor"] == 90)]["MLookups/s"].mean()

avg_lf90_lp_insert_tuple_slowdown_intel = df2[(df2["Arch"] == "Intel x86") & (df2["PlotIdentifier"] == "LP_UN_THP_DummyTuple") & (df2["LoadFactor"] == 90)]["LPIntInsertSpeedup"].mean()
avg_lf90_lp_insert_tuple_slowdown_amd = df2[(df2["Arch"] == "AMD x86") & (df2["PlotIdentifier"] == "LP_UN_THP_DummyTuple") & (df2["LoadFactor"] == 90)]["LPIntInsertSpeedup"].mean()
avg_lf90_lp_insert_tuple_slowdown_a64fx = df2[(df2["Arch"] == "A64FX ARM") & (df2["PlotIdentifier"] == "LP_UN_THP_DummyTuple") & (df2["LoadFactor"] == 90)]["LPIntInsertSpeedup"].mean()
avg_lf90_lp_insert_tuple_slowdown_power = df2[(df2["Arch"] == "Power") & (df2["PlotIdentifier"] == "LP_UN_THP_DummyTuple") & (df2["LoadFactor"] == 90)]["LPIntInsertSpeedup"].mean()

avg_lf90_bbc_insert_tuple_slowdown_intel = df2[(df2["Arch"] == "Intel x86") & (df2["PlotIdentifier"] == "BucketSIMD_8B_THP_DummyTuple") & (df2["LoadFactor"] == 90)]["BBCIntInsertSpeedup"].mean()
avg_lf90_bbc_insert_tuple_slowdown_amd = df2[(df2["Arch"] == "AMD x86") & (df2["PlotIdentifier"] == "BucketSIMD_8B_THP_DummyTuple") & (df2["LoadFactor"] == 90)]["BBCIntInsertSpeedup"].mean()
avg_lf90_bbc_insert_tuple_slowdown_a64fx = df2[(df2["Arch"] == "A64FX ARM") & (df2["PlotIdentifier"] == "BucketSIMD_8B_THP_DummyTuple") & (df2["LoadFactor"] == 90)]["BBCIntInsertSpeedup"].mean()
avg_lf90_bbc_insert_tuple_slowdown_power = df2[(df2["Arch"] == "Power") & (df2["PlotIdentifier"] == "BucketSIMD_8B_THP_DummyTuple") & (df2["LoadFactor"] == 90)]["BBCIntInsertSpeedup"].mean()



print(f"(1) average speedup int over tuple <Intel, LF25, LP>: {avg_lf25_intel_int_vs_tuple_speedup}\n"
      f"(1.5) abs. perf diff <Intel, LF25, LP>: {avg_lf25_intel_int_vs_tuple_abs_diff}\n"
      f"(2) average speedup int over tuple <Intel, LF90, LP>: {avg_lf90_intel_int_vs_tuple_speedup}\n"
      f"(2.5) abs. perf diff <Intel, LF25, LP>: {avg_lf90_intel_int_vs_tuple_abs_diff}\n"
      f"(3) average slowdown inserts tuple vs int <Intel, LP90, LP>: {avg_lf90_lp_insert_tuple_slowdown_intel}\n"
      f"(4) average slowdown inserts tuple vs int <AMD, LP90, LP>: {avg_lf90_lp_insert_tuple_slowdown_amd}\n"
      f"(5) average slowdown inserts tuple vs int <A64FX, LP90, LP>: {avg_lf90_lp_insert_tuple_slowdown_a64fx}\n"
      f"(6) average slowdown inserts tuple vs int <Power, LP90, LP>: {avg_lf90_lp_insert_tuple_slowdown_power}\n"
      f"(7) average slowdown inserts tuple vs int <Intel, LP90, BBC>: {avg_lf90_bbc_insert_tuple_slowdown_intel}\n"
      f"(8) average slowdown inserts tuple vs int <AMD, LP90, BBC>: {avg_lf90_bbc_insert_tuple_slowdown_amd}\n"
      f"(9) average slowdown inserts tuple vs int <A64FX, LP90, BBC>: {avg_lf90_bbc_insert_tuple_slowdown_a64fx}\n"
      f"(10) average slowdown inserts tuple vs int <Power, LP90, BBC>: {avg_lf90_bbc_insert_tuple_slowdown_power}\n"
     )

result_dict["IntLf25LpIntVsTupValRel"] = round(avg_lf25_intel_int_vs_tuple_speedup, 2)
result_dict["IntLf25LpIntVsTupValAbs"] = round(avg_lf25_intel_int_vs_tuple_abs_diff, 1)
result_dict["IntLf90LpIntVsTupValRel"] = round(avg_lf90_intel_int_vs_tuple_speedup, 2)
result_dict["IntLf90LpIntVsTupValAbs"] = round(avg_lf90_intel_int_vs_tuple_abs_diff, 1)

result_dict["IntLf90LpTupleVsIntValInsertSlowdown"] = round(avg_lf90_lp_insert_tuple_slowdown_intel, 2) 
result_dict["AmdLf90LpTupleVsIntValInsertSlowdown"] = round(avg_lf90_lp_insert_tuple_slowdown_amd, 2) 
result_dict["A64FXLf90LpTupleVsIntValInsertSlowdown"] = round(avg_lf90_lp_insert_tuple_slowdown_a64fx, 2) 
result_dict["PwrLf90LpTupleVsIntValInsertSlowdown"] = round(avg_lf90_lp_insert_tuple_slowdown_power, 2) 

result_dict["IntBbc90LpTupleVsIntValInsertSlowdown"] = round(avg_lf90_lp_insert_tuple_slowdown_intel, 2) 
result_dict["AmdBbc90LpTupleVsIntValInsertSlowdown"] = round(avg_lf90_bbc_insert_tuple_slowdown_amd, 2) 
result_dict["A64FXBbc90LpTupleVsIntValInsertSlowdown"] = round(avg_lf90_bbc_insert_tuple_slowdown_a64fx, 2) 
result_dict["PwrBbc90LpTupleVsIntValInsertSlowdown"] = round(avg_lf90_bbc_insert_tuple_slowdown_power, 2) 

## Pointer Values

In [None]:
pal_dict = {'BucketSIMD_8B_THP_DummyTuple': deep_palette[0], 'LP_UN_THP_DummyTuple': deep_palette[1],
            'BucketSIMD_8B_THP_PtrDummyTuple': deep_palette[2], "LP_UN_THP_PtrDummyTuple": deep_palette[3] }

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 1000,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, 
                    hide_svbcast=True, hide_lsb_fingerprints=True, hide_avx512_smaller_512=True, hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP_PtrDummyTuple", "LP_UN_THP_DummyTuple"],
                   show_inline_dummytuple=True, show_pointer_dummytuple=True, only_show_dummy=True,
                   legend_label_dict={"LP_UN_THP_DummyTuple": "LP(AoS/Unal.)+Inline", "LP_UN_THP_PtrDummyTuple": "LP(AoS/Unal.)+Pointer",
                                      "BucketSIMD_8B_THP_DummyTuple": "BBC(8)+Inline", "BucketSIMD_8B_THP_PtrDummyTuple": "BBC(8)+Pointer"},
                   file_path="full_dummytuple_inline_vs_ptr.pdf")

In [None]:
_ = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [50, 90], 375,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_lsbmsb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, 
                    hide_svbcast=True, hide_lsb_fingerprints=True, hide_avx512_smaller_512=True, hide_notestz=True,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP_PtrDummyTuple", "LP_UN_THP_DummyTuple"],
                   show_inline_dummytuple=True, show_pointer_dummytuple=True, only_show_dummy=True,
                   legend_label_dict={"LP_UN_THP_DummyTuple": "LP(AoS/Unal.)+Inline", "LP_UN_THP_PtrDummyTuple": "LP(AoS/Unal.)+Pointer",
                                      "BucketSIMD_8B_THP_DummyTuple": "BBC(8)+Inline", "BucketSIMD_8B_THP_PtrDummyTuple": "BBC(8)+Pointer"},                   file_path="dummytuple_inline_vs_ptr.pdf")

In [None]:
# Numbers
df1 = read_d.copy()
df2 = write_d.copy()

def get_lp_pointer_lookups(row):   
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_PtrDummyTuple")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_PtrDummyTuple")].iloc[0]["MLookups/s"]

def get_lp_inline_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_DummyTuple")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_DummyTuple")].iloc[0]["MLookups/s"]

def get_bbc_pointer_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_PtrDummyTuple")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_PtrDummyTuple")].iloc[0]["MLookups/s"]

def get_bbc_inline_lookups(row):    
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_DummyTuple")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_DummyTuple")].iloc[0]["MLookups/s"]

def get_lp_inline_inserts_inserts(row):
    assert len(df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) & (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "LP_UN_THP_DummyTuple")]) == 1
    return df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) &  (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "LP_UN_THP_DummyTuple")].iloc[0]["MInserts/s"]

def get_bbc_inline_inserts_inserts(row):
    assert len(df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) & (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "BucketSIMD_8B_THP_DummyTuple")]) == 1
    return df2[(df2["Arch"] == row["Arch"]) & (df2["Compiler"] == row["Compiler"]) & (df2["PageSize"] == row["PageSize"]) & (df2["HugePageSize"] == row["HugePageSize"]) &  (df2["Size"] == row["Size"]) & (df2["Distribution"] == row["Distribution"]) & (df2["LoadFactor"] == row["LoadFactor"]) & (df2["PlotIdentifier"] == "BucketSIMD_8B_THP_DummyTuple")].iloc[0]["MInserts/s"]


df1["LPPointerPerf"] = df1.apply(lambda row: get_lp_pointer_lookups(row), axis=1)
df1["LPPointerSpeedup"] = df1["MLookups/s"] / df1["LPPointerPerf"]

df1["LPInlinePerf"] = df1.apply(lambda row: get_lp_inline_lookups(row), axis=1)
df1["LPInlineSpeedup"] = df1["MLookups/s"] / df1["LPInlinePerf"]

df1["BBCPointerPerf"] = df1.apply(lambda row: get_bbc_pointer_lookups(row), axis=1)
df1["BBCPointerSpeedup"] = df1["MLookups/s"] / df1["BBCPointerPerf"]

df1["BBCInlinePerf"] = df1.apply(lambda row: get_bbc_inline_lookups(row), axis=1)
df1["BBCInlineSpeedup"] = df1["MLookups/s"] / df1["BBCInlinePerf"]

df2["LPInlineInsertPerf"] = df2.apply(lambda row: get_lp_inline_inserts_inserts(row), axis=1)
df2["LPInlineInsertSpeedup"] = df2["MInserts/s"] / df2["LPInlineInsertPerf"]

df2["BBCInlineInsertPerf"] = df2.apply(lambda row: get_bbc_inline_inserts_inserts(row), axis=1)
df2["BBCInlineInsertSpeedup"] = df2["MInserts/s"] / df2["BBCInlineInsertPerf"]

avg_lf70_intel_lp_ptr_vs_inline_readspeedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP_PtrDummyTuple") & (df1["LoadFactor"] == 70)]["LPInlineSpeedup"].mean()
avg_lf90_intel_lp_ptr_vs_inline_readspeedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "LP_UN_THP_PtrDummyTuple") & (df1["LoadFactor"] == 90)]["LPInlineSpeedup"].mean()
avg_lf70_intel_lp_ptr_vs_inline_writespeedup = df2[(df2["Arch"] == "Intel x86") & (df2["PlotIdentifier"] == "LP_UN_THP_PtrDummyTuple") & (df2["LoadFactor"] == 70)]["LPInlineInsertSpeedup"].mean()
avg_lf90_intel_lp_ptr_vs_inline_writespeedup = df2[(df2["Arch"] == "Intel x86") & (df2["PlotIdentifier"] == "LP_UN_THP_PtrDummyTuple") & (df2["LoadFactor"] == 90)]["LPInlineInsertSpeedup"].mean()

avg_lf90_amd_lp_inline_vs_ptr_readspeedup = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "LP_UN_THP_DummyTuple") & (df1["LoadFactor"] == 90)]["LPPointerSpeedup"].mean()
avg_lf70_amd_lp_ptr_vs_inline_writespeedup = df2[(df2["Arch"] == "AMD x86") & (df2["PlotIdentifier"] == "LP_UN_THP_PtrDummyTuple") & (df2["LoadFactor"] == 70)]["LPInlineInsertSpeedup"].mean()
avg_lf90_amd_lp_ptr_vs_inline_writespeedup = df2[(df2["Arch"] == "AMD x86") & (df2["PlotIdentifier"] == "LP_UN_THP_PtrDummyTuple") & (df2["LoadFactor"] == 90)]["LPInlineInsertSpeedup"].mean()

avg_lf90_power_lp_ptr_vs_inline_readspeedup = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "LP_UN_THP_PtrDummyTuple") & (df1["LoadFactor"] == 90)]["LPInlineSpeedup"].mean()
avg_lf70_power_lp_ptr_vs_inline_writespeedup = df2[(df2["Arch"] == "Power") & (df2["PlotIdentifier"] == "LP_UN_THP_PtrDummyTuple") & (df2["LoadFactor"] == 70)]["LPInlineInsertSpeedup"].mean()
avg_lf90_power_lp_ptr_vs_inline_writespeedup = df2[(df2["Arch"] == "Power") & (df2["PlotIdentifier"] == "LP_UN_THP_PtrDummyTuple") & (df2["LoadFactor"] == 90)]["LPInlineInsertSpeedup"].mean()

avg_lf90_a64fx_lp_inline_vs_ptr_readspeedup = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "LP_UN_THP_DummyTuple") & (df1["LoadFactor"] == 90)]["LPPointerSpeedup"].mean()
avg_lf70_a64fx_lp_ptr_vs_inline_writespeedup = df2[(df2["Arch"] == "A64FX ARM") & (df2["PlotIdentifier"] == "LP_UN_THP_PtrDummyTuple") & (df2["LoadFactor"] == 70)]["LPInlineInsertSpeedup"].mean()
avg_lf90_a64fx_lp_ptr_vs_inline_writespeedup = df2[(df2["Arch"] == "A64FX ARM") & (df2["PlotIdentifier"] == "LP_UN_THP_PtrDummyTuple") & (df2["LoadFactor"] == 90)]["LPInlineInsertSpeedup"].mean()

avg_lf70_intel_bbc_inline_vs_ptr_readspeedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_DummyTuple") & (df1["LoadFactor"] == 70)]["BBCPointerSpeedup"].mean()
avg_lf70_amd_bbc_inline_vs_ptr_readspeedup = df1[(df1["Arch"] == "AMD x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_DummyTuple") & (df1["LoadFactor"] == 70)]["BBCPointerSpeedup"].mean()
avg_lf70_a64fx_bbc_inline_vs_ptr_readspeedup = df1[(df1["Arch"] == "A64FX ARM") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_DummyTuple") & (df1["LoadFactor"] == 70)]["BBCPointerSpeedup"].mean()
avg_lf70_power_bbc_inline_vs_ptr_readspeedup = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_DummyTuple") & (df1["LoadFactor"] == 70)]["BBCPointerSpeedup"].mean()

avg_lf90_intel_bbc_ptr_vs_inline_writespeedup = df2[(df2["Arch"] == "Intel x86") & (df2["PlotIdentifier"] == "BucketSIMD_8B_THP_PtrDummyTuple") & (df2["LoadFactor"] == 90)]["BBCInlineInsertSpeedup"].mean()
avg_lf90_amd_bbc_ptr_vs_inline_writespeedup = df2[(df2["Arch"] == "AMD x86") & (df2["PlotIdentifier"] == "BucketSIMD_8B_THP_PtrDummyTuple") & (df2["LoadFactor"] == 90)]["BBCInlineInsertSpeedup"].mean()
avg_lf90_a64fx_bbc_ptr_vs_inline_writespeedup = df2[(df2["Arch"] == "A64FX ARM") & (df2["PlotIdentifier"] == "BucketSIMD_8B_THP_PtrDummyTuple") & (df2["LoadFactor"] == 90)]["BBCInlineInsertSpeedup"].mean()
avg_lf90_power_bbc_ptr_vs_inline_writespeedup = df2[(df2["Arch"] == "Power") & (df2["PlotIdentifier"] == "BucketSIMD_8B_THP_PtrDummyTuple") & (df2["LoadFactor"] == 90)]["BBCInlineInsertSpeedup"].mean()


print(f"(1) average read speedup ptr over inline <Intel, LF70, LP>: {avg_lf70_intel_lp_ptr_vs_inline_readspeedup}\n"
      f"(2) average read speedup ptr over inline <Intel, LF90, LP>: {avg_lf90_intel_lp_ptr_vs_inline_readspeedup}\n"
      f"(3) average insert speedup ptr over inline <Intel, LF70, LP>: {avg_lf70_intel_lp_ptr_vs_inline_writespeedup}\n"
      f"(4) average insert speedup ptr over inline <Intel, LF90, LP>: {avg_lf90_intel_lp_ptr_vs_inline_writespeedup}\n"
      f"(5) average read speedup inline over ptr <AMD, LF90, LP>: {avg_lf90_amd_lp_inline_vs_ptr_readspeedup}\n"
      f"(6) average insert speedup ptr over inline <AMD, LF70, LP>: {avg_lf70_amd_lp_ptr_vs_inline_writespeedup}\n"
      f"(7) average insert speedup iptr over inline <AMD, LF90, LP>: {avg_lf90_amd_lp_ptr_vs_inline_writespeedup}\n"
      f"(8) average read speedup ptr over inline <Power, LF90, LP>: {avg_lf90_power_lp_ptr_vs_inline_readspeedup}\n"
      f"(9) average insert speedup ptr over inline <Power, LF70, LP>: {avg_lf70_power_lp_ptr_vs_inline_writespeedup}\n"
      f"(10) average insert speedup ptr over inline <Power, LF90, LP>: {avg_lf90_power_lp_ptr_vs_inline_writespeedup}\n"
      f"(A) average insert speedup ptr over inline <A64FX, LF70, LP>: {avg_lf70_a64fx_lp_ptr_vs_inline_writespeedup}\n"
      f"(B) average insert speedup ptr over inline <A64FX, LF90, LP>: {avg_lf90_a64fx_lp_ptr_vs_inline_writespeedup}\n"
      f"(C) average read speedup inline over ptr <A64FX, LF90, LP>: {avg_lf90_a64fx_lp_inline_vs_ptr_readspeedup}\n"
      f"(11) average read speedup inline over ptr <Intel, LF70, BBC>: {avg_lf70_intel_bbc_inline_vs_ptr_readspeedup}\n"
      f"(12) average read speedup inline over ptr <AMD, LF70, BBC>: {avg_lf70_amd_bbc_inline_vs_ptr_readspeedup}\n"
      f"(13) average read speedup inline over ptr <A64FX, LF70, BBC>: {avg_lf70_a64fx_bbc_inline_vs_ptr_readspeedup}\n"
      f"(14) average read speedup inline over ptr <Power, LF70, BBC>: {avg_lf70_power_bbc_inline_vs_ptr_readspeedup}\n"
      f"(15) average write speedup ptr over inline <Intel, LF90, BBC>: {avg_lf90_intel_bbc_ptr_vs_inline_writespeedup}\n"
      f"(16) average write speedup ptr over inline <AMD, LF90, BBC>: {avg_lf90_amd_bbc_ptr_vs_inline_writespeedup}\n"
      f"(17) average write speedup ptr over inline <A64FX, LF90, BBC>: {avg_lf90_a64fx_bbc_ptr_vs_inline_writespeedup}\n"
      f"(18) average write speedup ptr over inline <Power, LF90, BBC>: {avg_lf90_power_bbc_ptr_vs_inline_writespeedup}\n"
     )


result_dict["IntLf70LargeValLPReadSpeedupPtrInline"] = round(avg_lf70_intel_lp_ptr_vs_inline_readspeedup, 2)
result_dict["IntLf90LargeValLPReadSpeedupPtrInline"] = round(avg_lf90_intel_lp_ptr_vs_inline_readspeedup, 2)
result_dict["IntLf70LargeValLPInsertSpeedupPtrInline"] = round(avg_lf70_intel_lp_ptr_vs_inline_writespeedup, 2)
result_dict["IntLf90LargeValLPInsertSpeedupPtrInline"] = round(avg_lf90_intel_lp_ptr_vs_inline_writespeedup, 2)
result_dict["AmdLf70LargeValLPInsertSpeedupPtrInline"] = round(avg_lf70_amd_lp_ptr_vs_inline_writespeedup, 2)
result_dict["AmdLf90LargeValLPReadSpeedupInlinePtr"] = round(avg_lf90_amd_lp_inline_vs_ptr_readspeedup, 2)
result_dict["PwrLf70LargeValLPInsertSpeedupPtrInline"] = round(avg_lf70_power_lp_ptr_vs_inline_writespeedup, 2)
result_dict["PwrLf90LargeValLPInsertSpeedupPtrInline"] = round(avg_lf90_power_lp_ptr_vs_inline_writespeedup, 2)
result_dict["PwrLf90LargeValLPReadSpeedupPtrInline"] = round(avg_lf90_power_lp_ptr_vs_inline_readspeedup, 2)
result_dict["A64FXLf70LargeValLPInsertSpeedupPtrInline"] = round(avg_lf70_a64fx_lp_ptr_vs_inline_writespeedup, 2)
result_dict["A64FXLf90LargeValLPInsertSpeedupPtrInline"] = round(avg_lf90_a64fx_lp_ptr_vs_inline_writespeedup, 2)
result_dict["A64FXLf90LargeValLPReadSpeedupInlinePtr"] = round(avg_lf90_a64fx_lp_inline_vs_ptr_readspeedup, 2)

result_dict["IntLf70LargeValBBCReadSpeedupInlinePtr"] = round(avg_lf70_intel_bbc_inline_vs_ptr_readspeedup, 2)
result_dict["AmdLf70LargeValBBCReadSpeedupInlinePtr"] = round(avg_lf70_amd_bbc_inline_vs_ptr_readspeedup, 2)
result_dict["A64FXLf70LargeValBBCReadSpeedupInlinePtr"] = round(avg_lf70_a64fx_bbc_inline_vs_ptr_readspeedup, 2)
result_dict["PwrLf70LargeValBBCReadSpeedupInlinePtr"] = round(avg_lf70_power_bbc_inline_vs_ptr_readspeedup, 2)

result_dict["IntLf90LargeValBBCInsertSpeedupPtrInline"] = round(avg_lf90_intel_bbc_ptr_vs_inline_writespeedup, 2)
result_dict["AmdLf90LargeValBBCInsertSpeedupPtrInline"] = round(avg_lf90_amd_bbc_ptr_vs_inline_writespeedup, 2)
result_dict["A64FXLf90LargeValBBCInsertSpeedupPtrInline"] = round(avg_lf90_a64fx_bbc_ptr_vs_inline_writespeedup, 2)
result_dict["PwrLf90LargeValBBCInsertSpeedupPtrInline"] = round(avg_lf90_power_bbc_ptr_vs_inline_writespeedup, 2)


## Paper Large Inline/Pointer Kombi

In [None]:
pal_dict = {'BucketSIMD_8B_THP_DummyTuple': "#fd8d3c",
            'LP_UN_THP_DummyTuple': "#a1dab4",
            'Chain_BudBucket8FP_THP_DummyTuple': "#d7b5d8", 
            'BucketSIMD_8B_THP_PtrDummyTuple': "#e31a1c", 
            "LP_UN_THP_PtrDummyTuple": "#41b6c4",
            'Chain_BudBucket8FP_THP_PtrDummyTuple': "#df65b0"}

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "A64FX ARM"], [50, 90], 240,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True, 
                   hide_msb_fingerprints=False, hide_msblsb_fingerprints=True, hide_lsbmsb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True,
                   hide_lsb_fingerprints=True, hide_notestz=True, hide_non_avx512=True, ncol_legend=6,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_8b_fingerprints=False, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP_PtrDummyTuple", 'LP_UN_THP_DummyTuple', 'Chain_BudBucket8FP_THP_DummyTuple', 'Chain_BudBucket8FP_THP_PtrDummyTuple'],
                   show_inline_dummytuple=True, show_pointer_dummytuple=True, only_show_dummy=True,
                   legend_label_dict={"BucketSIMD_8B_THP_DummyTuple": "BBC+Inl", "LP_UN_THP_DummyTuple": "LP+Inl",
                                      "Chain_BudBucket8FP_THP_DummyTuple": "CH+Inl", "BucketSIMD_8B_THP_PtrDummyTuple": "BBC+Ptr",
                                      "LP_UN_THP_PtrDummyTuple": "LP+Ptr", "Chain_BudBucket8FP_THP_PtrDummyTuple": "CH+Ptr"},
                   file_path="paper_dummytuple.pdf", legend_height_shift=-0.23, ylabel_padding=0, xlabel_padding=0,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True)

In [None]:
# Numbers
df1 = read_d.copy()
df2 = write_d.copy()

def get_lp_largeval_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_DummyTuple")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP_DummyTuple")].iloc[0]["MLookups/s"]

def get_bbc_largeval_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_DummyTuple")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_DummyTuple")].iloc[0]["MLookups/s"]

def get_chained_pointer_lookups(row):
    if row["LoadFactor"] == 90: return 0
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "Chain_BudBucket8FP_THP_PtrDummyTuple")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "Chain_BudBucket8FP_THP_PtrDummyTuple")].iloc[0]["MLookups/s"]


df1["LPTuplePerf"] = df1.apply(lambda row: get_lp_largeval_lookups(row), axis=1)
df1["LPTupleSpeedup"] = df1["MLookups/s"] / df1["LPTuplePerf"]

df1["BBCTuplePerf"] = df1.apply(lambda row: get_bbc_largeval_lookups(row), axis=1)
df1["BBCTupleSpeedup"] = df1["MLookups/s"] / df1["BBCTuplePerf"]

df1["ChainPtrPerf"] = df1.apply(lambda row: get_chained_pointer_lookups(row), axis=1)
df1["ChainPtrSpeedup"] = df1["MLookups/s"] / df1["ChainPtrPerf"]


avg_lf25_intel_chain_vs_bbc_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "Chain_BudBucket8FP_THP_DummyTuple") & (df1["LoadFactor"] == 25)]["BBCTupleSpeedup"].mean()
avg_lf50_intel_chain_vs_bbc_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "Chain_BudBucket8FP_THP_DummyTuple") & (df1["LoadFactor"] == 50)]["BBCTupleSpeedup"].mean()
avg_lf70_intel_chain_vs_bbc_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "Chain_BudBucket8FP_THP_DummyTuple") & (df1["LoadFactor"] == 70)]["BBCTupleSpeedup"].mean()
avg_lf90_intel_chain_vs_bbc_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "Chain_BudBucket8FP_THP_DummyTuple") & (df1["LoadFactor"] == 90)]["BBCTupleSpeedup"].mean()

avg_lf25_allsys_chain_vs_bbc_speedup = df1[ (df1["PlotIdentifier"] == "Chain_BudBucket8FP_THP_DummyTuple") & (df1["LoadFactor"] == 25)]["BBCTupleSpeedup"].mean()
avg_lf50_allsys_chain_vs_bbc_speedup = df1[(df1["PlotIdentifier"] == "Chain_BudBucket8FP_THP_DummyTuple") & (df1["LoadFactor"] == 50)]["BBCTupleSpeedup"].mean()
avg_lf70_allsys_chain_vs_bbc_speedup = df1[ (df1["PlotIdentifier"] == "Chain_BudBucket8FP_THP_DummyTuple") & (df1["LoadFactor"] == 70)]["BBCTupleSpeedup"].mean()
avg_lf90_allsys_chain_vs_bbc_speedup = df1[(df1["PlotIdentifier"] == "Chain_BudBucket8FP_THP_DummyTuple") & (df1["LoadFactor"] == 90)]["BBCTupleSpeedup"].mean()

avg_lf25_allsys_bbc_vs_chain_ptr_speedup = df1[ (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_PtrDummyTuple") & (df1["LoadFactor"] == 25)]["ChainPtrSpeedup"].mean()
avg_lf50_allsys_bbc_vs_chain_ptr_speedup = df1[(df1["PlotIdentifier"] == "BucketSIMD_8B_THP_PtrDummyTuple") & (df1["LoadFactor"] == 50)]["ChainPtrSpeedup"].mean()
avg_lf70_allsys_bbc_vs_chain_ptr_speedup = df1[ (df1["PlotIdentifier"] == "BucketSIMD_8B_THP_PtrDummyTuple") & (df1["LoadFactor"] == 70)]["ChainPtrSpeedup"].mean()
avg_lf90_allsys_bbc_vs_chain_ptr_speedup = df1[(df1["PlotIdentifier"] == "BucketSIMD_8B_THP_PtrDummyTuple") & (df1["LoadFactor"] == 90)]["ChainPtrSpeedup"].mean()


print(f"(1) average speedup chain over bbc <Intel, LF25>: {avg_lf25_intel_chain_vs_bbc_speedup}\n"
     f"(1) average speedup chain over bbc <Intel, LF50>: {avg_lf50_intel_chain_vs_bbc_speedup}\n"
     f"(1) average speedup chain over bbc <Intel, LF70>: {avg_lf70_intel_chain_vs_bbc_speedup}\n"
     f"(1) average speedup chain over bbc <Intel, LF90>: {avg_lf90_intel_chain_vs_bbc_speedup}\n"
     f"(1) average speedup chain over bbc <allsys, LF25>: {avg_lf25_allsys_chain_vs_bbc_speedup}\n"
     f"(1) average speedup chain over bbc <allsys, LF50>: {avg_lf50_allsys_chain_vs_bbc_speedup}\n"
     f"(1) average speedup chain over bbc <allsys, LF70>: {avg_lf70_allsys_chain_vs_bbc_speedup}\n"
     f"(1) average speedup chain over bbc <allsys, LF90>: {avg_lf90_allsys_chain_vs_bbc_speedup}\n"
     f"(1) average speedup bbc over chain ptr <allsys, LF25>: {avg_lf25_allsys_bbc_vs_chain_ptr_speedup}\n"
     f"(1) average speedup bbc over chain ptr <allsys, LF50>: {avg_lf50_allsys_bbc_vs_chain_ptr_speedup}\n"
     f"(1) average speedup bbc over chain ptr <allsys, LF70>: {avg_lf70_allsys_bbc_vs_chain_ptr_speedup}\n"
     f"(1) average speedup bbc over chain ptr <allsys, LF90>: {avg_lf90_allsys_bbc_vs_chain_ptr_speedup}\n")

result_dict["AllSysChainVsBBCSpeedupLf25"] = round(avg_lf25_allsys_chain_vs_bbc_speedup, 2)
result_dict["AllSysChainVsBBCSpeedupLf50"] = round(avg_lf50_allsys_chain_vs_bbc_speedup, 2)
result_dict["AllSysChainVsBBCSpeedupLf70"] = round(avg_lf70_allsys_chain_vs_bbc_speedup, 2)
result_dict["AllSysChainVsBBCSpeedupLf90"] = round(avg_lf90_allsys_chain_vs_bbc_speedup, 2)
result_dict["AllSysBBCvsChainPtrSpeedupLf50"] = round(avg_lf50_allsys_bbc_vs_chain_ptr_speedup, 2)
result_dict["AllSysBBCvsChainPtrSpeedupLf70"] = round(avg_lf70_allsys_bbc_vs_chain_ptr_speedup, 2)


In [None]:
pal_dict = {'BucketSIMD_8B_THP_DummyTuple': "#fd8d3c",
            'LP_UN_THP_DummyTuple': "#a1dab4",
            'Chain_BudBucket8FP_THP_DummyTuple': "#d7b5d8", 
            'BucketSIMD_8B_THP_PtrDummyTuple': "#e31a1c", 
            "LP_UN_THP_PtrDummyTuple": "#41b6c4",
            'Chain_BudBucket8FP_THP_PtrDummyTuple': "#df65b0"}

read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["AMD x86"], [50, 90], 240,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True, 
                   hide_msb_fingerprints=False, hide_msblsb_fingerprints=True, hide_lsbmsb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True,
                   hide_lsb_fingerprints=True, hide_notestz=True, hide_non_avx512=True, ncol_legend=6,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_8b_fingerprints=False, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP_PtrDummyTuple", 'LP_UN_THP_DummyTuple', 'Chain_BudBucket8FP_THP_DummyTuple', 'Chain_BudBucket8FP_THP_PtrDummyTuple'],
                   show_inline_dummytuple=True, show_pointer_dummytuple=True, only_show_dummy=True,
                   legend_label_dict={"BucketSIMD_8B_THP_DummyTuple": "BBC+Inl", "LP_UN_THP_DummyTuple": "LP+Inl",
                                      "Chain_BudBucket8FP_THP_DummyTuple": "CH+Inl", "BucketSIMD_8B_THP_PtrDummyTuple": "BBC+Ptr",
                                      "LP_UN_THP_PtrDummyTuple": "LP+Ptr", "Chain_BudBucket8FP_THP_PtrDummyTuple": "CH+Ptr"},
                   file_path="revision_dummytuple_32.pdf", legend_height_shift=-0.6, ylabel_padding=0, xlabel_padding=0,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True, arch_ylimits={"AMD x86": 60})

In [None]:
pal_dict = {'BucketSIMD_8B_THP_128DummyTuple': "#fd8d3c",
            'LP_UN_THP_128DummyTuple': "#a1dab4",
            'Chain_BudBucket8FP_THP_128DummyTuple': "#d7b5d8", 
            'BucketSIMD_8B_THP_Ptr128DummyTuple': "#e31a1c", 
            "LP_UN_THP_Ptr128DummyTuple": "#41b6c4",
            'Chain_BudBucket8FP_THP_Ptr128DummyTuple': "#df65b0"}


read_d, write_d  = analysis_plot("BucketingSIMDHashTable", pal_dict, ["AMD x86"], [50, 90], 240,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True, 
                   hide_msb_fingerprints=False, hide_msblsb_fingerprints=True, hide_lsbmsb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True,
                   hide_lsb_fingerprints=True, hide_notestz=True, hide_non_avx512=True, ncol_legend=6,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=[],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_8b_fingerprints=False, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=['LP_UN_THP_128DummyTuple',"LP_UN_THP_Ptr128DummyTuple", 'Chain_BudBucket8FP_THP_128DummyTuple', 'Chain_BudBucket8FP_THP_Ptr128DummyTuple'],
                    #force_include=[ 'LP_UN_THP_DummyTuple', 'Chain_BudBucket8FP_THP_DummyTuple', ],
                   only_show_dummy=True,
                   legend_label_dict={"BucketSIMD_8B_THP_128DummyTuple": "BBC+Inl", "LP_UN_THP_128DummyTuple": "LP+Inl",
                                      "Chain_BudBucket8FP_THP_128DummyTuple": "CH+Inl", "BucketSIMD_8B_THP_Ptr128DummyTuple": "BBC+Ptr",
                                      "LP_UN_THP_Ptr128DummyTuple": "LP+Ptr", "Chain_BudBucket8FP_THP_Ptr128DummyTuple": "CH+Ptr"},
                   file_path="revision_dummytuple_128.pdf", legend_height_shift=-0.6, ylabel_padding=0, xlabel_padding=0,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True, hide_dummytuple128 = False, arch_ylimits={"AMD x86": 60})

In [None]:
generate_result_tex_file(result_dict, "performance_results.tex")

# Thread Benchmark

In [None]:
pal_dict = {"BucketSIMD_16B_THP": "#e31a1c", 'BucketSIMD_8B_THP': "#fd8d3c",
            'FingerPSIMD_8B_THP': "#fecc5c", 'RecalcRH_UN_THP':  "#2c7fb8",
            'LP_UN_THP':  "#a1dab4", 'Chain_BudBucket16FP_THP': "#253494"}

read_d, write_d = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 550,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_notestz=True,
                   hide_non_avx512=True, hide_lsbmsb_fingerprints=False,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=["FingerprintingSIMDSoAHashTable"],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "RecalcRH_UN_THP"],
                   legend_label_dict={"LP_UN_THP": "LP(AoS)", "RecalcRH_UN_THP": "RecalcRH", "FingerPSIMD_8B_THP": "VFP(8)", "Chain_BudBucket16FP_THP": "Chained(16)", "BucketSIMD_8B_THP": "BBC(8)", "BucketSIMD_16B_THP": "BBC(16)"},
                   file_path="mt_perf_sqr50.pdf", ncol_legend=6,
                   label_order=[2,3,1,0],
                   legend_height_shift=-0.05, ylabel_padding=0.5, xlabel_padding=0.5,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True,
                   analyze_threadcount=True,  force_write_lf=90, force_sqr=50, hide_insertions=False,
                   markevery=[0,3,4,5,6])

In [None]:
pal_dict = {"BucketSIMD_16B_THP": "#e31a1c", 'BucketSIMD_8B_THP': "#fd8d3c",
            'FingerPSIMD_8B_THP': "#fecc5c", 'RecalcRH_UN_THP':  "#2c7fb8",
            'LP_UN_THP':  "#a1dab4", 'Chain_BudBucket16FP_THP': "#253494"}

_ = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [50, 90], 240,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_notestz=True,
                   hide_non_avx512=True, hide_lsbmsb_fingerprints=False,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=["FingerprintingSIMDSoAHashTable"],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "RecalcRH_UN_THP"],
                   legend_label_dict={"LP_UN_THP": "LP(AoS)", "RecalcRH_UN_THP": "RecalcRH", "FingerPSIMD_8B_THP": "VFP(8)", "Chain_BudBucket16FP_THP": "Chained(16)", "BucketSIMD_8B_THP": "BBC(8)", "BucketSIMD_16B_THP": "BBC(16)"},
                   file_path="paper_mt_perf_sqr50.pdf", ncol_legend=4,
                   label_order=[2,3,1,0],
                   legend_height_shift=-0.045, ylabel_padding=0, xlabel_padding=0,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True,
                   analyze_threadcount=True,  force_write_lf=90, force_sqr=50, hide_insertions=False, arch_ylimits={"Power": 500},
                   markevery=[0,3,4,5,6], extra_legend_columnspacing=0.3, extra_legend_handlelength=0.7)

In [None]:
# Numbers
df1 = read_d.copy()
df2 = write_d.copy()

def get_lp_lookups(row):
    tmp = df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")]
    if len(tmp) != 1:
        print(tmp)
        print(len(tmp))
        raise ValueError()
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")].iloc[0]["MLookups/s"]

def get_vfp_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP")].iloc[0]["MLookups/s"]

def get_bbc_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP")].iloc[0]["MLookups/s"]


df1["LPPerf"] = df1.apply(lambda row: get_lp_lookups(row), axis=1)
df1["LPSpeedup"] = df1["MLookups/s"] / df1["LPPerf"]

df1["VFPPerf"] = df1.apply(lambda row: get_vfp_lookups(row), axis=1)
df1["VFPSpeedup"] = df1["MLookups/s"] / df1["VFPPerf"]

df1["BBCPerf"] = df1.apply(lambda row: get_bbc_lookups(row), axis=1)
df1["BBCSpeedup"] = df1["MLookups/s"] / df1["BBCPerf"]

avg_lf50_intel_bbc_vs_vfp_avg_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50)]["VFPSpeedup"].mean()
avg_lf50_intel_bbc_vs_vfp_min_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50)]["VFPSpeedup"].min()
avg_lf90_intel_bbc_vs_vfp_avg_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["VFPSpeedup"].mean()
avg_lf90_intel_bbc_vs_vfp_min_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["VFPSpeedup"].min()
avg_lf50_st_intel_bbc_vs_vfp_avg_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50) & (df1["ThreadCount"] == 1)]["VFPSpeedup"].mean()
avg_lf90_st_intel_bbc_vs_vfp_avg_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90) & (df1["ThreadCount"] == 1)]["VFPSpeedup"].mean()

avg_lf50_intel_bbc_vs_lp_avg_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50)]["LPSpeedup"].mean()
avg_lf50_intel_bbc_vs_lp_min_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50)]["LPSpeedup"].min()
avg_lf90_intel_bbc_vs_lp_avg_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["LPSpeedup"].mean()
avg_lf90_intel_bbc_vs_lp_min_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["LPSpeedup"].min()
avg_lf50_st_intel_bbc_vs_lp_avg_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50) & (df1["ThreadCount"] == 1)]["LPSpeedup"].mean()
avg_lf90_st_intel_bbc_vs_lp_avg_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90) & (df1["ThreadCount"] == 1)]["LPSpeedup"].mean()

avg_lf90_pwr_vfp_vs_bbc_avg_speedup = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["BBCSpeedup"].mean()
avg_lf90_st_pwr_vfp_vs_bbc_avg_speedup = df1[(df1["Arch"] == "Power") & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP") & (df1["LoadFactor"] == 90) & (df1["ThreadCount"] == 1)]["BBCSpeedup"].mean()


print(f"(1) avg speedup bbc vs vfp <Intel, LF50>: {avg_lf50_intel_bbc_vs_vfp_avg_speedup}\n"
     f"(2) min speedup bbc vs vfp <Intel, LF50>: {avg_lf50_intel_bbc_vs_vfp_min_speedup}\n"
     f"(3) avg speedup bbc vs vfp <Intel, LF90>: {avg_lf90_intel_bbc_vs_vfp_avg_speedup}\n"
     f"(4) min speedup bbc vs vfp <Intel, LF90>: {avg_lf90_intel_bbc_vs_vfp_min_speedup}\n"
    f"(5) avg speedup bbc vs vfp <Intel, LF50, SINGLETHREAD>: {avg_lf50_st_intel_bbc_vs_vfp_avg_speedup}\n"
      f"(6) avg speedup bbc vs vfp <Intel, LF90, SINGLETHREAD>: {avg_lf90_st_intel_bbc_vs_vfp_avg_speedup}\n"
      f"(7) avg speedup bbc vs lp <Intel, LF50>: {avg_lf50_intel_bbc_vs_lp_avg_speedup}\n"
     f"(8) min speedup bbc vs lp <Intel, LF50>: {avg_lf50_intel_bbc_vs_lp_min_speedup}\n"
     f"(9) avg speedup bbc vs lp <Intel, LF90>: {avg_lf90_intel_bbc_vs_lp_avg_speedup}\n"
     f"(10) min speedup bbc vs lp <Intel, LF90>: {avg_lf90_intel_bbc_vs_lp_min_speedup}\n"
    f"(11) avg speedup bbc vs lp <Intel, LF50, SINGLETHREAD>: {avg_lf50_st_intel_bbc_vs_lp_avg_speedup}\n"
      f"(12) avg speedup bbc vs lp <Intel, LF90, SINGLETHREAD>: {avg_lf90_st_intel_bbc_vs_lp_avg_speedup}\n"
     f"(13) avg speedup vfp vs bbc <Power, LF90>: {avg_lf90_pwr_vfp_vs_bbc_avg_speedup}\n"
     f"(14) avg speedup vfp vs bbc <Power, LF90, SINGLETHREAD>: {avg_lf90_st_pwr_vfp_vs_bbc_avg_speedup}\n"

     )


result_dict["IntelBBCvsLPLF90SingleThreadSpeedup"] = round(avg_lf90_intel_bbc_vs_lp_avg_speedup, 2)
result_dict["IntelBBCvsLPLF90AllThreadSpeedup"] = round(avg_lf90_intel_bbc_vs_lp_avg_speedup, 2)
result_dict["IntelBBCvsVFPLF90SingleThreadSpeedup"] = round(avg_lf90_intel_bbc_vs_vfp_avg_speedup, 2)
result_dict["IntelBBCvsVFPLF90AllThreadSpeedup"] = round(avg_lf90_intel_bbc_vs_vfp_avg_speedup, 2)
result_dict["PowerVFPvsBBCLF90SingleThreadSpeedup"] = round(avg_lf90_st_pwr_vfp_vs_bbc_avg_speedup, 2)
result_dict["PowerVFPvsBBCLF90AllThreadSpeedup"] = round(avg_lf90_pwr_vfp_vs_bbc_avg_speedup, 2)


In [None]:
pal_dict = {"BucketSIMD_16B_THP": "#e31a1c", 'BucketSIMD_8B_THP': "#fd8d3c",
            'FingerPSIMD_8B_THP': "#fecc5c", 'RecalcRH_UN_THP':  "#2c7fb8",
            'LP_UN_THP':  "#a1dab4", 'Chain_BudBucket16FP_THP': "#253494"}

read_d, write_d = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 550,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_notestz=True,
                   hide_non_avx512=True, hide_lsbmsb_fingerprints=False,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=["FingerprintingSIMDSoAHashTable"],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "RecalcRH_UN_THP"],
                   legend_label_dict={"LP_UN_THP": "LP(AoS)", "RecalcRH_UN_THP": "RecalcRH", "FingerPSIMD_8B_THP": "VFP(8)", "Chain_BudBucket16FP_THP": "Chained(16)", "BucketSIMD_8B_THP": "BBC(8)", "BucketSIMD_16B_THP": "BBC(16)"},
                   file_path="mt_perf_sqr100.pdf", ncol_legend=6,
                   label_order=[2,3,1,0],
                   legend_height_shift=-0.05, ylabel_padding=0.5, xlabel_padding=0.5,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True,
                   analyze_threadcount=True,  force_write_lf=90, force_sqr=100, hide_insertions=False,
                   markevery=[0,3,4,5,6])

In [None]:
# Numbers
df1 = read_d.copy()
df2 = write_d.copy()

def get_lp_lookups(row):
    tmp = df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")]
    if len(tmp) != 1:
        print(tmp)
        print(len(tmp))
        raise ValueError()
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")].iloc[0]["MLookups/s"]

def get_vfp_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP")].iloc[0]["MLookups/s"]

def get_bbc_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP")].iloc[0]["MLookups/s"]


df1["LPPerf"] = df1.apply(lambda row: get_lp_lookups(row), axis=1)
df1["LPSpeedup"] = df1["MLookups/s"] / df1["LPPerf"]

df1["VFPPerf"] = df1.apply(lambda row: get_vfp_lookups(row), axis=1)
df1["VFPSpeedup"] = df1["MLookups/s"] / df1["VFPPerf"]

df1["BBCPerf"] = df1.apply(lambda row: get_bbc_lookups(row), axis=1)
df1["BBCSpeedup"] = df1["MLookups/s"] / df1["BBCPerf"]

avg_lf50_sqr100_intel_bbc_vs_lp_avg_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50)]["LPSpeedup"].mean()
avg_lf50_sqr100_intel_bbc_vs_lp_min_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50)]["LPSpeedup"].min()
avg_lf90_sqr100_intel_bbc_vs_lp_avg_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["LPSpeedup"].mean()
avg_lf90_sqr100_intel_bbc_vs_lp_min_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["LPSpeedup"].min()


print(f"(1) avg speedup bbc vs lp <Intel, LF50>: {avg_lf50_sqr100_intel_bbc_vs_lp_avg_speedup}\n"
     f"(2) min speedup bbc vs lp <Intel, LF50>: {avg_lf50_sqr100_intel_bbc_vs_lp_min_speedup}\n"
     f"(3) avg speedup bbc vs lp <Intel, LF90>: {avg_lf90_sqr100_intel_bbc_vs_lp_avg_speedup}\n"
     f"(4) min speedup bbc vs lp <Intel, LF90>: {avg_lf90_sqr100_intel_bbc_vs_lp_min_speedup}\n"

     )

result_dict["IntelBBCvsLPLF90SQR100AllThreadSpeedup"] = round(avg_lf90_sqr100_intel_bbc_vs_lp_avg_speedup, 2)


In [None]:
pal_dict = {"BucketSIMD_16B_THP": "#e31a1c", 'BucketSIMD_8B_THP': "#fd8d3c",
            'FingerPSIMD_8B_THP': "#fecc5c", 'RecalcRH_UN_THP':  "#2c7fb8",
            'LP_UN_THP':  "#a1dab4", 'Chain_BudBucket16FP_THP': "#253494"}

read_d, write_d = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 550,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_notestz=True,
                   hide_non_avx512=True, hide_lsbmsb_fingerprints=False,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=["FingerprintingSIMDSoAHashTable"],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "RecalcRH_UN_THP"],
                   legend_label_dict={"LP_UN_THP": "LP(AoS)", "RecalcRH_UN_THP": "RecalcRH", "FingerPSIMD_8B_THP": "VFP(8)", "Chain_BudBucket16FP_THP": "Chained(16)", "BucketSIMD_8B_THP": "BBC(8)", "BucketSIMD_16B_THP": "BBC(16)"},
                   file_path="mt_perf_sqr0.pdf", ncol_legend=6,
                   label_order=[2,3,1,0],
                   legend_height_shift=-0.05, ylabel_padding=0.5, xlabel_padding=0.5,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True,
                   analyze_threadcount=True,  force_write_lf=90, force_sqr=0, hide_insertions=False,
                   markevery=[0,3,4,5,6])

In [None]:
# Numbers
df1 = read_d.copy()
df2 = write_d.copy()

def get_lp_lookups(row):
    tmp = df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")]
    if len(tmp) != 1:
        print(tmp)
        print(len(tmp))
        raise ValueError()
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")].iloc[0]["MLookups/s"]

def get_vfp_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP")].iloc[0]["MLookups/s"]

def get_bbc_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP")].iloc[0]["MLookups/s"]


df1["LPPerf"] = df1.apply(lambda row: get_lp_lookups(row), axis=1)
df1["LPSpeedup"] = df1["MLookups/s"] / df1["LPPerf"]

df1["VFPPerf"] = df1.apply(lambda row: get_vfp_lookups(row), axis=1)
df1["VFPSpeedup"] = df1["MLookups/s"] / df1["VFPPerf"]

df1["BBCPerf"] = df1.apply(lambda row: get_bbc_lookups(row), axis=1)
df1["BBCSpeedup"] = df1["MLookups/s"] / df1["BBCPerf"]

avg_lf50_sqr0_intel_bbc_vs_lp_avg_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50)]["LPSpeedup"].mean()
avg_lf50_sqr0_intel_bbc_vs_lp_min_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 50)]["LPSpeedup"].min()
avg_lf90_sqr0_intel_bbc_vs_lp_avg_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["LPSpeedup"].mean()
avg_lf90_sqr0_intel_bbc_vs_lp_min_speedup = df1[(df1["Arch"] == "Intel x86") & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP") & (df1["LoadFactor"] == 90)]["LPSpeedup"].min()


print(f"(1) avg speedup bbc vs lp <Intel, LF50>: {avg_lf50_sqr0_intel_bbc_vs_lp_avg_speedup}\n"
     f"(2) min speedup bbc vs lp <Intel, LF50>: {avg_lf50_sqr0_intel_bbc_vs_lp_min_speedup}\n"
     f"(3) avg speedup bbc vs lp <Intel, LF90>: {avg_lf90_sqr0_intel_bbc_vs_lp_avg_speedup}\n"
     f"(4) min speedup bbc vs lp <Intel, LF90>: {avg_lf90_sqr0_intel_bbc_vs_lp_min_speedup}\n"

     )

result_dict["IntelBBCvsLPLF90SQR0AllThreadSpeedup"] = round(avg_lf90_sqr0_intel_bbc_vs_lp_avg_speedup, 2)


# Zipf

In [None]:

pal_dict = {"BucketSIMD_8B_THP_Zipf": "#e31a1c", 'BucketSIMD_8B_THP': "#fd8d3c",
            'FingerPSIMD_8B_THP': "#d7b5d8", 'FingerPSIMD_8B_THP_Zipf':  "#41b6c4",
            'LP_UN_THP':  "#a1dab4", 'LP_UN_THP_Zipf': "#253494"}

read_d, write_d = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [25, 50, 70, 90], 550,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_notestz=True,
                   hide_non_avx512=True, hide_lsbmsb_fingerprints=False,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=["FingerprintingSIMDSoAHashTable"],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP", "LP_UN_THP_Zipf"],
                   legend_label_dict={"LP_UN_THP": "LP(AoS)", "FingerPSIMD_8B_THP": "VFP(8)", "BucketSIMD_8B_THP": "BBC(8)", "LP_UN_THP_Zipf": "LP(AoS)+Zipf", "FingerPSIMD_8B_THP_Zipf": "VFP(8)+Zipf", "BucketSIMD_8B_THP_Zipf": "BBC(8)+Zipf"},
                   file_path="zipf.pdf", ncol_legend=6,
                   label_order=[4,5,2,3,0,1],
                   legend_height_shift=-0.05, ylabel_padding=0.5, xlabel_padding=0.5,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True,
                   hide_zipf=False, hide_insertions=True)

In [None]:
_ = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86"], [50, 70, 90], 240,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_notestz=True,
                   hide_non_avx512=True, hide_lsbmsb_fingerprints=False,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=["FingerprintingSIMDSoAHashTable"],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP_Zipf"],
                   legend_label_dict={"LP_UN_THP_Zipf": "LP(AoS)", "FingerPSIMD_8B_THP_Zipf": "VFP(8)", "BucketSIMD_8B_THP_Zipf": "BBC(8)"},
                   file_path="paper_zipf.pdf", ncol_legend=3,
                   legend_height_shift=-0.6, ylabel_padding=0, xlabel_padding=0,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True,
                   hide_zipf=False, hide_non_zipf=True, hide_insertions=True, label_order=[2,1,0],
                 extra_legend_columnspacing=0.5, extra_legend_handlelength=1,)

In [None]:
_ = analysis_plot("BucketingSIMDHashTable", pal_dict, ["Intel x86", "AMD x86", "A64FX ARM", "Power"], [50, 70, 90], 240,
                   show_thp=True, show_pref=False, show_unroll=False, hide_non_thp=True,  hide_non_likely_hints=True,
                   hide_msb_fingerprints=False, hide_likely_hints=False, hide_vec_iterators=True, hide_svbcast=True, hide_lsb_fingerprints=True, hide_notestz=True,
                   hide_non_avx512=True, hide_lsbmsb_fingerprints=False,
                   hide_non_s2n=False, exclude_prefixes=[], additional_include_filter=["FingerprintingSIMDSoAHashTable"],
                   word_filter=["_NEON_S2N", "_AVX512", "_SVE", "_Hints", "_LSBMSB", "_TESTZ", "_512", "_128", "_256", "_KVBu", "_64FPPB",  "_32FPPB", "_16FPPB", "_8FPPB"],
                   hide_non_512_on_intel=True, hide_non_256_on_amd = True, hide_neon=True, hide_16b_fingerprints=True, hide_vfp_16b=True,
                   force_include=["LP_UN_THP_Zipf"],
                   legend_label_dict={"LP_UN_THP_Zipf": "LP(AoS)", "FingerPSIMD_8B_THP_Zipf": "VFP(8)", "BucketSIMD_8B_THP_Zipf": "BBC(8)"},
                   file_path="revision_zipf.pdf", ncol_legend=3,
                   legend_height_shift=-0.06, ylabel_padding=0.5, xlabel_padding=0.5,
                   disable_legend_border=True, reduce_xticks=True, reduce_yticks=True, reduce_minor=True,
                   hide_zipf=False, hide_non_zipf=True, hide_insertions=True, label_order=[2,1,0],)

In [None]:
# Numbers
df1 = read_d.copy()
df2 = write_d.copy()

def get_lp_nozipf_lookups(row):
    tmp = df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")]
    if len(tmp) != 1:
        print(tmp)
        print(len(tmp))
        print(row)
        raise ValueError()
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "LP_UN_THP")].iloc[0]["MLookups/s"]

def get_vfp_nozipf_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"]) & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "FingerPSIMD_8B_THP")].iloc[0]["MLookups/s"]

def get_bbc_nozipf_lookups(row):
    assert len(df1[(df1["Arch"] == row["Arch"])   & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP")]) == 1
    return df1[(df1["Arch"] == row["Arch"])  & (df1["Compiler"] == row["Compiler"]) & (df1["PageSize"] == row["PageSize"]) & (df1["ThreadCount"] == row["ThreadCount"]) & (df1["HugePageSize"] == row["HugePageSize"]) & (df1["SQR"] == row["SQR"]) & (df1["Size"] == row["Size"]) & (df1["Distribution"] == row["Distribution"]) & (df1["LoadFactor"] == row["LoadFactor"]) & (df1["PlotIdentifier"] == "BucketSIMD_8B_THP")].iloc[0]["MLookups/s"]


df1["LPPerf"] = df1.apply(lambda row: get_lp_nozipf_lookups(row), axis=1)
df1["LPSpeedup"] = df1["MLookups/s"] / df1["LPPerf"]

df1["VFPPerf"] = df1.apply(lambda row: get_vfp_nozipf_lookups(row), axis=1)
df1["VFPSpeedup"] = df1["MLookups/s"] / df1["VFPPerf"]

df1["BBCPerf"] = df1.apply(lambda row: get_bbc_nozipf_lookups(row), axis=1)
df1["BBCSpeedup"] = df1["MLookups/s"] / df1["BBCPerf"]

avg_allsys_lf90_zipf_speedup_bbc = df1[(df1["PlotIdentifier"] == "BucketSIMD_8B_THP_Zipf") & (df1["LoadFactor"] == 90)]["BBCSpeedup"].mean()

print(f"(1) avg speedup zipf <AllSys, LF90, BBC>: {avg_allsys_lf90_zipf_speedup_bbc}\n"
     )

result_dict["AllSysZipfVsNoSysBBCSpeedup"] = round(avg_allsys_lf90_zipf_speedup_bbc, 2)
