In [1]:
import plotly.express as px 
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd 
import os 
import plotly.io as pio
pio.kaleido.scope.mathjax = None 
from math import log

# <b>Functions for visualizing benchmarking results</b>

## Speedup Visualization

In [None]:
def find_color(line_color_map, imp):
    for i, c in line_color_map:
        if i == imp:
            return c 

def vis_bench_speedup(primary: str, secondary: str, date: str, time: str, save_pdf: bool = False):
    df = pd.read_csv("csv_results/" + date + "/" + primary + "_" + secondary + "_" + date + "_" + time + ".csv")
    df.sort_values('set_size', inplace=True)

    exponentials = []
    i = 5
    for _ in df.set_size.unique():
        exponentials.append("2<sup>" + str(i) + "</sup>")
        i += 1

    fig = make_subplots(rows=3, 
                        cols=1, shared_yaxes=False, shared_xaxes=True,
                        x_title="Number of VLMCs",
                        y_title="Speedup",
                        subplot_titles=("Small", "Medium", "Large"), 
                        horizontal_spacing= 0.02, vertical_spacing= 0.05) # specs=[[{}, {}],[{"colspan": 2}, None]])
    
    df_pst = df[df.implementation=="PstClassifierSeqan"]
    df = df[df.implementation!="PstClassifierSeqan"]

    marker_symbols = ['circle', 'square', 'diamond', 'cross', 'x', 'pentagon', 'star', 'diamond-wide']
    line_dashes = ['solid', 'dot', 'dash', 'longdash', 'dashdot', 'longdashdot']
    implementations = ["PstClassifierSeqan", "sorted-vector", "sorted-search", "hashmap", "veb", "ey", "alt-btree" ,"sorted-search-ey"]
    line_color_map = list(zip(implementations, px.colors.qualitative.Plotly))
    iterate_on = list(zip(list(df.implementation.unique()), marker_symbols, line_dashes))
    for imp, marker_symbol, line_dash in iterate_on:
        df_imp = df[df.implementation==imp]
        line_color = find_color(line_color_map, imp)
        fig.add_trace(go.Scatter(x=exponentials, y=list(df_pst[df_pst.vlmc_size=='small']['elapsed_time']) / df_imp[df_imp.vlmc_size=='small']['elapsed_time'], line_color=line_color, name=imp, marker_symbol=marker_symbol, line_dash=line_dash), 1, 1)
        fig.add_trace(go.Scatter(x=exponentials, y=list(df_pst[df_pst.vlmc_size=='medium']['elapsed_time']) / df_imp[df_imp.vlmc_size=='medium']['elapsed_time'], line_color=line_color, name=imp, marker_symbol=marker_symbol, line_dash=line_dash, showlegend = False), 2, 1)
        fig.add_trace(go.Scatter(x=exponentials, y=list(df_pst[df_pst.vlmc_size=='large']['elapsed_time']) / df_imp[df_imp.vlmc_size=='large']['elapsed_time'], line_color=line_color, name=imp, marker_symbol=marker_symbol, line_dash=line_dash, showlegend = False), 3, 1)
    
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        font_size=24,
        y=1.08),
        margin=dict(l=100, r=60, t=60, b=80),
        height=900,
        width=900,
        plot_bgcolor="white")
    fig.update_layout(xaxis_range=[0, i - 5.5])

    fig.update_xaxes(gridcolor='LightGrey', tickfont_size=20, showline=True, linewidth=1, linecolor='LightGrey')
    fig.update_yaxes(gridcolor='LightGrey', tickfont_size=20, showline=True, linewidth=1, linecolor='LightGrey')

    for i in range(0, len(fig.layout.annotations)):
        fig.layout.annotations[i]["font"] = {'size': 24}
    
    if save_pdf:
        fig.write_image("images/" + primary + "_to_" + secondary + "_elapsed_time.pdf") 
    fig.show()

## Cache-misses Visualization

In [None]:
def find_color(line_color_map, imp):
    for i, c in line_color_map:
        if i == imp:
            return c 

def vis_bench_cache_misses(primary: str, secondary: str, date: str, time: str, save_pdf: bool = False):
    df = pd.read_csv("csv_results/" + date + "/" + primary + "_" + secondary + "_" + date + "_" + time + ".csv")
    df.sort_values('set_size', inplace=True)

    exponentials = []
    i = 5
    for _ in df.set_size.unique():
        exponentials.append("2<sup>" + str(i) + "</sup>")
        i += 1

    fig = make_subplots(rows=3, 
                        cols=1, shared_yaxes=False, shared_xaxes=True,
                        x_title="Number of VLMCs",
                        y_title="Cache misses (%)",
                        subplot_titles=("Small", "Medium", "Large"), 
                        horizontal_spacing= 0.02, vertical_spacing= 0.05) # specs=[[{}, {}],[{"colspan": 2}, None]])
    
    marker_symbols = ['circle', 'square', 'diamond', 'cross', 'x', 'pentagon', 'star']#, 'diamond-wide']
    line_dashes = ['solid', 'dot', 'dash', 'longdash', 'dashdot', 'longdashdot', 'dot']
    implementations = ["PstClassifierSeqan", "sorted-vector", "sorted-search", "hashmap", "veb", "ey", "alt-btree"]
    line_color_map = list(zip(implementations, px.colors.qualitative.Plotly))
    iterate_on = list(zip(list(df.implementation.unique()), marker_symbols, line_dashes))
    for imp, marker_symbol, line_dash in iterate_on:
        df_imp = df[df.implementation==imp]
        line_color = find_color(line_color_map, imp)
        fig.add_trace(go.Scatter(x=exponentials, y=df_imp[df_imp.vlmc_size=='small']['cache_misses'], line_color=line_color, name=imp, marker_symbol=marker_symbol, line_dash=line_dash), 1, 1)
        fig.add_trace(go.Scatter(x=exponentials, y=df_imp[df_imp.vlmc_size=='medium']['cache_misses'], line_color=line_color, name=imp, marker_symbol=marker_symbol, line_dash=line_dash, showlegend = False), 2, 1)
        fig.add_trace(go.Scatter(x=exponentials, y=df_imp[df_imp.vlmc_size=='large']['cache_misses'], line_color=line_color, name=imp, marker_symbol=marker_symbol, line_dash=line_dash, showlegend = False), 3, 1)
    
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        font_size=24,
        y=1.08),
        margin=dict(l=100, r=60, t=60, b=80),
        height=900,
        width=900,
        plot_bgcolor="white")
    fig.update_layout(xaxis_range=[0, i - 5.5])

    fig.update_xaxes(gridcolor='LightGrey', tickfont_size=20, showline=True, linewidth=1, linecolor='LightGrey')
    fig.update_yaxes(gridcolor='LightGrey', tickfont_size=20, showline=True, linewidth=1, linecolor='LightGrey')

    for i in range(0, len(fig.layout.annotations)):
        fig.layout.annotations[i]["font"] = {'size': 24}
    
    if save_pdf:
        fig.write_image("images/" + primary + "_to_" + secondary + "_cache_misses.pdf") 
    fig.show()

# Single Run Visualization

In [2]:
def find_color(line_color_map, imp):
    for i, c in line_color_map:
        if i == imp:
            return c 

def vis_bench_speedup_single_run(datetime, save_pdf: bool = False):
    datasets = [("human", "human"), ("human", "turkey") , ("human", "corn"), ("turkey", "turkey"), ("turkey", "corn"), ("corn", "corn"), ("ecoli", "ecoli")]

    df_list = []
    for primary, secondary in datasets:
        for date, time in datetime: 
            try:
                df_tmp = pd.read_csv("csv_results/" + date + "/" + primary + "_" + secondary + "_" + date + "_" + time + ".csv")
                df_tmp['datasets'] = primary.title() + " to " + secondary.title() 
                df_list.append(df_tmp)
            except:
                print("Couldn't read " + primary + " to " + secondary + " for " + date + " " + time)

    df = pd.concat(df_list)

    df['elapsed_time_mean'] = df.groupby(['datasets', 'implementation', 'vlmc_size'])['elapsed_time'].transform('mean')

    for dataset in df.datasets.unique():
        for s in ["small", "medium", "large"]:
            df_pst_time = df[(df.datasets==dataset) & (df.vlmc_size==s) & (df.implementation=="PstClassifierSeqan")]['elapsed_time_mean'].iloc[0]
            df.loc[(df.datasets==dataset) & (df.vlmc_size==s), 'speedup'] = df_pst_time / df[(df.datasets==dataset) & (df.vlmc_size==s)].elapsed_time_mean 

    df['implementation_mean'] = df.groupby(['implementation', 'vlmc_size'])['speedup'].transform('mean')
    
    df['datasets'] = df.datasets.apply(lambda x : x.replace("Ecoli", "Virus"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("sbs", "SBS"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("eytzinger", "Eytzinger"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("b-tree", "B-tree"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("sorted-vector", "Sorted vector"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("hashmap", "Hash map"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("veb", "vEB"))

    fig = make_subplots(rows=3, 
                        cols=3, shared_yaxes=False, shared_xaxes=True,
                        column_widths=[0.7, 0.15, 0.15],
                        x_title="",
                        y_title="Speedup",
                        subplot_titles=("Small", "Small", "Small", "Medium", "Medium", "Medium", "Large", "Large", "Large"), 
                        horizontal_spacing= 0.08, vertical_spacing= 0.07) # specs=[[{}, {}],[{"colspan": 2}, None]])

    marker_size = 12

    df = df[df.implementation!="PstClassifierSeqan"]
    marker_symbols = ['circle', 'square', 'diamond', 'cross', 'x', 'pentagon', 'star', 'diamond-wide']
    implementations = ["Sorted vector", "SBS", "Hash map", "vEB", "Eytzinger", "B-tree", "PstClassifierSeqan"] # ,"sorted-search-ey"]
    line_color_map = list(zip(implementations, px.colors.qualitative.Plotly))
    iterate_on = list(zip(implementations, marker_symbols))

    for imp, marker_symbol in iterate_on:
        df_imp = df[df.implementation==imp]
        df_eco = df_imp[df_imp.datasets=="Virus to Virus"]
        df_imp = df_imp[df_imp.datasets!="Virus to Virus"]
        line_color = find_color(line_color_map, imp)
        fig.add_trace(go.Scatter(x=df_imp[df_imp.vlmc_size=='small']['datasets'], y=df_imp[df_imp.vlmc_size=='small']['speedup'], line_color=line_color, name=imp, marker_symbol=marker_symbol, mode='markers', marker_size=marker_size), 1, 1)
        fig.add_trace(go.Scatter(x=df_imp[df_imp.vlmc_size=='medium']['datasets'], y=df_imp[df_imp.vlmc_size=='medium']['speedup'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 2, 1)
        fig.add_trace(go.Scatter(x=df_imp[df_imp.vlmc_size=='large']['datasets'], y=df_imp[df_imp.vlmc_size=='large']['speedup'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 3, 1)

        fig.add_trace(go.Scatter(x=df_eco[df_eco.vlmc_size=='small']['datasets'], y=df_eco[df_eco.vlmc_size=='small']['speedup'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 1, 2)
        fig.add_trace(go.Scatter(x=df_eco[df_eco.vlmc_size=='medium']['datasets'], y=df_eco[df_eco.vlmc_size=='medium']['speedup'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 2, 2)
        fig.add_trace(go.Scatter(x=df_eco[df_eco.vlmc_size=='large']['datasets'], y=df_eco[df_eco.vlmc_size=='large']['speedup'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 3, 2)

        fig.add_trace(go.Scatter(x=["All Data Sets"], y=df_imp[df_imp.vlmc_size=='small']['implementation_mean'], line_color=line_color, name=imp, marker_symbol=marker_symbol, mode='markers', showlegend=False, marker_size=marker_size), 1, 3)
        fig.add_trace(go.Scatter(x=["All Data Sets"], y=df_imp[df_imp.vlmc_size=='medium']['implementation_mean'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 2, 3)
        fig.add_trace(go.Scatter(x=["All Data Sets"], y=df_imp[df_imp.vlmc_size=='large']['implementation_mean'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 3, 3)

    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        font_size=24,
        y=1.08),
        margin=dict(l=100, r=60, t=60, b=80),
        height=900,
        width=900,
        plot_bgcolor="white")
    fig.update_xaxes(gridcolor='LightGrey', tickfont_size=20, showline=True, linewidth=1, linecolor='LightGrey')
    fig.update_yaxes(gridcolor='LightGrey', tickfont_size=20, showline=True, linewidth=1, linecolor='LightGrey')
    for i in range(0, len(fig.layout.annotations)):
        fig.layout.annotations[i]["font"] = {'size': 24}

    if save_pdf:
        fig.write_image("images/Single_run_" + date + "_" + time + "_elapsed_time.pdf") 
    fig.show()

In [None]:
def find_color(line_color_map, imp):
    for i, c in line_color_map:
        if i == imp:
            return c 

def vis_bench_cache_misses_single_run(datetime, save_pdf: bool = False):
    datasets = [("human", "human"), ("human", "turkey") , ("human", "corn"), ("turkey", "turkey"), ("turkey", "corn"), ("corn", "corn"), ("ecoli", "ecoli")]

    df_list = []
    for primary, secondary in datasets:
        for date, time in datetime: 
            try:
                df_tmp = pd.read_csv("csv_results/" + date + "/" + primary + "_" + secondary + "_" + date + "_" + time + ".csv")
                df_tmp['datasets'] = primary.title() + " to " + secondary.title() 
                df_list.append(df_tmp)
            except:
                print("Couldn't read " + primary + " to " + secondary + " for " + date + " " + time)

    df = pd.concat(df_list)

    df['cache_misses'] = df.groupby(['datasets', 'implementation', 'vlmc_size'])['cache_misses'].transform('mean')

    df['datasets'] = df.datasets.apply(lambda x : x.replace("Ecoli", "Virus"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("sorted-search", "SBS"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("ey", "Eytzinger"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("alt-btree", "B-tree"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("sorted-vector", "Sorted vector"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("hashmap", "Hash map"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("veb", "vEB"))

    fig = make_subplots(rows=3, 
                        cols=2, shared_yaxes=False, shared_xaxes=True,
                        column_widths=[0.8, 0.2],
                        x_title="",
                        y_title="Speedup",
                        subplot_titles=("Small", "Small", "Medium", "Medium", "Large", "Large"), 
                        horizontal_spacing= 0.08, vertical_spacing= 0.07) # specs=[[{}, {}],[{"colspan": 2}, None]])

    marker_size = 12

    marker_symbols = ['circle', 'square', 'diamond', 'cross', 'x', 'pentagon', 'star', 'diamond-wide']
    implementations = ["Sorted vector", "SBS", "Hash map", "vEB", "Eytzinger", "B-tree", "PstClassifierSeqan"] # ,"sorted-search-ey"]
    line_color_map = list(zip(implementations, px.colors.qualitative.Plotly))
    iterate_on = list(zip(implementations, marker_symbols))

    for imp, marker_symbol in iterate_on:
        df_imp = df[df.implementation==imp]
        df_eco = df_imp[df_imp.datasets=="Virus to Virus"]
        df_imp = df_imp[df_imp.datasets!="Virus to Virus"]
        line_color = find_color(line_color_map, imp)
        fig.add_trace(go.Scatter(x=df_imp[df_imp.vlmc_size=='small']['datasets'], y=df_imp[df_imp.vlmc_size=='small']['cache_misses'], line_color=line_color, name=imp, marker_symbol=marker_symbol, mode='markers', marker_size=marker_size), 1, 1)
        fig.add_trace(go.Scatter(x=df_imp[df_imp.vlmc_size=='medium']['datasets'], y=df_imp[df_imp.vlmc_size=='medium']['cache_misses'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 2, 1)
        fig.add_trace(go.Scatter(x=df_imp[df_imp.vlmc_size=='large']['datasets'], y=df_imp[df_imp.vlmc_size=='large']['cache_misses'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 3, 1)

        fig.add_trace(go.Scatter(x=df_eco[df_eco.vlmc_size=='small']['datasets'], y=df_eco[df_eco.vlmc_size=='small']['cache_misses'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 1, 2)
        fig.add_trace(go.Scatter(x=df_eco[df_eco.vlmc_size=='medium']['datasets'], y=df_eco[df_eco.vlmc_size=='medium']['cache_misses'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 2, 2)
        fig.add_trace(go.Scatter(x=df_eco[df_eco.vlmc_size=='large']['datasets'], y=df_eco[df_eco.vlmc_size=='large']['cache_misses'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 3, 2)

    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        font_size=24,
        y=1.08),
        margin=dict(l=100, r=60, t=60, b=80),
        height=900,
        width=900,
        plot_bgcolor="white")
    fig.update_xaxes(gridcolor='LightGrey', tickfont_size=20, showline=True, linewidth=1, linecolor='LightGrey')
    fig.update_yaxes(gridcolor='LightGrey', tickfont_size=20, showline=True, linewidth=1, linecolor='LightGrey')
    for i in range(0, len(fig.layout.annotations)):
        fig.layout.annotations[i]["font"] = {'size': 24}

    if save_pdf:
        fig.write_image("images/Single_run_" + date + "_" + time + "_cache_misses.pdf") 
    fig.show()

In [None]:
def find_color(line_color_map, imp):
    for i, c in line_color_map:
        if i == imp:
            return c 
        
def ahmdals(date, time, species, to_pdf: bool = False):
    df = pd.read_csv(f'csv_results/{date}/{species}_parallelization_{date}_{time}.csv')
    df['implementation'] = df.implementation.apply(lambda x : x.replace("sorted-search", "SBS"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("ey", "Eytzinger"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("alt-btree", "B-tree"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("sorted-vector", "Sorted vector"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("hashmap", "Hashmap"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("veb", "vEB"))
    
    size_vlmc = "large"
    containers = ["Sorted vector", "SBS", "Hashmap"]#, "vEB", "Eytzinger", "B-tree", "PstClassifierSeqan"]
    p=0.99

    line_color_map = list(zip(containers, px.colors.qualitative.Plotly))
    line_dashes = ['solid', 'dot', 'dash', 'longdash', 'dashdot', 'longdashdot']
    iterate_on = zip(containers, line_dashes)

    fig = go.Figure()
    for container, line_dash in iterate_on:
        time = df[(df.implementation==container) & (df.vlmc_size==size_vlmc)]['elapsed_time']
        cores = df[(df.implementation==container) & (df.vlmc_size==size_vlmc)]['nr_cores_used']
        cores = [str(x) for x in cores]
        single_core_time = time.iloc[0]
        speedup = single_core_time / time
        line_color = find_color(line_color_map, container)
        fig.add_trace(go.Scatter(mode='lines', x=cores, y=speedup, name=container, line_color=line_color, line_dash=line_dash))

    cores = df[(df.implementation==container) & (df.vlmc_size==size_vlmc)]['nr_cores_used']
    th_speedups = [1 / ( (1 - p) + (p / s)) for s in cores]
    cores = [str(x) for x in cores]
    fig.add_trace(go.Scatter(mode='lines', x=cores, y=th_speedups, line_color='#636EFA', name="Theoretical"))
    fig.update_layout(yaxis_tick0=0, yaxis_dtick=2, yaxis_title="Parallel speedup", xaxis_title="Number of cores used", yaxis_range=[0,25], xaxis_range=[0,len(cores)], font={'size': 20}, plot_bgcolor="white", yaxis_gridcolor='LightGrey'
    ,xaxis_gridcolor='LightGrey')
    fig.update_layout(
        autosize=False,
        width=1000,
        height=600)
    fig.show()
    if to_pdf:
        fig.write_image("images/virus-parallel-to-ahmdals.pdf")

# Speedup to cache misses

In [None]:
def find_color(line_color_map, imp):
    for i, c in line_color_map:
        if i == imp:
            return c 

def vis_bench_speedup_to_cache_miss_single_run(datetime, save_pdf: bool = False):
    datasets = [("human", "human"), ("human", "turkey") , ("human", "corn"), ("turkey", "turkey"), ("turkey", "corn"), ("corn", "corn"), ("ecoli", "ecoli")]

    df_list = []
    for primary, secondary in datasets:
        for date, time in datetime: 
            df_tmp = pd.read_csv("csv_results/" + date + "/" + primary + "_" + secondary + "_" + date + "_" + time + ".csv")
            df_tmp['datasets'] = primary + " to " + secondary 
            df_list.append(df_tmp)

    df = pd.concat(df_list)

    df['elapsed_time_mean'] = df.groupby(['datasets', 'implementation', 'vlmc_size'])['elapsed_time'].transform('mean')
    for dataset in df.datasets.unique():
        for s in ["small", "medium", "large"]:
            df_pst_time = df[(df.datasets==dataset) & (df.vlmc_size==s) & (df.implementation=="PstClassifierSeqan")]['elapsed_time_mean'].iloc[0]
            df.loc[(df.datasets==dataset) & (df.vlmc_size==s), 'speedup'] = df_pst_time / df[(df.datasets==dataset) & (df.vlmc_size==s)].elapsed_time_mean 

    df['datasets'] = df.datasets.apply(lambda x : x.replace("ecoli", "virus"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("sorted-search", "SBS"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("ey", "Eytzinger"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("alt-btree", "B-tree"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("sorted-vector", "Sorted vector"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("hashmap", "Hashmap"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("veb", "vEB"))

    df.loc[(df.datasets!="virus to virus") & (df.vlmc_size=="medium"), "max_depth"] = 10
    df.loc[(df.datasets!="virus to virus") & (df.vlmc_size=="large"), "max_depth"] = 15

    fig = make_subplots(rows=1, 
                        cols=1, shared_yaxes=False, shared_xaxes=True,
                        x_title="Cache misses (%)",
                        y_title="Speedup", 
                        horizontal_spacing= 0.08, vertical_spacing= 0.07) # specs=[[{}, {}],[{"colspan": 2}, None]])

    marker_size = 12

    df = df[df.implementation!="PstClassifierSeqan"]
    marker_symbols = ['circle', 'square', 'diamond', 'cross', 'x', 'pentagon', 'star', 'diamond-wide']
    implementations = ["Sorted vector", "SBS", "Hashmap"] # , "vEB", "Eytzinger", "B-tree", "PstClassifierSeqan"] # ,"sorted-search-ey"]
    line_color_map = list(zip(implementations, px.colors.qualitative.Plotly))
    iterate_on = list(zip(implementations, marker_symbols))

    for imp, marker_symbol in iterate_on:
        df_imp = df[df.implementation==imp]
        line_color = find_color(line_color_map, imp)
        fig.add_trace(go.Scatter(x=df_imp['cache_misses'], y=df_imp['speedup'], line_color=line_color, name=imp, marker_symbol=marker_symbol, mode='markers', marker_size=marker_size), 1, 1)
        # fig.add_trace(go.Scatter(x=df_imp[df_imp.vlmc_size=='medium']['cache_misses'], y=df_imp[df_imp.vlmc_size=='medium']['speedup'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 2, 1)
        # fig.add_trace(go.Scatter(x=df_imp[df_imp.vlmc_size=='large']['cache_misses'], y=df_imp[df_imp.vlmc_size=='large']['speedup'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 3, 1)

    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        font_size=24,
        y=1.08),
        margin=dict(l=100, r=60, t=60, b=80),
        height=600,
        width=900,
        plot_bgcolor="white")
    fig.update_xaxes(gridcolor='LightGrey', tickfont_size=20, showline=True, linewidth=1, linecolor='LightGrey')
    fig.update_yaxes(gridcolor='LightGrey', tickfont_size=20, showline=True, linewidth=1, linecolor='LightGrey')
    for i in range(0, len(fig.layout.annotations)):
        fig.layout.annotations[i]["font"] = {'size': 24}

    if save_pdf:
        fig.write_image("images/Single_run_" + date + "_" + time + "_elapsed_time.pdf") 
    fig.show()

# <b>PLOTS</b>

In [None]:
vis_bench_speedup_to_cache_miss_single_run([("05_11", "11_02"), ("05_11", "13_38"), ("05_11", "20_45")], False)

In [None]:
vis_bench_speedup_single_run([("05_11", "11_02"), ("05_11", "13_38"), ("05_11", "20_45"), ("05_21", "14_16")], True)
vis_bench_cache_misses_single_run([("05_11", "11_02"), ("05_11", "13_38"), ("05_11", "20_45"), ("05_21", "14_16")], True)

In [None]:
vis_bench_speedup_single_run([("05_11", "11_02"), ("05_11", "13_38")], True)
vis_bench_cache_misses_single_run([("05_11", "11_02"), ("05_11", "13_38")], True)

In [None]:
ahmdals("05_15", "21_28", "ecoli", False)

In [6]:
# Results in paper:
vis_bench_speedup_single_run([("06_01", "13_53")], False)
#vis_bench_cache_misses_single_run([("06_01", "11_02")], False)
vis_bench_speedup_single_run([("06_01", "17_20")], True)
#vis_bench_cache_misses_single_run([("06_01", "11_02")], False)

In [None]:
def find_color(line_color_map, imp):
    for i, c in line_color_map:
        if i == imp:
            return c 

def vis_bench_cache_misses_single_run(datetime, save_pdf: bool = False):
    datasets = [("human", "human"), ("human", "turkey") , ("human", "corn"), ("turkey", "turkey"), ("turkey", "corn"), ("corn", "corn"), ("ecoli", "ecoli")]

    df_list = []
    for primary, secondary in datasets:
        for date, time in datetime: 
            try:
                df_tmp = pd.read_csv("csv_results/" + date + "/" + primary + "_" + secondary + "_" + date + "_" + time + ".csv")
                df_tmp['datasets'] = primary.title() + " to " + secondary.title() 
                df_list.append(df_tmp)
            except:
                print("Couldn't read " + primary + " to " + secondary + " for " + date + " " + time)

    df = pd.concat(df_list)

    df['cache_references_count'] = df.groupby(['datasets', 'implementation', 'vlmc_size'])['cache_references_count'].transform('mean')

    df['datasets'] = df.datasets.apply(lambda x : x.replace("Ecoli", "Virus"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("sorted-search", "SBS"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("ey", "Eytzinger"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("alt-btree", "B-tree"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("sorted-vector", "Sorted vector"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("hashmap", "Hash map"))
    df['implementation'] = df.implementation.apply(lambda x : x.replace("veb", "vEB"))

    fig = make_subplots(rows=3, 
                        cols=2, shared_yaxes=False, shared_xaxes=True,
                        column_widths=[0.8, 0.2],
                        x_title="",
                        y_title="Speedup",
                        subplot_titles=("Small", "Small", "Medium", "Medium", "Large", "Large"), 
                        horizontal_spacing= 0.08, vertical_spacing= 0.07) # specs=[[{}, {}],[{"colspan": 2}, None]])

    marker_size = 12

    marker_symbols = ['circle', 'square', 'diamond', 'cross', 'x', 'pentagon', 'star', 'diamond-wide']
    implementations = ["Sorted vector", "SBS", "Hash map", "vEB", "Eytzinger", "B-tree", "PstClassifierSeqan"] # ,"sorted-search-ey"]
    line_color_map = list(zip(implementations, px.colors.qualitative.Plotly))
    iterate_on = list(zip(implementations, marker_symbols))

    print(df.columns)

    for imp, marker_symbol in iterate_on:
        df_imp = df[df.implementation==imp]
        df_eco = df_imp[df_imp.datasets=="Virus to Virus"]
        df_imp = df_imp[df_imp.datasets!="Virus to Virus"]
        line_color = find_color(line_color_map, imp)
        fig.add_trace(go.Scatter(x=df_imp[df_imp.vlmc_size=='small']['datasets'], y=df_imp[df_imp.vlmc_size=='small']['cache_references_count'], line_color=line_color, name=imp, marker_symbol=marker_symbol, mode='markers', marker_size=marker_size), 1, 1)
        fig.add_trace(go.Scatter(x=df_imp[df_imp.vlmc_size=='medium']['datasets'], y=df_imp[df_imp.vlmc_size=='medium']['cache_references_count'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 2, 1)
        fig.add_trace(go.Scatter(x=df_imp[df_imp.vlmc_size=='large']['datasets'], y=df_imp[df_imp.vlmc_size=='large']['cache_references_count'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 3, 1)

        fig.add_trace(go.Scatter(x=df_eco[df_eco.vlmc_size=='small']['datasets'], y=df_eco[df_eco.vlmc_size=='small']['cache_references_count'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 1, 2)
        fig.add_trace(go.Scatter(x=df_eco[df_eco.vlmc_size=='medium']['datasets'], y=df_eco[df_eco.vlmc_size=='medium']['cache_references_count'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 2, 2)
        fig.add_trace(go.Scatter(x=df_eco[df_eco.vlmc_size=='large']['datasets'], y=df_eco[df_eco.vlmc_size=='large']['cache_references_count'], line_color=line_color, name=imp, marker_symbol=marker_symbol, showlegend = False, mode='markers', marker_size=marker_size), 3, 2)

    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        font_size=24,
        y=1.08),
        margin=dict(l=100, r=60, t=60, b=80),
        height=900,
        width=900,
        plot_bgcolor="white")
    fig.update_xaxes(gridcolor='LightGrey', tickfont_size=20, showline=True, linewidth=1, linecolor='LightGrey')
    fig.update_yaxes(gridcolor='LightGrey', tickfont_size=20, showline=True, linewidth=1, linecolor='LightGrey')
    for i in range(0, len(fig.layout.annotations)):
        fig.layout.annotations[i]["font"] = {'size': 24}

    if save_pdf:
        fig.write_image("images/Single_run_" + date + "_" + time + "_cache_misses.pdf") 
    fig.show()

vis_bench_cache_misses_single_run([("05_11", "11_02"), ("05_11", "13_38"), ("05_11", "20_45"), ("05_21", "14_16")], False)