# Scheduling benchmarks analysis

## Library

In [None]:
import matplotlib.pyplot as plt
from typing import Optional, List, Tuple
import pandas as pd
import os
import re
import json
import subprocess

#### Global variables

In [None]:
TRACE_CMD_CACHE_FILENAME = "trace_cmd_runtimes"
RESULTS_DIR_PATH = "../results"

USE_TRACE_CMD_CACHE = True

def get_result_dir_path(result_dir_name):
    return os.path.join(RESULTS_DIR_PATH, result_dir_name)

#### Visualization params

In [None]:
DEFAULT_FIGURE_HEIGHT = 12

nb_enabled_color = "tab:orange"
nb_disabled_color = "tab:blue"

figure_height = DEFAULT_FIGURE_HEIGHT
min_figure_width = 40
figure_width_coeff = 0.3

line_width = 1.0

#### Trace-cmd results processing functions

In [None]:
def get_run_index(filename) -> int :
    match = re.search(r'_(\d+)\.dat', filename)
    if match:
        return int(match.group(1))
    return None


def get_trace_duration(dat_file_path: str) :
    result = subprocess.run(
        ["trace-cmd", "report", "--first-event", "--last-event", dat_file_path],
        stdout = subprocess.PIPE,
        universal_newlines = True
    )

    firsts = []
    lasts = []
    for line in result.stdout.split("\n"):
        match = re.search(r'\s+\d+\tFirst event:\s*(\d+\.\d+)\tLast event:\s*(\d+\.\d+)', line)
        if match:
            firsts.append(float(match.group(1)))
            lasts.append(float(match.group(2)))
    return max(lasts) - min(firsts)


def get_trace_broken_work_conservation_time(dat_file_path: str) :
    result = subprocess.run(
        ["/home/cgachod/repos/ocaml-scripts/running_waiting", "--wc", dat_file_path],
        stdout = subprocess.PIPE,
        universal_newlines = True
    )
    
    match = re.search(r'WC time.*?(\d+.\d+)', result.stdout)
    if match :
        return float(match.group(1))
    

def get_runs_dataframe(dir_path, use_cache = USE_TRACE_CMD_CACHE, sort_by_duration = True) :
    if not os.path.exists(dir_path) :
        return None

    cache_path = os.path.join(dir_path, TRACE_CMD_CACHE_FILENAME + ".csv")
    if use_cache and os.path.isfile(cache_path):
        df = pd.read_csv(cache_path)
        df['exists'] = df['filename'].map(lambda x: os.path.isfile(os.path.join(dir_path, x)))
    else :
        dat_files = [file for file in os.listdir(dir_path) if file.endswith('.dat')]
        ndat = len(dat_files)
        if ndat == 0 :
            print(f"get_runs_dataframe : no trace files found in {dir_path}, skipping")
            return None
        
        df = pd.DataFrame({'filename': dat_files})
        df['duration'] = df['filename'].map(lambda x: get_trace_duration(os.path.join(dir_path, x)))
        df['broken_wc_time'] = df['filename'].map(lambda x: get_trace_broken_work_conservation_time(os.path.join(dir_path, x)))
        df.sort_values('duration', ignore_index=True, inplace=True)
        df.to_csv(cache_path, encoding='utf-8', index=False)
        df['exists'] = True

    if sort_by_duration :
        df.sort_values('duration', ignore_index=True, inplace=True)

    
    df['run_index'] = df['filename'].map(get_run_index)
    min_duration = df['duration'].min()
    df['variation'] = (df['duration'] - min_duration) * 100 / min_duration
    return df

In [None]:
# Naming convention :
# - run : 1 set of files, either nb or no_nb
# - benchmark / result : 1 series of nb and 1 series of no_nb


def init_runs_plot():
    # plt.figure(figsize=(fig_width, 5))
    # plt.legend(loc="upper left")
    plt.xlabel("Nth run")
    plt.ylabel("Run time (seconds)")
    plt.grid(axis="y", which="both")
    plt.grid(axis="x", which="major")
    
    # ax = plt.gca()
    # ax.ticklabel_format(axis="x", useOffset=1.0)    
    
def plot_trace_cmd_run(df, data = "duration", label = None, color = None, init_plot = True, show_run_index = True) :
    if init_plot:
        init_runs_plot()
    plt.plot(df.index, df[data], label=label, color=color, linewidth=line_width)
    
    current_xticks_count = len(plt.xticks()[0])
    if len(df) > current_xticks_count :
        plt.xticks(range(len(df)), rotation=45)
        plt.xlim(-1, len(df))
        
    if data == "duration" :
        candidate_ylim_max = df[data].max() * 1.2
        if candidate_ylim_max > plt.ylim()[1] :
            plt.ylim(0, candidate_ylim_max)
        
    fig = plt.gcf()
    candidate_width = max(int(len(df) * figure_width_coeff), min_figure_width)
    curr_width = fig.get_size_inches()[0]
    if candidate_width > curr_width :
        fig.set_size_inches(candidate_width, figure_height)
    
    if label :
        plt.legend(loc="upper left")

    if show_run_index :
        ax = plt.gca()
        for idx in df.index:
            if not df['exists'][idx]:
                continue
            ax.text(idx, df[data][idx], df['run_index'][idx], fontsize=8)
    


In [None]:
trace_cmd_folders_color = {
    "nb": "tab:orange",
    "nb_memory": "tab:purple",
    "nb_tasks": "tab:green",
    "no_nb": "tab:blue"
}

def plot_trace_cmd_runs(dir_path, label, data = "duration", show_run_index = True, sort_by = None, color = None) :
    df = get_runs_dataframe(dir_path)
    if df is None :
        return
    if sort_by is not None :
        df.sort_values(sort_by, ignore_index=True, inplace=True)
    plot_trace_cmd_run(df, data=data, label=label, color=color, init_plot=False, show_run_index=show_run_index)
    
def plot_trace_cmd_benchmark_dir(benchmark_dir_name, label_prefix, data = "duration", show_run_index = True, sort_by = None, folder_name_filters = None) :
    if not sort_by :
        sort_by = 'duration'

    def is_dir_name_in_filters(dir_name) :
        if folder_name_filters is None or len(folder_name_filters) == 0 :
            return True
        return any([filter_substr in dir_name for filter_substr in folder_name_filters])
        
    benchmark_dir_path = get_result_dir_path(benchmark_dir_name)
    contents = os.listdir(benchmark_dir_path)
    for c in contents :
        dir_path = os.path.join(benchmark_dir_path, c)
        if not os.path.isdir(dir_path) :
            continue
        if not is_dir_name_in_filters(c) :
            continue
        
        df = get_runs_dataframe(dir_path)
        if df is None :
            continue
        if sort_by is not None :
            df.sort_values(sort_by, ignore_index=True, inplace=True)
        plot_trace_cmd_run(df, data=data, label=label_prefix + " " + c, color=None, init_plot=False, show_run_index=show_run_index)


In [None]:
class Plotter:
    class TimeSeries :
        def __init__(self, index, values, name, label = None, color = None, individual_labels = None) -> None:
            self.index = index
            self.values = values
            self.name = name
            self.label = label
            self.color = color
            self.individual_labels = individual_labels
            
    def __init__(self) -> None:
        self.time_series: List[self.TimeSeries] = []
        self.reference_value = None
    
    def add_time_series(self, name, values, index = None, label = None, color = None, update_reference_value = True) :
        if index == None :
            index = [i for i in range(len(values))]
        self.time_series.append(self.TimeSeries(index, values, name = name, label = label, color = color))
        if update_reference_value:
            self.update_reference_value(min(values))
        
    def add_trace_cmd_run(self, name, df, data = "duration", label = None, color = None, update_reference_value = True) :
        self.time_series.append(self.TimeSeries(
            df.index, df[data], name = name, label = label, color = color, individual_labels = df['run_index']
        ))
        if update_reference_value:
            self.update_reference_value(min(df[data]))
            
    def add_trace_cmd_directory(self, benchmark_dir_name, label_prefix = "", data = "duration", sort_by = None, update_reference_value = True) :
        benchmark_dir_path = get_result_dir_path(benchmark_dir_name)
        contents = os.listdir(benchmark_dir_path)
        for c in contents :
            dir_path = os.path.join(benchmark_dir_path, c)
            if not os.path.isdir(dir_path) :
                continue
            
            df = get_runs_dataframe(dir_path)
            if df is None :
                continue
            if sort_by is not None :
                df.sort_values(sort_by, ignore_index=True, inplace=True)
            self.add_trace_cmd_run(c, df, data, label_prefix + " " + c, None, update_reference_value)

        
    def update_reference_value(self, candidate) :
        if self.reference_value is None or candidate < self.reference_value :
            self.reference_value = candidate

    def set_reference_value(self, new_val) :
        self.reference_value = new_val
        
    def set_reference_index(self, idx: int) :
        self.reference_value = min([ts.values[idx] for ts in self.time_series])
        print("New reference value : ", self.reference_value)
        
    def get_filter_names(self) :
        print("Available time series names :", [ts.name for ts in self.time_series])
        
        
        
    def __plot(self, index: List[float], values: List[float], line_width, label = None, color = None, individual_labels: Optional[list] = None) :
        n = len(values)
        plt.plot(index, values, label=label, color=color, linewidth=line_width)
        
        current_xticks_count = len(plt.xticks()[0])
        if n > current_xticks_count :
            plt.xticks(range(n), rotation=45)
            plt.xlim(-1, n)
            
        fig = plt.gcf()
        candidate_width = max(int(n * figure_width_coeff), min_figure_width)
        curr_width = fig.get_size_inches()[0]
        if candidate_width > curr_width :
            fig.set_size_inches(candidate_width, figure_height)
        
        if label is not None :
            plt.legend(loc="upper left")
        
        if individual_labels is None:
            return
            
        if len(individual_labels) != n :
            print(f"ERROR : time series has {n} values but only {len(individual_labels)} individual labels")
            return
        
        ax = plt.gca()
        for i in range(n) :
            il = str(individual_labels[i])
            if len(il) == 0 :
                continue
            ax.text(index[i], values[i], il, fontsize=8)
    
    
    def __plot_variations(self, time_series: TimeSeries, reference_value: float, show_labels: bool) :
        variations = [(v * 100) / reference_value - 100 for v in time_series.values]
        self.__plot(time_series.index, variations, line_width=line_width, label=time_series.label, 
                    color=time_series.color, individual_labels = time_series.individual_labels if show_labels else None)
        
        
    def __plot_values(self, time_series: TimeSeries, show_labels: bool) :
        self.__plot(time_series.index, time_series.values, line_width=line_width, label=time_series.label, 
                    color=time_series.color, individual_labels = time_series.individual_labels if show_labels else None)
    
        
    def __plot_time_series(self, time_series: TimeSeries, reference_value: float, show_labels: bool) :
        # Start at 0 is gotten for free because we are using 
        n = len(time_series.values)
        variations = [(v * 100) / reference_value - 100 for v in time_series.values]
        plt.plot(time_series.index, variations, label=time_series.label, color=time_series.color, linewidth=line_width)
        
        current_xticks_count = len(plt.xticks()[0])
        if n > current_xticks_count :
            plt.xticks(range(n), rotation=45)
            plt.xlim(-1, n)
            
        fig = plt.gcf()
        candidate_width = max(int(n * figure_width_coeff), min_figure_width)
        curr_width = fig.get_size_inches()[0]
        if candidate_width > curr_width :
            fig.set_size_inches(candidate_width, figure_height)
        
        if time_series.label is not None :
            plt.legend(loc="upper left")
        
        if time_series.individual_labels is None or not show_labels:
            return
            
        if len(time_series.individual_labels) != n :
            print(f"ERROR : time series has {n} values but only {len(time_series.individual_labels)} individual labels")
            return
        
        ax = plt.gca()
        for i in range(n) :
            il = str(time_series.individual_labels[i])
            if len(il) == 0 :
                continue
            ax.text(time_series.index[i], variations[i], il, fontsize=8)
                
    
    def __init_plot(self) :
        plt.xlabel("Nth run")
        plt.grid(axis="y", which="both")
        plt.grid(axis="x", which="major")
        
        
    def matches_filters(self, value, filters) :
        for f in filters :
            if f in value :
                return True
        return False
            
    def show_variations(self, name_filters = None, show_labels = False, reference_idx = None) :
        plt.ylabel("Variation (percent)")
        filtered_ts = self.time_series
        if name_filters is not None and len(name_filters) > 0 :
            filtered_ts = [ts for ts in self.time_series if self.matches_filters(ts.name, name_filters)]
        if reference_idx is None :
            reference_value = min([min([v for v in ts.values]) for ts in filtered_ts])
        else :
            reference_value = min([ts.values[reference_idx] for ts in filtered_ts])
        print("Reference value : ", reference_value)
        
        self.__init_plot()
        for ts in filtered_ts :
            self.__plot_variations(ts, reference_value, show_labels=show_labels)
        plt.show()
        
    def show_values(self, name_filters = None, show_labels = False) :
        plt.ylabel("Time (seconds)")
        filtered_ts = self.time_series
        if name_filters is not None and len(name_filters) > 0 :
            filtered_ts = [ts for ts in self.time_series if self.matches_filters(ts.name, name_filters)]
        
        self.__init_plot()
        for ts in filtered_ts :
            self.__plot_values(ts, show_labels=show_labels)
        plt.show()

    
    # def plot_trace_cmd_directory(self, benchmark_dir_name, label_prefix, data = "duration", sort_by = None, update_reference_value = True, folder_name_filters = None)

    
    def reset(self) :
        self.time_series = []

# Thread placement comparison

### BT

In [None]:
sort_by='duration'
# sort_by='run_index'
show_run_index = True
# show_run_index = False

print("bt.B on dahu 15")
vp = Plotter()
vp.add_trace_cmd_directory("_trace-cmd/bt.B.x__dahu-15__v6.8.0-rc3__performance__2024-03-08")
vp.get_filter_names()
vp.show_variations(name_filters = ["none", "sockorder"])
# init_runs_plot()
# plot_trace_cmd_benchmark_dir("bt.B.x__dahu-15__v6.8.0-rc3__performance__2024-03-08", "Dahu 15 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by)
# plt.show()

print("bt.C on dahu 15")
vp = Plotter()
vp.add_trace_cmd_directory("_trace-cmd/bt.C.x__dahu-15__v6.8.0-rc3__performance__2024-03-08")
vp.show_variations(name_filters = ["none", "sockorder"])


print("bt.C on yeti 3")
vp = Plotter()
vp.add_trace_cmd_directory("_trace-cmd/bt.C.x__yeti-3__v6.8.0-rc3__performance__2024-03-11")
# vp.get_filter_names()
vp.show_variations(name_filters = ["none", "sockorder"])

### CG

In [None]:
sort_by='duration'
# sort_by='run_index'
# show_run_index = True
show_run_index = False

print("cg.C on dahu 28")
vp = Plotter()
vp.add_trace_cmd_directory("_trace-cmd/cg.C.x__dahu-28__v6.8.0-rc3__performance__2024-03-28", "Dahu 15 - v6.8-rc3 OMP -", sort_by=sort_by)
vp.get_filter_names()
vp.show_variations(name_filters = ["none"], show_labels=show_run_index, reference_idx=5)

print("cg.C on troll 1") # TODO Have automated mechanism to print graph info before the graph
vp = Plotter()
vp.add_trace_cmd_directory("_trace-cmd/cg.D.x__troll-1__v6.8.0-rc3__performance__2024-03-30", "Troll 1 - v6.8-rc3 OMP -", sort_by=sort_by)
vp.set_reference_index(10)
vp.show_variations(name_filters = ["none"], show_labels=show_run_index, reference_idx=10)


# init_runs_plot()
# print("cg.C on yeti 3")
# plot_trace_cmd_benchmark_dir("cg.C.x__yeti-3__v6.8.0-rc3__performance__2024-03-11", "Yeti 3 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by)
# plt.show()

### DC

In [None]:
sort_by='duration'
# sort_by='run_index'
# show_run_index = True
show_run_index = False

# Ww should not look at DC

# init_runs_plot()
# print("dc.B on dahu 13")
# plot_trace_cmd_benchmark_dir("dc.B.x__dahu-13__v6.8.0-rc3__performance__2024-03-09", "Dahu 15 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by)
# plt.show()

### FT

In [None]:
sort_by='duration'
# sort_by='run_index'
# show_run_index = True
show_run_index = False

print("ft.C on dahu 15")
vp = Plotter()
vp.add_trace_cmd_directory("_trace-cmd/ft.C.x__dahu-15__v6.8.0-rc3__performance__2024-03-09", "Dahu 15 - v6.8-rc3 OMP -", sort_by=sort_by)
vp.show_variations(name_filters = ["none", "sockorder"], show_labels=show_run_index)

print("ft.C on yeti 3")
vp = Plotter()
vp.add_trace_cmd_directory("_trace-cmd/ft.C.x__yeti-3__v6.8.0-rc3__performance__2024-03-11", "Yeti 3 - v6.8-rc3 OMP -", sort_by=sort_by)
vp.show_values(name_filters = ["none", "sockorder"], show_labels=show_run_index)

### LU

In [None]:
sort_by='duration'
# sort_by='run_index'
# show_run_index = True
show_run_index = False
figure_height = DEFAULT_FIGURE_HEIGHT

### LU.B on dahu doesn't show a significant difference between enabled none and disabled none
# init_runs_plot()
# print("lu.B on dahu 15")
# plot_trace_cmd_benchmark_dir("lu.B.x__dahu-15__v6.8.0-rc3__performance__2024-03-09", "Dahu 15 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by, folder_name_filters=["none", "sockorder"])
# plt.show()

### LU.B on yeti doesn't show a significant difference between enabled none and disabled none
# init_runs_plot()
# print("lu.B on yeti 3")
# plot_trace_cmd_benchmark_dir("lu.B.x__yeti-3__v6.8.0-rc3__performance__2024-03-11", "Yeti 3 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by)
# plt.show()

print("lu.C on dahu 15")
vp = Plotter()
vp.add_trace_cmd_directory("_trace-cmd/lu.C.x__dahu-15__v6.8.0-rc3__performance__2024-03-09", "Dahu 15 - v6.8-rc3 OMP -", sort_by=sort_by)
vp.show(name_filters = ["none", "sockorder"], show_labels=show_run_index)


print("lu.C on yeti 1")
vp = Plotter()
vp.add_trace_cmd_directory("_trace-cmd/lu.C.x__yeti-1__v6.8.0-rc3__performance__2024-03-11", "Yeti 1 - v6.8-rc3 OMP -", sort_by=sort_by)
vp.show(name_filters = ["none", "sockorder"], show_labels=show_run_index)

### MG

In [None]:
sort_by='duration'
# sort_by='run_index'
# show_run_index = True
show_run_index = False

print("mg.D on dahu 15")
vp = Plotter()
vp.add_trace_cmd_directory("_trace-cmd/mg.D.x__dahu-15__v6.8.0-rc3__performance__2024-03-09", sort_by=sort_by)
vp.show_variations(show_labels=show_run_index)


### SP

In [None]:
sort_by='duration'
# sort_by='run_index'
# show_run_index = True
show_run_index = False

print("sp.B on dahu 13")
vp = Plotter()
vp.add_trace_cmd_directory("_trace-cmd/sp.B.x__dahu-13__v6.8.0-rc3__performance__2024-03-09", sort_by=sort_by)
vp.show_variations(name_filters = ["none", "sockorder", "sequential"], show_labels=show_run_index)

vp = Plotter()
vp.add_trace_cmd_directory("_trace-cmd/sp.B.x__yeti-1__v6.8.0-rc3__performance__2024-03-11", sort_by=sort_by)
vp.show_variations(name_filters = ["none", "sockorder", "sequential"], show_labels=show_run_index)

vp = Plotter()
vp.add_trace_cmd_directory("_trace-cmd/sp.C.x__dahu-13__v6.8.0-rc3__performance__2024-03-09", sort_by=sort_by)
vp.show_variations(name_filters = ["none", "sockorder", "sequential"], show_labels=show_run_index)

# init_runs_plot()
# plot_trace_cmd_benchmark_dir("sp.B.x__dahu-13__v6.8.0-rc3__performance__2024-03-09", "Dahu 13 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by)
# plt.show()

# print("sp.B on yeti 1")
# init_runs_plot()
# plot_trace_cmd_benchmark_dir("sp.B.x__yeti-1__v6.8.0-rc3__performance__2024-03-11", "Yeti 1 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by)
# plt.show()

# print("sp.C on dahu 13")
# init_runs_plot()
# plot_trace_cmd_benchmark_dir("sp.C.x__dahu-13__v6.8.0-rc3__performance__2024-03-09", "Dahu 13 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by)
# plt.show()

### UA

In [None]:
sort_by='duration'
# sort_by='run_index'
show_run_index = True
# show_run_index = False


# # plot_trace_cmd_benchmark_dir("ua.C.x__dahu-13__v6.8.0-rc3__performance__2024-03-09", "Dahu 13 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by)
# plot_trace_cmd_runs(get_result_dir_path("ua.C.x__dahu-13__v6.8.0-rc3__performance__2024-03-09/nb-disabled-none"), "NUMA balancing disabled", show_run_index=show_run_index, sort_by=sort_by)
# plot_trace_cmd_runs(get_result_dir_path("ua.C.x__dahu-13__v6.8.0-rc3__performance__2024-03-09/nb-enabled-none"), "NUMA balancing enabled", show_run_index=show_run_index, sort_by=sort_by)

# print("ua.C on dahu 13")
# vp = Plotter()
# vp.add_trace_cmd_directory("_trace-cmd/ua.C.x__dahu-13__v6.8.0-rc3__performance__2024-03-09", sort_by=sort_by)
# vp.show_variations(name_filters = ["none", "sockorder", "sequential"], show_labels=show_run_index)

print("ua.C on yeti 1")
vp = Plotter()
vp.add_trace_cmd_directory("_trace-cmd/ua.C.x__yeti-1__v6.8.0-rc3__performance__2024-03-11", data="broken_wc_time", sort_by=sort_by)
vp.show_values(name_filters = ["none"], show_labels=show_run_index)

vp.reset()
vp.add_trace_cmd_directory("_trace-cmd/ua.C.x__yeti-1__v6.8.0-rc3__performance__2024-03-11", data="duration", sort_by=sort_by)
vp.show_values(name_filters = ["none"], show_labels=show_run_index)




# init_runs_plot()
# plot_trace_cmd_benchmark_dir("ua.C.x__yeti-1__v6.8.0-rc3__performance__2024-03-11", "Yeti 1 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by)

### Multiapp - BT and CG

In [None]:
sort_by='duration'
# sort_by='run_index'
# show_run_index = True
show_run_index = False

print("bt.B and cg.C on dahu 9")
init_runs_plot()
plot_trace_cmd_benchmark_dir("bt.B.x-and-cg.C.x__dahu-9__v6.8.0-rc3__performance__2024-03-11", "Dahu 9 - v6.8-rc3 -", show_run_index=show_run_index, sort_by=sort_by)
plt.show()

# Perf stats

In [None]:
def get_perf_stats(run_dir_path) -> List[float]:
    hyperfine_run_files = [file for file in os.listdir(run_dir_path) if file.endswith(".json")]
    if len(hyperfine_run_files) == 0 :
        return []
    if len(hyperfine_run_files) > 1 :
        print("WARNING : More than 1 hyperfine run files found in", run_dir_path)
    
    with open(os.path.join(run_dir_path, hyperfine_run_files[0])) as f:
        data = json.load(f)
    return data['results'][0]['times']


def plot_hyperfine_benchmark_dir(benchmark_dir_name, sort_values = True):
    dir_path = get_result_dir_path(benchmark_dir_name)
    nb_dir_path = os.path.join(dir_path, "nb")
    no_nb_dir_path = os.path.join(dir_path, "no_nb")

    times_nb = get_hyperfine_runtimes(nb_dir_path) if os.path.exists(nb_dir_path) else None
    times_no_nb = get_hyperfine_runtimes(no_nb_dir_path) if os.path.exists(no_nb_dir_path) else None
    
    if sort_values:
        times_nb.sort()
        times_no_nb.sort()
    # init_runs_plot()
    if times_nb :
        plot_hyperfine_run(times_nb, "With NUMA balancing", nb_enabled_color, False)
    if times_no_nb :
        plot_hyperfine_run(times_no_nb, "Without NUMA balancing", nb_disabled_color, False)
    plot_title = benchmark_dir_name + " sorted" if sort_values else " unsorted"
    plt.title(plot_title)

test_file_result_path = "cg.C.x__dahu-11__v6.8.0-rc3__performance__2024-03-17/nb-disabled-none/cg.C.x__dahu-11__v6.8.0-rc3__performance__2024-03-17__nb-disabled-none__99.txt"
test_file_path = get_result_dir_path(test_file_result_path)





def parse_perf_stat_result_file(file_path: str) -> Tuple[float, list] :
    with open(test_file_path, 'r') as file:
        text = file.read()
    nas_time_match = re.search(r'Time in seconds\s+=\s+(\d*.\d*)', text)
    if nas_time_match :
        nas_time = float(nas_time_match.group(1))

    json_begin_pos = text.find('{')
    json_end_pos = text.rfind('}')
    json_text = text[json_begin_pos:json_end_pos + 1]
    json_lines = json_text.split('\n')
    json_objects = [json.loads(line) for line in json_lines]

    return nas_time, json_objects

def get_perf_stats_run(run_dir_path: str) :
    files = [file for file in os.listdir(benchmark_dir_path) if file.endswith(".txt")]
    return [parse_perf_stat_result_file(file) for file in files]
    

# We want 2 things :
# 1. A graph with a line per directory
# 2. A graph combining all folders with time in x axis and value in y


def read_perf_benchmark_dir(benchmark_dir_name: str) :
    benchmark_dir_path = get_result_dir_path(benchmark_dir_name)
    contents = os.listdir(benchmark_dir_path)

    available_events = None

    # Extract events that can be plotted
    # init_runs_plot()
    for c in contents :
        dir_path = os.path.join(benchmark_dir_path, c)
        if not os.path.isdir(dir_path) :
            continue
        
        perf_results = get_perf_stats_run(dir_path)
        
        df = get_runs_dataframe(dir_path)
        if df is None :
            continue
        if sort_by is not None :
            df.sort_values(sort_by, ignore_index=True, inplace=True)
        # color = trace_cmd_folders_color[c] if c in trace_cmd_folders_color else "tab:gray"
        plot_trace_cmd_run(df,  label_prefix + " " + c, None, False, show_run_index)
    


read_perf_stat_result(test_file_path)

# json_objects

# json_text = f"\"values\" : [{json_text}]"

# json.loads('{"counter-value" : "0.000000", "unit" : "", "event" : "offcore_response.all_data_rd.l3_miss_local_dram.snoop_miss_or_no_fwd", "event-runtime" : 170372680655, "pcnt-running" : 26.00, "metric-value" : 0.000000, "metric-unit" : "(null)"}')

# NUMA balancing comparison for EEVDF

### BT

In [None]:
sort_by='duration'
# sort_by='run_index'
# show_run_index = True
show_run_index = False

init_runs_plot()
print("bt.B dahu")
# plot_trace_cmd_benchmark_dir("bt.B.29-02.dahu-2.v6.1.0-17", "Dahu 2 - v6.1.0-17 -", show_run_index=show_run_index, sort_by=sort_by)
plot_trace_cmd_benchmark_dir("bt.B.dahu-17.v6.8-rc3", "Dahu 17 - v6.8-rc3 -", show_run_index=show_run_index, sort_by=sort_by)
# plot_trace_cmd_benchmark_dir("bt.B.x__dahu-6__v6.8.0-rc6+__2024-03-05", "Dahu 6 - v6.8-rc6 prints -", show_run_index=show_run_index, sort_by=sort_by)

# get_result_dir_path(benchmark_dir_name)
# plot_trace_cmd_runs(get_result_dir_path("bt.B.x__dahu-6__v6.8.0-rc6+__2024-03-05/nb-scan-only"), "Scan only", show_run_index=show_run_index, sort_by=sort_by)
plt.show()

init_runs_plot()
print("bt.C dahu")
# plot_trace_cmd_benchmark_dir("bt.C.dahu.v6.1.0-17", "Dahu - v6.1.0-17 -", show_run_index=show_run_index, sort_by=sort_by)
plot_trace_cmd_benchmark_dir("bt.C.dahu-28.v6.8-rc3", "Dahu - v6.8-rc3 -", show_run_index=show_run_index, sort_by=sort_by)
plt.show()

# init_runs_plot()
# print("bt.C yeti")
# plot_trace_cmd_benchmark_dir("bt.C.yeti.v6.1.0-17", "Yeti - v6.1.0-17 -", show_run_index=show_run_index, sort_by=sort_by)
# plt.show()

### LU

In [None]:
sort_by='duration'
# sort_by='run_index'
show_run_index = True
init_runs_plot()
print("lu.C yeti")
# plot_trace_cmd_benchmark_dir("lu.C.yeti-4.v6.8-rc3", "Yeti 4 - v6.8-rc3 -", show_run_index=show_run_index, sort_by=sort_by)
plot_trace_cmd_benchmark_dir("lu.C.29-02.yeti-1.v6.8-rc6+", "Yeti 1 - v6.8-rc6+ toggles -", show_run_index=show_run_index, sort_by=sort_by)
# plot_trace_cmd_benchmark_dir("lu.C.x__yeti-1__v6.8.0-rc3__2024-03-06", "Yeti 1 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by)
plt.show()

init_runs_plot()
print("lu.C dahu")
plot_trace_cmd_benchmark_dir("lu.C.dahu-18.v6.8-rc3", "Dahu 18 - v6.8-rc3 -", show_run_index=show_run_index, sort_by=sort_by)
plt.show()

### UA

In [None]:
sort_by='duration'
# sort_by='run_index'
init_runs_plot()
print("ua.C yeti")
show_run_index = True
# plot_trace_cmd_benchmark_dir("ua.C.01-03.yeti-2.v6.1.0", "Yeti 2 - v6.1.0 stock -", show_run_index=show_run_index, sort_by=sort_by)
plot_trace_cmd_benchmark_dir("ua.C.29-02.yeti-2.v6.8-rc6+", "Yeti 2 - v6.8-rc6+ toggles -", show_run_index=show_run_index, sort_by=sort_by)
# plot_trace_cmd_benchmark_dir("ua.C.x__yeti-1__v6.8.0-rc3__2024-03-06", "Yeti 1 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by)
# plot_trace_cmd_benchmark_dir("ua.C.x__yeti-3__v6.8.0-rc3__performance__2024-03-08", "Yeti 3 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by)
plt.show()

# init_runs_plot()
# print("ua.C dahu")
# plot_trace_cmd_benchmark_dir("ua.C.dahu-30.v6.1.0-17", "Dahu 30 - v6.1.0-17 -", show_run_index=show_run_index, sort_by=sort_by)
# plt.show()

#### ft.C - Yeti & Dahu

In [None]:
sort_by='duration'
# sort_by='run_index'
show_run_index = True
# show_run_index = False


print("ft.C on yeti")
init_runs_plot()
# plot_trace_cmd_benchmark_dir("ft.C.01-03.yeti-2.v6.1.0", "Yeti 2 - v6.1.0 stock -", show_run_index=show_run_index, sort_by=sort_by)
# plot_trace_cmd_benchmark_dir("ft.C.yeti-1.v6.1.0-17", "Yeti 1 - v6.1.0-17 -", show_run_index=show_run_index, sort_by=sort_by)
# plot_trace_cmd_benchmark_dir("ft.C.yeti-4.v6.8-rc3", "Yeti 4 - v6.8-rc3 -", show_run_index=show_run_index, sort_by=sort_by)
plot_trace_cmd_benchmark_dir("ft.C.x__yeti-2__v6.8.0-rc3__2024-03-06", "Yeti 3 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by)
plt.show()


print("ft.C on dahu")
init_runs_plot()
# plot_trace_cmd_benchmark_dir("ft.C.dahu.v6.1.0-17", "Dahu - v6.1.0-17 -", show_run_index=show_run_index, sort_by=sort_by)
plot_trace_cmd_benchmark_dir("ft.C.x__dahu-7__v6.8.0-rc3__2024-03-04", "Dahu 7 - v6.8.0-rc3 -", show_run_index=show_run_index, sort_by=sort_by)
plt.show()


#### mg.D - Yeti & Dahu

In [None]:
sort_by='duration'
# sort_by='run_index'
init_runs_plot()
# show_run_index = False
plot_trace_cmd_benchmark_dir("mg.D.28-02.yeti-1.v6.8-rc3", "Yeti 1 - v6.8-rc3 -", show_run_index=show_run_index, sort_by=sort_by)
# plot_trace_cmd_benchmark_dir("mg.D.x__yeti-2__v6.8.0-rc3__2024-03-06", "Yeti 2 - v6.8-rc3 OMP -", show_run_index=show_run_index, sort_by=sort_by)

# plot_trace_cmd_benchmark_dir("mg.d.dahu-28.v6.1.0-17", "Dahu - v6.1.0-17 -", show_run_index=show_run_index, sort_by=sort_by)

plt.show()

#### cg.C - Yeti & Dahu

In [None]:
sort_by='duration'
# sort_by='run_index'
show_run_index = True

print("cg.C on yeti 2")
init_runs_plot()
plot_trace_cmd_benchmark_dir("cg.C.x__yeti-2__v6.8.0-rc3__2024-03-06", "Yeti 2 - v6.8-rc3 -", show_run_index=show_run_index, sort_by=sort_by)
plt.show()

print("cg.C on dahu 18")
init_runs_plot()
plot_hyperfine_benchmark_dir("cg.C.dahu-18.v6.8-rc3", "Dahu 18 - v6.8-rc3 -")
# plot_trace_cmd_benchmark_dir("cg.C.dahu-18.v6.8-rc3", "Dahu 18 - v6.8-rc3 -", show_run_index=show_run_index, sort_by=sort_by)
plt.show()


### Phoronix build-kernel

In [None]:
dir_path = get_result_dir_path("build-linux-kernel__dahu-31__v6.8.0-rc3__performance__2024-03-13")
file_location = "nb-enabled/build-linux-kernel__dahu-31__v6.8.0-rc3__performance__2024-03-13-phoronix.json"

def get_phoronix_results(dir_path) -> List[List[float]] :
    filename = os.listdir(dir_path)[0]
    with open(os.path.join(dir_path, filename)) as f:
        data = json.load(f)
    results = []
    for res in data['results'].values() :
        result_obj = list(res['results'].values())[0]
        results.append(result_obj['raw_values'])
    return results

# TODO Have the option not to focus on 0
# Benchmark dir is a parent directory that contain specific directories that contain themselves several runs
def plot_phoronix_benchmark_dir(benchmark_dir_name, should_sort = False) :
    benchmark_dir_path = get_result_dir_path(benchmark_dir_name)
    contents = os.listdir(benchmark_dir_path)
    for c in contents :
        dir_path = os.path.join(benchmark_dir_path, c)
        if not os.path.isdir(dir_path) :
            continue
        
        results = get_phoronix_results(dir_path)
        for idx, res in enumerate(results) :
            if should_sort :
                res.sort()
            plot_hyperfine_run(res, label = str(idx) + " - " + c, color = None, init_plot = False)


In [None]:
init_runs_plot()
plot_phoronix_benchmark_dir("build-linux-kernel__dahu-31__v6.8.0-rc3__performance__2024-03-13", True)
plt.show()

### Phoronix svt-av1

In [None]:
init_runs_plot()
plot_phoronix_benchmark_dir("svt-av1__dahu-31__v6.8.0-rc3__performance__2024-03-13", True)
plt.show()

### Phoronix dav1d

In [None]:
init_runs_plot()
plot_phoronix_benchmark_dir("dav1d__dahu-25__v6.8.0-rc3__performance__2024-03-13", True)
plt.show()

### Phoronix lu.C

In [None]:
init_runs_plot()
plot_phoronix_benchmark_dir("_phoronix/npb__yeti-1__v6.8.0-rc3__performance__2024-03-14", True)
plt.show()

#### Kernel v6.8.0-rc3

In [None]:
plot_trace_cmd_benchmark_dir("ua.C.01-03.yeti-2.v6.1.0", show_run_index=False, sort_by='duration')

In [None]:
for dir in trace_cmd_benchmark_dirs :
    if not dir.endswith('rc3'):
        continue
    benchmark_dir = get_result_dir_path(dir)
    print(dir)
    plot_trace_cmd_benchmark_dir(benchmark_dir, show_run_index=False, sort_by='duration')
print("Done")