# Scheduling benchmarks analysis

## Library

In [None]:
import matplotlib.pyplot as plt
from typing import Optional, List, Tuple, Dict
import pandas as pd
import os
import re
import json
import subprocess
import numpy as np
from scipy import stats
from collections import defaultdict

#### Global variables

In [None]:
TRACE_CMD_CACHE_FILENAME = "trace_cmd_runtimes"
RESULTS_DIR_PATH = "../results"
ABSOLUTE_RESULTS_DIR_PATH = "/home/cgachod/analysis/results"

def get_result_dir_path(result_dir_name):
    return os.path.join(RESULTS_DIR_PATH, result_dir_name)

def get_absolute_result_dir_path(result_dir_name) :
    return os.path.join(ABSOLUTE_RESULTS_DIR_PATH, result_dir_name)

#### Visualization params

In [None]:
nb_enabled_color = "tab:orange"
nb_disabled_color = "tab:blue"

figure_height = 10 # 10
min_figure_width = 40
figure_width_coeff = 0.3

line_width = 1.0

In [None]:
def init_runs_plot():
    # plt.figure(figsize=(fig_width, 5))
    # plt.legend(loc="upper left")
    plt.xlabel("Nth run")
    plt.ylabel("Run time (seconds)")
    plt.grid(axis="y", which="both")
    plt.grid(axis="x", which="major")
    
    # ax = plt.gca()
    # ax.ticklabel_format(axis="x", useOffset=1.0)

def plot_runs(values: List[float], label = None, color = None, init_plot = True) :
    if init_plot:
        init_runs_plot()
    plt.plot(range(len(values)), values, label=label, color=color, linewidth=line_width)
    
    current_xticks_count = len(plt.xticks()[0])
    if len(values) > current_xticks_count :
        plt.xticks(range(len(values)), rotation=45)
        plt.xlim(-1, len(values))
        
    # candidate_ylim_max = max(values) * 1.2
    # if candidate_ylim_max > plt.ylim()[1] :
    #     plt.ylim(0, candidate_ylim_max)
        
    fig = plt.gcf()
    candidate_width = max(int(len(values) * figure_width_coeff), min_figure_width)
    curr_width = fig.get_size_inches()[0]
    if candidate_width > curr_width :
        fig.set_size_inches(candidate_width, figure_height)
    
    if label :
        plt.legend(loc="upper left")

# Perf stats

In [None]:
def get_perf_stats(run_dir_path) -> List[float]:
    hyperfine_run_files = [file for file in os.listdir(run_dir_path) if file.endswith(".json")]
    if len(hyperfine_run_files) == 0 :
        return []
    if len(hyperfine_run_files) > 1 :
        print("WARNING : More than 1 hyperfine run files found in", run_dir_path)
    
    with open(os.path.join(run_dir_path, hyperfine_run_files[0])) as f:
        data = json.load(f)
    return data['results'][0]['times']


# Idea here is to create a df with all the counter values
# so n cols = (min, max, avg) * counters
# rows are runs






class PerfStatRun :
    def __init__(self, file_path) -> None :
        self.file_path = file_path
        self.event_counters = self.parse_file(file_path)
        self.event_counters = self.compute_additional_counters(self.event_counters)
        self.available_events = set(self.event_counters.keys()) # set([cv["event"] for cv in self.event_counters])
        
    def compute_additional_counters(self, event_counters: Dict[str, float]) :
        if "branches" in event_counters and "branch-misses" in event_counters :
            event_counters["branch-misses-ratio"] = event_counters["branch-misses"] / event_counters["branches"]
        
        if "l2_rqsts.all_demand_references" in event_counters and "l2_rqsts.all_demand_miss" in event_counters :
            event_counters["l2_rqsts.all_demand_miss-ratio"] = event_counters["l2_rqsts.all_demand_miss"] / event_counters["l2_rqsts.all_demand_references"]
        
        if "l2_rqsts.all_demand_data_rd" in event_counters and "l2_rqsts.demand_data_rd_miss" in event_counters :
            event_counters["l2_rqsts.demand_data_rd_miss-ratio"] = event_counters["l2_rqsts.demand_data_rd_miss"] / event_counters["l2_rqsts.all_demand_data_rd"]
        
        if "LLC-loads" in event_counters and "LLC-load-misses" in event_counters :
            event_counters["LLC-load-misses-ratio"] = event_counters["LLC-load-misses"] / event_counters["LLC-loads"]
        
        if "LLC-stores" in event_counters and "LLC-store-misses" in event_counters :
            event_counters["LLC-store-misses-ratio"] = event_counters["LLC-store-misses"] / event_counters["LLC-stores"]
        
        if "cache-references" in event_counters and "cache-misses" in event_counters :
            event_counters["cache-misses-ratio"] = event_counters["cache-misses"] / event_counters["cache-references"]
        return event_counters
        
    def parse_file(self, file_path) -> Dict[str, float]:
        with open(file_path, 'r') as file:
            text = file.read()
        nas_time_match = re.search(r'Time in seconds\s+=\s+(\d*.\d*)', text)
        if nas_time_match :
            nas_time = float(nas_time_match.group(1))

        json_begin_pos = text.find('{')
        json_end_pos = text.rfind('}')
        json_text = text[json_begin_pos:json_end_pos + 1]
        json_lines = json_text.split('\n')
        first_line_obj = json.loads(line)
        if "cpu" in first_line_obj :
            return self.parse_file_per_cpu(file_path)
        
        event_counters = {"nas_runtime": nas_time}
        for line in json_lines :
            json_object = json.loads(line)
            event_counters[json_object["event"].strip()] = float(json_object["counter-value"])

        return event_counters
    
    def parse_file_per_cpu(self, file_path) -> Dict[str, float]:
        with open(file_path, 'r') as file:
            text = file.read()
        nas_time_match = re.search(r'Time in seconds\s+=\s+(\d*.\d*)', text)
        if nas_time_match :
            nas_time = float(nas_time_match.group(1))

        json_begin_pos = text.find('{')
        json_end_pos = text.rfind('}')
        json_text = text[json_begin_pos:json_end_pos + 1]
        json_lines = json_text.split('\n')
        event_counters = {"nas_runtime": nas_time}
        for line in json_lines :
            json_object = json.loads(line)
            event_counters[json_object["event"].strip()] = float(json_object["counter-value"])

        return event_counters
    
    def get_event_value(self, event: str) :
        # return float(self.event_counters[event]["counter-value"])
        return self.event_counters[event]
    
    
class PerfStatBenchmark :
    def __init__(self, benchmark_dir_path) -> None:
        self.available_events, self.results_per_directory = self.read_benchmark_dir(benchmark_dir_path)
        
    def sort_by_event(self, event_name: str) :
        for dir_name, results in self.results_per_directory.items() :
            results.sort(key = lambda x : x.get_event_value(event_name))
        
    def get_values_for_event(self, event_name: str) -> Dict[str, List[float]] :
        res = {}
        for dir_name, results in self.results_per_directory.items() :
            res[dir_name] = [r.get_event_value(event_name) for r in results]
        return res
    
    def print_available_events(self) :
        print("Available events")
        for i, e in enumerate(self.available_events) :
            print(f"{i}. {e}")
            
    def parse_runs(self, runs_dir_path: str) -> List[PerfStatRun] :
        files = [file for file in os.listdir(runs_dir_path) if file.endswith(".txt")]
        return [PerfStatRun(os.path.join(runs_dir_path, file)) for file in files]
    
    def read_benchmark_dir(self, benchmark_dir_path: str) -> Tuple[List[str], Dict[str, List[PerfStatRun]]] :
        available_events = None
        runs_results = {}
        
        contents = os.listdir(benchmark_dir_path)
        for c in contents :
            dir_path = os.path.join(benchmark_dir_path, c)
            if not os.path.isdir(dir_path) :
                continue
            
            perf_results = self.parse_runs(dir_path)
            runs_results[c] = perf_results
            available_events_list = [pr.available_events for pr in perf_results]
            available_events_intersection = set.intersection(*available_events_list)
            if available_events is None :
                available_events = available_events_intersection
            else :
                available_events.intersection(available_events_intersection)
            
        available_events = list(available_events)
        return available_events, runs_results
        
        

def parse_perf_stat_result_file(file_path: str) -> PerfStatRun :
    return PerfStatRun(file_path)

def get_perf_stats_run(run_dir_path: str) :
    files = [file for file in os.listdir(run_dir_path) if file.endswith(".txt")]
    return [parse_perf_stat_result_file(os.path.join(run_dir_path, file)) for file in files]
    

# We want 2 things :
# 1. A graph with a line per directory
# 2. A graph combining all folders with time in x axis and value in y



    
# Idea is one graph per value, or simply change the value for the graph
    
def read_perf_stat_benchmark_dir(dir_name: str) :
    return PerfStatBenchmark(get_result_dir_path(dir_name))


def plot_perf_stat_benchmark_for_event(perf_stat_benchmark: PerfStatBenchmark, event_name: int, sort_by_event: str = "") :
    if len(sort_by_event) != -1 :
        perf_stat_benchmark.sort_by_event(sort_by_event)
    
    values_for_event = perf_stat_benchmark.get_values_for_event(event_name)
    print("Values for event :", event_name)
    init_runs_plot()
    for dir_name, values in values_for_event.items() :
        plot_runs(values=values, label=dir_name, init_plot=False)
    plt.show()
        

def plot_perf_stat_benchmark_for_events(perf_stat_benchmark: PerfStatBenchmark, events: List[str] = [], sort_by_event: str = "") :
    if len(sort_by_event) == 0 :
        sort_by_event = "duration_time"
    if len(events) == 0 :
        events = perf_stat_benchmark.available_events
    
    for e in events :
        plot_perf_stat_benchmark_for_event(perf_stat_benchmark, e, sort_by_event)


events = [
    "nas_runtime",
    "duration_time",
    "system_time",
    "user_time",
    "instructions",
    "cycles",
    "migrations",
    "context-switches",
    "cache-misses",
    "cache-misses-ratio",
    "LLC-loads",
    "LLC-load-misses",
    "LLC-load-misses-ratio",
    "LLC-stores",
    "LLC-store-misses",
    "LLC-store-misses-ratio",
    "cycle_activity.stalls_l3_miss",
    "l2_rqsts.all_demand_miss",
    "l2_rqsts.all_demand_miss-ratio",
    "l2_rqsts.demand_data_rd_miss",
    "l2_rqsts.demand_data_rd_miss-ratio",
    "mem_load_l3_miss_retired.local_dram",
    "mem_load_l3_miss_retired.remote_dram",
    "offcore_response.all_data_rd.l3_miss_local_dram.snoop_miss_or_no_fwd",
    "offcore_response.all_data_rd.l3_miss_remote_dram.snoop_miss_or_no_fwd",
    "branch-misses",
    "branch-misses-ratio"
]

In [None]:
# 2 things :
# 1. Plot the boxes per event and use the fact that 1 setup is slower than the others
# 2. Mix all the data from a directory and try to derive a correlation to runtime for each event


def parse_per_cpu_perf_file(file_path) -> Tuple[pd.DataFrame, Dict]:
    # min:... max:...
    with open(file_path, 'r') as file:
        text = file.read()
    nas_time_match = re.search(r'Time in seconds\s+=\s+(\d*.\d*)', text)
    if nas_time_match :
        nas_time = float(nas_time_match.group(1))
        
        
    # Make it a dataframe ?
    # columns would be event
    # lines cpu

    json_begin_pos = text.find('{')
    json_end_pos = text.rfind('}')
    json_text = text[json_begin_pos:json_end_pos + 1]
    json_lines = json_text.split('\n')
    
    events_df = pd.DataFrame()
    
    meta_values = {"nas_runtime": nas_time}
    
    event_counters: Dict[str, list] = defaultdict(list)
    for line in json_lines :
        json_object = json.loads(line)
        event_str = json_object["event"].strip()
        if event_str in ["duration_time", "user_time", "system_time"] :
            meta_values[event_str] = float(json_object["counter-value"])
        else :
            event_counters[event_str].append((int(json_object["cpu"]), float(json_object["counter-value"])))
        
    events_df = pd.DataFrame({event: [e[1] for e in sorted(values)] for event, values in event_counters.items()})
    events_df["LLC-all-misses"] = events_df["LLC-load-misses"] + events_df["LLC-store-misses"]
    events_df["LLC-load-misses-ratio"] = events_df["LLC-load-misses"] / events_df["LLC-loads"]
    events_df["LLC-store-misses-ratio"] = events_df["LLC-store-misses"] / events_df["LLC-stores"]
    
    events_df["mem_load_l3_miss_retired.all"] = events_df["mem_load_l3_miss_retired.remote_dram"] + events_df["mem_load_l3_miss_retired.local_dram"]
    events_df["mem_load_l3_miss_retired-over-LLC-all"] = events_df["mem_load_l3_miss_retired.all"] / events_df["LLC-all-misses"]
    events_df["mem_load_l3_miss_retired.remote_over_local_dram"] = events_df["mem_load_l3_miss_retired.remote_dram"] / events_df["mem_load_l3_miss_retired.local_dram"]
    events_df["mem_load_l3_miss_retired.remote_over_total"] = events_df["mem_load_l3_miss_retired.remote_dram"] / events_df["mem_load_l3_miss_retired.all"]
    events_df["mem_load_l3_miss_retired.local_over_total"] = events_df["mem_load_l3_miss_retired.local_dram"] / events_df["mem_load_l3_miss_retired.all"]
    
    return events_df, meta_values
    
    # result_dict = {}
    # for event, values in event_counters.items() :
    #     if event in ["nas_runtime", "duration_time", "user_time", "system_time"] :
    #         result_dict[f"once:{event}"] = min(values)
    #     else :
    #         result_dict[f"min:{event}"] = min(values)
    #         result_dict[f"max:{event}"] = max(values)
    #         result_dict[f"sum:{event}"] = sum(values)
            
    # # Computing new events
    
    # # ratio remote l3 miss / local l3 miss
    # # ratio l3 miss / l3 access
    
    # return result_dict


def parse_dir(dir_path) :
    df = pd.DataFrame()
    file_paths = [os.path.join(dir_path, file) for file in os.listdir(dir_path) if file.endswith(".txt")]
    for file_path in file_paths :
        events_df, result_dict = parse_per_cpu_perf_file(file_path)
        for series_name, series in events_df.items() :
            result_dict[f"min:{series_name}"] = series.min()
            result_dict[f"max:{series_name}"] = series.max()
            result_dict[f"sum:{series_name}"] = series.sum()
        file_df = pd.DataFrame(result_dict, index=[0])
        df = pd.concat([df, file_df], ignore_index=True)
    return df



def combine_benchmark_dir(benchmark_dir_name) :
    df = pd.DataFrame()
    benchmark_dir_path = get_result_dir_path("_perf/" + benchmark_dir_name)
    contents = os.listdir(benchmark_dir_path)
    for c in contents :
        dir_path = os.path.join(benchmark_dir_path, c)
        if not os.path.isdir(dir_path) :
            continue
        
        df = pd.concat([df, parse_dir(dir_path)], ignore_index=True)
    return df


In [None]:
events_df, meta_values = parse_per_cpu_perf_file(get_result_dir_path("_perf/cg.C.x__dahu-21__v6.8.0-rc3__performance__2024-04-10-sample/nb-disabled-sockorder/cg.C.x__dahu-21__v6.8.0-rc3__performance__2024-04-10__nb-disabled-sockorder__1.txt"))

for col in events_df.columns :
    print(col, sum(events_df[col]))

In [None]:
perf_df = combine_benchmark_dir("cg.C.x__dahu-21__v6.8.0-rc3__performance__2024-04-10")

In [None]:
perf_df

In [None]:
pd.set_option('display.max_rows', 500)
correlations = perf_df.corr()['nas_runtime']
# print(perf_df.corr()['once:nas_runtime'].sort_values())



# new_df = correlations.to_frame(name = "corr")
# new_df["abs_corr"] = new_df["corr"].map(lambda x : abs(x))
# new_df.sort_values(by = "abs_corr", ascending=False)

correlations.sort_values(ascending=False)

In [None]:
# TODO Print the box plots : one graph per event, on each graph 3 functions for all the setups

### cg.C on Dahu 11

In [None]:
bench = read_perf_stat_benchmark_dir("_perf/cg.C.x__dahu-11__v6.8.0-rc3__performance__2024-03-17")
# bench.print_available_events()

# test_file_result_path = "cg.C.x__dahu-11__v6.8.0-rc3__performance__2024-03-17/nb-disabled-none/cg.C.x__dahu-11__v6.8.0-rc3__performance__2024-03-17__nb-disabled-none__99.txt"
# result = parse_perf_stat_result_file(get_result_dir_path(test_file_result_path))
# # print(result.event_counters)

plot_perf_stat_benchmark_for_events(bench, events=events, sort_by_event="nas_runtime")


TODOs :
- Try and focus on the worst core instead of showing an average across all cores