# Charts for mojort paper

## Boiler plates

In [None]:
import pandas as pd
import seaborn as sns
from benchkit.charts.printedcharts import export_figure
import matplotlib.pyplot as plt
import benchkit
#import cairosvg
from benchkit.charts.dataframes import get_dataframe
import os
from pathlib import Path
from benchkit.utils.dir import gitmainrootdir
import re


In [None]:
repo_dir = gitmainrootdir()
# paper_dir = Path("/home/antonio/Dropbox/Applications/ShareLaTeX/rtns25_mojort")
paper_dir = Path("/tmp")

fig_dir = paper_dir / "figures/generated"

os.makedirs(str(fig_dir), exist_ok=True)


In [None]:
def make_parent(
    path: Path,
) -> None:
    parent_dir = Path(path).parent.resolve()
    parent_dir.mkdir(parents=True, exist_ok=True)


In [None]:
def export_fig(
    plot,
    path: Path,
) -> None:
    make_parent(path)

    export_figure(
        plot=plot,
        path=path,
        creation_year=2024,
        creation_month=12,
        creation_day=10,
    )


In [None]:
CONTEXT = "talk"
STYLE = "whitegrid"
PALETTE = "colorblind"
# ESTIMATOR = np.median
ESTIMATOR_STR = "median"
# CONFINT = 'sd'
FIG_USE_LATEX_FONTS = True


In [None]:
def set_config(width=8, height=6, font_scale=1.15):
    sns.set_theme(
        context=CONTEXT,
        style=STYLE,
        palette=PALETTE,
        font_scale=font_scale,
        rc={
            "figure.figsize": (width, height),
            "pdf.fonttype": 42,
            "pdf.use14corefonts": True,
            "text.usetex": FIG_USE_LATEX_FONTS,
        },
    )


In [None]:
set_config()


## Microbenches

## Microbenches

In [None]:
path_results = Path("results/microbenches/")
csv_paths = [path_results / f for f in os.listdir(path_results) if f.endswith(".csv")]
dataframes = [get_dataframe(p) for p in sorted(csv_paths)]
dataframe = pd.concat(dataframes)
df = dataframe


FileNotFoundError: [Errno 2] No such file or directory: 'results/microbenches'

In [None]:
df


In [None]:
# Ensure required columns
required = {"language","src_filename","size","runtime"}
missing = required - set(df.columns)
assert not missing, f"Missing columns: {missing}"

# Parse language into family + opt level
def split_lang(s: str):
    m = re.match(r"^\s*([A-Za-z\+\-]+(?:\s*[A-Za-z\-]+)?)\s*(?:(-O[0-3]|-Ofast))?\s*$", str(s))
    if not m:
        return s.strip(), "unspecified"
    fam = m.group(1).strip()
    opt = (m.group(2) or "unspecified").strip()
    return fam, opt

fam_opt = df["language"].apply(split_lang)
df["lang_family"] = fam_opt.str[0]
df["opt_level"]   = fam_opt.str[1]

# Orders for plotting
opt_order  = ["-O1", "-O2", "-O3"]
size_order = sorted(df["size"].dropna().unique())
lang_order = sorted(df["lang_family"].dropna().unique(),
                    key=lambda s: ("mojo" not in s.lower(), s.lower()))

# Cast before slicing
df["opt_level"]   = pd.Categorical(df["opt_level"],   categories=opt_order, ordered=True)
df["size"]        = pd.Categorical(df["size"],        categories=size_order, ordered=True)
df["lang_family"] = pd.Categorical(df["lang_family"], categories=lang_order, ordered=False)

# Now filter
df = df[~df["opt_level"].isin(["unspecified", "-Ofast"])]
df = df[df["src_filename"] != "knmp"] # TODO remove


In [None]:
plot_df = (
    df.loc[df["opt_level"].isin(opt_order)]
      .dropna(subset=["src_filename", "size", "lang_family", "runtime", "opt_level"])
      .copy()
)

# Reset index to avoid duplicate-label issues inside seaborn
plot_df.reset_index(drop=True, inplace=True)

# Flatten (src_filename, size) into one panel column
plot_df["panel"] = (
    plot_df["src_filename"].astype(str)
    + " — size "
    + plot_df["size"].astype(str)
)

plot_df["size_num"] = pd.to_numeric(plot_df["size"].astype(str), errors="coerce")
panel_order = (plot_df[["panel", "src_filename", "size_num"]]
               .drop_duplicates()
               .sort_values(["src_filename", "size_num"])["panel"].tolist())

local_lang_order = [x for x in lang_order if x in plot_df["lang_family"].unique()]

g = sns.catplot(
    data=plot_df,
    x="opt_level", y="runtime",
    hue="lang_family",
    col="panel", col_order=panel_order, col_wrap=2,
    order=opt_order, hue_order=local_lang_order,
    kind="bar",
    errorbar=("pi", 95),
    capsize=0.15,
    sharey=False,
    height=3.8, aspect=1.15,
)

g.set_axis_labels("Optimization level", "Runtime (units)")
g.set_titles("{col_name}")
g.add_legend(title="Language")
#plt.tight_layout()
plt.show()


In [None]:
export_fig(g, fig_dir / "microbenches.pdf")


# cyclictest

In [None]:
path_results = Path("results/cyclic")
csv_paths = [path_results / f for f in os.listdir(path_results) if f.endswith(".csv")]
dataframes = [get_dataframe(p) for p in sorted(csv_paths)]

dataframe = pd.concat(dataframes)

frames = []
path_col = "datapath"
value_col = "latency"
for _, row in dataframe.iterrows():
    path = Path(row[path_col])
    with path.open("r") as f:
        for line in f:
            if not line.strip():
                continue
            new_row = row.to_dict()
            new_row[value_col] = float(line.strip())
            frames.append(new_row)
df = pd.DataFrame(frames)


In [None]:
# Ensure required columns are present
required = {"language","src_filename","runtime"}
missing = required - set(df.columns)
assert not missing, f"Missing columns: {missing}"

# Parse language into family + opt level, without losing raw rows
def split_lang(s: str):
    m = re.match(r"^\s*([A-Za-z\+\-]+(?:\s*[A-Za-z\-]+)?)\s*(?:(-O[0-3]|-Ofast))?\s*$", str(s))
    if not m:
        return s.strip(), "unspecified"
    fam = m.group(1).strip()
    opt = (m.group(2) or "unspecified").strip()
    return fam, opt

fam_opt = df["language"].apply(split_lang)
df["lang_family"] = fam_opt.apply(lambda x: x[0])
df["opt_level"]   = fam_opt.apply(lambda x: x[1])

# Orders for nice, consistent axes
opt_order  = ["-O1", "-O2", "-O3",]
lang_order = sorted(df["lang_family"].dropna().unique(), key=lambda s: ("mojo" not in s.lower(), s.lower()))

# Cast to categoricals for stable plotting order
df["opt_level"]   = pd.Categorical(df["opt_level"],   categories=opt_order, ordered=True)
df["lang_family"] = pd.Categorical(df["lang_family"], categories=lang_order, ordered=False)

df = df[df["opt_level"] != "unspecified"]
df = df[df["opt_level"] != "-Ofast"]


In [None]:
g = sns.catplot(
    data=df,
    x="opt_level", y="runtime",
    hue="lang_family",
    row="src_filename",
    order=opt_order, hue_order=lang_order,
    kind="bar",
    errorbar=("pi", 95),  # 95% percentile interval
    capsize=0.15,
    sharey=False,
    height=3.8, aspect=1.2
)


# gpu


In [None]:
path_results = Path("results/gpu")
csv_paths = [path_results / f for f in os.listdir(path_results) if f.endswith(".csv")]
dataframes = [get_dataframe(p) for p in sorted(csv_paths)]
dataframe = pd.concat(dataframes)
df = dataframe


In [None]:
# Ensure required columns are present
required = {"language","src_filename","runtime"}
missing = required - set(df.columns)
assert not missing, f"Missing columns: {missing}"

# Parse language into family + opt level, without losing raw rows
def split_lang(s: str):
    m = re.match(r"^\s*([A-Za-z\+\-]+(?:\s*[A-Za-z\-]+)?)\s*(?:(-O[0-3]|-Ofast))?\s*$", str(s))
    if not m:
        return s.strip(), "unspecified"
    fam = m.group(1).strip()
    opt = (m.group(2) or "unspecified").strip()
    return fam, opt

fam_opt = df["language"].apply(split_lang)
df["lang_family"] = fam_opt.apply(lambda x: x[0])
df["opt_level"]   = fam_opt.apply(lambda x: x[1])

# Orders for nice, consistent axes
opt_order  = ["-O1", "-O2", "-O3",]
lang_order = sorted(df["lang_family"].dropna().unique(), key=lambda s: ("mojo" not in s.lower(), s.lower()))

# Cast to categoricals for stable plotting order
df["opt_level"]   = pd.Categorical(df["opt_level"],   categories=opt_order, ordered=True)
df["lang_family"] = pd.Categorical(df["lang_family"], categories=lang_order, ordered=False)

df = df[df["opt_level"] != "unspecified"]
df = df[df["opt_level"] != "-Ofast"]


In [None]:
g = sns.catplot(
    data=df,
    x="size", y="runtime",
    hue="src_filename",
    kind="bar",
    errorbar=("pi", 95),  # 95% percentile interval
    capsize=0.15,
    sharey=False,
    height=3.8, aspect=1.2
)


# knmp