In [None]:
from pathlib import Path
import pandas as pd

In [None]:
AVG_NS = "Avg (ns)"
NS_IN_MS = 1e6
RANGE = "Range"
FORW = ":forward"
FORW_ONLY = "forward_only"
MODEL = "Model"
CONTEXT = "Context"
TIME = "Time (ms)"

def read_csv(path):
    df = pd.read_csv(path)
    df = df.loc[df[RANGE].str.startswith(FORW),[RANGE, AVG_NS]]
    df[[FORW_ONLY, MODEL, CONTEXT]] = df[RANGE].str.extract(
        r"forward_only_(true|false)_model_([A-Za-z0-9.]+)_model\.context_length_(\d+)"
    )
    df[FORW_ONLY] = df[FORW_ONLY].map({"true": True, "false": False})
    df[CONTEXT] = df[CONTEXT].astype(int)
    df[TIME] = df[AVG_NS] / NS_IN_MS
    return df[[MODEL, CONTEXT, TIME, FORW_ONLY]]

In [None]:
reports = Path("raw_data/reports").glob("*.csv")
combined = pd.concat((read_csv(f) for f in reports), ignore_index=True)

MODEL_ORDER = ["small", "medium", "large", "xl", "2.7B"]
combined[MODEL] = pd.Categorical(combined[MODEL], categories=MODEL_ORDER, ordered=True)

forward_infer = combined[combined[FORW_ONLY]].pivot_table(
    index=MODEL, columns=CONTEXT, values=TIME, observed=False,
)

forward_train = combined[~combined[FORW_ONLY]].pivot_table(
    index=MODEL, columns=CONTEXT, values=TIME, observed=False,
)

# Align and combine into the formatted string
report = forward_infer.combine(
    forward_train,
    lambda a, b: a.combine(
        b,
        lambda x, y: f"{x:.1f} / {y:.1f}" if pd.notna(x) and pd.notna(y)
                    else f"{x:.1f} / OOM" if pd.notna(x)
                    else f"OOM / {y:.1f}" if pd.notna(y)
                    else "OOM / OOM"
    ),
)

report = report.sort_index()

In [None]:
report

In [None]:
tex = report.to_latex(index=True, caption=f"Forward infer / train time (ms)", escape=False)
tex = tex.replace(r"\begin{table}", r"\begin{table}[H]")
print(tex)