In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from bench_lib.evaluation import benchmark_results_folder

rfolder = benchmark_results_folder()
folders = [
    # "gemma-3-4b-it",
    # "gemma-3-12b-it",
    # "gemma-3-27b-it_00",
    "qwen-2.5-vl",
    "gemini-2.5-pro-noschema",
]
files = [str(rfolder / f / "toxicainment_videos_log.jsonl") for f in folders]

In [3]:
import duckdb

con = duckdb.connect()
query = f"SELECT * FROM read_json({files})"
df = con.sql(query).df()

In [None]:
df.head(2)

In [None]:
agg_df = (
    df.groupby("Model ID", as_index=False)
    .agg(
        avg_total_runtime=("Total_Runtime", "mean"),
        avg_model_runtime=("Model_Runtime", "mean"),
        avg_tokens_generated=("Tokens_Generated", "mean"),
        avg_peak_memory_alloc=("Peak_Memory_Allocated", "mean"),
        avg_peak_memory_reserved=("Peak_Memory_Reserved", "mean"),
        avg_total_frames=("Total_Frames", "mean"),
    )
    .sort_values("avg_model_runtime", ascending=False)
    .round(2)
)
agg_df

# Runtime Plots

In [6]:
from bench_lib.evaluation import visualize_runtime


fig, _ = visualize_runtime(
    df.query("`Model ID`.str.contains('gemma')"), hue="Model ID", plot_colorbar=False
)
fig.savefig("imgs/runtime_scatter_g3_by_model.pdf", bbox_inches="tight")

In [7]:
fig, _ = visualize_runtime(df.query("`Model ID`.str.contains('gemma')"))
fig.savefig("imgs/runtime_scatter_g3_by_frames.pdf", bbox_inches="tight")

In [8]:
fig, _ = visualize_runtime(
    df.query("`Model ID`.str.contains('Qwen')"), hue="Model ID", plot_colorbar=False
)
fig.savefig("imgs/runtime_scatter_qwenvl_by_model.pdf", bbox_inches="tight")

In [9]:
fig, _ = visualize_runtime(df.query("`Model ID`.str.contains('Qwen')"))
fig.savefig("imgs/runtime_scatter_qwenvl_by_frames.pdf", bbox_inches="tight")

In [None]:
df.query("`Model ID`.str.contains('gemini')")["Total_Runtime"].describe()

# Memory Plots

In [None]:
agg_df

In [None]:
import pandas as pd
from bench_lib.evaluation import Cols
import numpy as np

# Extract model size from model ID
mem_df = agg_df.dropna()
mem_df = mem_df.assign(
    model_size=mem_df["Model ID"].str.lower().str.extract(r"-(\d*)[b-]-").astype(int),
    group=np.where(mem_df[Cols.model_id].str.contains("gemma"), "Gemma-3", "Qwen2.5-VL")
)
mem_df = mem_df[
    ["Model ID", "avg_peak_memory_alloc", "model_size", "group"]
]
# Qwen 72B needed 2 to 3 H100 à 96GB each
qwen_72b_mem = 2.5 * 96
mem_df.loc[mem_df["Model ID"].str.contains("72B"), "avg_peak_memory_alloc"] = qwen_72b_mem
mem_df

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111)
ax = sns.scatterplot(data=mem_df, x="model_size", y="avg_peak_memory_alloc", hue="group", ax=ax)
ax.set(ylabel="Peak GPU Memory Allocated (GB)", xlabel="Model Params (Billions)")
ax.set_xscale('log')
ax.set_yscale('log')
ax.grid(True, alpha=0.5)
xticks = [1, 2, 3, 5, 10, 20, 50, 100]
yticks = [3 * x for x in xticks]
ax.set_xticks(xticks)
ax.set_xticklabels(xticks)
ax.set_yticks(yticks)
ax.set_yticklabels(yticks)
ax.legend(title="Model Family", loc="upper left", bbox_to_anchor=(1, 1))
fig.savefig("imgs/mem_alloc_by_model.pdf", bbox_inches="tight")