In [16]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import re

# Load the CSV files
xgb_df = pd.read_csv('results/xgb.csv')
ollama_df = pd.read_csv('results/ollama.csv')

In [17]:
# Show xgb_df in a sortable, interactive table
display(xgb_df)

Unnamed: 0,machine,CPU,GPU,python,platform,bench,dataset_rows,gpu,train_median_s,infer_median_s,auc,seed,timestamp
0,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,xgboost,100000,True,0.8,0.01,0.81023,42,2025-07-23T21:02:39
1,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,xgboost,100000,False,0.59,0.01,0.81072,42,2025-07-23T21:02:43
2,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,xgboost,1000000,True,1.6,0.01,0.82277,42,2025-07-23T21:02:54
3,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,xgboost,1000000,False,3.99,0.1,0.82276,42,2025-07-23T21:03:11
4,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,xgboost,full,True,9.93,0.06,0.82486,42,2025-07-23T21:04:25
5,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,xgboost,full,False,50.37,1.04,0.82495,42,2025-07-23T21:07:42
6,PC_AL_2015,Intel i5-4690K,Nvidia GTX 1060 6GB,3.13.5,Windows-10-10.0.19045-SP0,xgboost,100000,True,1.66,0.01,0.80975,42,2025-07-23T23:13:28
7,PC_AL_2015,Intel i5-4690K,Nvidia GTX 1060 6GB,3.13.5,Windows-10-10.0.19045-SP0,xgboost,100000,False,1.79,0.03,0.80892,42,2025-07-23T23:13:36
8,PC_AL_2015,Intel i5-4690K,Nvidia GTX 1060 6GB,3.13.5,Windows-10-10.0.19045-SP0,xgboost,1000000,True,5.8,0.04,0.82262,42,2025-07-23T23:14:04
9,PC_AL_2015,Intel i5-4690K,Nvidia GTX 1060 6GB,3.13.5,Windows-10-10.0.19045-SP0,xgboost,1000000,False,13.69,0.32,0.82227,42,2025-07-23T23:14:57


In [18]:
# Build machine label dynamically (CPU + GPU)
xgb_df["machine_label"] = xgb_df["CPU"] + " + " + xgb_df["GPU"]

# Acceleration label
xgb_df["acc_label"] = xgb_df["gpu"].astype(str).replace({"True": "GPU", "False": "CPU"})

# ------------------ Build dynamic and formatted dataset_rows labels -------------
def format_thousands(s: str) -> str:
    n = int(s)
    # thousands separator '.'
    return f"{n:,}".replace(",", ".")

unique_rows = xgb_df["dataset_rows"].astype(str).unique()

numeric_vals = sorted([int(v) for v in unique_rows if re.fullmatch(r"\d+", v)])
labels_formatted = [format_thousands(str(v)) for v in numeric_vals]

# ensure 'full' appears last
if "full" in unique_rows:
    labels_formatted.append("full")

# Map original rows to formatted label
mapping_label = {str(v): format_thousands(str(v)) for v in numeric_vals}
mapping_label["full"] = "full"

xgb_df["rows_label"] = xgb_df["dataset_rows"].astype(str).map(mapping_label)

# Make categorical for ordering (ascending numeric then 'full')
xgb_df["rows_label"] = pd.Categorical(xgb_df["rows_label"], categories=labels_formatted, ordered=True)

# ------------------ Machine-related dynamic parts ------------------
machine_labels = sorted(xgb_df["machine_label"].unique())

# pattern shapes cycled
patterns_cycle = ["", "/", "x", "\\", "-", "|", "+", "."]
pattern_map = {machine: patterns_cycle[i % len(patterns_cycle)]
               for i, machine in enumerate(machine_labels)}

# Color map for acceleration
color_map = {"GPU": px.colors.qualitative.Pastel[0],
             "CPU": px.colors.qualitative.Pastel[1]}

# ------------------ Build traces ------------------
traces = []
meta = []  # store metadata per trace

for machine in machine_labels:
    for acc in ["GPU", "CPU"]:
        subset = xgb_df[(xgb_df["machine_label"] == machine) & (xgb_df["acc_label"] == acc)]
        if subset.empty:
            continue
        traces.append(
            go.Bar(
                y=subset["rows_label"],
                x=subset["train_median_s"],
                orientation="h",
                showlegend=False,
                marker=dict(color=color_map[acc], pattern=dict(shape=pattern_map[machine])),
                text=[f"{t:.2f}" for t in subset["train_median_s"]],
                textposition="outside",
                hovertemplate=(
                    f"{machine}<br>%{{y}} righe · {acc}<br>%{{x:.2f}} s"
                    "<extra></extra>"
                ),
            )
        )
        meta.append({"machine": machine, "acc": acc})

fig = go.Figure(data=traces)

# ------------------ Visibility masks ------------------
n = len(traces)
vis_all = [True]*n
vis_gpu_only = [m["acc"] == "GPU" for m in meta]
vis_cpu_only = [m["acc"] == "CPU" for m in meta]

machine_vis_dict = {machine: [m["machine"] == machine for m in meta]
                    for machine in machine_labels}

# ------------------ Dropdowns ------------------
dropdown_acc = dict(
    buttons=[
        dict(label="Tutte le accelerazioni", method="update",
             args=[{"visible": vis_all},
                   {"title": "Tempo di training – tutte le accelerazioni"}]),
        dict(label="Solo GPU", method="update",
             args=[{"visible": vis_gpu_only},
                   {"title": "Tempo di training – solo GPU"}]),
        dict(label="Solo CPU", method="update",
             args=[{"visible": vis_cpu_only},
                   {"title": "Tempo di training – solo CPU"}]),
    ],
    direction="down",
    x=1.02,
    y=1,
    xanchor="left",
    yanchor="top",
    showactive=True,
    bgcolor="white",
    bordercolor="lightgray"
)

machine_buttons = [
    dict(label="Tutte le macchine", method="update",
         args=[{"visible": vis_all},
               {"title": "Tempo di training – tutte le macchine"}])
]
for machine in machine_labels:
    machine_buttons.append(
        dict(label=machine, method="update",
             args=[{"visible": machine_vis_dict[machine]},
                   {"title": f"Tempo di training – {machine}"}])
    )

dropdown_machine = dict(
    buttons=machine_buttons,
    direction="down",
    x=1.02,
    y=0.8,
    xanchor="left",
    yanchor="top",
    showactive=True,
    bgcolor="white",
    bordercolor="lightgray"
)

# ------------------ Layout updates ------------------
fig.update_layout(
    barmode="group",
    title="Tempo di training – tutte le accelerazioni e macchine",
    xaxis_title="Train time [s]",
    yaxis_title="Numero di righe del dataset",
    updatemenus=[dropdown_acc, dropdown_machine],
    margin=dict(r=200)  # space for menus
)

fig.update_yaxes(categoryorder='array', categoryarray=labels_formatted)

fig.show()

In [19]:
# Show ollama_df with tabulate
display(ollama_df)

Unnamed: 0,machine,CPU,GPU,python,platform,bench,model,gpu,wall_min_s,wall_med_s,wall_max_s,tok_min_s,tok_med_s,tok_max_s,seed,timestamp
0,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,ollama,phi3:3.8b,True,6.37,6.45,14.81,133.99,136.25,136.62,42,2025-07-23T21:08:10
1,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,ollama,phi3:3.8b,False,31.59,34.68,34.7,21.43,21.44,21.55,42,2025-07-23T21:09:52
2,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,ollama,qwen3:4b,True,13.93,14.19,19.01,105.24,106.41,107.55,42,2025-07-23T21:10:39
3,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,ollama,qwen3:4b,False,92.86,92.92,142.73,12.89,13.18,13.19,42,2025-07-23T21:16:08
4,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,ollama,qwen3:14b,True,26.28,26.34,29.25,40.56,40.74,40.75,42,2025-07-23T21:17:30
5,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,ollama,qwen3:14b,False,232.64,232.64,258.1,4.85,4.85,4.85,42,2025-07-23T21:29:34
6,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,ollama,gemma3:4b,True,5.55,5.55,7.44,108.46,109.05,109.22,42,2025-07-23T21:29:53
7,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,ollama,gemma3:4b,False,26.23,27.92,28.35,18.41,18.6,18.74,42,2025-07-23T21:31:16
8,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,ollama,gemma3:12b,True,11.89,11.91,13.64,44.15,44.31,44.51,42,2025-07-23T21:31:54
9,PC_AL_2025,AMD Ryzen 5 9600X,Nvidia RTX 5060 16GB,3.13.5,Windows-11-10.0.26100-SP0,ollama,gemma3:12b,False,63.53,64.72,66.1,6.63,6.75,6.76,42,2025-07-23T21:35:09


In [20]:
# Acceleration label
ollama_df["acc_label"] = ollama_df["gpu"].astype(str).replace({"True": "GPU", "False": "CPU"})

# Machine label = CPU + GPU (dynamic)
ollama_df["machine_label"] = ollama_df["CPU"] + " + " + ollama_df["GPU"]

# Sorted list of unique models (appearance order preserved)
model_order = list(dict.fromkeys(ollama_df["model"].tolist()))  # preserve first appearance order
ollama_df["model"] = pd.Categorical(ollama_df["model"], categories=model_order, ordered=True)

# Sorted list of unique machines
machine_labels = list(dict.fromkeys(ollama_df["machine_label"].tolist()))

# Pattern shapes (cycle if more machines)
patterns_cycle = ["", "/", "x", "\\", "-", "|", "+", "."]
pattern_map = {
    machine: patterns_cycle[i % len(patterns_cycle)]
    for i, machine in enumerate(machine_labels)
}

# Colour mapping for GPU/CPU
color_map = {"GPU": px.colors.qualitative.Pastel[0], "CPU": px.colors.qualitative.Pastel[1]}

# ------------------ Build traces ------------------
traces = []
meta = []

for machine in machine_labels:
    for acc in ["GPU", "CPU"]:  # fixed order
        subset = ollama_df[(ollama_df["machine_label"] == machine) & (ollama_df["acc_label"] == acc)]
        if subset.empty:
            continue
        traces.append(
            go.Bar(
                y=subset["model"],
                x=subset["tok_med_s"],   # average tokens per second
                orientation="h",
                showlegend=False,
                marker=dict(color=color_map[acc], pattern=dict(shape=pattern_map[machine])),
                text=[f"{t:.2f}" for t in subset["tok_med_s"]],
                textposition="outside",
                hovertemplate=(
                    f"{machine}<br>Modello: %{{y}} · {acc}<br>%{{x:.2f}} token/s"
                    "<extra></extra>"
                ),
            )
        )
        meta.append({"machine": machine, "acc": acc})

fig = go.Figure(data=traces)

# ------------------ Visibility masks ------------------
n = len(traces)
vis_all = [True]*n
vis_gpu_only = [m["acc"] == "GPU" for m in meta]
vis_cpu_only = [m["acc"] == "CPU" for m in meta]

machine_vis_dict = {machine: [m["machine"] == machine for m in meta]
                    for machine in machine_labels}

# ------------------ Dropdowns ------------------
dropdown_acc = dict(
    buttons=[
        dict(label="Tutte le accelerazioni", method="update",
             args=[{"visible": vis_all},
                   {"title": "Token al secondo – tutte le accelerazioni"}]),
        dict(label="Solo GPU", method="update",
             args=[{"visible": vis_gpu_only},
                   {"title": "Token al secondo – solo GPU"}]),
        dict(label="Solo CPU", method="update",
             args=[{"visible": vis_cpu_only},
                   {"title": "Token al secondo – solo CPU"}]),
    ],
    direction="down",
    x=1.02,
    y=1,
    xanchor="left",
    yanchor="top",
    showactive=True,
    bgcolor="white",
    bordercolor="lightgray"
)

machine_buttons = [
    dict(label="Tutte le macchine", method="update",
         args=[{"visible": vis_all},
               {"title": "Token al secondo – tutte le macchine"}])
]
for machine in machine_labels:
    machine_buttons.append(
        dict(label=machine, method="update",
             args=[{"visible": machine_vis_dict[machine]},
                   {"title": f"Token al secondo – {machine}"}])
    )

dropdown_machine = dict(
    buttons=machine_buttons,
    direction="down",
    x=1.02,
    y=0.8,
    xanchor="left",
    yanchor="top",
    showactive=True,
    bgcolor="white",
    bordercolor="lightgray"
)

# ------------------ Layout ------------------
fig.update_layout(
    barmode="group",
    title="Token al secondo – tutte le accelerazioni e macchine",
    xaxis_title="Token/s",
    yaxis_title="Modello LLM",
    updatemenus=[dropdown_acc, dropdown_machine],
    height = len(ollama_df) * 40,
    margin=dict(r=220)
)

fig.show()