# Generate plots from tensorboard data

In [2]:
import polars as pl
import plotly.express as px


## Configs

In [None]:
THEME = "plotly_white"

## Plots

### Baseline

In [27]:
baseline_training_reward = pl.read_csv(
    "../presentation_data/plotting/PPO_best_baseline_env=l2rpn_case14_sandbox_iterations=10000_2025-09-14_15_16_50_PPO_1.csv"
)
fig = px.line(
    baseline_training_reward,
    x="Step",
    y="Value",
    labels={
        "Step": "training_step",
        "Value": "reward",
    },
    template=THEME,
)
fig.update_xaxes({"title": {"font": {"size": 22}}, "tickfont": {"size": 18}})
fig.update_yaxes({"title": {"font": {"size": 22}}, "tickfont": {"size": 18}})
fig.update_traces(line_color="#612b94")


### Improved reward

In [None]:
improved_reward_training_reward = pl.read_csv(
    "../presentation_data/plotting/PPO_best_reward_env=l2rpn_case14_sandbox_iterations=10000_2025-09-14_17_06_53_PPO_1.csv"
)
fig = px.line(
    improved_reward_training_reward,
    x="Step",
    y="Value",
    labels={
        "Step": "training_step",
        "Value": "reward",
    },
    template=THEME,
)
fig.update_xaxes({"title": {"font": {"size": 22}}, "tickfont": {"size": 18}})
fig.update_yaxes({"title": {"font": {"size": 22}}, "tickfont": {"size": 18}})
fig.update_traces(line_color="#612b94")

In [None]:
tuning_iterations = pl.read_csv(
    "../presentation_data/tuning_logs/study_PPO_reward_2025-09-14_15:24:41.csv"
)
tuning_iterations.sort("value", descending=True)[0]

Unnamed: 0_level_0,number,value,datetime_start,datetime_complete,duration,params_batch_size,params_gamma,params_learning_rate,params_n_steps,params_net_arch,params_safe_max_rho,state
i64,i64,f64,str,str,str,i64,f64,f64,i64,i64,f64,str
10,10,0.244196,"""2025-09-14 16:11:25.844427""","""2025-09-14 16:18:09.204066""","""0 days 00:06:43.359639""",32,0.962539,9e-06,8,400,0.993905,"""COMPLETE"""


### Masked actions

In [53]:
maskable_training_reward = pl.read_csv(
    "../presentation_data/plotting/PPO_best_maskable_reward_env=l2rpn_case14_sandbox_iterations=10000_2025-09-14_20_37_50_PPO_1.csv"
)
fig = px.line(
    maskable_training_reward,
    x="Step",
    y="Value",
    labels={
        "Step": "training_step",
        "Value": "reward",
    },
    template=THEME,
)
fig.update_xaxes({"title": {"font": {"size": 22}}, "tickfont": {"size": 18}})
fig.update_yaxes({"title": {"font": {"size": 22}}, "tickfont": {"size": 18}})
fig.update_traces(line_color="#612b94")

In [68]:
comparison_maskable = pl.concat(
    [
        improved_reward_training_reward.with_columns(model=pl.lit("improved reward")),
        maskable_training_reward.with_columns(
            model=pl.lit("maskable + improved reward")
        ),
    ]
)

fig = px.line(
    comparison_maskable,
    x="Step",
    y="Value",
    color="model",
    template=THEME,
    labels={
        "Step": "training_step",
        "Value": "reward",
    },
)

fig.update_xaxes({"title": {"font": {"size": 22}}, "tickfont": {"size": 18}})
fig.update_yaxes({"title": {"font": {"size": 22}}, "tickfont": {"size": 18}})
fig.update_layout(legend_title_font_size=18, font_size=18)

fig.for_each_trace(
    lambda trace: trace.update(
        line_color="#26a6a6"
        if trace.name == "maskable + improved reward"
        else "#612b94"
    ),
)


In [69]:
fig.update_layout(showlegend=False)

### Graph embeddings

In [75]:
graph_training_reward = pl.read_csv(
    "../presentation_data/plotting/PPO_best_graph_reward_env=l2rpn_case14_sandbox_iterations=10000_2025-09-15_20_54_44_PPO_1.csv"
)
maskable_graph_training_reward = pl.read_csv(
    "../presentation_data/plotting/PPO_best_maskable_graph_reward_env=l2rpn_case14_sandbox_iterations=10000_2025-09-15_18_51_44_PPO_1.csv"
)
fig = px.line(
    graph_training_reward,
    x="Step",
    y="Value",
    labels={
        "Step": "training_step",
        "Value": "reward",
    },
    template=THEME,
)
fig.update_xaxes({"title": {"font": {"size": 22}}, "tickfont": {"size": 18}})
fig.update_yaxes({"title": {"font": {"size": 22}}, "tickfont": {"size": 18}})
fig.update_traces(line_color="#612b94")

In [76]:
fig = px.line(
    maskable_graph_training_reward,
    x="Step",
    y="Value",
    labels={
        "Step": "training_step",
        "Value": "reward",
    },
    template=THEME,
)
fig.update_xaxes({"title": {"font": {"size": 22}}, "tickfont": {"size": 18}})
fig.update_yaxes({"title": {"font": {"size": 22}}, "tickfont": {"size": 18}})
fig.update_traces(line_color="#612b94")

In [None]:
comparison_graph = pl.concat(
    [
        maskable_training_reward.with_columns(
            model=pl.lit("maskable + improved reward")
        ),
        graph_training_reward.with_columns(model=pl.lit("graph + improved reward")),
        maskable_graph_training_reward.with_columns(
            model=pl.lit("graph + maskable + improved reward")
        ),
    ]
)

fig = px.line(
    comparison_graph,
    x="Step",
    y="Value",
    color="model",
    template=THEME,
    labels={
        "Step": "training_step",
        "Value": "reward",
    },
)

fig.update_xaxes({"title": {"font": {"size": 22}}, "tickfont": {"size": 18}})
fig.update_yaxes({"title": {"font": {"size": 22}}, "tickfont": {"size": 18}})
fig.update_layout(legend_title_font_size=18, font_size=18)

fig.for_each_trace(
    lambda trace: trace.update(
        line_color="#26a6a6"
        if trace.name == "graph + improved reward"
        else "#612b94"
        if trace.name == "graph + maskable + improved reward"
        else "#dbb944"
    ),
)


In [None]:
fig.update_layout(showlegend=False)

In [None]:
pl.read_csv(
    "../presentation_data/tuning_logs/study_MaskableGraphPPO_reward_2025-09-15_16:54:28.csv"
).sort("value", descending=True)[0]

Unnamed: 0_level_0,number,value,datetime_start,datetime_complete,duration,params_batch_size,params_gamma,params_gnn_dropout_p,params_gnn_features_dim,params_gnn_hidden_dim,params_learning_rate,params_n_steps,params_safe_max_rho,state
i64,i64,f64,str,str,str,i64,f64,f64,i64,i64,f64,i64,f64,str
18,18,0.256969,"""2025-09-15 18:40:32.781241""","""2025-09-15 18:46:09.724127""","""0 days 00:05:36.942886""",16,0.9731,0.1,128,32,3e-06,64,0.90315,"""COMPLETE"""
