In [None]:
# This notebook is used to do some general comparison between the chosen six combinations.

In [None]:
# Preperation for obs and action plotting
import pandas as pd
from src.main.rl.evaluation.plots.phase3_plots import (
    prepare_one_combination_actions_and_obs_for_analysis,
)
from src.main.rl.utils.constants import color_mapping
from src.main.rl.utils.constants import (
    scaling_factors_scenario_1,
    scaling_factors_scenario_2,
    scaling_factors_scenario_3,
    action_dimensions_german,
)
import matplotlib.pyplot as plt

paths = [
    "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_None_RewardOption2Wrapper_TD3_training_04_06",
    "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_SAC_training_04_06",
    "src/main/rl/models/scenario2/training_04_06/scenario2_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_None_RewardOption2Wrapper_PPO_training_04_06",
    "src/main/rl/models/scenario2/training_04_06/scenario2_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06",
    "src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_None_RewardOption2Wrapper_PPO_training_04_06",
    "src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption3Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06",
]
list_actions_prepared = []
list_obs_prepared = []
list_list_of_all_actions_taken = []
list_list_of_all_obs_taken = []
obs_dimensions = [6, 11, 11, 6, 11, 7]

scaling_factors = [
    scaling_factors_scenario_1,
    scaling_factors_scenario_1,
    scaling_factors_scenario_2,
    scaling_factors_scenario_2,
    scaling_factors_scenario_3,
    scaling_factors_scenario_3,
]
for idx, path in enumerate(paths):
    (
        actions_prepared,
        obs_prepared,
        list_of_all_actions_taken,
        list_of_all_obs_taken,
    ) = prepare_one_combination_actions_and_obs_for_analysis(
        path, obs_dimensions=obs_dimensions[idx]
    )
    list_actions_prepared.append(actions_prepared)
    list_obs_prepared.append(obs_prepared)
    list_list_of_all_actions_taken.append(list_of_all_actions_taken)
    list_list_of_all_obs_taken.append(list_of_all_obs_taken)

In [None]:
# Create all actions taken mean (min, max) for all 6 combis
# Mean is dark blue line, shaded blue is the min/max from the ten models within a combiniation at that timestamp
scaling_functions = [
    lambda x: int(round((x + 1) * (scaling_factors[list_idx][idx] / 2))),
    lambda x: int(round((x + 1) * (scaling_factors[list_idx][idx] / 2))),
    lambda x: -scaling_factors_scenario_2[idx]
    if x == 0
    else scaling_factors_scenario_2[idx],
    lambda x: -scaling_factors_scenario_2[idx]
    if x == 0
    else scaling_factors_scenario_2[idx],
    lambda x: scaling_factors_scenario_3[idx][x],
    lambda x: scaling_factors_scenario_3[idx][x],
]
for list_idx, actions_prepared in enumerate(list_actions_prepared):
    for idx, item in enumerate(actions_prepared):
        # actions are scaled in the models, so we need to rescale
        item = item.applymap(scaling_functions[list_idx])
        # get min, mean, max for plotting
        actions_prepared[idx] = item.agg(["mean", "min", "max", "std"], axis=1)
    fig, ax = plt.subplots(1, 5, constrained_layout=True)
    fig.set_figheight(3)
    fig.set_figwidth(20)
    [
        ax[idx].plot(item["mean"], color=color_mapping["standard"])
        for idx, item in enumerate(actions_prepared)
    ]
    [
        ax[idx].fill_between(
            [i for i in range(251)],
            item["min"],
            item["max"],
            color=color_mapping["standard"],
            alpha=0.3,
        )
        for idx, item in enumerate(actions_prepared)
    ]
    [ax[idx].set_xlabel("Zeitschritte") for idx in range(5)]
    [ax[idx].set_ylabel("Value") for idx in range(5)]
    [ax[idx].set_title(f"Kombination {list_idx+1}") for idx in range(5)]
    plt.show()
    fig.savefig(
        f"src/main/rl/evaluation/plot_results/phase3_actions_combi_{list_idx+1}.png",
        format="png",
        dpi=300,
    )

In [None]:
# Create all obs taken mean (min, max) for all 6 combis
# Mean is dark blue line, shaded blue is the min/max from the ten models within a combiniation at that timestamp
from src.main.rl.utils.constants import (
    scaling_factors_scenario_1,
    scaling_factors_scenario_2,
    action_dimensions_german,
    obs_dimensions_german,
    obs_scaling_factors,
    obs_dimensions,
    scaling_factors_scenario_3,
)

obs_dimensions = [6, 11, 11, 6, 11, 7]

for list_idx, obs_prepared in enumerate(list_obs_prepared):
    obs_space_size = obs_dimensions[list_idx]
    current_obs_scaling_factors = obs_scaling_factors[obs_space_size]
    agg_result_list_obs = [pd.DataFrame() for _ in range(obs_space_size)]
    fig, ax = plt.subplots(1, int(obs_space_size), constrained_layout=True)
    fig.set_figheight(3)
    fig.set_figwidth(30)
    for idx, item in enumerate(obs_prepared):
        item = item.fillna(0)
        # rescaling obs
        item = item.applymap(
            lambda x: int(round((x + 1) * (current_obs_scaling_factors[idx] / 2)))
        )
        agg_result_list_obs[idx] = item.agg(["mean", "min", "max", "std"], axis=1)

    [
        ax[idx].plot(item["mean"], color=color_mapping["standard"])
        for idx, item in enumerate(agg_result_list_obs)
    ]
    [
        ax[idx].fill_between(
            [i for i in range(251)],
            item["min"],
            item["max"],
            color=color_mapping["standard"],
            alpha=0.3,
        )
        for idx, item in enumerate(agg_result_list_obs)
    ]
    [ax[idx].set_xlabel("Zeitschritte") for idx in range(obs_space_size)]
    [ax[idx].set_ylabel("Value") for idx in range(obs_space_size)]
    [
        ax[idx].set_title(obs_dimensions_german[obs_space_size][idx])
        for idx in range(obs_space_size)
    ]
fig.savefig(
    f"src/main/rl/evaluation/plot_results/phase3_obs_plot_with_min_max_scen1_wo_all_models.png",
    format="png",
    dpi=300,
)

In [None]:
# Create all obs taken mean (min, max) for all 6 combis
# Mean is dark blue line, shaded blue is the min/max from the ten models within a combiniation at that timestamp
from src.main.rl.utils.constants import (
    scaling_factors_scenario_1,
    scaling_factors_scenario_2,
    action_dimensions_german,
    obs_dimensions_german,
    obs_scaling_factors,
    obs_dimensions,
    scaling_factors_scenario_3,
)

obs_dimensions = [6, 11, 11]
fig, ax = plt.subplots(1, 3, constrained_layout=True)

fig.set_figheight(3)
for list_idx, obs_prepared in enumerate(list_obs_prepared[0:3]):
    obs_space_size = obs_dimensions[list_idx]
    current_obs_scaling_factors = obs_scaling_factors[obs_space_size]
    agg_result_list_obs = [pd.DataFrame() for _ in range(obs_space_size)]

    fig.set_figwidth(12)
    for idx, item in enumerate(obs_prepared):
        item = item.fillna(0)
        # rescaling obs
        item = item.applymap(
            lambda x: int(round((x + 1) * (current_obs_scaling_factors[idx] / 2)))
        )
        agg_result_list_obs[idx] = item.agg(["mean", "min", "max", "std"], axis=1)
    ax[list_idx].plot(agg_result_list_obs[1]["mean"], color=color_mapping["standard"])

    ax[list_idx].fill_between(
        [i for i in range(251)],
        agg_result_list_obs[1]["min"],
        agg_result_list_obs[1]["max"],
        color=color_mapping["standard"],
        alpha=0.3,
    )

    ax[list_idx].axhline(1500, color=color_mapping["orange"], ls="--")
    ax[list_idx].axhline(2500, color=color_mapping["orange"], ls="--")
    ax[list_idx].axhline(2800, color=color_mapping["red"], ls="--")
    ax[list_idx].axhline(1200, color=color_mapping["red"], ls="--")
    ax[list_idx].axhline(1000, color="black", ls="--")
    ax[list_idx].set_xlabel("Zeitschritte")
    ax[list_idx].set_ylabel("Wasserlevel Reaktor")
    ax[list_idx].set_ylim([900, 2900])
    ax[list_idx].set_title(f"Kombination {list_idx+1}")

fig.savefig(
    f"src/main/rl/evaluation/plot_results/phase3_obs_waterlevel_reactor_3combis.png",
    format="png",
    dpi=300,
)

In [None]:
# Read/Create evaluation 3 details dataframe and file
import seaborn
import pandas as pd
import matplotlib.pyplot as plt
from src.main.rl.evaluation.phase3_evaluation import create_evaluation_df_phase3


path_to_save = "src/main/rl/evaluation/output/phase3_evaluation_results.csv"
os.makedirs("src/main/rl/evaluation/output/",exist_ok=True)
os.makedirs("src/main/rl/evaluation/plot_results/",exist_ok=True)
pd.options.display.max_colwidth = 500
df = pd.DataFrame()
try:
    df = pd.read_csv(path_to_save)
except:
    pass

if df.empty:
    create_evaluation_df_phase3(path_to_save, paths)
    df = pd.read_csv(path_to_save)
len(df)

In [None]:
# Create stripe plot of choosen combiniations
seaborn.set(style="ticks")
ax = seaborn.stripplot(
    x=df["combination"],
    y=df["cum_reward"],
    jitter=0.1,
    color=color_mapping["standard"],
    dodge=True,
    size=6,
)
ax.set(xticklabels=["1", "2", "3", "4", "5", "6"])
ax.set_xlabel("Kombination")
ax.set_ylabel("Return")
ax.margins(x=0.1)
ax.set(yticks=[160, 180, 200, 220, 240])
fig = ax.get_figure()
plt.tight_layout = True
plt.show()
fig.savefig(
    f"src/main/rl/evaluation/plot_results/phase3_rewards_per_combi.png",
    format="png",
    dpi=300,
)

In [None]:
# Create boxplots with returns
import matplotlib.pyplot as plt
from src.main.rl.utils.constants import color_mapping
from scipy.stats import ttest_ind


def set_box_color(bp, color):
    plt.setp(bp["boxes"], color=color)
    plt.setp(bp["whiskers"], color=color)
    plt.setp(bp["caps"], color=color)
    plt.setp(bp["medians"], color=color)
    plt.setp(bp["caps"], color=color)
    plt.setp(bp["fliers"], color=color)


fig, ax = plt.subplots(constrained_layout=True)
fig.set_figwidth(5)
colors = [
    color_mapping["blue"],
    color_mapping["red"],
    color_mapping["grey"],
    color_mapping["yellow"],
    color_mapping["brown"],
    color_mapping["turquoise"],
]

for idx, item in enumerate(paths):
    ax1 = ax.boxplot(
        df.query("combination==@item")["cum_reward"],
        positions=[idx * 0.3],
        labels=[f"{idx+1}"],
    )
    set_box_color(ax1, color=colors[idx])
ax.set_xlabel("Kombination")
ax.set_ylabel("Return")
plt.plot([], c=color_mapping["blue"], label="Kombination 1")
plt.plot([], c=color_mapping["red"], label="Kombination 2")
plt.plot([], c=color_mapping["grey"], label="Kombination 3")
plt.plot([], c=color_mapping["yellow"], label="Kombination 4")
plt.plot([], c=color_mapping["brown"], label="Kombination 5")
plt.plot([], c=color_mapping["turquoise"], label="Kombination 6")
plt.legend(loc=[0.6, 0.1])
fig.savefig(
    f"src/main/rl/evaluation/plot_results/phase3_return_by_combi_box.png",
    format="png",
    dpi=300,
)
plt.show()
# Welsch Ttest
t_test_combi3_4 = ttest_ind(
    df.query("combination==@paths[2]")["cum_reward"],
    df.query("combination==@paths[3]")["cum_reward"],
    equal_var=False,
)
print(t_test_combi3_4)

In [None]:
# Create boxplots with cricticality score

fig, ax = plt.subplots(constrained_layout=True)
fig.set_figwidth(5)
colors = [
    color_mapping["blue"],
    color_mapping["red"],
    color_mapping["grey"],
    color_mapping["yellow"],
    color_mapping["brown"],
    color_mapping["turquoise"],
]

for idx, item in enumerate(paths):
    ax1 = ax.boxplot(
        df.query("combination==@item")["criticality_score"],
        positions=[idx * 0.3],
        labels=[f"{idx+1}"],
    )
    set_box_color(ax1, color=colors[idx])
ax.set_xlabel("Kombination")
ax.set_ylabel("Kritikalitäts-Score")
plt.plot([], c=color_mapping["blue"], label="Kombination 1")
plt.plot([], c=color_mapping["red"], label="Kombination 2")
plt.plot([], c=color_mapping["grey"], label="Kombination 3")
plt.plot([], c=color_mapping["yellow"], label="Kombination 4")
plt.plot([], c=color_mapping["brown"], label="Kombination 5")
plt.plot([], c=color_mapping["turquoise"], label="Kombination 6")
plt.legend(loc=[0.03, 0.55])
fig.savefig(
    f"src/main/rl/evaluation/plot_results/phase3_return_by_combi_criticality.png",
    format="png",
    dpi=300,
)

In [None]:
# Create boxplots with combined return and cricticality score
import matplotlib.pyplot as plt

fig, ax = plt.subplots(constrained_layout=True)
fig.set_figwidth(5)
colors = [
    color_mapping["blue"],
    color_mapping["red"],
    color_mapping["grey"],
    color_mapping["yellow"],
    color_mapping["brown"],
    color_mapping["turquoise"],
]

for idx, item in enumerate(paths):
    local_df = df.query("combination==@item")
    ax1 = ax.boxplot(
        local_df["criticality_score"] + local_df["cum_reward"],
        positions=[idx * 0.3],
        labels=[f"{idx+1}"],
    )
    set_box_color(ax1, color=colors[idx])
ax.set_xlabel("Kombination")
ax.set_ylabel("Score (Kritikalitäts-Score + Return)")
plt.plot([], c=color_mapping["blue"], label="Kombination 1")
plt.plot([], c=color_mapping["red"], label="Kombination 2")
plt.plot([], c=color_mapping["grey"], label="Kombination 3")
plt.plot([], c=color_mapping["yellow"], label="Kombination 4")
plt.plot([], c=color_mapping["brown"], label="Kombination 5")
plt.plot([], c=color_mapping["turquoise"], label="Kombination 6")
plt.legend(loc=[0.61, 0.05])
fig.savefig(
    f"src/main/rl/evaluation/plot_results/phase3_return_by_combi_criticality_and_return.png",
    format="png",
    dpi=300,
)

In [None]:
from scipy.stats import pearsonr
from scipy.stats import linregress
import numpy as np

x = df["criticality_score"]
y = df["cum_reward"]

result = linregress(x, y)
print(pearsonr(df["criticality_score"], df["cum_reward"]))

plt.scatter(x, y)
plt.plot(x, result.intercept + result.slope * x, "r", label="fitted line")

In [None]:
# Best combinations return + criticality score
df[df["cum_reward"] + df["criticality_score"] > 450][
    ["combination", "cum_reward", "criticality_score"]
]

In [None]:
# Sort models by criticality score
df[["criticality_score", "combination"]].sort_values("criticality_score")

In [None]:
# T-tests (Welch) - only 10 models per combination no correction factor used for multiple comparisons - just to get an idea
from scipy.stats import ttest_ind
from scipy.stats import f_oneway

test = df[["combination", "cum_reward"]]


wo_scen1 = test[
    test["combination"]
    == "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_None_RewardOption2Wrapper_TD3_training_04_06"
].drop(columns=["combination"])
w_scen1 = test[
    test["combination"]
    == "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_SAC_training_04_06"
].drop(columns=["combination"])

wo_scen2 = test[
    test["combination"]
    == "src/main/rl/models/scenario2/training_04_06/scenario2_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_None_RewardOption2Wrapper_PPO_training_04_06"
].drop(columns=["combination"])
w_scen2 = test[
    test["combination"]
    == "src/main/rl/models/scenario2/training_04_06/scenario2_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06"
].drop(columns=["combination"])

wo_scen3 = test[
    test["combination"]
    == "src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_None_RewardOption2Wrapper_PPO_training_04_06"
].drop(columns=["combination"])
w_scen3 = test[
    test["combination"]
    == "src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption3Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06"
].drop(columns=["combination"])

ttest_ind(wo_scen2, w_scen2, equal_var=False)

In [None]:
# Statistics per combination
from scipy.stats import iqr

df_statistics_per_combination = (
    df.drop(columns=["full_path"])
    .groupby(
        [
            "combination",
            "scenario",
            "alg",
            "action_wrapper",
            "obs_wrapper",
            "automation_wrapper",
        ],
        dropna=False,
    )
    .agg(["mean", "max", "min", "std", iqr])
)
# Necessary to set index as alphabetical is confusing for the thesis
df_statistics_per_combination["index"] = [0, 1, 3, 2, 5, 4]
df_statistics_per_combination = (
    df_statistics_per_combination.reset_index()
    .set_index(["index", "combination"])
    .sort_index()
)
df_statistics_per_combination.columns = [
    "_".join(a) for a in df_statistics_per_combination.columns.to_flat_index()
]
save_df_per_combination = df_statistics_per_combination.copy()
[col for col in df_statistics_per_combination.columns if "time" in col]
df_statistics_per_combination[
    ["total_timesteps_mean", "cum_reward_mean", "criticality_score_mean"]
]

In [None]:
# Preparation to get single reward per timestamp and then mean/min/max per combiniation
import pandas as pd
from src.main.rl.utils.parser import parse_scenario_name, parse_wrapper
from src.main.rl.utils.combined_parser import parse_information_from_path
from src.main.rl.evaluation.eval import get_single_reward

paths = [
    "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_None_RewardOption2Wrapper_TD3_training_04_06",
    "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_SAC_training_04_06",
    "src/main/rl/models/scenario2/training_04_06/scenario2_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_None_RewardOption2Wrapper_PPO_training_04_06",
    "src/main/rl/models/scenario2/training_04_06/scenario2_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06",
    "src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_None_RewardOption2Wrapper_PPO_training_04_06",
    "src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption3Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06",
]
list_combinations = [pd.DataFrame() for i in range(len(paths))]
for idx, path in enumerate(paths):

    for number in range(1, 11):
        result_dict = {}
        full_path = path + f"_{str(number)}"
        path_to_overhand = full_path + "/best_model.zip"

        action_wrapper, automation_wrapper, obs_wrapper, reward_wrapper = parse_wrapper(
            full_path
        )
        scenario, alg, wrapper_maker = parse_information_from_path(full_path)
        reward = get_single_reward(
            scenario, path_to_overhand, alg, wrapper_maker, episode_length=250
        )
        intermediate_df = pd.DataFrame()
        intermediate_df[full_path] = reward
        list_combinations[idx] = list_combinations[idx].join(
            intermediate_df, how="outer"
        )


for idx, item in enumerate(list_combinations):
    list_combinations[idx] = item.agg(["mean", "min", "max", "std"], axis=1)

In [None]:
from src.main.rl.evaluation.eval import get_single_reward_sop

reward_per_timestep_sop = get_single_reward_sop()

In [None]:
# Plot mean reward per combination per timestamp in mutiple plots with min and max
import matplotlib.pyplot as plt
from src.main.rl.utils.constants import color_mapping

fig, ax = plt.subplots(2, 3, constrained_layout=True)
fig.set_figheight(4)
fig.set_figwidth(10)
[
    ax[0][idx].plot(item["mean"], color=color_mapping["standard"])
    for idx, item in enumerate(list_combinations)
    if idx < 3
]
[
    ax[1][idx - 3].plot(item["mean"], color=color_mapping["standard"])
    for idx, item in enumerate(list_combinations)
    if idx >= 3
]
[
    ax[0][idx].fill_between(
        [i for i in range(249)],
        item["min"],
        item["max"],
        color=color_mapping["standard"],
        alpha=0.3,
    )
    for idx, item in enumerate(list_combinations)
    if idx < 3
]
[
    ax[1][idx - 3].fill_between(
        [i for i in range(249)],
        item["min"],
        item["max"],
        color=color_mapping["standard"],
        alpha=0.3,
    )
    for idx, item in enumerate(list_combinations)
    if idx >= 3
]
[ax[0][idx].set_xlabel("Zeitschritte") for idx in range(6) if idx < 3]
[ax[1][idx - 3].set_xlabel("Zeitschritte") for idx in range(6) if idx >= 3]
[ax[0][idx].set_ylabel("Value") for idx in range(6) if idx < 3]
[ax[1][idx - 3].set_ylabel("Value") for idx in range(6) if idx >= 3]
[ax[0][idx].set_title(f"Kombination {idx+1}") for idx in range(6) if idx < 3]
[ax[1][idx - 3].set_title(f"Kombination {idx+1}") for idx in range(6) if idx >= 3]
plt.show()
fig.savefig(
    f"src/main/rl/evaluation/plot_results/phase3_reward_per_step_per_combi.png",
    format="png",
    dpi=300,
)

In [None]:
# Plot mean reward per combination per timestamp in one plot
fig, ax = plt.subplots(1, 1, constrained_layout=True)
fig.set_figheight(4)
fig.set_figwidth(8)
colors = [
    color_mapping["blue"],
    color_mapping["red"],
    color_mapping["grey"],
    color_mapping["yellow"],
    color_mapping["brown"],
    color_mapping["turquoise"],
]
[ax.plot(item["mean"], color=colors[idx]) for idx, item in enumerate(list_combinations)]
# Activate/Deactiveate with SOP
# ax.plot(reward_per_timestep_sop, color="black")
ax.set_xlabel("Zeitschritte")
ax.set_ylabel("Reward")
plt.plot([], c=color_mapping["blue"], label="Kombination 1")
plt.plot([], c=color_mapping["red"], label="Kombination 2")
plt.plot([], c=color_mapping["grey"], label="Kombination 3")
plt.plot([], c=color_mapping["yellow"], label="Kombination 4")
plt.plot([], c=color_mapping["brown"], label="Kombination 5")
plt.plot([], c=color_mapping["turquoise"], label="Kombination 6")
plt.legend()
fig.savefig(
    f"src/main/rl/evaluation/plot_results/phase3_reward_per_step_per_combi.png",
    format="png",
    dpi=300,
)

In [None]:
# Plot mean reward per combination per timestamp in mutiple plots
import matplotlib.pyplot as plt
from src.main.rl.utils.constants import color_mapping

paths = [
    "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_None_RewardOption2Wrapper_TD3_training_04_06",
    "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_SAC_training_04_06",
    "src/main/rl/models/scenario2/training_04_06/scenario2_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_None_RewardOption2Wrapper_PPO_training_04_06",
    "src/main/rl/models/scenario2/training_04_06/scenario2_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06",
    "src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_None_RewardOption2Wrapper_PPO_training_04_06",
    "src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption3Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06",
]
list_combinations = [pd.DataFrame() for i in range(len(paths))]
for idx, path in enumerate(paths):

    for number in range(1, 11):
        result_dict = {}
        full_path = path + f"_{str(number)}"
        path_to_overhand = full_path + "/best_model.zip"

        action_wrapper, automation_wrapper, obs_wrapper, reward_wrapper = parse_wrapper(
            full_path
        )
        scenario, alg, wrapper_maker = parse_information_from_path(full_path)
        reward = get_single_reward(
            scenario, path_to_overhand, alg, wrapper_maker, episode_length=250
        )
        intermediate_df = pd.DataFrame()
        intermediate_df[full_path] = reward
        list_combinations[idx] = list_combinations[idx].join(
            intermediate_df, how="outer"
        )
fig, ax = plt.subplots(2, 3, constrained_layout=True)
fig.set_figheight(20)
fig.set_figwidth(20)
[
    ax[0][idx].plot(item, color=color_mapping["standard"])
    for idx, item in enumerate(list_combinations)
    if idx < 3
]
[
    ax[1][idx - 3].plot(item, color=color_mapping["standard"])
    for idx, item in enumerate(list_combinations)
    if idx >= 3
]

[ax[0][idx].set_xlabel("Zeitschritte") for idx in range(6) if idx < 3]
[ax[1][idx - 3].set_xlabel("Zeitschritte") for idx in range(6) if idx >= 3]
[ax[0][idx].set_ylabel("Value") for idx in range(6) if idx < 3]
[ax[1][idx - 3].set_ylabel("Value") for idx in range(6) if idx >= 3]
[ax[0][idx].set_title(f"Kombination {idx+1}") for idx in range(6) if idx < 3]
[ax[1][idx - 3].set_title(f"Kombination {list_idx+1}") for idx in range(6) if idx >= 3]
plt.show()
fig.savefig(
    f"src/main/rl/evaluation/plot_results/phase3_reward_per_step_per_combi_2.png",
    format="png",
    dpi=300,
)

In [None]:
###### Statistics over the all combinations and models (60 models) ######

In [None]:
# Read / Create df with results of all tests in phase 3
import pandas as pd
from scipy.stats import iqr
from src.main.rl.evaluation.phase3_evaluation import create_evaluation_df_phase3

path_to_save = "src/main/rl/evaluation/output/phase3_evaluation_results.csv"
pd.options.display.max_colwidth = 500
df = pd.DataFrame()
try:
    df = pd.read_csv(path_to_save)
except:
    pass

if df.empty:
    create_evaluation_df_phase3(path_to_save, paths)
    df = pd.read_csv(path_to_save)
df[["combination", "cum_reward", "criticality_score"]]

In [None]:
# Statistics per combination
df_statistics_per_combination = (
    df.drop(columns=["full_path"])
    .groupby(
        [
            "combination",
            "scenario",
            "alg",
            "action_wrapper",
            "obs_wrapper",
            "automation_wrapper",
        ],
        dropna=False,
    )
    .agg(["mean", "max", "min", "std", iqr])
)
# Necessary to set index as alphabetical is confusing for the thesis
df_statistics_per_combination["index"] = [0, 1, 3, 2, 5, 4]
df_statistics_per_combination = (
    df_statistics_per_combination.reset_index()
    .set_index(["index", "combination"])
    .sort_index()
)
df_statistics_per_combination.columns = [
    "_".join(a) for a in df_statistics_per_combination.columns.to_flat_index()
]
save_df_per_combination = df_statistics_per_combination.copy()
df_statistics_per_combination

In [None]:
# Criticality Score per combination
print(df_statistics_per_combination[["criticality_score_mean"]].round(2))
print(
    df_statistics_per_combination[
        [
            "criticality_score_mean",
            "criticality_score_max",
            "criticality_score_min",
            "criticality_score_std",
            "criticality_score_iqr",
        ]
    ]
    .droplevel(1)
    .transpose()
    .round(2)
    .to_latex()
)

In [None]:
df_statistics_per_combination["cum_reward_max"]