In [None]:
# This notebook is used to analyse the best combinis in detail. All the tests with length, noise etc. are being analysed here.
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import iqr

from src.main.rl.evaluation.plots.phase3_plots import (
    plot_actions_taken,
    plot_observations,
)
from src.main.rl.evaluation.phase3_evaluation import create_evaluation_df_phase3
from src.main.rl.utils.constants import color_mapping

paths = [
    "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_None_RewardOption2Wrapper_TD3_training_04_06",
    "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_SAC_training_04_06",
    "src/main/rl/models/scenario2/training_04_06/scenario2_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_None_RewardOption2Wrapper_PPO_training_04_06",
    "src/main/rl/models/scenario2/training_04_06/scenario2_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06",
    "src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_None_RewardOption2Wrapper_PPO_training_04_06",
    "src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption3Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06",
]
path_to_save = "src/main/rl/evaluation/output/phase3_evaluation_results.csv"
pd.options.display.max_colwidth = 500
df = pd.DataFrame()
try:
    df = pd.read_csv(path_to_save)
except:
    pass
if df.empty:
    create_evaluation_df_phase3(path_to_save, paths)
    df = pd.read_csv(path_to_save)
len(df)

In [None]:
# Statistics per combination
df_statistics_per_combination = (
    df.drop(columns=["full_path"])
    .groupby(
        [
            "combination",
            "scenario",
            "alg",
            "action_wrapper",
            "obs_wrapper",
            "automation_wrapper",
        ],
        dropna=False,
    )
    .agg(["mean", "max", "min", "std", iqr])
)
# Necessary to set index as alphabetical is confusing for the thesis
df_statistics_per_combination["index"] = [0, 1, 3, 2, 5, 4]
df_statistics_per_combination = (
    df_statistics_per_combination.reset_index()
    .set_index(["index", "combination"])
    .sort_index()
)
df_statistics_per_combination.columns = [
    "_".join(a) for a in df_statistics_per_combination.columns.to_flat_index()
]
save_df_per_combination = df_statistics_per_combination.copy()
df_statistics_per_combination

In [None]:
# SOP Analysis
from src.main.rl.evaluation.eval import evaluate_sop

(
    cum_reward_sop,
    criticality_score_sop,
    total_timesteps_sop,
    actions_taken_sop,
    obs_taken_sop,
    info_sop,
) = evaluate_sop()

In [None]:
from src.main.rl.evaluation.plots.phase3_plots import (
    prepare_one_combination_actions_and_obs_for_analysis,
)
from src.main.rl.utils.constants import color_mapping
from src.main.rl.utils.constants import (
    scaling_factors_scenario_1,
    action_dimensions_german,
    obs_scaling_factors,
)
import matplotlib.pyplot as plt

path = "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_None_RewardOption2Wrapper_TD3_training_04_06"

(
    actions_prepared,
    obs_prepared,
    list_of_all_actions_taken,
    list_of_all_obs_taken,
) = prepare_one_combination_actions_and_obs_for_analysis(path, obs_dimensions=6)

for idx, item in enumerate(actions_prepared):
    # actions are scaled in the models, so we need to rescale
    item = item.applymap(
        lambda x: int(round((x + 1) * (scaling_factors_scenario_1[idx] / 2)))
    )
    # get min, mean, max for plotting
    actions_prepared[idx] = item.agg(["mean"], axis=1)

for idx, item in enumerate(obs_prepared):
    item = item.fillna(0)
    # rescaling obs
    item = item.applymap(
        lambda x: int(round((x + 1) * (obs_scaling_factors[6][idx] / 2)))
    )
    obs_prepared[idx] = item.agg(["mean"], axis=1)

In [None]:
y_axis_scale_actions = [[0, 100], [0, 2200], [0, 1.1], [0, 1.1], [0, 2200]]

fig, ax = plot_actions_taken(
    actions_taken_sop, "scenario1", y_axis_scale_actions, color="standard"
)
[
    ax[idx].plot(item["mean"], color=color_mapping["grey"])
    for idx, item in enumerate(actions_prepared)
]

fig.savefig(
    f"src/main/rl/evaluation/plot_results/phase3_action_sop_combi1.png",
    format="png",
    dpi=300,
)

In [None]:
y_axis_scale_obs = [[0, 1000], [0, 4000], [0, 550], [0, 8000], [0, 180], [0, 32]]
fig, ax = plot_observations(obs_taken_sop, y_axis_scale_obs)
[
    ax[idx].plot(item["mean"], color=color_mapping["grey"])
    for idx, item in enumerate(obs_prepared)
]
fig.savefig(
    f"src/main/rl/evaluation/plot_results/phase3_obs_sop_combi1.png",
    format="png",
    dpi=300,
)

In [None]:
from src.main.rl.utils.constants import (
    scaling_factors_scenario_1,
    scaling_factors_scenario_2,
    action_dimensions_german,
    obs_scaling_factors,
    obs_dimensions_german,
    scaling_factors_scenario_3,
)
obs_positions = [[] for i in range(6)]
current_obs_scaling_factors = obs_scaling_factors[6]
for item in obs_taken_sop:
    [obs_positions[idx].append(single_action) for idx, single_action in enumerate(item)]
for idx, item in enumerate(obs_positions):
        scaled_values_obs.append([int(round((x + 1) * (current_obs_scaling_factors[idx] / 2))) for x in item])
scaled_values_obs

In [None]:
# 1000 length
print(df["episode_length_1000_timesteps"])

In [None]:
# Number of succesfull models doing 1000 steps per combination.
length_1000 = (
    df[["episode_length_1000_timesteps", "combination"]]
    .set_index("combination")
    .eq(1000)
    .groupby("combination")
    .sum()
    .transpose()
    .sum()
    .transpose()
    .to_latex()
)
print(length_1000)

In [None]:
# Check how often the pumps are blown in the 1000 length test
df[
    [
        "episode_length_1000_condensator_pump_blown",
        "episode_length_1000_water_pump_blown",
    ]
].sum()

In [None]:
from src.main.rl.evaluation.eval import evaluate_terminal_state_obs
from src.main.rl.utils.combined_parser import parse_information_from_path

path = "src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption3Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06_10/best_model.zip"
scenario, alg, wrapper_maker = parse_information_from_path(path)
cum_reward, obs, info = evaluate_terminal_state_obs(
    scenario, path, alg, wrapper_maker, episode_length=1000
)
# normalized_power_output = 2 * (in
# normalized_reactor_water_level =
# normalized_reactor_pressure = 2 *
# normalized_condenser_water_level
# normalized_condenser_pressure = 2
scaled_values_obs = []
obs_scaling_factors = [800, 4000, 550, 8000, 180, 30, 0]
for l_idx, item in enumerate(obs):
    scaled_values_obs.append(
        [
            int(round((x + 1) * (obs_scaling_factors[idx] / 2)))
            for idx, x in enumerate(item)
        ]
    )

In [None]:
# Criticality Score in more detail
cols = [item for item in df_statistics_per_combination.columns if "criti" in item]
df_statistics_per_combination[cols].round(2)

In [None]:
# Noise
cols = [item for item in df_statistics_per_combination.columns if "DelayNoise" in item]
df_statistics_per_combination[cols].round(2)

print(df_statistics_per_combination[["DelayNoiseWrapperOption1_mean"]].round(2))
print(
    df_statistics_per_combination[cols]
    .round(2)
    .droplevel(1)[
        [
            "DelayNoiseWrapperOption1_mean",
            "DelayNoiseWrapperOption1_max",
            "DelayNoiseWrapperOption1_min",
            "DelayNoiseWrapperOption1_std",
            "DelayNoiseWrapperOption1_iqr",
            "DelayNoiseWrapperOption2_mean",
            "DelayNoiseWrapperOption2_max",
            "DelayNoiseWrapperOption2_min",
            "DelayNoiseWrapperOption2_std",
            "DelayNoiseWrapperOption2_iqr",
        ]
    ]
    .transpose()
    .to_latex()
)

In [None]:
# Box Plots for noise per option and per combiniation using timesteps
def set_box_color(bp, color, idx):
    plt.setp(bp["boxes"], color=color)
    plt.setp(bp["whiskers"], color=color)
    plt.setp(bp["caps"], color=color)
    plt.setp(bp["medians"], color=color)
    plt.setp(bp["caps"], color=color)
    plt.setp(bp["fliers"], color=color)


noise_df = df[
    [
        "obs_wrapper",
        "automation_wrapper",
        "scenario",
        "DelayNoiseWrapperOption2_timesteps",
        "DelayNoiseWrapperOption1_timesteps",
        "combination",
        "ObservationVariesNoiseWrapper1_timesteps",
        "ObservationVariesPositiveNoiseWrapper_timesteps",
        "ObservationVariesNegativeNoiseWrapper_timesteps",
    ]
]

order_of_col = [
    "ObservationVariesPositiveNoiseWrapper_timesteps",
    "ObservationVariesNegativeNoiseWrapper_timesteps",
    "ObservationVariesNoiseWrapper1_timesteps",
    "DelayNoiseWrapperOption1_timesteps",
    "DelayNoiseWrapperOption2_timesteps",
]
fig, ax = plt.subplots(constrained_layout=True)
fig.set_figwidth(14)

ax1 = ax.boxplot(
    noise_df.query(
        "combination=='src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_None_RewardOption2Wrapper_TD3_training_04_06'"
    )[order_of_col],
    positions=[0, 0.2, 0.4, 0.6, 0.8],
    labels=["1a", "1b", "1c", "2a", "2b"],
)
ax2 = ax.boxplot(
    noise_df.query(
        "combination=='src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_SAC_training_04_06'"
    )[order_of_col],
    positions=[1.2, 1.4, 1.6, 1.8, 2],
    labels=["1a", "1b", "1c", "2a", "2b"],
)
ax3 = ax.boxplot(
    noise_df.query(
        "combination=='src/main/rl/models/scenario2/training_04_06/scenario2_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_None_RewardOption2Wrapper_PPO_training_04_06'"
    )[order_of_col],
    positions=[2.4, 2.6, 2.8, 3, 3.2],
    labels=["1a", "1b", "1c", "2a", "2b"],
)
ax4 = ax.boxplot(
    noise_df.query(
        "combination=='src/main/rl/models/scenario2/training_04_06/scenario2_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06'"
    )[order_of_col],
    positions=[3.6, 3.8, 4, 4.2, 4.4],
    labels=["1a", "1b", "1c", "2a", "2b"],
)
ax5 = ax.boxplot(
    noise_df.query(
        "combination=='src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_None_RewardOption2Wrapper_PPO_training_04_06'"
    )[order_of_col],
    positions=[4.8, 5, 5.2, 5.4, 5.6],
    labels=["1a", "1b", "1c", "2a", "2b"],
)
ax6 = ax.boxplot(
    noise_df.query(
        "combination=='src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption3Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06'"
    )[order_of_col],
    positions=[6, 6.2, 6.4, 6.6, 6.8],
    labels=["1a", "1b", "1c", "2a", "2b"],
)
set_box_color(ax1, color_mapping["blue"], 0)
set_box_color(ax1, color_mapping["blue"], 1)
set_box_color(ax2, color_mapping["red"], 0)
set_box_color(ax2, color_mapping["red"], 1)
set_box_color(ax3, color_mapping["grey"], 0)
set_box_color(ax3, color_mapping["grey"], 1)
set_box_color(ax4, color_mapping["yellow"], 0)
set_box_color(ax4, color_mapping["yellow"], 1)
set_box_color(ax5, color_mapping["brown"], 0)
set_box_color(ax5, color_mapping["brown"], 1)
set_box_color(ax6, color_mapping["turquoise"], 0)
set_box_color(ax6, color_mapping["turquoise"], 1)
ax.set_xlabel("Rauschoptionen")
ax.set_ylabel("Absolvierte Zeitschritte")
plt.plot([], c=color_mapping["blue"], label="Kombination 1")
plt.plot([], c=color_mapping["red"], label="Kombination 2")
plt.plot([], c=color_mapping["grey"], label="Kombination 3")
plt.plot([], c=color_mapping["yellow"], label="Kombination 4")
plt.plot([], c=color_mapping["brown"], label="Kombination 5")
plt.plot([], c=color_mapping["turquoise"], label="Kombination 6")
plt.legend(loc=[0.2, 0.1])

plt.show()
plt.tight_layout = True
fig.savefig(
    f"src/main/rl/evaluation/plot_results/phase3_noise_by_combi.png",
    format="png",
    dpi=300,
)

In [None]:
noise_df.eq(250).sum()

In [None]:
noise_agg = (
    df[
        [
            "DelayNoiseWrapperOption2_timesteps",
            "DelayNoiseWrapperOption1_timesteps",
            "ObservationVariesNoiseWrapper1_timesteps",
            "ObservationVariesNoiseWrapper1_timesteps",
            "ObservationVariesNegativeNoiseWrapper_timesteps",
            "combination",
        ]
    ]
    .set_index("combination")
    .eq(250)
    .groupby("combination")
    .sum()
    .transpose()
    .sum()
    .transpose()
    .to_latex()
)
print(noise_agg)

In [None]:
# Check combination 1 model 3 for noise and why it fails in detail (plotting)
import pandas as pd
from src.main.rl.utils.parser import parse_wrapper
from src.main.rl.utils.combined_parser import parse_information_from_path
from src.main.rl.evaluation.eval import evaluate
from src.main.rl.utils.utils import WrapperMaker
from src.main.rl.evaluation.wrapper.noise.obs_varies_wrapper import (
    ObservationVariesNegativeNoiseWrapper,
    ObservationVariesPositiveNoiseWrapper,
)
from src.main.rl.evaluation.plots.phase3_plots import (
    plot_observations,
    plot_actions_taken,
)
from src.main.rl.evaluation.eval import evaluate, evaluate_terminal_state_obs

path = "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_None_RewardOption2Wrapper_TD3_training_04_06_3"

path_to_overhand = path + "/best_model.zip"

action_wrapper, automation_wrapper, obs_wrapper, reward_wrapper = parse_wrapper(path)
scenario, alg, wrapper_maker = parse_information_from_path(path)
wrapper_maker = WrapperMaker(
    action_wrapper,
    automation_wrapper,
    obs_wrapper,
    reward_wrapper,
    None,
    ObservationVariesNegativeNoiseWrapper,
)
(
    cum_reward,
    criticality_score,
    total_timesteps,
    actions_taken,
    obs_taken,
    info,
) = evaluate(scenario, path_to_overhand, alg, wrapper_maker, episode_length=5)
cum_reward2, obs_taken2, info2 = evaluate_terminal_state_obs(
    scenario, path_to_overhand, alg, wrapper_maker, episode_length=5
)

assert cum_reward2 == cum_reward
y_axis_scale_obs = [[0, 1000], [0, 4000], [0, 550], [0, 8000], [0, 180], [0, 32]]

fig = plot_observations(obs_taken2, y_axis_scale_obs)
fig1 = plot_observations(obs_taken, y_axis_scale_obs)
fig2 = plot_actions_taken(actions_taken, "scenario1")
fig2[0].savefig(
    f"src/main/rl/evaluation/plot_results/phase3_noise_combi_1_action_not_working.png",
    format="png",
    dpi=300,
)
print(info)

In [None]:
wrapper_maker = WrapperMaker(
    action_wrapper, automation_wrapper, obs_wrapper, reward_wrapper, None, None
)
(
    cum_reward,
    criticality_score,
    total_timesteps,
    actions_taken,
    obs_taken,
    info,
) = evaluate(scenario, path_to_overhand, alg, wrapper_maker, episode_length=2)
y_axis_scale_obs = [[0, 1000], [0, 4000], [0, 550], [0, 8000], [0, 180], [0, 32]]
fig = plot_observations(obs_taken, y_axis_scale_obs)
fig2 = plot_actions_taken(actions_taken, "scenario1")
fig2[0].savefig(
    f"src/main/rl/evaluation/plot_results/phase3_noise_combi_1_action_not_working_comp_with_working.png",
    format="png",
    dpi=300,
)
print(actions_taken)

In [None]:
# Starting State
cols = [col for col in df.columns if "starting" in col and "criticality" not in col]
df[cols].mean()

In [None]:
df["create_starting_state_option1_timesteps"]

In [None]:
# Check combination differen models (see paths) for noise and why they fail in detail
import pandas as pd
from src.main.rl.utils.parser import parse_wrapper
from src.main.rl.utils.combined_parser import parse_information_from_path
from src.main.rl.evaluation.eval import evaluate
from src.main.rl.utils.utils import WrapperMaker
from src.main.rl.evaluation.wrapper.noise.obs_varies_wrapper import (
    ObservationVariesNegativeNoiseWrapper,
    ObservationVariesPositiveNoiseWrapper,
)
from src.main.rl.evaluation.plots.phase3_plots import (
    plot_observations,
    plot_actions_taken,
)
from src.main.rl.evaluation.eval import evaluate, evaluate_terminal_state_obs
from src.main.rl.utils.constants import (
    ALL_OBS_NOISE_WRAPPERS,
    ALL_DELAY_NOISE_WRAPPERS,
    STARTING_STATE_OPTION1,
    STARTING_STATE_OPTION2,
    STARTING_STATE_OPTION3,
)

path = "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_None_RewardOption2Wrapper_TD3_training_04_06_2"
# path = "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_None_RewardOption2Wrapper_TD3_training_04_06_4"
# path = "src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_None_RewardOption2Wrapper_TD3_training_04_06_8"
# path =  "src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption3Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06_5"
path_to_overhand = path + "/best_model.zip"

action_wrapper, automation_wrapper, obs_wrapper, reward_wrapper = parse_wrapper(path)
scenario, alg, wrapper_maker = parse_information_from_path(path)
wrapper_maker = WrapperMaker(
    action_wrapper, automation_wrapper, obs_wrapper, reward_wrapper, None, None
)
(
    cum_reward,
    criticality_score,
    total_timesteps,
    actions_taken,
    obs_taken,
    info,
) = evaluate(
    scenario,
    path_to_overhand,
    alg,
    wrapper_maker,
    episode_length=250,
    starting_state=STARTING_STATE_OPTION1[0](),
)
cum_reward2, obs_taken2, info2 = evaluate_terminal_state_obs(
    scenario,
    path_to_overhand,
    alg,
    wrapper_maker,
    episode_length=250,
    starting_state=STARTING_STATE_OPTION1[0](),
)
print(info2)
assert cum_reward2 == cum_reward

In [None]:
df[
    [
        "create_starting_state_option2a_timesteps",
        "create_starting_state_option2b_timesteps",
        "create_starting_state_option2c_timesteps",
    ]
]

In [None]:
# Box Plots for starting state per option and per combination using timesteps
def set_box_color(bp, color, idx):
    plt.setp(bp["boxes"], color=color)
    plt.setp(bp["whiskers"], color=color)
    plt.setp(bp["caps"], color=color)
    plt.setp(bp["medians"], color=color)
    plt.setp(bp["caps"], color=color)
    plt.setp(bp["fliers"], color=color)


starting_state2 = df[
    [
        "combination",
        "create_starting_state_option2a_timesteps",
        "create_starting_state_option2b_timesteps",
        "create_starting_state_option2c_timesteps",
    ]
]

order_of_col = [
    "create_starting_state_option2a_timesteps",
    "create_starting_state_option2b_timesteps",
    "create_starting_state_option2c_timesteps",
]
fig, ax = plt.subplots(constrained_layout=True)
fig.set_figwidth(8)
ax1 = ax.boxplot(
    starting_state2.query(
        "combination=='src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_None_RewardOption2Wrapper_TD3_training_04_06'"
    )[order_of_col],
    positions=[
        0,
        0.2,
        0.4,
    ],
    labels=[
        "2a",
        "2b",
        "2c",
    ],
)
ax2 = ax.boxplot(
    starting_state2.query(
        "combination=='src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_SAC_training_04_06'"
    )[order_of_col],
    positions=[
        0.9,
        1.1,
        1.3,
    ],
    labels=[
        "2a",
        "2b",
        "2c",
    ],
)
ax3 = ax.boxplot(
    starting_state2.query(
        "combination=='src/main/rl/models/scenario2/training_04_06/scenario2_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_None_RewardOption2Wrapper_PPO_training_04_06'"
    )[order_of_col],
    positions=[1.8, 2, 2.2],
    labels=[
        "2a",
        "2b",
        "2c",
    ],
)
ax4 = ax.boxplot(
    starting_state2.query(
        "combination=='src/main/rl/models/scenario2/training_04_06/scenario2_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06'"
    )[order_of_col],
    positions=[2.7, 2.9, 3.1],
    labels=[
        "2a",
        "2b",
        "2c",
    ],
)
ax5 = ax.boxplot(
    starting_state2.query(
        "combination=='src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption5Wrapper_None_RewardOption2Wrapper_PPO_training_04_06'"
    )[order_of_col],
    positions=[3.6, 3.8, 4],
    labels=[
        "2a",
        "2b",
        "2c",
    ],
)
ax6 = ax.boxplot(
    starting_state2.query(
        "combination=='src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption3Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06'"
    )[order_of_col],
    positions=[4.5, 4.7, 4.9],
    labels=[
        "2a",
        "2b",
        "2c",
    ],
)
set_box_color(ax1, color_mapping["blue"], 0)
set_box_color(ax1, color_mapping["blue"], 1)
set_box_color(ax2, color_mapping["red"], 0)
set_box_color(ax2, color_mapping["red"], 1)
set_box_color(ax3, color_mapping["grey"], 0)
set_box_color(ax3, color_mapping["grey"], 1)
set_box_color(ax4, color_mapping["yellow"], 0)
set_box_color(ax4, color_mapping["yellow"], 1)
set_box_color(ax5, color_mapping["brown"], 0)
set_box_color(ax5, color_mapping["brown"], 1)
set_box_color(ax6, color_mapping["turquoise"], 0)
set_box_color(ax6, color_mapping["turquoise"], 1)
ax.set_xlabel("Startzustände")
ax.set_ylabel("Absolvierte Zeitschritte")
plt.plot([], c=color_mapping["blue"], label="Kombination 1")
plt.plot([], c=color_mapping["red"], label="Kombination 2")
plt.plot([], c=color_mapping["grey"], label="Kombination 3")
plt.plot([], c=color_mapping["yellow"], label="Kombination 4")
plt.plot([], c=color_mapping["brown"], label="Kombination 5")
plt.plot([], c=color_mapping["turquoise"], label="Kombination 6")
plt.legend(loc=[0.35, 0.5])
plt.show()
fig.savefig(
    f"src/main/rl/evaluation/plot_results/phase3_starting_state_2_by_combi.png",
    format="png",
    dpi=300,
)

In [None]:
starting_state2.eq(250).sum()

In [None]:
starting_state2.query(
        "combination=='src/main/rl/models/scenario3/training_04_06/scenario3_ActionSpaceOption3Wrapper_ObservationOption3Wrapper_NPPAutomationWrapper_RewardOption2Wrapper_PPO_training_04_06'"
    ).eq(250).sum()
starting_state2.query(
        "combination=='src/main/rl/models/scenario1/training_04_06/scenario1_ActionSpaceOption3Wrapper_ObservationOption4Wrapper_None_RewardOption2Wrapper_TD3_training_04_06'"
    ).eq(250).sum()

In [None]:
df["create_starting_state_option1_timesteps"].eq(250).sum()

In [None]:
starting_state3 = df[
    [
        "obs_wrapper",
        "combination",
        "automation_wrapper",
        "scenario",
        "create_starting_state_option3_timesteps",
    ]
]
starting_state3.groupby("combination").agg("mean")

In [None]:
starting_state3.eq(250).sum(axis=1)

In [None]:
df_statistics_per_combination[
    [
        "create_starting_state_option1_timesteps_mean",
        "create_starting_state_option2a_timesteps_mean",
        "create_starting_state_option2b_timesteps_mean",
        "create_starting_state_option2c_timesteps_mean",
        "create_starting_state_option3_timesteps_mean",
    ]
].agg(["mean", "max", "min", "std"], axis=1)

In [None]:
print(
    df_statistics_per_combination[
        [
            "create_starting_state_option1_timesteps_mean",
            "create_starting_state_option2a_timesteps_mean",
            "create_starting_state_option2b_timesteps_mean",
            "create_starting_state_option2c_timesteps_mean",
            "create_starting_state_option3_timesteps_mean",
        ]
    ]
    .agg(["mean", "max", "min", "std"], axis=1)
    .droplevel(1)
    .round(2)
    .transpose()
    .to_latex()
)

In [None]:
starting_state_2 = (
    df[
        [
            "create_starting_state_option1_timesteps",
            "create_starting_state_option2a_timesteps",
            "create_starting_state_option2b_timesteps",
            "create_starting_state_option2c_timesteps",
            "create_starting_state_option3_timesteps",
            "combination",
        ]
    ]
    .set_index("combination")
    .eq(250)
    .groupby("combination")
    .sum()
    .transpose()
    .sum()
    .transpose()
    .to_latex()
)

print(starting_state_2)

In [None]:
starting_state_total = df[
    [
        "obs_wrapper",
        "automation_wrapper",
        "scenario",
        "create_starting_state_option1_timesteps",
        "create_starting_state_option2a_timesteps",
        "create_starting_state_option2b_timesteps",
        "create_starting_state_option2c_timesteps",
        "create_starting_state_option3_timesteps",
    ]
]

In [None]:
starting_state_total.eq(250).sum().sum()

In [None]:
# Number of model-experiment combinations that sucessfully executed all timesteps during the specific experiment.
# In total there were 660 such combinations
(
    df[
        [
            "DelayNoiseWrapperOption2_timesteps",
            "DelayNoiseWrapperOption1_timesteps",
            "ObservationVariesNoiseWrapper1_timesteps",
            "ObservationVariesNoiseWrapper1_timesteps",
            "ObservationVariesNegativeNoiseWrapper_timesteps",
            "create_starting_state_option1_timesteps",
            "create_starting_state_option2a_timesteps",
            "create_starting_state_option2b_timesteps",
            "create_starting_state_option2c_timesteps",
            "create_starting_state_option3_timesteps",
        ]
    ]
    .eq(250)
    .sum()
    .sum()
    + df["episode_length_1000_timesteps"].eq(1000).sum()
)

In [None]:
# Number of model-experiment combinations per combination that sucessfully executed all timesteps during the specific experiment.
# In total there were 110 such combinations per combination
# Carefull: The order of the combination might not be the same as in the thesis!

def get_counts_by_group(df, max_timesteps):
    return df.eq(max_timesteps).sum()

counts_noise=(
    df[
        [   "combination",
            "DelayNoiseWrapperOption2_timesteps",
            "DelayNoiseWrapperOption1_timesteps",
            "ObservationVariesNoiseWrapper1_timesteps",
            "ObservationVariesNoiseWrapper1_timesteps",
            "ObservationVariesNegativeNoiseWrapper_timesteps",
            
        ]
    ].groupby("combination").apply(get_counts_by_group,(250))).sum(axis=1)
counts_starting_state=(
    df[
        [   "combination",
            "create_starting_state_option1_timesteps",
            "create_starting_state_option2a_timesteps",
            "create_starting_state_option2b_timesteps",
            "create_starting_state_option2c_timesteps",
            "create_starting_state_option3_timesteps",
            
        ]
    ].groupby("combination").apply(get_counts_by_group,(250))).sum(axis=1)
counts_1000=df[["combination","episode_length_1000_timesteps"]].groupby("combination").apply(get_counts_by_group,(1000))["episode_length_1000_timesteps"]

print("Counts over all modification experiments with noise:")
print(counts_noise )
print("Counts over all modification experiments with starting states:")
print(counts_starting_state)
print("Counts over all modification experiment with 100 length:")
print(counts_1000)

print("Counts over all modification experiments:")
print(counts_noise + counts_starting_state+counts_1000)

In [None]:
# Starting state comparison grouped by different categories e.g. scenario by  timesteps
starting_state_total.loc[
    starting_state_total["automation_wrapper"].isna(), "automation_wrapper"
] = "NaN"
starting_state_total.groupby("scenario").agg(["mean"], axis=0).round(2).agg(
    "mean", axis=1
)

In [None]:
starting_state_total.loc[
    starting_state_total["automation_wrapper"].isna(), "automation_wrapper"
] = "NaN"
starting_state_total.groupby("automation_wrapper").agg(["mean"], axis=0).round(2).agg(
    "mean", axis=1
)

In [None]:
# T-test (Welch) -> non normality therefore ttest can not be used / results have to be interpreted carefully
from scipy.stats import ttest_ind

wo_NPP = starting_state_total[starting_state_total["automation_wrapper"] == "NaN"].drop(
    columns=["obs_wrapper", "scenario", "automation_wrapper"]
)
wo_NPP = wo_NPP.stack().reset_index(drop=True)
w_NPP = starting_state_total[
    starting_state_total["automation_wrapper"] == "NPPAutomationWrapper"
].drop(columns=["obs_wrapper", "scenario", "automation_wrapper"])
w_NPP = w_NPP.stack().reset_index(drop=True)

ttest_ind(wo_NPP, w_NPP, equal_var=False)

In [None]:
wo_NPP = starting_state_total[starting_state_total["automation_wrapper"] == "NaN"].drop(
    columns=["obs_wrapper", "scenario", "automation_wrapper"]
)["create_starting_state_option3_timesteps"]
w_NPP = starting_state_total[
    starting_state_total["automation_wrapper"] == "NPPAutomationWrapper"
].drop(columns=["obs_wrapper", "scenario", "automation_wrapper"])[
    "create_starting_state_option3_timesteps"
]
ttest_ind(wo_NPP, w_NPP, equal_var=False)