In [None]:
# This notebook is used to plot some of the training curves and also calculate the wall times.

In [None]:
import matplotlib
import matplotlib.pyplot as plt
from src.main.rl.utils.constants import color_mapping
import pandas as pd
import glob
from tbparse import SummaryReader

from src.main.rl.utils.parser import parse_alg_name

df_times = pd.DataFrame()
try:
    df_times = pd.read_csv("src/main/rl/evaluation/output/execution_times.csv")
except:
    pass
if df_times.empty:
    all_files = []
    for file in glob.glob("src/main/rl/logs/*/*/*/*", recursive=True):
        all_files.append(file)

    result_dict = {}
    for item in all_files:
        reader = SummaryReader(item, extra_columns={"wall_time"})
        df_times = reader.scalars
        df_times["wall_time"] = pd.to_datetime(df_times["wall_time"], unit="s")
        time_diff = (
            df_times["wall_time"].max() - df_times["wall_time"].min()
        ).total_seconds()
        result_dict[item] = [time_diff, parse_alg_name(item)]
    df_times = pd.DataFrame.from_dict(
        result_dict, orient="index", columns=["seconds", "alg"]
    )
    df_times.to_csv("src/main/rl/evaluation/output/execution_times.csv")

df_times.groupby("alg").agg(["mean", "sum"]) / 60

In [None]:
all_files = []
for file in glob.glob("src/main/rl/logs/*/*/*TD3*/*", recursive=True):
    all_files.append(file)

chosen_iterations = [18, 32, 110, 289]
chosen_files = [all_files[i] for i in chosen_iterations]
result_dict = {}
fig, ax = plt.subplots(constrained_layout=True, figsize=(6, 3))
fig.set_dpi(300)
for idx, item in enumerate(chosen_files):
    reader = SummaryReader(item, extra_columns={"dir_name", "file_name"})
    df = reader.scalars
    df = df.query("tag=='eval/mean_reward'")
    plt.plot(df["step"], df["value"], list(color_mapping.values())[idx])
    ax.xaxis.set_major_formatter(
        matplotlib.ticker.FuncFormatter(
            lambda x, p: format(int(x), ",").replace(",", ".")
        )
    )
    plt.xlabel("Zeitschritte")
    plt.ylabel("Return")
plt.show()

In [None]:
all_files = []
for file in glob.glob("src/main/rl/logs/*/*/*PPO*/*", recursive=True):
    all_files.append(file)

chosen_iterations = [140, 182, 284, 494, 584, 843]
chosen_files = [all_files[i] for i in chosen_iterations]
result_dict = {}
fig, ax = plt.subplots(constrained_layout=True, figsize=(6, 3))
fig.set_dpi(300)
for idx, item in enumerate(chosen_files):
    reader = SummaryReader(item, extra_columns={"dir_name", "file_name"})
    df = reader.scalars
    df = df.query("tag=='eval/mean_reward'")
    plt.plot(df["step"], df["value"], list(color_mapping.values())[idx])
    ax.xaxis.set_major_formatter(
        matplotlib.ticker.FuncFormatter(
            lambda x, p: format(int(x), ",").replace(",", ".")
        )
    )
    plt.xlabel("Zeitschritte")
    plt.ylabel("Return")
plt.show()

In [None]:
all_files = []
for file in glob.glob("src/main/rl/logs/*/*/*A2C*/*", recursive=True):
    all_files.append(file)

chosen_iterations = [45, 129, 192, 534, 679, 892]
chosen_files = [all_files[i] for i in chosen_iterations]
result_dict = {}
fig, ax = plt.subplots(constrained_layout=True, figsize=(6, 3))
fig.set_dpi(300)
for idx, item in enumerate(chosen_files):
    reader = SummaryReader(item, extra_columns={"dir_name", "file_name"})
    df = reader.scalars
    df = df.query("tag=='eval/mean_reward'")
    plt.plot(df["step"], df["value"], list(color_mapping.values())[idx])
    ax.xaxis.set_major_formatter(
        matplotlib.ticker.FuncFormatter(
            lambda x, p: format(int(x), ",").replace(",", ".")
        )
    )
    plt.xlabel("Zeitschritte")
    plt.ylabel("Return")
plt.show()

In [None]:
all_files = []
for file in glob.glob("src/main/rl/logs/*/*/*SAC*/*", recursive=True):
    all_files.append(file)

chosen_iterations = [3, 44, 156, 192, 248, 293]
chosen_files = [all_files[i] for i in chosen_iterations]
result_dict = {}
fig, ax = plt.subplots(constrained_layout=True, figsize=(6, 3))
fig.set_dpi(300)
for idx, item in enumerate(chosen_files):
    reader = SummaryReader(item, extra_columns={"dir_name", "file_name"})
    df = reader.scalars
    df = df.query("tag=='eval/mean_reward'")
    plt.plot(df["step"], df["value"], list(color_mapping.values())[idx])
    ax.xaxis.set_major_formatter(
        matplotlib.ticker.FuncFormatter(
            lambda x, p: format(int(x), ",").replace(",", ".")
        )
    )
    plt.xlabel("Zeitschritte")
    plt.ylabel("Return")
plt.show()

In [None]:
# Sparse Rewards
all_files_scen1 = []
for file in glob.glob("src/main/rl/logs_sparse_rewards/scen1/*/*/*", recursive=True):
    all_files_scen1.append(file)
all_files_scen2 = []
for file in glob.glob("src/main/rl/logs_sparse_rewards/scen2/*/*/*", recursive=True):
    all_files_scen2.append(file)
all_files_scen3 = []
for file in glob.glob("src/main/rl/logs_sparse_rewards/scen3/*/*/*", recursive=True):
    all_files_scen3.append(file)


def print_sparse_rewards_color(all_files):
    result_dict = {}
    fig, ax = plt.subplots(constrained_layout=True, figsize=(6, 3))
    fig.set_dpi(300)
    for idx, item in enumerate(all_files):
        reader = SummaryReader(item, extra_columns={"dir_name", "file_name"})
        df = reader.scalars
        df = df.query("tag=='eval/mean_reward'")
        plt.plot(df["step"], df["value"], list(color_mapping.values())[idx])
        ax.xaxis.set_major_formatter(
            matplotlib.ticker.FuncFormatter(
                lambda x, p: format(int(x) / 1000000, ",").replace(",", ".")
            )
        )
        plt.xlabel("Zeitschritte in Millionen")
        plt.ylabel("Return")
    plt.show()


print_sparse_rewards_color(all_files_scen1)
print_sparse_rewards_color(all_files_scen2)
print_sparse_rewards_color(all_files_scen3)

In [None]:
# Sparse Rewards
all_files_scen1 = []
all_files_scen2 = []
all_files_scen3 = []
for file in glob.glob("src/main/rl/logs_sparse_rewards/scen1/*/*/*", recursive=True):
    all_files_scen1.append(file)
for file in glob.glob("src/main/rl/logs_sparse_rewards/scen2/*/*/*", recursive=True):
    all_files_scen2.append(file)
for file in glob.glob("src/main/rl/logs_sparse_rewards/scen3/*/*/*", recursive=True):
    all_files_scen3.append(file)


def print_sparse_rewards(all_files: list, number: int):
    result_dict = {}
    fig, ax = plt.subplots(constrained_layout=True, figsize=(6, 3))
    fig.set_dpi(300)
    for idx, item in enumerate(all_files):
        reader = SummaryReader(item, extra_columns={"dir_name", "file_name"})
        df = reader.scalars
        df = df.query("tag=='eval/mean_reward'")
        plt.plot(
            df["step"],
            df["value"],
            list(color_mapping.values())[0]
            if "None" in item
            else list(color_mapping.values())[1],
        )
        ax.xaxis.set_major_formatter(
            matplotlib.ticker.FuncFormatter(
                lambda x, p: format(int(x) / 1000000, ",").replace(",", ".")
            )
        )
        plt.xlabel("Zeitschritte in Millionen")
        plt.ylabel("Return")
    plt.plot([], c=color_mapping["yellow"], label=f"Kombination {number}")
    plt.plot([], c=color_mapping["red"], label=f"Kombination {number+1}")
    plt.legend()
    plt.show()


print_sparse_rewards(all_files_scen1, 1)
print_sparse_rewards(all_files_scen2, 3)
print_sparse_rewards(all_files_scen3, 5)