In [None]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import json
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf

In [None]:
def get_exp_config(experiment: Path):
    with open(experiment / "config.json", "r") as f:
        config = json.load(f)
    return config

def get_exp_stats(experiment: Path):
    return pd.read_csv(experiment / "stats.csv")


def get_best_checkpoint(experiment: Path):
    return tf.train.latest_checkpoint(experiment / "best_checkpoints")


In [None]:
EXPDIR = Path("../experiments")

EXPERIMENTS_PATH = EXPDIR / "simple_ppo_envs_comparison"

EXPERIMENTS = list(EXPERIMENTS_PATH.iterdir())

In [None]:
def expanded_stats(experiment: Path):
    stats_df = get_exp_stats(experiment)
    exp_data = get_exp_config(experiment)["experiment"]
    stats_df["env_id"] = exp_data["env_id"]
    return stats_df

In [None]:
stats = pd.concat([expanded_stats(e) for e in EXPERIMENTS])

In [None]:
plt.figure(figsize=(14, 8))
sns.lineplot(stats, x="env/total_steps", y="env/final_reward", hue="env_id")
plt.grid()

In [None]:
q98 = stats.groupby("env_id")["env/final_reward"].quantile(0.98)

In [None]:
q98

In [None]:
sns.barplot(q98)
plt.xticks(rotation=76)

In [None]:
SOLVED_ITER_REWARD_THRESHOLD = 0.95

# This value is setup given the current analysed training runs... It's been
#  visually determined with the training plots which environments
#  have been solved, so in case of repeating the experiments this value
#  may required to be changed, there is no guarantee of generality.
SOLVED_Q98_COEF = 0.78

def get_solved_stats(df):
    q98 = df["env/final_reward"].quantile(0.98)

    if q98 < SOLVED_Q98_COEF:
        return pd.Series(
            {"solved": False, "solved_iter": -1, "solved_steps": -1, "q98_final_reward": q98, "solved_reward": float("nan")}
    )
    solved_reward = SOLVED_ITER_REWARD_THRESHOLD * df["env/final_reward"].max()
    solved_flags = df["env/final_reward"] > solved_reward
    solved_iter = solved_flags.idxmax()
    solved_steps = df.loc[solved_iter, "env/total_steps"]
    return pd.Series(
        {"solved": True, "solved_iter": solved_iter, "solved_steps": solved_steps, "q98_final_reward": q98, "solved_reward": solved_reward}
    )


In [None]:
solved_stats = stats.groupby("env_id").apply(get_solved_stats, include_groups=False)

In [None]:
solved_stats

In [None]:
solved_stats.to_clipboard()

In [None]:
solved_stats = solved_stats[solved_stats["solved"]]

In [None]:
solved_stats

In [None]:
sns.barplot(solved_stats["solved_steps"].sort_values())
plt.grid()
plt.xticks(rotation=76)