In [1]:
# imports
%cd ..
import os
from tqdm.notebook import tqdm
from mango.environments import frozen_lake
from mango import Agent, Mango
from frozen_lake_tests import utils_plot, utils_save, utils_sim

/home/davide_sartor/MANGO-lite


Experiment Parameters

In [42]:
# parameters for the environment
map_base = 2
map_scale = 4
p_frozen = None
one_shot = False

plot_confront = True
plot_training_evolution = False
plot_qvalues = False
ignore_agent = False

In [43]:
# load agent models one by one
dir_path = utils_save.path_to_save_dir(map_base, map_scale, p_frozen, one_shot)
files = sorted(os.listdir(dir_path + "models/"))
mango_agent_files = [name for name in files if name.startswith("mango_agent")]
run_ids = [f"run_{name[-9:-7]}" for name in mango_agent_files]
mango_agents = [
    utils_save.load_from_file(dir_path + "models/" + file_name) for file_name in mango_agent_files
]
if ignore_agent:
    normal_agents = mango_agents
else:
    normal_agent_files = [name for name in files if name.startswith("normal_agent")]
    normal_agents = [
        utils_save.load_from_file(dir_path + "models/" + file_name)
        for file_name in normal_agent_files
    ]

if plot_confront:
    os.makedirs(dir_path + "comparisons/", exist_ok=True)
    utils_plot.plot_confront_loss_reward_avg(
        agents=[normal_agents, mango_agents] if not ignore_agent else [mango_agents],
        labels=["vanilla", "mango"] if not ignore_agent else ["mango"],
        save_path=f"{dir_path}comparisons/compare_reward_avg.pdf",
    )

for mango_agent, normal_agent, run_id_str in zip(mango_agents, normal_agents, run_ids):
    if plot_confront:
        os.makedirs(dir_path + "comparisons/", exist_ok=True)
        utils_plot.plot_confront_loss_reward(
            agents=[normal_agent, mango_agent] if not ignore_agent else [mango_agent],
            labels=["vanilla", "mango"] if not ignore_agent else ["mango"],
            save_path=f"{dir_path}comparisons/compare_reward_{run_id_str}.pdf",
        )

    if plot_training_evolution:
        os.makedirs(dir_path + "training/", exist_ok=True)
        if not ignore_agent:
            utils_plot.plot_normal_agent_loss_reward(
                normal_agent,
                save_path=f"{dir_path}training/train_results_{run_id_str}_normal_agent.pdf",
            )
        utils_plot.plot_mango_agent_loss_reward(
            mango_agent,
            save_path=f"{dir_path}training/train_results_{run_id_str}_mango_agent.pdf",
        )

    if plot_qvalues:
        os.makedirs(dir_path + "qvalues/", exist_ok=True)
        if not ignore_agent:
            normal_agent.reset()
            trajectory, rewards = normal_agent.run_episode(
                randomness=0.0, episode_length=4**map_scale
            )
            frozen_lake.plot_utils.plot_all_qvals_normal_agent(
                normal_agent,
                trajectory,
                save_path=f"{dir_path}qvalues/qvalues_{run_id_str}_normal_agent.pdf",
            )
        mango_agent.reset()
        trajectory, rewards = mango_agent.run_episode(randomness=0.0, episode_length=4**map_scale)
        frozen_lake.plot_utils.plot_all_qvals_mango_agent(
            mango_agent,
            trajectory,
            save_path=f"{dir_path}qvalues/qvalues_{run_id_str}_mango_agent.pdf",
        )

ValueError: max() arg is an empty sequence

<Figure size 1000x400 with 0 Axes>

In [None]:
import numpy as np
import matplotlib.pyplot as plt


def smooth(signal, window=0.05):
    window = max(3, int(len(signal) * window))
    if len(signal) < 10:
        return signal
    signal = np.array([s for s in signal if s is not None])
    window_array = np.ones(window) / window
    return np.convolve(signal, window_array, mode="valid")


annealing_ep, max_ep, _, _ = utils_sim.train_params(map_base, map_scale, p, one_shot)
max_len = annealing_ep + max_ep
p_frozen = [1.0, 0.8, 0.5, 0.3]

for p in p_frozen:
    dir_path = utils_save.path_to_save_dir(map_base, map_scale, p, one_shot)
    files = sorted(os.listdir(dir_path + "models/"))
    mango_agent_files = [name for name in files if name.startswith("mango_agent")]
    normal_agent_files = [name for name in files if name.startswith("normal_agent")]

    def get_rew_statistics(agent_files):
        reward_logs = []
        for agent_file in agent_files:
            agent: Agent | Mango = utils_save.load_from_file(dir_path + "models/" + agent_file)
            rew = smooth(agent.reward_log[1::2])
            reward_logs.append(np.pad(rew, (0, max_len - len(rew)), "edge"))
        mean = np.mean(reward_logs, axis=0)
        ci95 = 1.96 * np.std(reward_logs, axis=0) / np.sqrt(len(reward_logs))
        return mean, (mean - ci95, mean + ci95)

    mean, ci = get_rew_statistics(mango_agent_files)
    plt.plot(mean, "-", label=f"p={p} Mango")
    #plt.fill_between(range(len(mean)), ci[0], ci[1], alpha=0.3)
    mean, ci = get_rew_statistics(normal_agent_files)
    plt.plot(mean, "--", label=f"p={p} Normal", color=plt.gca().lines[-1].get_color())
    #plt.fill_between(range(len(mean)), ci[0], ci[1], alpha=0.3)

plt.ylim((-0.05, 1.05))
plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0.0)

NameError: name 'p' is not defined