In [None]:

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 12})
plt.rcParams['figure.dpi'] = 300


In [None]:

def visualize_paths_on_benchmark_maps(env, policy, map_folder="maps/", num_maps=4, grid_cols=2, save=False):
    grid_rows = int(np.ceil(num_maps / grid_cols))
    fig, axs = plt.subplots(grid_rows, grid_cols, figsize=(grid_cols * 4, grid_rows * 4), dpi=300)
    if grid_rows == 1:
        axs = np.array([axs])
    axs = axs.flatten()
    for idx in range(num_maps):
        ax = axs[idx]
        env.load_map(f"{map_folder}/map_{idx}.npy")
        path = policy.rollout(env)
        ax.imshow(env.cost_map, cmap='Blues', alpha=0.3, origin='lower')
        ax.imshow(env.risk_map, cmap='Reds', alpha=0.3, origin='lower')
        ax.plot([p[0] for p in path], [p[1] for p in path], color='tab:green')
        ax.set_title(f"Map {idx}")
        ax.axis('off')
    for j in range(idx + 1, len(axs)):
        axs[j].axis('off')
    plt.tight_layout()
    if save:
        plt.savefig("trajectories.png", dpi=300)
    plt.show()


In [None]:

def smooth(x, window=10):
    return np.convolve(x, np.ones(window)/window, mode='valid')

# plot mean ± std over multiple seeds with 95% CI shading
def plot_mean_reward_curves(all_rewards, names, window=10):
    colors = plt.cm.tab10.colors
    plt.figure(figsize=(10,5))
    for idx, (runs, name) in enumerate(zip(all_rewards, names)):
        runs = np.array(runs)
        mean = runs.mean(axis=0)
        std = runs.std(axis=0)
        ci = 1.96 * std / np.sqrt(runs.shape[0])
        sm_mean = smooth(mean, window)
        sm_ci = smooth(ci, window)
        episodes = np.arange(len(sm_mean))
        plt.plot(episodes, sm_mean, label=name, color=colors[idx], lw=2)
        plt.fill_between(episodes, sm_mean - sm_ci, sm_mean + sm_ci, color=colors[idx], alpha=0.2)
    plt.title('Training Reward Curves', fontsize=14)
    plt.xlabel('Episode', fontsize=12)
    plt.ylabel('External Reward', fontsize=12)
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.tight_layout()
    plt.show()


In [None]:

def plot_grouped_bars(names, success, steps, planner_use, intrinsic):
    metrics = [success, planner_use, steps, intrinsic]
    labels = ['Success Rate (%)', 'Planner Usage (%)', 'Avg Steps', 'Avg Intrinsic']
    x = np.arange(len(names))
    width = 0.18
    fig, ax = plt.subplots(figsize=(10,6))
    for i, metric in enumerate(metrics):
        means = [np.mean(m) * (100 if i < 2 else 1) for m in metric]
        stds = [np.std(m) * (100 if i < 2 else 1) for m in metric]
        ax.bar(x + (i-1.5)*width, means, width, yerr=stds, capsize=4, label=labels[i])
    ax.set_xticks(x)
    ax.set_xticklabels(names, rotation=30)
    ax.set_ylabel('Value')
    ax.set_title('Comparative Evaluation of Exploration Methods', fontsize=14)
    ax.legend()
    ax.grid(axis='y', linestyle='--', alpha=0.5)
    plt.tight_layout()
    plt.show()


In [None]:

def plot_generalization_results(names, clean_m, clean_s, noisy_m, noisy_s):
    x = np.arange(len(names))
    width = 0.35
    fig, ax = plt.subplots(figsize=(8,5))
    ax.bar(x - width/2, clean_m, width, yerr=clean_s, capsize=4, label='Clean', color='tab:blue')
    ax.bar(x + width/2, noisy_m, width, yerr=noisy_s, capsize=4, label='Noisy', color='tab:orange')
    ax.set_xticks(x)
    ax.set_xticklabels(names, rotation=30)
    ax.set_ylabel('Average Reward')
    ax.set_title('Generalization Performance', fontsize=14)
    ax.legend()
    ax.grid(axis='y', linestyle='--', alpha=0.5)
    plt.tight_layout()
    plt.show()


### Usage Examples

In [None]:

# Example: assuming we collected reward arrays for 3 seeds per model
# reward_runs = [ [seed1_rewards, seed2_rewards, seed3_rewards], ... ]
# plot_mean_reward_curves(reward_runs, model_names)

# After training, evaluate generalization
# plot_grouped_bars(model_names, success_rates, steps_to_goal, planner_usages, intrinsic_rewards)
# plot_generalization_results(model_names, clean_means, clean_stds, noisy_means, noisy_stds)
