In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import matplotlib.patches as mpatches
%matplotlib tk
import ast
import re
import json
from tabulate import tabulate
import sys
import os
import joblib
from sklearn.linear_model import LinearRegression

current_dir = os.getcwd()
ppo_experiments_dir = os.path.join(current_dir, "ppo_experiments")
sys.path.append(ppo_experiments_dir)

from ppo_experiments.utilities import plot_multiple_results, read_monitor_file

In [2]:
# Global variables
gymnasium_envs = ['Acrobot-v1',
                  'Pendulum-v1',
                  'LunarLander-v3',
                  'BipedalWalker-v3',
                  # 'Pong-v5',
                  # 'Ant-v5',
                  # 'Humanoid-v5'
                  ]
n_models = 5

In [3]:
def get_best_and_median_policy_results(experiment_dir):
    """
    Retrieve cumul. reward and episode length statistics on test environments for the best and median policies from log files
    generated during the experiment.
    :param experiment_dir: Path to experiment directory
    :return:
    """

    # Parse experiment path to extract env. config.
    match = re.search(r'(\d+)containers_(\d+)presses', experiment_dir)
    env_name = None

    if match:
        num_containers = int(match.group(1))
        num_presses = int(match.group(2))
        env_name = 'CG_n' + str(num_containers) + '_m' + str(num_presses)
        # print(f'Containers: {num_containers}, Presses: {num_presses}')
    else:
        for env in gymnasium_envs:
            if env in experiment_dir:
                match = True
                env_name = env
                break

    if not match:
        print('Invalid experiment directory.')
        return False

    with open(experiment_dir + 'test_results.txt', 'r') as file:  # TODO: Include '/' in experiment_dir or '/test_results.txt'?
        data = file.read()

    # Replace numpy float64 with native python floats in data string
    data = re.sub(r'np\.float64\(([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)\)', r'\1', data)

    # Extracting the list of models from the data
    models_str = data.split('Mean and std dev of all ' + str(n_models) + ' models:\n')[1].split('Best model')[0].strip()
    models = ast.literal_eval(models_str)

    # Extract best and median seeds from the file
    best_seed_match = re.search(r'Best model \(seed=(\d+)\)', data)
    median_seed_match = re.search(r'Median model \(seed=(\d+)\)', data)

    best_seed = int(best_seed_match.group(1))
    median_seed = int(median_seed_match.group(1))

    best_model_res = [x[1] for x in models if x[0] == best_seed][0]
    median_model_res = [x[1] for x in models if x[0] == median_seed][0]

    return env_name, best_seed, best_model_res, median_seed, median_model_res

In [4]:
def plot_best_and_median_lr(best_policy_dir, median_policy_dir, title=None, fig_name=None):
    """
    Plot best and median policy training learning rates. Relevant in particular when an adaptive learning rate strategy is used by the optimizer.
    :param best_policy_dir: Best policy directory
    :param median_policy_dir: Median policy directory
    :param title: Desired plot title
    :param fig_name:
    :return:
    """
    df1 = pd.read_csv(best_policy_dir + 'learning_rates.csv')
    df2 = pd.read_csv(median_policy_dir + 'learning_rates.csv')

    fig, ax = plt.subplots()

    ax.semilogy(df1.iloc[:, 0], df1.iloc[:, 1], linestyle='-', color='b', label='Best')
    ax.semilogy(df2.iloc[:, 0], df2.iloc[:, 1], linestyle='--', color='r', label='Median')

    ax.set_xlabel('Timesteps')
    ax.set_ylabel('Learning rate')
    if title:
        ax.set_title(title)
    else:
        ax.set_title("Training learning rate")
    ax.legend()
    ax.grid(True)

    if fig_name:
        fig.savefig(f'{fig_name}.pdf')

    return fig

In [5]:
def exponential_smoothing(arrays, alpha=0.2):
    """Applies exponential smoothing to a list of NumPy arrays."""
    smoothed = [arrays[0]]  # Initialize with first value

    for i in range(1, len(arrays)):
        smoothed_value = (1 - alpha) * smoothed[-1] + np.sqrt(alpha * (2 - alpha)) * arrays[i]
        smoothed.append(smoothed_value)

    return smoothed  # np.array(smoothed)

In [6]:
def plot_cumul_path_distrib_mean_vs_dim(l_dim, l_peaks, title=None):
    """
    :param l_dim: list of #params to plot against (dimensionality of the problem)
    :param l_peaks: list of peaks of \|p_t\| values for median seeds/models
    :param title: plot title
    :return: figure
    """
    fig, ax = plt.subplots()
    # for dim, peaks in zip(l_dim, l_peaks):  # If l_peaks is a list of lists
    #     ax.scatter([dim] * len(peaks), peaks)

    ax.plot(l_dim, l_peaks, marker='o',  markeredgewidth=2, linewidth=3)
    ax.set_xticks(l_dim)
    ax.set_title(title)
    ax.set_xlabel(r'$D$')
    ax.set_ylabel(r'Mean of $\|p_t\|$ distribution (median seed)')
    ax.grid(True)

    return fig

In [7]:
def plot_adam_steps(experiment_dir, title=None, c=0.1, fig_name=None):
    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(8, 9))
    adam_step_label = r'$\frac{\|\hat{m}_t / (\sqrt{\hat{v_t}} + \epsilon)\|^2}{\nu_t D}$'
    if title:
        axes[0].set_title(title)
    axes[0].set_xlabel('Timesteps')
    axes[0].set_ylabel(adam_step_label)

    axes[1].set_title(r'Distribution of ' + adam_step_label)
    axes[1].set_xlabel(adam_step_label)

    axes[2].set_xlabel('Timesteps')
    axes[2].set_ylabel('Learning rate')

    _, best_seed, _, median_seed, _ = get_best_and_median_policy_results(experiment_dir)

    density_plot = None

    for seed in range(n_models):
        with open(f"{experiment_dir}/{seed:02d}/adam_updates.json", 'r') as f:
             data = json.load(f)  # Load JSON content
        df = pd.DataFrame(data)
        column = 'adam_update'
        df[column] = df[column].apply(np.array)
        D = len(df[column].iloc[-1])

        df['path'] = exponential_smoothing(df[column].to_list(), alpha=c)
        df['path_norm'] = df['path'].apply(lambda x: np.linalg.norm(x)**2)
        df['step_norm'] = df[column].apply(lambda x: np.linalg.norm(x)**2)
        df['normalization'] = df['path_norm'] / D  # (df['path_norm'] - df['step_norm']) / D

        df_lr = pd.read_csv(f"{experiment_dir}/{seed:02d}/" + 'learning_rates.csv')

        if seed == best_seed:
            axes[0].semilogy(df['timestep'], df['normalization'], label='Best')
            density_plot = df['normalization'].plot.kde(ax=axes[1], label='Best')
            axes[2].semilogy(df_lr['timestep'], df_lr['learning_rate'], label='Best')

        elif seed == median_seed:
            axes[0].semilogy(df['timestep'], df['normalization'], label='Median')
            density_plot = df['normalization'].plot.kde(ax=axes[1], label='Median')
            axes[2].semilogy(df_lr['timestep'], df_lr['learning_rate'], label='Median')
        else:
            axes[0].semilogy(df['timestep'], df['normalization'])
            density_plot = df['normalization'].plot.kde(ax=axes[1], label='_')
            axes[2].semilogy(df_lr['timestep'], df_lr['learning_rate'])

    # Extract peaks of path norm density plots
    line = density_plot.get_lines()[median_seed]
    x, y = line.get_xdata(), line.get_ydata()

    # Find the x value where the density is highest
    density_peak = x[np.argmax(y)]

    axes[0].grid(True), axes[1].grid(True), axes[2].grid(True)
    axes[0].legend(), axes[1].legend(), axes[2].legend()

    plt.tight_layout()
    # plt.show()

    if fig_name:
        fig.savefig(f'{fig_name}.pdf')

    return fig, D, density_peak


In [8]:
def fit_and_save_linear_model(x, y, model_path='linear_model.pkl', x_label=r'$D$', y_label='Peak of ' + r'$\frac{\|\hat{m}_t / (\sqrt{\hat{v_t}} + \epsilon)\|^2}{\sigma^2 D}$' + ' distribution (median seed)'):
    """
    Fits a linear regression model to the given data, saves it to a file, and returns the trained model.
    """
    x = np.array(x).reshape(-1, 1)  # Ensure x is a column vector
    y = np.array(y)

    model = LinearRegression()
    model.fit(x, y)

    # Save the model
    joblib.dump(model, model_path)

    print(f'Model saved to {model_path}')

     # Predict y-values for the given x
    y_pred = model.predict(x)

    # Plot the results
    fig, ax = plt.subplots()
    ax.set_title('Linear approximation')
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.plot(x, y, color='blue', label='Data', marker='o', linestyle='--')
    ax.plot(x, y_pred, color='red', linewidth=2, label='Fitted line')
    ax.set_xticks(x.ravel())
    ax.grid(True)
    ax.legend()

    return model

# Example usage
# x = [1, 2, 3, 4, 5]
# y = [2.2, 2.8, 3.6, 4.5, 5.1]

# model = fit_and_save_linear_model(x, y)

# Load the model later for use
# loaded_model = joblib.load('linear_model.pkl')
# print(f"Loaded Model Coefficients: {loaded_model.coef_[0]}, Intercept: {loaded_model.intercept_}")

In [9]:
def display_results(experiments_dir, plot_figs=True):
    # TODO: Adjust y-label on best & median learning rate plots
    env_configs = []  # Labels for env. configurations
    configs_dir = []
    if 'containergym' in experiments_dir:
        n_container_values = [5, 5, 11, 11]
        n_pu_values = [2, 5, 2, 11]
        for n, m in zip(n_container_values, n_pu_values):
             env_configs.append('n' + str(n) + '_m' + str(m))
             configs_dir.append(str(n) + 'containers_' + str(m) + 'presses_timestep_2min/')
    else:
        env_configs = gymnasium_envs

    # Best model stats
    best_rewards = []
    best_std_rewards = []
    best_lengths = []
    best_std_lengths = []

    # Median model stats
    median_rewards = []
    median_std_rewards = []
    median_lengths = []
    median_std_lengths = []

    l_dim = []
    l_peaks = []

    for i in range(len(env_configs)):
        if 'containergym' in experiments_dir:
            path = experiments_dir + configs_dir[i]
        else:
            path = experiments_dir + env_configs[i] + '/'
        _, best_seed, best, median_seed, median = get_best_and_median_policy_results(path)
        best_rewards.append(best['avg_reward']), best_std_rewards.append(best['std_reward'])
        best_lengths.append(best['avg_length']), best_std_lengths.append(best['std_length'])

        median_rewards.append(median['avg_reward']), median_std_rewards.append(median['std_reward'])
        median_lengths.append(median['avg_length']), median_std_lengths.append(median['std_length'])

        # Plot training learning rates
        if 'dadaptation' in experiments_dir or 'prodigy' in experiments_dir:
            plot_best_and_median_lr(path + '{:02}'.format(best_seed) + '/', path + '{:02}'.format(median_seed) + '/', env_configs[i],
                                    fig_name=experiments_dir + 'best_and_median_lr_' + env_configs[i])

        # Plot learning curves (training return) per env. config.
        log_dirs = []
        for seed in range(n_models):
            log_dir = f'{path}/{seed:02d}/'
            log_dirs.append(log_dir)

        plot_multiple_results(log_dirs, save_location=experiments_dir, title='Smoothed training rewards on ' + env_configs[i], window_size=50)

        # Plot gradient norms
        if 'dadaptation' not in experiments_dir and 'prodigy' not in experiments_dir:
            _, D, density_peaks = plot_adam_steps(path, title='Norm of Adam steps on ' + env_configs[i], fig_name=experiments_dir + 'norm_adam_steps_' + env_configs[i])

            # Prepare lists for path's highest density plots
            l_dim.append(D)
            l_peaks.append(density_peaks)

    # Train linear model for path mean value as a function of a model's #params
    if 'dadaptation' not in experiments_dir and 'prodigy' not in experiments_dir:
        _ = fit_and_save_linear_model(l_dim, l_peaks)

    # Print test results for best and median policies
    print('Best policy statistics:')
    headers = ['Config.', 'Cumul. r', 'Std. cumul. r', 'Episode length', 'Std. episode length']
    table = zip(env_configs, best_rewards, best_std_rewards, best_lengths, best_std_lengths)
    print(tabulate(table, headers=headers, floatfmt='.2f'))

    print('\n')
    print('Median policy statistics:')
    headers = ['Config.', 'Cumul. r', 'Std. cumul. r', 'Episode length', 'Std. episode length']
    table = zip(env_configs, median_rewards, median_std_rewards, median_lengths, median_std_lengths)
    print(tabulate(table, headers=headers, floatfmt='.2f'))

    x = np.arange(len(env_configs))  # Position for bars
    width = 0.35  # Width of the bars

    fig, ax = plt.subplots(1, 2, figsize=(12, 5))

    # ----- BAR PLOT: Reward Comparison -----
    ax[0].bar(x - width/2, best_rewards, width, yerr=best_std_rewards, label='Best', capsize=5, color='b', alpha=0.7)
    ax[0].bar(x + width/2, median_rewards, width, yerr=median_std_rewards, label='Median', capsize=5, color='r', alpha=0.7)
    ax[0].set_ylabel('Cumulative reward')
    ax[0].set_title('Best vs. median policy - Cumulative reward')
    ax[0].grid(True)
    ax[0].set_xticks(x)
    ax[0].set_xticklabels(env_configs)
    ax[0].legend()

    # ----- LINE PLOT: Episode Length Comparison -----
    ax[1].errorbar(env_configs, best_lengths, yerr=best_std_lengths, label='Best', marker='o', linestyle='-', color='b', capsize=5)
    ax[1].errorbar(env_configs, median_lengths, yerr=median_std_lengths, label='Median', marker='D', linestyle='-', color='r', capsize=5)
    ax[1].set_ylabel('Episode length')
    ax[1].set_ylim(0)
    ax[1].grid(True)
    ax[1].set_title('Best vs. median policy - Episode length')
    ax[1].legend()

    # plot_cumul_path_distrib_mean_vs_dim(l_dim, l_peaks)

    plt.tight_layout()
    fig.savefig(experiments_dir + 'reward_plots.pdf')

    if plot_figs:
        plt.show()

In [10]:
def compute_mean_std_array(x_arrays, y_arrays, common_x=None, label='Mean', color='blue', alpha=0.3):
    """
    Interpolates multiple (x, y) arrays to a common x-axis, computes the mean and standard deviation,
    and plots the result with a shaded area for variability.

    Parameters:
    - x_arrays: List of x-axis arrays.
    - y_arrays: List of corresponding y-axis arrays.
    - common_x: Optional array defining the common x-axis. If None, it will be auto-generated.
    - label: Label for the mean line.
    - color: Color for the plot line and shaded area.
    - alpha: Transparency level for the shaded area.
    """

    # Validate input
    if len(x_arrays) != len(y_arrays):
        raise ValueError("x_arrays and y_arrays must have the same length.")

    # Automatically define a common x-axis if not provided
    if common_x is None:
        min_x = min([x.min() for x in x_arrays])
        max_x = max([x.max() for x in x_arrays])
        common_x = np.linspace(min_x, max_x, max([len(x) for x in x_arrays]))

    # Interpolate each (x, y) pair to the common x-axis
    interpolated_ys = [np.interp(common_x, x, y) for x, y in zip(x_arrays, y_arrays)]

    # Stack and compute mean and standard deviation
    stacked = np.vstack(interpolated_ys)
    mean_y = np.mean(stacked, axis=0)
    std_y = np.std(stacked, axis=0)

    return common_x, mean_y, std_y

In [11]:
def plot_learning_curves_on_env_type(env_type, list_experiment_dir, color_code_optimizers=None, fig_location='./figures/'):
    # env_type = 'containergym' or 'gymnasium'

    # Extract environment type from dir
    if all(env_type in experiment_dir for experiment_dir in list_experiment_dir):
        env_configs = []  # Labels for env. configurations
        configs_dir = []
        if env_type == 'containergym':
            n_container_values = [5, 5, 11, 11]
            n_pu_values = [2, 5, 2, 11]
            for n, m in zip(n_container_values, n_pu_values):
                 env_configs.append('CG_n' + str(n) + '_m' + str(m))
                 configs_dir.append(str(n) + 'containers_' + str(m) + 'presses_timestep_2min/')
        else:
            env_configs = gymnasium_envs
            configs_dir = [env + '/' for env in gymnasium_envs]

        # Create dictionary of results to plot
        res_dict = dict.fromkeys(env_configs, None)

        # Create color-to-optimizer mapping
        color_dict = dict()
        linestyles = ['--', ':', '--', (0, (1, 10))]

        for i in range(len(env_configs)):  # Loop over environments
            # list_df_lr = dict()
            res_alg = dict()
            for j, experiment_dir in enumerate(list_experiment_dir):  # Loop over algorithms
                # Extract algorithm's name
                alg_name = None
                if 'adam' in experiment_dir:
                    alg_name = 'Adam'
                    if 'adaptive_lr' in experiment_dir:
                        alg_name += '_CLARA'
                    if 'linear_schedule' in experiment_dir:
                        alg_name += '_LS'
                if 'dadaptation' in experiment_dir:
                    alg_name = 'Adam_D-Adaptation'

                 # Map color to optimizer/algorithm
                color_dict[alg_name] = color_code_optimizers[alg_name]

                path = experiment_dir + configs_dir[i]
                _, best_seed, _, median_seed, _ = get_best_and_median_policy_results(path)

                # Collect learning rates for all seeds
                dict_lr = dict()
                for seed in range(n_models):
                    if seed == best_seed:
                        dict_lr['best'] = pd.read_csv(f"{path}/{seed:02d}/" + 'learning_rates.csv')
                    elif seed == median_seed:
                        dict_lr['median'] = pd.read_csv(f"{path}/{seed:02d}/" + 'learning_rates.csv')
                    else:
                        dict_lr[seed] = pd.read_csv(f"{path}/{seed:02d}/" + 'learning_rates.csv')

                x_arrays = []
                y_arrays = []
                for s in range(n_models):
                    x, y = read_monitor_file(f'{path}/{s:02d}/', window_size=25)
                    x_arrays.append(x)
                    y_arrays.append(y)

                res_alg[alg_name] = (compute_mean_std_array(x_arrays, y_arrays), dict_lr)

            res_dict[env_configs[i]] = res_alg

        # Plot best and median training rewards for each algorithm per environment
        for env in res_dict:
            # Plot training rewards
            fig_reward, ax_reward = plt.subplots(1, 1, sharey=True, figsize=(6, 5))
            # Plot learning rates
            fig_lr, ax_lr = plt.subplots(1, len(color_dict), figsize=(len(color_dict) * 6, 5))

            for j, alg in enumerate(res_dict[env]):
                common_x, mean_y, std_y = res_dict[env][alg][0]
                ax_reward.plot(common_x, mean_y, color=color_dict[alg], label=alg)
                ax_reward.fill_between(common_x, mean_y - std_y, mean_y + std_y, color=color_dict[alg], alpha=0.3)
                ls_idx = -1
                label = None
                for k in res_dict[env][alg][-1]:
                    if k == 'best':
                        linestyle = '-'
                        label = 'Best'
                    elif k == 'median':
                        linestyle = '-.'
                        label = 'Median'
                    else:
                        ls_idx += 1
                        linestyle = linestyles[ls_idx]
                        label = None

                    alpha = 1 if k == 'best' or k == 'median' else 0.2
                    ax_lr[j].semilogy(res_dict[env][alg][-1][k]['timestep'], res_dict[env][alg][-1][k]['learning_rate'], color=color_dict[alg], label=label, linestyle=linestyle, alpha=alpha)

                ax_lr[j].set_title(alg)
                ax_lr[j].set_xlabel('Timesteps')
                ax_lr[j].set_ylabel('Learning rate')
                ax_lr[j].legend(loc='lower right')
                ax_lr[j].grid(True)

            ax_reward.legend(loc='lower right')
            ax_reward.grid(True)
            ax_reward.set_xlabel('Timesteps')
            ax_reward.set_ylabel('Average episodic return')
            fig_reward.suptitle(env + ' - Average episodic return', fontsize=14)

            fig_lr.suptitle(env + ' - Learning rate', fontsize=14)
            figname = env + '_training_reward'

            plt.tight_layout()
            fig_reward.savefig(fig_location + figname + '.pdf')
            figname = env + '_learning_rate'
            fig_lr.savefig(fig_location + figname + '.pdf')

In [12]:
def plot_test_reward_on_env_type(env_type, list_experiment_dir, color_code_optimizers=None, fig_location='./figures/'):
    # env_type = 'containergym' or 'gymnasium'

    # Extract environment type from dir
    if all(env_type in experiment_dir for experiment_dir in list_experiment_dir):
        env_configs = []  # Labels for env. configurations
        configs_dir = []
        if env_type == 'containergym':
            n_container_values = [5, 5, 11, 11]
            n_pu_values = [2, 5, 2, 11]
            for n, m in zip(n_container_values, n_pu_values):
                 env_configs.append('CG_n' + str(n) + '_m' + str(m))
                 configs_dir.append(str(n) + 'containers_' + str(m) + 'presses_timestep_2min/')
        else:
            env_configs = gymnasium_envs
            configs_dir = [env + '/' for env in gymnasium_envs]

        # Create dictionary of results to plot
        res_dict = dict.fromkeys(env_configs, None)

        # Create color-to-optimizer mapping
        color_dict = dict()

        for i in range(len(env_configs)):  # Loop over environments
            res_alg = dict()
            for j, experiment_dir in enumerate(list_experiment_dir):  # Loop over algorithms
                # Extract algorithm's name
                alg_name = None
                if 'adam' in experiment_dir:
                    alg_name = 'Adam'
                    if 'adaptive_lr' in experiment_dir:
                        alg_name += '_CLARA'
                    if 'linear_schedule' in experiment_dir:
                        alg_name += '_LS'
                if 'dadaptation' in experiment_dir:
                    alg_name = 'Adam_D-Adaptation'

                 # Map color to optimizer/algorithm
                color_dict[alg_name] = color_code_optimizers[alg_name]

                path = experiment_dir + configs_dir[i]
                _, best_seed, best, median_seed, median = get_best_and_median_policy_results(path)
                res_alg[alg_name] = (best, median)

            res_dict[env_configs[i]] = res_alg

        # Plot best and median test rewards for each algorithm per environment
        for env in res_dict:
            best_rewards = []
            best_std_rewards = []
            median_rewards = []
            median_std_rewards = []
            x = []  # Position for bars
            for alg in res_dict[env]:
                x.append(alg)
                best_rewards.append(res_dict[env][alg][0]['avg_reward']), best_std_rewards.append(res_dict[env][alg][0]['std_reward'])
                median_rewards.append(res_dict[env][alg][-1]['avg_reward']), median_std_rewards.append(res_dict[env][alg][-1]['std_reward'])

            width = 0.35  # Width of the bars

            fig, ax = plt.subplots(1, 1, figsize=(6, 5))
            figname = env + '_test_reward'

            # ----- BAR PLOT: Reward Comparison -----
            ax.bar(np.arange(len(x)) - width/2, best_rewards, width, yerr=best_std_rewards, capsize=5, color=[color_dict[alg] for alg in x], alpha=0.7)
            ax.bar(np.arange(len(x)) + width/2, median_rewards, width, yerr=median_std_rewards, hatch='//', capsize=5, color=[color_dict[alg] for alg in x], alpha=0.7)
            ax.set_ylabel('Test episodic return')
            ax.set_title(env + ' - Test episodic return')
            ax.grid(True)
            ax.set_xticks(np.arange(len(x)))
            ax.set_xticklabels(x)

            legend_patches = [mpatches.Patch(facecolor='gray', label='Best'),
                              mpatches.Patch(facecolor='gray', hatch='//', label='Median')
                              ]

            ax.legend(handles=legend_patches, loc='lower right')

            plt.tight_layout()
            fig.savefig(fig_location + figname + '.pdf')

In [13]:
def plot_test_reward_per_damping_value(env_type, lr0, damping, experiments_dir='/local/aatamna/', fig_location='./figures/'):
    env_configs = []  # Labels for env. configurations
    configs_dir = []
    if env_type == 'containergym':
        n_container_values = [5, 5, 11, 11]
        n_pu_values = [2, 5, 2, 11]
        for n, m in zip(n_container_values, n_pu_values):
             env_configs.append('CG_n' + str(n) + '_m' + str(m))
             configs_dir.append(str(n) + 'containers_' + str(m) + 'presses_timestep_2min/')
    else:
        env_configs = gymnasium_envs
        configs_dir = [env + '/' for env in gymnasium_envs]

    fig, axes = plt.subplots(1, len(lr0), sharey=True, figsize=(6 * len(lr0), 5))
    figname = env_type + '_damping_experiments'
    for j in range(len(lr0)):
        res_env = dict()
        adam_res_env = dict()
        for i in range(len(configs_dir)):
            best_seed_rewards = []
            best_seed_std = []
            for d in damping:
                path = experiments_dir + 'ppo_adam_adaptive_lr_' + env_type + '_lr0_' + str(lr0[j]) + '_d_' + str(d) + '/' + configs_dir[i]
                _, _, best, _, _ = get_best_and_median_policy_results(path)
                best_seed_rewards.append(best['avg_reward'])
                best_seed_std.append(best['std_reward'])

            res_env[env_configs[i]] = (best_seed_rewards, best_seed_std)

            # Get standard Adam performance
            path_baseline = experiments_dir + 'ppo_adam_' + env_type + '_lr0_' + str(lr0[j]) + '/' + configs_dir[i]
            _, _, best_adam, _, _ = get_best_and_median_policy_results(path_baseline)
            adam_res_env[env_configs[i]] = best_adam['avg_reward']

        # Plot best training average cumul. reward as a function of the damping for each env. config.
        for env in res_env:
            axes[j].errorbar(damping, res_env[env][0], yerr=res_env[env][1], label=env, linestyle='-', capsize=3)
            current_color = axes[j].get_lines()[-1].get_c()
            axes[j].hlines(y=adam_res_env[env], xmin=damping[0], xmax=damping[-1], colors=current_color, linestyles='-.', linewidth=3)

        # Adding labels and title
        fig.suptitle('Adam with CLARA - Best policy - Test episodic return vs. damping')
        axes[j].set_xticks(damping)
        axes[j].set_xlabel('Damping')
        axes[j].set_ylabel('Test episodic return')
        axes[j].set_title('lr0 = ' + str(lr0[j]))  # TODO: Update lr0 according to lr notation in paper
        # axes[j].set_xlim(damping[0], damping[-1])
        handles, labels = axes[j].get_legend_handles_labels()
        dotted_line = Line2D([0], [0], color='black', linestyle='-.', label='Adam')
        handles.append(dotted_line)
        labels.append('Adam')
        axes[j].grid(True)

    axes[j].legend(handles=handles, labels=labels)
    plt.tight_layout()
    fig.savefig(fig_location + figname + '.pdf')

In [12]:
experiments_dir = '/local/aatamna/'  # '/local/aatamna/rl_lr_experiments_5seeds/'
env_type = 'containergym'
# list_experiments = [experiments_dir + 'ppo_adam_' + env_type + '_lr0_0.0003/',
#                     experiments_dir + 'ppo_adam_linear_schedule_' + env_type + '_lr0_0.0003/',
#                     experiments_dir + 'ppo_dadaptation_' + env_type + '_lr_coef_0.01/',
#                     experiments_dir + 'ppo_adam_adaptive_lr_' + env_type + '_lr0_0.0003/'
#                     ]

list_experiments = [experiments_dir + 'ppo_adam_' + env_type + '_lr0_0.0003/',
                    # experiments_dir + 'ppo_adam_linear_schedule_' + env_type + '_lr0_0.0003/',
                    # experiments_dir + 'ppo_dadaptation_' + env_type + '_lr_coef_0.01/',
                    experiments_dir + 'ppo_adam_adaptive_lr_' + env_type + '_lr0_0.0003_d_0.05/'
                    ]

# Number of colors needed for plotting rewards per algorithm
num_colors = 4
alg_names = ['Adam', 'Adam_LS', 'Adam_D-Adaptation', 'Adam_CLARA']

# Use a colormap (e.g., 'tab10', 'viridis', 'plasma')
colormap = plt.get_cmap('tab10', num_colors)

# Generate a list of colors and map each one to an algorithm name
colors = dict()
for i in range(len(alg_names)):
    colors[alg_names[i]] = colormap(i)

plot_test_reward_on_env_type(env_type, list_experiments, colors)
plot_learning_curves_on_env_type(env_type, list_experiments, colors)

In [13]:
damping_values = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
plot_test_reward_per_damping_value('containergym', lr0=[1e-6, 3e-4, 1.0], damping=damping_values, experiments_dir='/local/aatamna/')

FileNotFoundError: [Errno 2] No such file or directory: '/local/aatamna/ppo_adam_linear_schedule_gymnasium/LunarLander-v3/test_results.txt'

In [14]:
experiments_dir = '/local/aatamna/rl_lr_experiments_5seeds/ppo_adam_containergym_lr0_0.0003/'  # '/local/aatamna/ppo_adam_containergym_lr0_1e-06/'
display_results(experiments_dir, plot_figs=False)

Model saved to linear_model.pkl
Best policy statistics:
Config.      Cumul. r    Std. cumul. r    Episode length    Std. episode length
---------  ----------  ---------------  ----------------  ---------------------
n5_m2           58.62             1.61            600.00                   0.00
n5_m5           55.84             1.79            600.00                   0.00
n11_m2          86.68             2.48            600.00                   0.00
n11_m11         87.57             1.52            600.00                   0.00


Median policy statistics:
Config.      Cumul. r    Std. cumul. r    Episode length    Std. episode length
---------  ----------  ---------------  ----------------  ---------------------
n5_m2           54.38             1.35            600.00                   0.00
n5_m5           53.57             1.72            600.00                   0.00
n11_m2          85.98             2.89            600.00                   0.00
n11_m11         85.62             3.