In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib tk
import ast
import re
import json
from tabulate import tabulate
import sys
import os
import joblib
import seaborn as sns
from sklearn.linear_model import LinearRegression
import gymnasium as gym

current_dir = os.getcwd()
ppo_experiments_dir = os.path.join(current_dir, "ppo_experiments")
sys.path.append(ppo_experiments_dir)

from ppo_experiments.utilities import plot_multiple_results

In [3]:
# Global variables
gymnasium_envs = ['LunarLander-v3',
                  'BipedalWalker-v3',
                  # 'Pong-v5',
                  # 'Ant-v5',
                  # 'Humanoid-v5'
                  ]
n_models = 5

In [4]:
def get_best_and_median_policy_results(experiment_dir):
    """
    Retrieve cumul. reward and episode length statistics on test environments for the best and median policies from log files
    generated during the experiment.
    :param experiment_dir: Path to experiment directory
    :return:
    """

    # Parse experiment path to extract env. config.
    match = re.search(r'(\d+)containers_(\d+)presses', experiment_dir)
    env_name = None

    if match:
        num_containers = int(match.group(1))
        num_presses = int(match.group(2))
        env_name = 'n' + str(num_containers) + '_m' + str(num_presses)
        # print(f'Containers: {num_containers}, Presses: {num_presses}')
    else:
        for env in gymnasium_envs:
            if env in experiment_dir:
                match = True
                env_name = env
                break

    if not match:
        print('Invalid experiment directory.')
        return False

    with open(experiment_dir + 'test_results.txt', 'r') as file:  # TODO: Include '/' in experiment_dir or '/test_results.txt'?
        data = file.read()

    # Extracting the list of models from the data
    models_str = data.split('Mean and std dev of all ' + str(n_models) + ' models:\n')[1].split('Best model')[0].strip()
    models = ast.literal_eval(models_str)

    # Extract best and median seeds from the file
    best_seed_match = re.search(r'Best model \(seed=(\d+)\)', data)
    median_seed_match = re.search(r'Median model \(seed=(\d+)\)', data)

    best_seed = int(best_seed_match.group(1))
    median_seed = int(median_seed_match.group(1))

    best_model_res = [x[1] for x in models if x[0] == best_seed][0]
    median_model_res = [x[1] for x in models if x[0] == median_seed][0]

    return env_name, best_seed, best_model_res, median_seed, median_model_res


In [5]:
def plot_best_and_median_lr(best_policy_dir, median_policy_dir, title=None):
    """
    Plot best and median policy training learning rates. Relevant in particular when an adaptive learning rate strategy is used by the optimizer.
    :param best_policy_dir: Best policy directory
    :param median_policy_dir: Median policy directory
    :param title: Desired plot title
    :return:
    """
    df1 = pd.read_csv(best_policy_dir + 'learning_rates.csv')
    df2 = pd.read_csv(median_policy_dir + 'learning_rates.csv')

    fig, ax = plt.subplots()

    ax.semilogy(df1.iloc[:, 0], df1.iloc[:, 1], linestyle='-', color='b', label='Best')
    ax.semilogy(df2.iloc[:, 0], df2.iloc[:, 1], linestyle='--', color='r', label='Median')

    ax.set_xlabel('Timesteps')
    ax.set_ylabel('Learning rate')
    if title:
        ax.set_title(title)
    else:
        ax.set_title("Training learning rate")
    ax.legend()
    ax.grid(True)

    return fig


In [6]:
def exponential_smoothing(arrays, alpha=0.2):
    """Applies exponential smoothing to a list of NumPy arrays."""
    smoothed = [arrays[0]]  # Initialize with first value

    for i in range(1, len(arrays)):
        smoothed_value = alpha * arrays[i] + (1 - alpha) * smoothed[-1]
        smoothed.append(smoothed_value)

    return smoothed  # np.array(smoothed)

In [7]:
def plot_cumul_path_distrib_mean_vs_dim(l_dim, l_peaks, title=None):
    """
    :param l_dim: list of #params to plot against (dimensionality of the problem)
    :param l_peaks: list of peaks of \|p_t\| values for median seeds/models
    :param title: plot title
    :return: figure
    """
    fig, ax = plt.subplots()
    # for dim, peaks in zip(l_dim, l_peaks):  # If l_peaks is a list of lists
    #     ax.scatter([dim] * len(peaks), peaks)

    ax.plot(l_dim, l_peaks, marker='o',  markeredgewidth=2, linewidth=3)
    ax.set_xticks(l_dim)
    ax.set_title(title)
    ax.set_xlabel(r'$D$')
    ax.set_ylabel(r'Mean of $\|p_t\|$ distribution (median seed)')
    ax.grid(True)

    return fig

In [8]:
def estimate_gradients_variance(experiment_dir, title=None):
    variances = []
    for seed in range(n_models):
        with open(f"{experiment_dir}/{seed:02d}/adam_updates.json", 'r') as f:
             data = json.load(f)  # Load JSON content
        df = pd.DataFrame(data)
        column = 'adam_update'
        df[column] = df[column].apply(np.array)

        # Convert the column of arrays into a single 2D NumPy array
        vectors_matrix = np.stack(df[column].values)

        # Compute variance for each dimension
        variances.append(np.median(np.var(vectors_matrix, axis=0, ddof=1)))  # Unbiased estimate

    # var = np.median(np.concatenate(variances))

    # plt.grid(True)
    # plt.legend()

    # plt.tight_layout()
    # plt.show()

    return variances

In [9]:
def plot_adam_steps(experiment_dir, title=None, c=0.1, d=1):
    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(8, 9))
    adam_step_label = r'$\frac{\|\hat{m}_t / (\sqrt{\hat{v_t}} + \epsilon)\|^2}{\nu_t D}$'
    if title:
        axes[0].set_title(title)
    axes[0].set_xlabel('Timesteps')
    axes[0].set_ylabel(adam_step_label)

    axes[1].set_title(r'Distribution of ' + adam_step_label)
    axes[1].set_xlabel(adam_step_label)

    axes[2].set_xlabel('Timesteps')
    axes[2].set_ylabel('Learning rate')

    _, best_seed, _, median_seed, _ = get_best_and_median_policy_results(experiment_dir)

    density_plot = None

    for seed in range(n_models):
        with open(f"{experiment_dir}/{seed:02d}/adam_updates.json", 'r') as f:
             data = json.load(f)  # Load JSON content
        df = pd.DataFrame(data)
        column = 'adam_update'
        df[column] = df[column].apply(np.array)
        # df['cumul_path'] = exponential_smoothing(df[column].to_list(), alpha=1)
        D = len(df[column].iloc[-1])

        df['var'] = df[column].apply(lambda x: np.var(x))
        df['var'] = df['var'].ewm(alpha=c, adjust=False).mean()
        df['norm_step'] = df[column].apply(lambda x: np.linalg.norm(x)**2)
        df['norm_step'] = df['norm_step'] / (df['var'] * D)

        df_lr = pd.read_csv(f"{experiment_dir}/{seed:02d}/" + 'learning_rates.csv')

        # df['lr'] = df_lr['learning_rate'].iloc[1:].values

        if seed == best_seed:
            axes[0].semilogy(df['timestep'], df['norm_step'], label='Best')
            density_plot = df['norm_step'].plot.kde(ax=axes[1], label='Best')
            axes[2].plot(df_lr['timestep'], df_lr['learning_rate'], label='Best')

        elif seed == median_seed:
            axes[0].semilogy(df['timestep'], df['norm_step'], label='Median')
            density_plot = df['norm_step'].plot.kde(ax=axes[1], label='Median')
            axes[2].plot(df_lr['timestep'], df_lr['learning_rate'], label='Median')
        else:
            axes[0].semilogy(df['timestep'], df['norm_step'])
            density_plot = df['norm_step'].plot.kde(ax=axes[1], label='_')
            axes[2].plot(df_lr['timestep'], df_lr['learning_rate'])

    # Extract peaks of path norm density plots
    line = density_plot.get_lines()[median_seed]
    x, y = line.get_xdata(), line.get_ydata()

    # Find the x value where the density is highest
    density_peak = x[np.argmax(y)]

    axes[0].grid(True), axes[1].grid(True), axes[2].grid(True)
    axes[0].legend(), axes[1].legend(), axes[2].legend()

    plt.tight_layout()
    # plt.show()

    return fig, D, density_peak

In [10]:
def fit_and_save_linear_model(x, y, model_path='linear_model.pkl', x_label=r'$D$', y_label='Peak of ' + r'$\frac{\|\hat{m}_t / (\sqrt{\hat{v_t}} + \epsilon)\|^2}{\sigma^2 D}$' + ' distribution (median seed)'):
    """
    Fits a linear regression model to the given data, saves it to a file, and returns the trained model.
    """
    x = np.array(x).reshape(-1, 1)  # Ensure x is a column vector
    y = np.array(y)

    model = LinearRegression()
    model.fit(x, y)

    # Save the model
    joblib.dump(model, model_path)

    print(f'Model saved to {model_path}')

     # Predict y-values for the given x
    y_pred = model.predict(x)

    # Plot the results
    fig, ax = plt.subplots()
    ax.set_title('Linear approximation')
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.plot(x, y, color='blue', label='Data', marker='o', linestyle='--')
    ax.plot(x, y_pred, color='red', linewidth=2, label='Fitted line')
    ax.set_xticks(x.ravel())
    ax.grid(True)
    ax.legend()

    return model

# Example usage
# x = [1, 2, 3, 4, 5]
# y = [2.2, 2.8, 3.6, 4.5, 5.1]

# model = fit_and_save_linear_model(x, y)

# Load the model later for use
# loaded_model = joblib.load('linear_model.pkl')
# print(f"Loaded Model Coefficients: {loaded_model.coef_[0]}, Intercept: {loaded_model.intercept_}")

In [11]:
def display_results(experiments_dir, plot_figs=True):
    # TODO: Adjust y-label on best & median learning rate plots
    env_configs = []  # Labels for env. configurations
    configs_dir = []
    if 'containergym' in experiments_dir:
        n_container_values = [5, 5, 11, 11]
        n_pu_values = [2, 5, 2, 11]
        for n, m in zip(n_container_values, n_pu_values):
             env_configs.append('n' + str(n) + '_m' + str(m))
             configs_dir.append(str(n) + 'containers_' + str(m) + 'presses_timestep_2min/')
    else:
        env_configs = gymnasium_envs

    # Best model stats
    best_rewards = []
    best_std_rewards = []
    best_lengths = []
    best_std_lengths = []

    # Median model stats
    median_rewards = []
    median_std_rewards = []
    median_lengths = []
    median_std_lengths = []

    lr_figs = []

    l_dim = []
    l_peaks = []

    for i in range(len(env_configs)):
        if 'containergym' in experiments_dir:
            path = experiments_dir + configs_dir[i]
        else:
            path = experiments_dir + env_configs[i] + '/'
        _, best_seed, best, median_seed, median = get_best_and_median_policy_results(path)
        best_rewards.append(best['avg_reward']), best_std_rewards.append(best['std_reward'])
        best_lengths.append(best['avg_length']), best_std_lengths.append(best['std_length'])

        median_rewards.append(median['avg_reward']), median_std_rewards.append(median['std_reward'])
        median_lengths.append(median['avg_length']), median_std_lengths.append(median['std_length'])

        if plot_figs:
            # Plot training learning rates
            # lr_figs.append(plot_best_and_median_lr(path + '{:02}'.format(best_seed) + '/', path + '{:02}'.format(median_seed) + '/', env_configs[i]))

            # Plot learning curves (training return) per env. config.
            log_dirs = []
            for seed in range(n_models):
                log_dir = f'{path}/{seed:02d}/'
                log_dirs.append(log_dir)

            plot_multiple_results(log_dirs, title='Smoothed training rewards on ' + env_configs[i], window_size=50)

            # Plot gradient norms
            if 'dadaptation' not in experiments_dir and 'prodigy' not in experiments_dir:
                _, D, density_peaks = plot_adam_steps(path, title='Norm of Adam cumulative path on ' + env_configs[i])

                # Prepare lists for path's highest density plots
                l_dim.append(D)
                l_peaks.append(density_peaks)  # ([p/D for p in density_peaks])

    # Train linear model for path mean value as a function of a model's #params
    if 'dadaptation' not in experiments_dir and 'prodigy' not in experiments_dir:
        _ = fit_and_save_linear_model(l_dim, l_peaks)

    # Print test results for best and median policies
    print('Best policy statistics:')
    headers = ['Config.', 'Cumul. r', 'Std. cumul. r', 'Episode length', 'Std. episode length']
    table = zip(env_configs, best_rewards, best_std_rewards, best_lengths, best_std_lengths)
    print(tabulate(table, headers=headers, floatfmt='.2f'))

    print('\n')
    print('Median policy statistics:')
    headers = ['Config.', 'Cumul. r', 'Std. cumul. r', 'Episode length', 'Std. episode length']
    table = zip(env_configs, median_rewards, median_std_rewards, median_lengths, median_std_lengths)
    print(tabulate(table, headers=headers, floatfmt='.2f'))

    if plot_figs:
        x = np.arange(len(env_configs))  # Position for bars
        width = 0.35  # Width of the bars

        fig, ax = plt.subplots(1, 2, figsize=(12, 5))

        # ----- BAR PLOT: Reward Comparison -----
        ax[0].bar(x - width/2, best_rewards, width, yerr=best_std_rewards, label='Best', capsize=5, color='b', alpha=0.7)
        ax[0].bar(x + width/2, median_rewards, width, yerr=median_std_rewards, label='Median', capsize=5, color='r', alpha=0.7)
        ax[0].set_ylabel('Cumulative reward')
        ax[0].set_title('Best vs. median policy - Cumulative reward')
        ax[0].grid(True)
        ax[0].set_xticks(x)
        ax[0].set_xticklabels(env_configs)
        ax[0].legend()

        # ----- LINE PLOT: Episode Length Comparison -----
        ax[1].errorbar(env_configs, best_lengths, yerr=best_std_lengths, label='Best', marker='o', linestyle='-', color='b', capsize=5)
        ax[1].errorbar(env_configs, median_lengths, yerr=median_std_lengths, label='Median', marker='D', linestyle='-', color='r', capsize=5)
        ax[1].set_ylabel('Episode length')
        ax[1].set_ylim(0)
        ax[1].grid(True)
        ax[1].set_title('Best vs. median policy - Episode length')
        ax[1].legend()

        # plot_cumul_path_distrib_mean_vs_dim(l_dim, l_peaks)

        plt.tight_layout()
        plt.show()


In [12]:
experiments_dir = '/local/aatamna/ppo_adam_linear_schedule_gymnasium/'

In [13]:
display_results(experiments_dir, plot_figs=True)

FileNotFoundError: [Errno 2] No such file or directory: '/local/aatamna/ppo_adam_linear_schedule_gymnasium/LunarLander-v3/test_results.txt'