In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from definitions import ROOT_DIR
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
from typing import Iterable
from scipy.signal import butter, lfilter


In [None]:
def get_data_from_tb_log(path, y, x="step", tb_config=None):
    if tb_config is None:
        tb_config = {}

    event_acc = EventAccumulator(path, tb_config)
    event_acc.Reload()
    # print(event_acc.Tags())
    if not isinstance(y, Iterable):
        y = [y]
    
    out_dict = {}
    for attr_name in y:
        if attr_name in event_acc.Tags()["scalars"]:
            x_vals, y_vals = np.array([(getattr(el, x), el.value) for el in event_acc.Scalars(attr_name)]).T
            out_dict[attr_name] = (x_vals, y_vals)
        else:
            out_dict[attr_name] = None
    return out_dict
    
def butter_lowpass(cutoff, fs, order=5):
    return butter(order, cutoff, fs=fs, btype='low', analog=False)

def butter_lowpass_filter(data, cutoff, fs, order=5):
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = lfilter(b, a, data)
    return y

In [None]:
EXPERIMENTS_DIR = os.path.join(ROOT_DIR, "trained_models", "curriculum_steps_complete_baoding_winner")
attributes = ("rollout/ep_rew_mean", "eval/score")
tb_dir_name = "RecurrentPPO_1"

In [None]:
curriculum_steps = sorted(os.listdir(EXPERIMENTS_DIR))
curriculum_data_dict = {a: [] for a in attributes}  # {attribute: [{step: val, time: val, value: val}]}

for experiment in curriculum_steps:
    experiment_dir = os.path.join(EXPERIMENTS_DIR, experiment)
    print(f"'{experiment_dir}'")
    tb_dir_path = os.path.join(experiment_dir, tb_dir_name)

    if os.path.isdir(tb_dir_path):
        folder_content = os.listdir(tb_dir_path)
        assert len(folder_content) == 1
        tb_file_name = folder_content[0]
        tb_file_path = os.path.join(tb_dir_path, tb_file_name)
        time_dict = get_data_from_tb_log(tb_file_path, attributes, x="wall_time")
        step_dict = get_data_from_tb_log(tb_file_path, attributes, x="step")
        for attr, values in step_dict.items():
            if values is None:
                curriculum_data_dict[attr].append({"step": np.array([]), "time": np.array([]), "value": np.array([])})
            else:
                time_vec = time_dict[attr][0]  # The x of the time data
                time_vec = (time_vec - time_vec[0]) / 3.6e3  # Train time in hours (relative)
                curriculum_data_dict[attr].append({"step": values[0], "time": time_vec, "value": values[1]})
                # fig, ax = plt.subplots()
                # ax.plot(*values)
                # ax.set_title(key)
                # plt.show()

In [None]:
for attr, data in curriculum_data_dict.items():
    print(attr)
    line_colors = plt.cm.coolwarm(np.linspace(0, 0.8, len(data)))
    fig, ax1 = plt.subplots(figsize=(10, 2))
    last_step = 0
    last_time = 0
    time_vec_list = []
    step_vec_list = []
    for idx, c_step in enumerate(data):
        x = (c_step["step"] * 1e-6 + last_step)
        y = c_step["value"]
        if len(c_step["step"]) > 0:
            if len(y) > 51:
                # y = savgol_filter(y, window_length=51, polyorder=1)
                fs = len(x) / (x[-1] - x[0])
                y = butter_lowpass_filter(y, cutoff=5, fs=fs, order=1)
            ax1.plot(x, y, color=line_colors[idx])
            last_step = x[-1]
            time = c_step["time"] + last_time
            last_time = time[-1]
            step_vec_list.append(x)
            time_vec_list.append(time)
            
    step = np.concatenate(step_vec_list) 
    time = np.concatenate(time_vec_list).astype(int)
    color = 'tab:red'
    ax1.set_xlabel('Millions of steps', color=color)
    ax1.set_ylabel('Episode reward')
    ax1.tick_params(axis='y')
    ax1.grid(True, color=color, alpha=0.6)
    ax1.tick_params(axis='x', labelcolor=color)


    ax2 = ax1.twiny()  # instantiate a second axes that shares the same y-axis
    color = 'tab:blue'
    ax2.set_xlabel('Training Time [hours]', color=color)
    # ax2.plot(time, value, color=color, marker='o', label='Loss')
    ax2.tick_params(axis='x', labelcolor=color)

    # Add iteration step as x-ticks on the top axis
    ax2.set_xticks(np.arange(time[0], time[-1], 100))
    ax2.set_xticklabels(np.arange(time[0], time[-1], 100))

    # Customize plot
    ax2.grid(True, color=color, alpha=0.6)
    fig.tight_layout()  # ensure that all the labels fit comfortably
    attr_print = attr.replace("/", "_")
    out_name = f"learning_curve_curriculum_{attr_print}"
    fig.savefig(os.path.join(ROOT_DIR, "data", "learning_curves", out_name + ".png"), format="png", dpi=800, bbox_inches="tight")    
    plt.show()


In [None]:
# Plotting
fig, ax1 = plt.subplots(figsize=(10, 6))

color = 'tab:red'
ax1.set_xlabel('Step', color=color)
ax1.set_ylabel('Episode reward')
ax1.plot(step, value, label='Episode reward')
ax1.tick_params(axis='y')
ax1.grid(True, color=color, alpha=0.4)
ax1.tick_params(axis='x', labelcolor=color)


ax2 = ax1.twiny()  # instantiate a second axes that shares the same y-axis
color = 'tab:blue'
ax2.set_xlabel('Training Time [hours]', color=color)
# ax2.plot(time, value, color=color, marker='o', label='Loss')
ax2.tick_params(axis='x', labelcolor=color)

# Add iteration step as x-ticks on the top axis
ax2.set_xticks(time[::50])
ax2.set_xticklabels(step[::50])

# Customize plot
plt.title('Learning Curve')
ax2.grid(True, color=color, alpha=0.4)
fig.tight_layout()  # ensure that all the labels fit comfortably
plt.show()