In [None]:
%load_ext autoreload
%autoreload 2
import os
from utils.plotting import get_colors, load_config, plot
import numpy as np
from matplotlib import pyplot as plt

In [None]:
import json
import glob
import os
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sb

from scipy.signal import savgol_filter
    

# Somehow the plotting functionallity I ended up with was already covered for the tabular case.
# I should have just used the plot function from that.
def plotMultiple(data, ylim=None, title='', logStepY=False, max_steps=200, xlim=None, figsize=None,
                 alphas=None, smooth=5, savename=None, rewyticks=None, lenyticks=None,
                 skip_stdevs=[], dont_label=[], dont_plot=[], min_steps=None):
    """
    Simple plotting method that shows the test reward on the y-axis and the number of performed training steps
    on the x-axis.
    
    data -> (dict[agent name] -> list([rewards, lens, decs, train_steps, train_episodes])) the data to plot
    ylim -> (list) y-axis limit
    title -> (str) title on top of plot
    logStepY -> (bool) flag that indicates if the y-axis should be on log scale.
    max_steps -> (int) maximal episode length
    xlim -> (list) x-axis limits
    figsize -> (list) dimensions of the figure
    alphas -> (dict[agent name] -> float) the alpha value to use for plotting of specific agents
    smooth -> (int) the window size for smoothing (has to be odd if used. < 0 deactivates this option)
    savename -> (str) filename to save the figure
    rewyticks -> (list) yticks for the reward plot
    lenyticks -> (list) yticks for the decisions plot
    skip_sdevs -> (list) list of names to not plot standard deviations for.
    dont_label -> (list) list of names to not label.
    dont_plot -> (list) list of names to not plot.
    """
    
    if smooth and smooth > 0:
        degree = 2
        for agent in data:
            data[agent] = list(data[agent])  # we have to convert the tuple to lists
            data[agent][0] = list(data[agent][0])
            data[agent][0][0] = savgol_filter(data[agent][0][0], smooth, degree)  # smooth the mean reward
            data[agent][0][1] = savgol_filter(data[agent][0][1], smooth, degree)  # smooth the stdev reward
            data[agent][1] = list(data[agent][1])
            data[agent][1][0] = savgol_filter(data[agent][1][0], smooth, degree)  # smooth mean num steps
            data[agent][1][1] = savgol_filter(data[agent][1][1], smooth, degree)
            data[agent][2] = list(data[agent][2])
            data[agent][2][0] = savgol_filter(data[agent][2][0], smooth, degree)  # smooth mean decisions
            data[agent][2][1] = savgol_filter(data[agent][2][1], smooth, degree)

    colors, color_map = get_colors()
    

    cfg = load_config()
    sb.set_style(cfg['plotting']['seaborn']['style'])
    sb.set_context(cfg['plotting']['seaborn']['context']['context'],
                   font_scale=cfg['plotting']['seaborn']['context']['font scale'],
                   rc=cfg['plotting']['seaborn']['context']['rc2'])

    if figsize:
        fig, ax = plt.subplots(2, figsize=figsize, dpi=100, sharex=True)
    else:
        fig, ax = plt.subplots(2, figsize=(20, 10), dpi=100,sharex=True)
    ax[0].set_title(title)

    for agent in list(data.keys())[::-1]:
        if agent in dont_plot:
            continue
        try:
            alph = alphas[agent]
        except:
            alph = 1.
        color_name = None
        if 'dar' in agent:
            color_name = color_map['dar']
        elif agent.startswith('t'):
            color_name = color_map['t-DDPG']
        elif agent.startswith('f'):
            color_name = color_map['f-DDPG']
        else:
            color_name = color_map[agent]
        rew, lens, decs, train_steps, train_eps = data[agent]
        
        label = agent.upper()
        if agent.startswith('t'):
            label = 't-DDPG'
        elif agent.startswith('f'):
            label = 'FiGAR'
        elif agent.startswith('e'):
            label = r'$\epsilon$z-DQN'
        elif agent in dont_label:
            label = None

        #### Plot rewards
        ax[0].step(train_steps[0], rew[0], where='post', c=colors[color_name], label=label,
                   alpha=alph)
        if agent not in skip_stdevs:
            ax[0].fill_between(train_steps[0], rew[0]-rew[1], rew[0]+rew[1], alpha=0.25 * alph, step='post',
                               color=colors[color_name])
        #### Plot lens
        ax[1].step(train_steps[0], decs[0], where='post', c=np.array(colors[color_name]), ls='-',
                   alpha=alph)
        if agent not in skip_stdevs:
            ax[1].fill_between(train_steps[0], decs[0]-decs[1], decs[0]+decs[1], alpha=0.125 * alph, step='post',
                               color=np.array(colors[color_name]))
        ax[1].step(train_steps[0], lens[0], where='post',
                   c=np.array(colors[color_name]) * .75, alpha=alph, ls=':')
        
        if agent not in skip_stdevs:
            ax[1].fill_between(train_steps[0], lens[0]-lens[1], lens[0]+lens[1], alpha=0.25 * alph, step='post',
                               color=np.array(colors[color_name]) * .75)
    ax[0].semilogx()
    if rewyticks is not None:
        ax[0].set_yticks(rewyticks)
    if ylim:
        ax[0].set_ylim(ylim)
    if xlim:
        ax[0].set_xlim(xlim)
    ax[0].set_ylabel('Reward')
    if len(data) - len(dont_label) < 5:
        ax[0].legend(ncol=1, loc='best', handlelength=.75)
    ax[1].semilogx()
    if logStepY:
        ax[1].semilogy()
        
    ax[1].plot([-999, -999], [-999, -999], ls=':', c='k', label='all')
    ax[1].plot([-999, -999], [-999, -999], ls='-', c='k', label='dec')
    ax[1].legend(loc='best', ncol=1, handlelength=.75)
    ax[1].set_ylim([min_steps if min_steps is not None else 1, max_steps])
    if xlim:
        ax[1].set_xlim(xlim)
    ax[1].set_ylabel('#Actions')
    ax[1].set_xlabel('#Train Steps')
    if lenyticks is not None:
        ax[1].set_yticks(lenyticks)
    plt.tight_layout()
    if savename:
        plt.savefig(savename)

    plt.show()

In [None]:
results = {}
ddpg_datas = []
for i in sorted(os.listdir('experiments/ddpg/DDPG')):
    ddpg_datas.append(np.load(f'experiments/ddpg/DDPG/{i}/DDPG_Pendulum-v0_{i}.npy'))


ddpg_mean = np.mean(ddpg_datas, axis=0)
ddpg_stdev = np.std(ddpg_datas, axis=0)
results['DDPG'] = [[ddpg_mean[:, 1], ddpg_stdev[:, 1]],
                   [ddpg_mean[:, 3], ddpg_stdev[:, 3]], 
                   [ddpg_mean[:, 2], ddpg_stdev[:, 2]],
                   [ddpg_mean[:, 0], ddpg_mean[:, 0]],
                   [ddpg_mean[:, 0], ddpg_mean[:, 0]]]

for max_len in [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]:
    temporl_datas = []
    for i in sorted(os.listdir(f'experiments/ddpg/TempoRLDDPG/{max_len}')):
        temporl_datas.append(np.load(f'experiments/ddpg/TempoRLDDPG/{max_len}/{i}/TempoRLDDPG_Pendulum-v0_{i}.npy'))

    figar_datas = []
    for i in sorted(os.listdir(f'experiments/ddpg/FiGARDDPG/{max_len}')):
        figar_datas.append(np.load(f'experiments/ddpg/FiGARDDPG/{max_len}/{i}/FiGARDDPG_Pendulum-v0_{i}.npy'))

    temporl_mean = np.mean(temporl_datas, axis=0)
    figar_mean = np.mean(figar_datas, axis=0)
    temporl_stdev = np.std(temporl_datas, axis=0)
    figar_stdev = np.std(figar_datas, axis=0)
    
    # (dict[agent name] -> list([rewards, lens, decs, train_steps, train_episodes]))
    results['t-DDPG'] = [[temporl_mean[:, 1], temporl_stdev[:, 1]],
                         [temporl_mean[:, 3], temporl_stdev[:, 3]],
                         [temporl_mean[:, 2], temporl_stdev[:, 2]],
                         [temporl_mean[:, 0], temporl_mean[:, 0]],
                         [temporl_mean[:, 0], temporl_mean[:, 0]]]
    results['f-DDPG'] = [[figar_mean[:, 1], figar_stdev[:, 1]],
                         [figar_mean[:, 3], figar_stdev[:, 3]],
                         [figar_mean[:, 2], figar_stdev[:, 2]],
                         [figar_mean[:, 0], figar_mean[:, 0]],
                         [figar_mean[:, 0], figar_mean[:, 0]]]
    print(min(min(results['DDPG'][0][0]), min(results['t-DDPG'][0][0]), min(results['f-DDPG'][0][0])),
          max(max(results['DDPG'][0][0]), max(results['t-DDPG'][0][0]), max(results['f-DDPG'][0][0])))
    print('  DDPG AUC:', np.mean((results['DDPG'][0][0] + 1800) / (-145 + 1800)))
    print('t-DDPG AUC:', np.mean((results['t-DDPG'][0][0] + 1800) / (-145 + 1800)))
    print(' FiGAR AUC:', np.mean((results['f-DDPG'][0][0] + 1800) / (-145 + 1800)))
    plotMultiple(results, title=r'Pendulum-v0  -- $\mathcal{J}=' + f'{max_len}$',
                 smooth=0, ylim=[-1800, -50], min_steps=10, max_steps=210, xlim=[10**3, 3*10**4],
                 lenyticks=[50, 125, 200], rewyticks=[-1800, -1000, -200],
                 savename=f'ddpg_{max_len}.pdf')