In [None]:
import os
import numpy as np
import pandas as pd

import glob
import copy

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data_expert = []
for file in glob.glob('./test_expert/results/expert*/rewards.npz'):
    ev = np.load(file)
    data_expert.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': ev['results'],
                                       'level': 'expert'}))

data_expert_eval = []
for file in glob.glob('./test_expert_eval/results/expert*/evaluations.npz'):
    ev = np.load(file)
    data_expert_eval.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': np.mean(ev['results'], axis=1),
                                       'level': 'expert'}))

In [None]:
data_expert = []
for file in glob.glob('./test_expert/results/expert*/rewards_ncpu.npz'):
    ev = np.load(file)
    data_expert.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': ev['results'],
                                       'level': 'expert'}))
for i, data in enumerate(data_expert):
    data_expert[i] = data[data.timesteps >= 32768]


data_expert_eval = []
for file in glob.glob('./test_expert_eval/results/expert*/evaluations.npz'):
    ev = np.load(file)
    data_expert_eval.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': np.mean(ev['results'], axis=1),
                                       'level': 'expert'}))

data_expert3264 = []
for file in glob.glob('./expert_3264/results/expert*/rewards_ncpu.npz'):
    ev = np.load(file)
    data_expert3264.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': ev['results'],
                                       'level': 'expert'}))
    
data_noob = []
for file in glob.glob('./test_noob/results/noob*/rewards_ncpu.npz'):
    ev = np.load(file)
    data_noob.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': ev['results'],
                                       'level': 'expert'}))
for i, data in enumerate(data_noob):
    data_noob[i] = data[data.timesteps >= 32768]


data_noob_eval = []
for file in glob.glob('./test_noob_eval/results/noob*/evaluations.npz'):
    ev = np.load(file)
    data_noob_eval.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': np.mean(ev['results'], axis=1),
                                       'level': 'expert'}))

data_noob3264 = []
for file in glob.glob('./noob_3264/results/noob*/rewards_ncpu.npz'):
    ev = np.load(file)
    data_noob3264.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': ev['results'],
                                       'level': 'expert'}))    

In [None]:
plt.figure()
plot_data(data_expert, smooth=5, label='high_obs')
# plot_data(data_expert3264, smooth=5, label='high_obs3264')
# plot_data(data_expert_eval, smooth=5, label='high_obs_eval')
plot_data(data_noob, smooth=5, label='standart_obs')
# plot_data(data_noob3264, smooth=5, label='standart_obs3264')
# plot_data(data_noob_eval, smooth=5, label='standart_obs_eval')
plt.xlim(0, 1.5*1e6)
plt.ylim(None, 100)
plt.savefig('Pictures/MountainCarContinuous_obs_comp.pdf', bbox_inches='tight')
plt.show()

In [None]:
plt.figure()
plot_data(data_expert3264, smooth=5, label='high_obs_n32b64')
plot_data(data_noob3264, smooth=5, label='standart_obs_n32b64')
plt.xlim(0, 0.5*1e6)
plt.ylim(None, 100)
plt.savefig('Pictures/MountainCarContinuous_obs_comp3264.pdf', bbox_inches='tight')
plt.show()

In [None]:
def plot_data(data, xaxis='timesteps', value="reward", smooth=1, **kwargs):
    data = copy.deepcopy(data)
    if smooth > 1:
        """
        smooth data with moving window average.
        that is,
            smoothed_y[t] = average(y[t-k], y[t-k+1], ..., y[t+k-1], y[t+k])
        where the "smooth" param is width of that window (2k+1)
        """
        y = np.ones(smooth)
        for datum in data:
            x = np.asarray(datum[value])
            z = np.ones(len(x))
            smoothed_x = np.convolve(x,y,'same') / np.convolve(z,y,'same')
            datum[value] = smoothed_x

    if isinstance(data, list):
        data = pd.concat(data, ignore_index=True)
    sns.set(style="whitegrid", font_scale=1.5) # darkgrid whitegrid
    sns.lineplot(data=data, x=xaxis, y=value, errorbar='sd', **kwargs, legend=None)
    plt.legend(
            # loc='upper center',
            # bbox_to_anchor=(0.5, 1.0),
            bbox_to_anchor=(0., 1.02, 1., .102), loc='lower left',
            ncol=2,
            handlelength=1.0, 
            mode="expand", 
            borderaxespad=0.,
            frameon=False,
            prop={'size': 13}
    )

    # plt.legend(loc='best').set_draggable(True)
    #plt.legend(loc='upper center', ncol=3, handlelength=1,
    #           borderaxespad=0., prop={'size': 13})

    """
    For the version of the legend used in the Spinning Up benchmarking page, 
    swap L38 with:
    plt.legend(loc='upper center', ncol=6, handlelength=1,
               mode="expand", borderaxespad=0., prop={'size': 13})
    """

    xscale = np.max(np.asarray(data[xaxis])) > 5e3
    if xscale:
        # Just some formatting niceness: x-axis scale in scientific notation if max x is large
        plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))

    plt.tight_layout(pad=2)