In [None]:
import os
import numpy as np
import pandas as pd

import glob
import copy

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
def plot_data(data, xaxis='timesteps', value="reward", smooth=1, xlim=(None, None), ylim=(None, None), time=None, **kwargs):
    data = copy.deepcopy(data)
    if smooth > 1:
        """
        smooth data with moving window average.
        that is,
            smoothed_y[t] = average(y[t-k], y[t-k+1], ..., y[t+k-1], y[t+k])
        where the "smooth" param is width of that window (2k+1)
        """
        y = np.ones(smooth)
        for datum in data:
            x = np.asarray(datum[value])
            z = np.ones(len(x))
            smoothed_x = np.convolve(x,y,'same') / np.convolve(z,y,'same')
            datum[value] = smoothed_x

    if isinstance(data, list):
        data = pd.concat(data, ignore_index=True)
        
    ax1 = plt.subplot(1,1,1)    
    sns.set(style="whitegrid", font_scale=1.5) # darkgrid whitegrid
    sns.lineplot(data=data, x=xaxis, y=value, errorbar='sd', **kwargs, legend=None)
    plt.legend(
            # loc='upper center',
            # bbox_to_anchor=(0.5, 1.0),
            bbox_to_anchor=(0., 1.02, 1., .102), loc='lower left',
            ncol=2,
            handlelength=1.0, 
            mode="expand", 
            borderaxespad=0.,
            frameon=False,
            prop={'size': 13}
    )
    
    # plt.legend(loc='best').set_draggable(True)
    #plt.legend(loc='upper center', ncol=3, handlelength=1,
    #           borderaxespad=0., prop={'size': 13})

    """
    For the version of the legend used in the Spinning Up benchmarking page, 
    swap L38 with:
    plt.legend(loc='upper center', ncol=6, handlelength=1,
               mode="expand", borderaxespad=0., prop={'size': 13})
    """

    xscale = np.max(np.asarray(data[xaxis])) > 5e3
    if xscale:
        # Just some formatting niceness: x-axis scale in scientific notation if max x is large
        plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))

    plt.xlim(xlim)
    plt.ylim(ylim)
    plt.tight_layout(pad=2)
    
    if time is not None:
        ax2 = ax1.twiny()
        
        timesteps = data.tail(1)[xaxis].values
        # print(np.ceil(time/60))
        # print(np.linspace(0, np.ceil(time/60), num=6))
        # print(np.unique(np.linspace(0, np.ceil(time/60), num=6)).astype(int))
        # newlabel = np.unique(np.linspace(0, np.ceil(time/60), num=6)).astype(int)
        print(np.arange(0, np.ceil(time/60)+1, np.ceil(time/60/6)))
        newlabel = np.arange(0, np.ceil(time/60)+1, np.ceil(time/60/6))
        newpos = newlabel*(timesteps / time * 60)
        ax2.set_xticks(newpos)
        ax2.set_xticklabels(newlabel)

        ax2.xaxis.set_ticks_position('bottom') # set the position of the second x-axis to bottom
        ax2.xaxis.set_label_position('bottom') # set the position of the second x-axis to bottom
        ax2.spines['bottom'].set_position(('outward', 64))
        ax2.set_xlabel('time, мин')
        ax2.set_xlim(ax1.get_xlim())
        # ax2.xaxis.set_major_formatter(FormatStrFormatter('% 1.1f'))
        ax2.grid(False)

# Experiment 1 - expand standart obs by angle

In [None]:
# data_expert_eval = []
# for file in glob.glob('./expert_eval/results/expert*/evaluations.npz'):
#     ev = np.load(file)
#     data_expert_eval.append(pd.DataFrame({'timesteps': ev['timesteps'],
#                                        'reward': np.mean(ev['results'], axis=1),
#                                        'level': 'expert'}))

# data_noob_eval = []
# for file in glob.glob('./noob_eval/results/noob*/evaluations.npz'):
#     ev = np.load(file)
#     data_noob_eval.append(pd.DataFrame({'timesteps': ev['timesteps'],
#                                        'reward': np.mean(ev['results'], axis=1),
#                                        'level': 'expert'}))

data_expert_eval2 = []
for file in glob.glob('./expert_eval2/results/expert*/evaluations.npz'):
    ev = np.load(file)
    data_expert_eval2.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': np.mean(ev['results'], axis=1),
                                       'level': 'expert'}))

data_noob_eval2 = []
for file in glob.glob('./noob_eval2/results/noob*/evaluations.npz'):
    ev = np.load(file)
    data_noob_eval2.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': np.mean(ev['results'], axis=1),
                                       'level': 'expert'}))

In [None]:
data_expert3264_eval = []
for file in glob.glob('./expert_3264_eval/results/expert*/evaluations.npz'):
    ev = np.load(file)
    data_expert3264_eval.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': np.mean(ev['results'], axis=1),
                                       'level': 'expert'}))
    
data_noob3264_eval = []
for file in glob.glob('./noob_3264_eval/results/noob*/evaluations.npz'):
    ev = np.load(file)
    data_noob3264_eval.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': np.mean(ev['results'], axis=1),
                                       'level': 'expert'}))    

data_expert3264_eval2 = []
for file in glob.glob('./expert_3264_eval2/results/expert*/evaluations.npz'):
    ev = np.load(file)
    data_expert3264_eval2.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': np.mean(ev['results'], axis=1),
                                       'level': 'expert'}))
    
data_noob3264_eval2 = []
for file in glob.glob('./noob_3264_eval2/results/noob*/evaluations.npz'):
    ev = np.load(file)
    data_noob3264_eval2.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': np.mean(ev['results'], axis=1),
                                       'level': 'expert'}))    

In [None]:
xlim = (0, 1.5*1e6)
ylim = (None, 100)
plt.figure()
# plot_data(data_expert, smooth=5, label='high_obs')
# plot_data(data_noob, smooth=5, label='standart_obs')
plot_data(data_expert_eval2, smooth=5, xlim=xlim, ylim=ylim, label='extend_obs')
plot_data(data_noob_eval2, smooth=5, xlim=xlim, ylim=ylim, label='standart_obs')
plt.savefig('Pictures/MountainCarContinuous_obs_comp_2.pdf', bbox_inches='tight')
plt.show()

In [None]:
xlim = (0, 0.5*1e6)
ylim = (None, 100)
plt.figure()
# plot_data(data_expert3264, smooth=5, label='high_obs')
# plot_data(data_noob3264, smooth=5, label='standart_obs')
plot_data(data_expert3264_eval2, smooth=5, xlim=xlim, ylim=ylim, label='extend_obs')
plot_data(data_noob3264_eval2, smooth=5, xlim=xlim, ylim=ylim, label='standart_obs')
plt.savefig('Pictures/MountainCarContinuous_obs_comp3264_2.pdf', bbox_inches='tight')
plt.show()

In [None]:
# xlim = (0, 1*1e6)
# ylim = (-110, 100)
# plt.figure()
# # plot_data(data_expert3264, smooth=5, label='high_obs')
# # plot_data(data_noob3264, smooth=5, label='standart_obs')
# plot_data(data_expert3264_eval, smooth=5, xlim=xlim, ylim=ylim, label='high_obs')
# plot_data(data_noob3264_eval, smooth=5, xlim=xlim, ylim=ylim, label='standart_obs')
# plt.savefig('Pictures/MountainCarContinuous_obs_comp3264_1.pdf', bbox_inches='tight')
# plt.show()

# Experiment 2 - No Vel

In [None]:
# data_noob_lstm = []
# for file in glob.glob('./noob_lstm_n32b128_1kk/results/noob*/rewards_ncpu.npz'):
#     ev = np.load(file)
#     data_noob_lstm.append(pd.DataFrame({'timesteps': ev['timesteps'],
#                                        'reward': ev['results'],
#                                        'level': 'expert'}))
# data_noob_novel = []
# for file in glob.glob('./noob_novel_n32b64_1kk/results/noob*/rewards_ncpu.npz'):
#     ev = np.load(file)
#     data_noob_novel.append(pd.DataFrame({'timesteps': ev['timesteps'],
#                                        'reward': ev['results'],
#                                        'level': 'expert'}))

In [None]:
data_noob_lstm_eval2 = []
for file in glob.glob('./noob_lstm_32128_eval2/results/noob*/evaluations.npz'):
    ev = np.load(file)
    data_noob_lstm_eval2.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': np.mean(ev['results'], axis=1),
                                       'level': 'expert'}))

data_noob_novel_fs2_eval2 = []
for file in glob.glob('./noob_novel_3264_fs2_eval2/results/noob*/evaluations.npz'):
    ev = np.load(file)
    data_noob_novel_fs2_eval2.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': np.mean(ev['results'], axis=1),
                                       'level': 'expert'}))
    
data_noob_novel_fs4_eval2 = []
for file in glob.glob('./noob_novel_3264_fs4_eval2/results/noob*/evaluations.npz'):
    ev = np.load(file)
    data_noob_novel_fs4_eval2.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': np.mean(ev['results'], axis=1),
                                       'level': 'expert'}))    

In [None]:
xlim=(0, 1*1e6)
ylim=(-110, 100)
plt.figure()
plot_data(data_noob_lstm_eval2, smooth=5, xlim=xlim, ylim=ylim, label='standart_obs_lstm')
plot_data(data_noob_novel_fs2_eval2, smooth=5, xlim=xlim, ylim=ylim, label='standart_obs_fs2')
plot_data(data_noob_novel_fs4_eval2, smooth=5, xlim=xlim, ylim=ylim, label='standart_obs_fs4')
plt.savefig('Pictures/MountainCarContinuousNoVel_comp_2.pdf', bbox_inches='tight')
plt.show()

# ...

In [None]:
data_expert3264 = []
for file in glob.glob('./expert_3264_1kk/results/expert*/rewards_ncpu.npz'):
    ev = np.load(file)
    data_expert3264.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': ev['results'],
                                       'level': 'expert'}))
# for i, data in enumerate(data_expert3264):
#     data_expert3264[i] = data[data.timesteps >= 32768*2]


data_noob3264 = []
for file in glob.glob('./noob_3264_1kk/results/noob*/rewards_ncpu.npz'):
    ev = np.load(file)
    data_noob3264.append(pd.DataFrame({'timesteps': ev['timesteps'],
                                       'reward': ev['results'],
                                       'level': 'expert'}))
# for i, data in enumerate(data_noob3264):
#     data_noob3264[i] = data[data.timesteps >= 32768*2]



In [None]:
plt.figure()
plot_data(data_expert3264, smooth=5, label='high_obs_n32b64')
plot_data(data_noob3264, smooth=5, label='standart_obs_n32b64')
plt.xlim(0, 1*1e6)
plt.ylim(85, 100)
plt.savefig('Pictures/MountainCarContinuous_obs_comp3264_cropp.pdf', bbox_inches='tight')
plt.show()

In [None]:
data = []
for file in glob.glob('./noob_novel_1kk/logs/noob32230_monitor/*'):
    data.append(pd.read_csv(file, header=1))
    
data_n = np.array([d.to_numpy()[:10] for d in data])
data_n[:, 0, 0]

In [None]:
from stable_baselines3.common import results_plotter
from stable_baselines3.common.results_plotter import load_results, ts2xy

# Helper from the library
results_plotter.plot_results([file for file in glob.glob('./test_expert/logs/expert*_monitor')], 1e6, results_plotter.X_TIMESTEPS, "TD3 LunarLander")

In [None]:
data_expert_3264 = [ load_results(file) for file in glob.glob('./test_expert/logs/expert*_monitor')]
data_expert_3264[0]

In [None]:
plot_data(data_expert_3264, smooth=1001, xaxis='t', value="r", label='high_obs')