In [1]:
from baselines.common import plot_util as pu
import numpy as np
import gym
import os

In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
envs = ['Reacher-v2',
        'InvertedPendulum-v2',
        'Walker2d-v2',
        'Humanoid-v2',
        'HalfCheetah-v2',
        'InvertedDoublePendulum-v2']

In [5]:
thresholds = {}
thresholds['Reacher-v2'] = -7
thresholds['InvertedPendulum-v2'] = 950
thresholds['Humanoid-v2'] = 2500
thresholds['Walker2d-v2'] = 3000
thresholds['HalfCheetah-v2'] = 4700
thresholds['InvertedDoublePendulum-v2'] = 9100

In [6]:
logs_dir = '/home/danielzgsilva/Documents/robotics_project/logs'

In [7]:
algs = ['trpo_mpi', 
        'ddpg',
        'acktr',
        'ppo2']

In [8]:
def calc_top_n_rewards(n, rewards):
    return np.mean(rewards[np.argsort(rewards)][-n:])

In [9]:
def episode_window_past_threshold(window, threshold, rewards):
    flag = False
    for i in range(len(rewards) - window):
        window_mean = np.mean(rewards[i:i+window])
        if window_mean >= threshold:
            return i

    if not flag:
        return -1

In [14]:
top_n = 20
window_size = 1

for env in envs:
    for alg in algs:
        exp_path = os.path.join(logs_dir, env, alg)
        print('------- Env: {}  Alg: {} --------'.format(env, alg))
        
        results = results = pu.load_results(exp_path)[0]
        rewards = np.array(results.monitor.r)
        
        top_rewards = calc_top_n_rewards(top_n, rewards)
        episode_to_thresh = episode_window_past_threshold(window_size, 
                                                         thresholds[env],
                                                         rewards)
        
        print('Average reward of top {} episodes: {}'.format(top_n, top_rewards))
        print('Episodes to reach {} reward: {}'.format(thresholds[env], episode_to_thresh))
        print()

------- Env: Reacher-v2  Alg: trpo_mpi --------
Average reward of top 20 episodes: -0.8435946000000001
Episodes to reach -7 reward: 2795

------- Env: Reacher-v2  Alg: ddpg --------
Average reward of top 20 episodes: -25.52634885
Episodes to reach -7 reward: -1

------- Env: Reacher-v2  Alg: acktr --------
Average reward of top 20 episodes: -35.665557050000004
Episodes to reach -7 reward: -1

------- Env: Reacher-v2  Alg: ppo2 --------
Average reward of top 20 episodes: -0.97445795
Episodes to reach -7 reward: 2760

------- Env: InvertedPendulum-v2  Alg: trpo_mpi --------
Average reward of top 20 episodes: 1000.0
Episodes to reach 950 reward: 493

------- Env: InvertedPendulum-v2  Alg: ddpg --------
Average reward of top 20 episodes: 1000.0
Episodes to reach 950 reward: 247

------- Env: InvertedPendulum-v2  Alg: acktr --------
Average reward of top 20 episodes: 430.9
Episodes to reach 950 reward: -1

------- Env: InvertedPendulum-v2  Alg: ppo2 --------
Average reward of top 20 episode