In [117]:
import math
import os
import matplotlib.pyplot as plt
import numpy as np

In [118]:
def plot_reward(timesteps, mean_reward, min_reward, max_reward, figname="mean_reward.png"):
    plt.figure(figsize=(11, 7))
    timesteps = timesteps / 1000
    plt.plot(timesteps, mean_reward, label='Mean Reward',  color='orangered')
    plt.fill_between(timesteps, min_reward, max_reward, color='mistyrose')

    axes = plt.gca()
    plt.title("Reward")
    plt.xlabel('Timesteps (in K)', fontdict={'size' : 18})
    plt.ylabel('Total Cumulative Reward', fontdict={'size' : 18})
    # plt.xticks(list(np.arange(0, (math.ceil(timesteps[-1] / timesteps_interval) + 1) * timesteps_interval, timesteps_interval)), ('{}'.format(str(x)) for x in np.arange(0, (math.ceil(timesteps[-1] / timesteps_interval) + 1) * timesteps_interval, timesteps_interval)))
    plt.savefig(figname, dpi=200)
    plt.clf()
    plt.close()

In [119]:
def plot_seeds_combined(run_dirs, with_std=True, figname='combined_seeds.png'):
    
    mean_rewards = []
    std_rewards = []
    timesteps = []
    for run_dir in run_dirs:
        train_stats = np.load(os.path.join(run_dir, 'train_stats.npz'))
        mean_rewards.append(train_stats['mean_reward'])
        std_rewards.append(train_stats['std_reward'])
        timesteps.append(train_stats['timesteps'])
    
    mean_rewards = np.array(mean_rewards)
    std_rewards = np.array(std_rewards)
    timesteps = np.array(timesteps)[0]
    
    plt.figure(figsize=(11, 7))
    timesteps = timesteps / 1000
    plt.plot(timesteps, mean_rewards[0], label='Mean Reward',  color='orangered')
    plt.plot(timesteps, mean_rewards[1], label='Mean Reward',  color='lightseagreen')
    plt.plot(timesteps, mean_rewards[2], label='Mean Reward',  color='goldenrod')
    if with_std:
        plt.fill_between(timesteps, mean_rewards[0] - std_rewards[0], mean_rewards[0] + std_rewards[0], color='mistyrose')
        plt.fill_between(timesteps, mean_rewards[1] - std_rewards[1], mean_rewards[1] + std_rewards[1], color='paleturquoise')
        plt.fill_between(timesteps, mean_rewards[2] - std_rewards[2], mean_rewards[2] + std_rewards[2], color='lightgoldenrodyellow')

    axes = plt.gca()
    plt.title("Reward")
    plt.xlabel('Timesteps (in K)', fontdict={'size' : 18})
    plt.ylabel('Total Cumulative Reward', fontdict={'size' : 18})
    # plt.xticks(list(np.arange(0, (math.ceil(timesteps[-1] / timesteps_interval) + 1) * timesteps_interval, timesteps_interval)), ('{}'.format(str(x)) for x in np.arange(0, (math.ceil(timesteps[-1] / timesteps_interval) + 1) * timesteps_interval, timesteps_interval)))
    
    log_dir = run_dirs[0].rsplit('/', 1)[0]
    plt.savefig(os.path.join(log_dir, figname), dpi=200)
    plt.clf()
    plt.close()

In [120]:
def plot_seeds(run_dirs, with_std=True, figname='seeds.png'):
    
    mean_rewards = []
    timesteps = []
    for run_dir in run_dirs:
        train_stats = np.load(os.path.join(run_dir, 'train_stats.npz'))
        mean_rewards.append(train_stats['mean_reward'])
        timesteps.append(train_stats['timesteps'])
    
    mean_reward = np.mean(np.array(mean_rewards), axis = 0)
    std_reward = np.std(np.array(mean_rewards), axis = 0)
    timesteps = np.array(timesteps)[0]
    
    log_dir = run_dirs[0].rsplit('/', 1)[0]
    plot_reward(timesteps, mean_reward, mean_reward - std_reward, mean_reward + std_reward, figname=os.path.join(log_dir, figname))

In [121]:
def plot_epochs(epoch_dirs, with_std=True, figname='seeds.png'):
    plt.figure(figsize=(11, 7))
    means = []
    stds = []
    ind2epoch = {0: 10, 1: 20, 2: 50}
    mean_colors = ['orangered', 'lightseagreen', 'goldenrod']
    fill_colors = ['mistyrose', 'paleturquoise', 'lightgoldenrodyellow']
    for ind, run_dir in enumerate(epoch_dirs[:3]):
        mean = []
        timesteps = []
        for indi in range(ind, 9, 3):
            run_dir = epoch_dirs[indi]
            train_stats = np.load(os.path.join(run_dir, 'train_stats.npz'))
            mean.append(train_stats['mean_reward'])
            timesteps.append(train_stats['timesteps'])
    
        mean = np.mean(np.array(mean), axis = 0)
        std = np.std(np.array(mean), axis = 0)
        timesteps = np.array(timesteps)[0]
        means.append(mean)
        stds.append(std)
        
        timesteps = timesteps / 1000
        plt.plot(timesteps, mean, label='Mean Reward (Epoch {})'.format(ind2epoch[ind]),  color=mean_colors[ind])
        if with_std:
            plt.fill_between(timesteps, mean - std, mean + std, color=fill_colors[ind])

    mean_rewards = np.array(means)
    std_rewards = np.array(stds)
    
    axes = plt.gca()
    plt.title("Reward")
    plt.xlabel('Timesteps (in K)', fontdict={'size' : 18})
    plt.ylabel('Total Cumulative Reward', fontdict={'size' : 18})
    plt.legend()
    # plt.xticks(list(np.arange(0, (math.ceil(timesteps[-1] / timesteps_interval) + 1) * timesteps_interval, timesteps_interval)), ('{}'.format(str(x)) for x in np.arange(0, (math.ceil(timesteps[-1] / timesteps_interval) + 1) * timesteps_interval, timesteps_interval)))

    log_dir = epoch_dirs[0].rsplit('/', 2)[0]
    plt.savefig(os.path.join(log_dir, figname), dpi=200)
    plt.clf()
    plt.close()

In [122]:
def generate_epoch_results(epoch, epoch_dir):
    single_process = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(1, 4)]
    forward_search_pop_size_1 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(4, 7)]
    forward_search_pop_size_3 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(7, 10)]
    forward_search_pop_size_5 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(10, 13)]
    
    # To Combine individual seeds plots
    plot_seeds_combined(single_process, figname="epochs_{}_combined_seeds.png".format(epoch))
    plot_seeds_combined(forward_search_pop_size_1, figname="epochs_{}_combined_seeds_fs_1.png".format(epoch))
    plot_seeds_combined(forward_search_pop_size_3, figname="epochs_{}_combined_seeds_fs_3.png".format(epoch))
    plot_seeds_combined(forward_search_pop_size_5, figname="epochs_{}_combined_seeds_fs_5.png".format(epoch))
    
    # To plot individual seeds mean and the variance between them.
    plot_seeds(single_process, figname="epochs_{}_seeds.png".format(epoch))
    plot_seeds(forward_search_pop_size_1, figname="epochs_{}_seeds_fs_1.png".format(epoch))
    plot_seeds(forward_search_pop_size_3, figname="epochs_{}_seeds_fs_3.png".format(epoch))
    plot_seeds(forward_search_pop_size_5, figname="epochs_{}_seeds_fs_5.png".format(epoch))

In [123]:
env_name = 'CartPole-v1'
log_dir = '/media/hdd/tanmaya/projects/GymExperiments'
num_timesteps = 100000
for epoch in epochs:
    epoch_dir = os.path.join(log_dir, env_name, '{}_{}'.format(num_timesteps, epoch))
    generate_epoch_results(epoch, epoch_dir)

In [124]:
epochs = [10, 20, 50]
epoch_dirs = [os.path.join(log_dir, env_name, '{}_{}'.format(num_timesteps, epoch)) for epoch in epochs]

In [125]:
single_process = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(1, 4) for epoch_dir in epoch_dirs]
forward_search_pop_size_1 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(4, 7) for epoch_dir in epoch_dirs]
forward_search_pop_size_3 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(7, 10) for epoch_dir in epoch_dirs]
forward_search_pop_size_5 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(10, 13) for epoch_dir in epoch_dirs]

In [126]:
plot_epochs(single_process, figname="single_process.png")
plot_epochs(forward_search_pop_size_1, figname="forward_search_pop_size_1.png")
plot_epochs(forward_search_pop_size_3, figname="forward_search_pop_size_3.png")
plot_epochs(forward_search_pop_size_5, figname="forward_search_pop_size_5.png")