In [3]:
import math
import os
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import traceback

In [4]:
font = {'size' : 18}
matplotlib.rc('font', **font)

In [5]:
def plot_reward(timesteps, mean_reward, min_reward, max_reward, env_name, figname="mean_reward.png"):
    plt.figure(figsize=(11, 7))
    plt.plot(timesteps, mean_reward, label='Mean Reward',  color='orangered')
    plt.fill_between(timesteps, min_reward, max_reward, color='mistyrose')

    axes = plt.gca()
    plt.title("{} Reward".format(env_name), fontdict={'size' : 18})
    plt.xlabel('Timesteps', fontdict={'size' : 18})
    plt.ylabel('Total Cumulative Reward', fontdict={'size' : 18})
    plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
    plt.savefig(figname, dpi=200)
    plt.clf()
    plt.close()

In [6]:
def plot_seeds_combined(run_dirs, env_name, with_std=True, figname='combined_seeds.png'):
    
    mean_rewards = []
    std_rewards = []
    timesteps = []
    for run_dir in run_dirs:
        train_stats = np.load(os.path.join(run_dir, 'train_stats.npz'))
        mean_rewards.append(train_stats['mean_reward'])
        std_rewards.append(train_stats['std_reward'])
        timesteps.append(train_stats['timesteps'])
    
    mean_rewards = np.array(mean_rewards)
    std_rewards = np.array(std_rewards)
    timesteps = np.array(timesteps)[0]
    
    plt.figure(figsize=(11, 7))
    plt.plot(timesteps, mean_rewards[0], label='Mean Reward',  color='orangered')
    plt.plot(timesteps, mean_rewards[1], label='Mean Reward',  color='lightseagreen')
    plt.plot(timesteps, mean_rewards[2], label='Mean Reward',  color='goldenrod')
    if with_std:
        plt.fill_between(timesteps, mean_rewards[0] - std_rewards[0], mean_rewards[0] + std_rewards[0], color='mistyrose')
        plt.fill_between(timesteps, mean_rewards[1] - std_rewards[1], mean_rewards[1] + std_rewards[1], color='paleturquoise')
        plt.fill_between(timesteps, mean_rewards[2] - std_rewards[2], mean_rewards[2] + std_rewards[2], color='lightgoldenrodyellow')

    axes = plt.gca()
    plt.title("{} Reward".format(env_name), fontdict={'size' : 18})
    plt.xlabel('Timesteps', fontdict={'size' : 18})
    plt.ylabel('Total Cumulative Reward', fontdict={'size' : 18})
    plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
    
    log_dir = run_dirs[0].rsplit('/', 1)[0]
    plt.savefig(os.path.join(log_dir, figname), dpi=200)
    plt.clf()
    plt.close()

In [9]:
def plot_seeds(run_dirs, env_name, with_std=True, figname='seeds.png'):
    
    mean_rewards = []
    timesteps = []
    for run_dir in run_dirs:
        train_stats = np.load(os.path.join(run_dir, 'train_stats.npz'))
        mean_rewards.append(train_stats['mean_reward'])
        timesteps.append(train_stats['timesteps'])
    
    mean_reward = np.mean(np.array(mean_rewards), axis = 0)
    std_reward = np.std(np.array(mean_rewards), axis = 0)
    timesteps = np.array(timesteps)[0]
    
    log_dir = run_dirs[0].rsplit('/', 1)[0]
    plot_reward(timesteps, mean_reward, mean_reward - std_reward, mean_reward + std_reward, env_name, figname=os.path.join(log_dir, figname))

In [7]:
def plot_epochs(epoch_dirs, ev_name, with_std=True, figname='seeds.png'):
    plt.figure(figsize=(11, 7))
    means = []
    stds = []
    ind2epoch = {0: 10, 1: 20, 2: 50}
    mean_colors = ['orangered', 'lightseagreen', 'goldenrod']
    fill_colors = ['mistyrose', 'paleturquoise', 'khaki']
    alphas = [0.5, 0.3, 0.3, 0.2]
    for ind, run_dir in enumerate(epoch_dirs[:3]):
        mean = []
        timesteps = []
        for indi in range(ind, 9, 3):
            run_dir = epoch_dirs[indi]
            train_stats = np.load(os.path.join(run_dir, 'train_stats.npz'))
            mean.append(train_stats['mean_reward'])
            timesteps.append(train_stats['timesteps'])
    
        mean = np.mean(np.array(mean), axis = 0)
        std = np.std(np.array(mean), axis = 0)
        timesteps = np.array(timesteps)[0]
        means.append(mean)
        stds.append(std)
        
        plt.plot(timesteps, mean, label='Mean Reward (Epoch {})'.format(ind2epoch[ind]),  color=mean_colors[ind])
        if with_std:
            plt.fill_between(timesteps, mean - std, mean + std, color=fill_colors[ind], alpha=alphas[ind])

    mean_rewards = np.array(means)
    std_rewards = np.array(stds)
    
    axes = plt.gca()
    plt.title("{} Reward".format(env_name), fontdict={'size' : 18})
    plt.xlabel('Timesteps', fontdict={'size' : 18})
    plt.ylabel('Total Cumulative Reward', fontdict={'size' : 18})
    plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
    plt.legend()
    # plt.xticks(list(np.arange(0, (math.ceil(timesteps[-1] / timesteps_interval) + 1) * timesteps_interval, timesteps_interval)), ('{}'.format(str(x)) for x in np.arange(0, (math.ceil(timesteps[-1] / timesteps_interval) + 1) * timesteps_interval, timesteps_interval)))

    log_dir = epoch_dirs[0].rsplit('/', 2)[0]
    plt.savefig(os.path.join(log_dir, figname), dpi=200)
    plt.clf()
    plt.close()

In [8]:
def plot_forward_search_with_single_process(epoch_dir, env_name, with_std=True, figname='seeds.png'):
    run_dirs = []
    single_process = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(1, 4)]
    forward_search_pop_size_1 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(4, 7)]
    forward_search_pop_size_3 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(7, 10)]
    forward_search_pop_size_5 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(10, 13)]
    
    run_dirs = single_process + forward_search_pop_size_1 + forward_search_pop_size_3 + forward_search_pop_size_5
    
    plt.figure(figsize=(11, 7))
    mean = []
    ind2epoch = {1: 10, 1: 1, 2: 3, 3: 5}
    label_names = ['Without EPS', 'EPS: K = 1', 'EPS: K = 3', 'EPS: K = 5']
    mean_colors = ['orangered', 'lightseagreen', 'goldenrod', 'darkorchid']
    fill_colors = ['mistyrose', 'paleturquoise', 'khaki', 'mediumpurple']
    alphas = [0.5, 0.3, 0.3, 0.2]
    for ind, run_dir in enumerate(run_dirs):
        timesteps = []
        train_stats = np.load(os.path.join(run_dirs[ind], 'train_stats.npz'))
        mean.append(train_stats['mean_reward'])
        timesteps.append(train_stats['timesteps'])
        timesteps = np.array(timesteps)[0]

        if (ind + 1) % 3 == 0:
            mean = np.mean(np.array(mean), axis = 0)
            std = np.std(np.array(mean), axis = 0)
            plt.plot(timesteps, mean, label=label_names[ind // 3],  color=mean_colors[ind // 3])
            if with_std:
                plt.fill_between(timesteps, mean - std, mean + std, color=fill_colors[ind // 3], alpha=alphas[ind // 3])
            
            mean = []

    axes = plt.gca()
    plt.title("{} Reward".format(env_name), fontdict={'size' : 18})
    plt.xlabel('Timesteps', fontdict={'size' : 18})
    plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
    plt.ylabel('Total Cumulative Reward', fontdict={'size' : 18})
    plt.legend()
    # plt.xticks(list(np.arange(0, (math.ceil(timesteps[-1] / timesteps_interval) + 1) * timesteps_interval, timesteps_interval)), ('{}'.format(str(x)) for x in np.arange(0, (math.ceil(timesteps[-1] / timesteps_interval) + 1) * timesteps_interval, timesteps_interval)))

    log_dir = epoch_dir.rsplit('/', 1)[0]
    plt.savefig(os.path.join(log_dir, figname), dpi=200)
    plt.clf()
    plt.close()

In [10]:
def plot_forward_search_with_single_process_computation(epoch_dir, env_name, with_std=True, figname='seeds.png'):
    run_dirs = []
    single_process = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(1, 4)]
    forward_search_pop_size_1 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(4, 7)]
    forward_search_pop_size_3 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(7, 10)]
    forward_search_pop_size_5 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(10, 13)]
    
    run_dirs = single_process + forward_search_pop_size_1 + forward_search_pop_size_3 + forward_search_pop_size_5
    
    plt.figure(figsize=(11, 7))
    mean = []
    ind2epoch = {0: 1, 1: 1, 2: 3, 3: 5}
    label_names = ['Without EPS', 'EPS: K = 1', 'EPS: K = 3', 'EPS: K = 5']
    mean_colors = ['orangered', 'lightseagreen', 'goldenrod', 'darkorchid']
    fill_colors = ['mistyrose', 'paleturquoise', 'khaki', 'mediumpurple']
    alphas = [0.5, 0.3, 0.3, 0.2]
    for ind, run_dir in enumerate(run_dirs):
        timesteps = []
        train_stats = np.load(os.path.join(run_dirs[ind], 'train_stats.npz'))
        mean.append(train_stats['mean_reward'])
        timesteps.append(train_stats['timesteps'])
        timesteps = np.array(timesteps)[0]

        if (ind + 1) % 3 == 0:
            mean = np.mean(np.array(mean), axis = 0)
            std = np.std(np.array(mean), axis = 0)
            
            if (ind // 3) >= 2:
                k = ind2epoch[ind // 3]
                timesteps = timesteps[::3]
                mean = mean[:int(timesteps.shape[0])]
            plt.plot(timesteps, mean, label=label_names[ind // 3],  color=mean_colors[ind // 3])
            if with_std:
                plt.fill_between(timesteps, mean - std, mean + std, color=fill_colors[ind // 3], alpha=alphas[ind // 3])
            
            mean = []

    axes = plt.gca()
    plt.title("{} Reward".format(env_name), fontdict={'size' : 18})
    plt.xlabel('Timesteps', fontdict={'size' : 18})
    plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
    plt.ylabel('Total Cumulative Reward', fontdict={'size' : 18})
    plt.legend()
    # plt.xticks(list(np.arange(0, (math.ceil(timesteps[-1] / timesteps_interval) + 1) * timesteps_interval, timesteps_interval)), ('{}'.format(str(x)) for x in np.arange(0, (math.ceil(timesteps[-1] / timesteps_interval) + 1) * timesteps_interval, timesteps_interval)))

    log_dir = epoch_dir.rsplit('/', 1)[0]
    plt.savefig(os.path.join(log_dir, figname), dpi=200)
    plt.clf()
    plt.close()

In [14]:
def plot_forward_search_with_same_computation(env_dir, env_name, with_std=True, figname='seeds.png'):
    run_dirs = []
#     single_process = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(1, 4)]
#     forward_search_pop_size_1 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(4, 7)]
#     forward_search_pop_size_3 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(7, 10)]
    single_process = [os.path.join(os.path.join(env_dir, '10000000_250'), 'run{}'.format(x)) for x in range(13, 16)]
    forward_search_pop_size_3 = [os.path.join(os.path.join(env_dir, '3320000_83'), 'run{}'.format(x)) for x in range(16, 19)]
    forward_search_pop_size_5 = [os.path.join(os.path.join(env_dir, '2000000_50'), 'run{}'.format(x)) for x in range(10, 13)]
    
    run_dirs = single_process + forward_search_pop_size_3 + forward_search_pop_size_5
    
    plt.figure(figsize=(11, 7))
    mean = []
    ind2epoch = {0: 1, 1: 3, 2: 5}
    label_names = ['Without EPS', 'EPS: K = 3', 'EPS: K = 5']
    mean_colors = ['orangered', 'lightseagreen', 'darkorchid']
    fill_colors = ['mistyrose', 'paleturquoise', 'mediumpurple']
    alphas = [0.5, 0.3, 0.2]
    for ind, run_dir in enumerate(run_dirs):
        timesteps = []
        train_stats = np.load(os.path.join(run_dirs[ind], 'train_stats.npz'))
        mean.append(train_stats['mean_reward'])
        timesteps.append(train_stats['timesteps'])
        timesteps = np.array(timesteps)[0]

        if (ind + 1) % 3 == 0:
            mean = np.mean(np.array(mean), axis = 0)
            std = np.std(np.array(mean), axis = 0)
            timesteps = timesteps * ind2epoch[ind // 3]
            plt.plot(timesteps, mean, label=label_names[ind // 3],  color=mean_colors[ind // 3])
            if with_std:
                plt.fill_between(timesteps, mean - std, mean + std, color=fill_colors[ind // 3], alpha=alphas[ind // 3])
            
            mean = []

    axes = plt.gca()
    plt.title("{} Reward".format(env_name), fontdict={'size' : 18})
    plt.xlabel('Computation Steps', fontdict={'size' : 18})
    plt.ylabel('Total Cumulative Reward', fontdict={'size' : 18})
    plt.legend()
    # plt.xticks(list(np.arange(0, (math.ceil(timesteps[-1] / timesteps_interval) + 1) * timesteps_interval, timesteps_interval)), ('{}'.format(str(x)) for x in np.arange(0, (math.ceil(timesteps[-1] / timesteps_interval) + 1) * timesteps_interval, timesteps_interval)))

    plt.savefig(os.path.join(env_dir, figname), dpi=200)
    plt.clf()
    plt.close()

In [15]:
def generate_epoch_results(epoch, epoch_dir, env_name):
    single_process = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(1, 4)]
    forward_search_pop_size_1 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(4, 7)]
    forward_search_pop_size_3 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(7, 10)]
    forward_search_pop_size_5 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(10, 13)]
    
    # To Combine individual seeds plots
    plot_seeds_combined(single_process, env_name, figname="epochs_{}_combined_seeds.png".format(epoch))
    plot_seeds_combined(forward_search_pop_size_1, env_name, figname="epochs_{}_combined_seeds_fs_1.png".format(epoch))
    plot_seeds_combined(forward_search_pop_size_3, env_name, figname="epochs_{}_combined_seeds_fs_3.png".format(epoch))
    plot_seeds_combined(forward_search_pop_size_5, env_name, figname="epochs_{}_combined_seeds_fs_5.png".format(epoch))
    
    # To plot individual seeds mean and the variance between them.
    plot_seeds(single_process, env_name, figname="epochs_{}_seeds.png".format(epoch))
    plot_seeds(forward_search_pop_size_1, env_name, figname="epochs_{}_seeds_fs_1.png".format(epoch))
    plot_seeds(forward_search_pop_size_3, env_name, figname="epochs_{}_seeds_fs_3.png".format(epoch))
    plot_seeds(forward_search_pop_size_5, env_name, figname="epochs_{}_seeds_fs_5.png".format(epoch))
    
    # To plot with and without forward search.
    plot_forward_search_with_single_process(epoch_dir, env_name, figname="epochs_{}_w_wo_eps.png".format(epoch))
#     plot_forward_search_with_single_process_computation(epoch_dir, env_name, figname="epochs_{}_w_wo_eps_w_comp.png".format(epoch))

In [16]:
# envs = [('MountainCarContinuous-v0', 1000000), ('Walker2d-v2', 2000000), ('Hopper-v2', 2000000), ('HalfCheetah-v2', 2000000), ('Swimmer-v2', 2000000)]
envs = [('Walker2d-v2', 2000000), ('Hopper-v2', 2000000), ('HalfCheetah-v2', 2000000), ('Swimmer-v2', 2000000)]
log_dir = '/media/hdd/tanmaya/projects/GymExperiments'
epochs = [10, 20, 50]
    
for env_name, num_timesteps in envs:
    for epoch in epochs:
        epoch_dir = os.path.join(log_dir, env_name, '{}_{}'.format(num_timesteps, epoch))
        generate_epoch_results(epoch, epoch_dir, env_name)
    
    plot_forward_search_with_same_computation(os.path.join(log_dir, env_name), env_name, figname="epochs_50_fs_5_same_computation.png")
    
    epoch_dirs = [os.path.join(log_dir, env_name, '{}_{}'.format(num_timesteps, epoch)) for epoch in epochs]
    
    single_process = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(1, 4) for epoch_dir in epoch_dirs]
    forward_search_pop_size_1 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(4, 7) for epoch_dir in epoch_dirs]
    forward_search_pop_size_3 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(7, 10) for epoch_dir in epoch_dirs]
    forward_search_pop_size_5 = [os.path.join(epoch_dir, 'run{}'.format(x)) for x in range(10, 13) for epoch_dir in epoch_dirs]
    
    plot_epochs(single_process, env_name, figname="single_process.png")
    plot_epochs(forward_search_pop_size_1, env_name, figname="forward_search_pop_size_1.png")
    plot_epochs(forward_search_pop_size_3, env_name, figname="forward_search_pop_size_3.png")
    plot_epochs(forward_search_pop_size_5, env_name, figname="forward_search_pop_size_5.png")

In [24]:
def read_file(file_name, skip_ind=1, delimiter=','):
    complete_data = []
    with open(file_name, 'r') as f:
        lines = f.readlines()
        lines = lines[::skip_ind]
        for line in lines:
            line = line.split(',')[:4]
            data = []
            for x in line:
                data.append(float(x))
            complete_data.append(data)
    
    return np.array(complete_data)

In [25]:
def get_data_from_file(log_path, run_path, indexes):
    successes = []
    rewards = []
    timesteps = []
    completed_timestep = []
    
    if 'with_EPS' in run_path:
        skip_ind = int(run_path.split('_')[2])
    else:
        skip_ind = 1

    new_rewards = {}
    new_success = {}
    for j in indexes:
        try:
            file_name = os.path.join(log_path, run_path, "run{}.csv".format(j))
            data = read_file(file_name, skip_ind=skip_ind, delimiter=',')
            
            epochs = data[:, 0]
            completed_timestep.append(epochs.shape[0])
            if 'with_EPS' in run_path:
                success = data[:, 2]
                reward = data[:, 3]
            else:
                success = data[:, 1]
                reward = data[:, 2]
            for idx in range(epochs.shape[0]):
                new_rewards.setdefault(epochs[idx], []).append(reward[idx])
                new_success.setdefault(epochs[idx], []).append(success[idx])

            successes.append(success)
            rewards.append(reward)
            timesteps.append(epochs)
        except Exception as e:
            print("********** File Not Found: {} **********".format(file_name))
            print(e)
            print(traceback.format_exc())
    
    return new_rewards, new_success

In [26]:
def compute_datapoints(new_rewards, new_success, dir_name, test_results=False):
    mean_reward = []
    min_reward = []
    max_reward = []

    mean_success = []
    min_success = []
    max_success = []
    timesteps = []
    for key in sorted(new_rewards):
        if 'without_EPS' in dir_name or test_results:
            timesteps.append(key / 1000000)
        else:
            timesteps.append(round(key * 0.12, 2))
        mean_reward.append(statistics.mean(new_rewards[key]))
        min_reward.append(min(new_rewards[key]))
        max_reward.append(max(new_rewards[key]))
        
        mean_success.append(statistics.mean(new_success[key]))
        min_success.append(min(new_success[key]))
        max_success.append(max(new_success[key]))
        
    return mean_reward, min_reward, max_reward, mean_success, min_success, max_success, timesteps

In [30]:
def plot_reward_CARLA(log_path, timesteps, mean_reward, min_reward, max_reward, test_results=False, with_std=True, figname="mean_reward.png"):
    plt.figure(figsize=(11, 7))

    if not test_results:
        label_names = ['Without EPS', 'EPS: K = 1', 'EPS: K = 3', 'EPS: K = 5']
        mean_colors = ['orangered', 'lightseagreen', 'goldenrod', 'darkorchid']
        fill_colors = ['mistyrose', 'paleturquoise', 'khaki', 'mediumpurple']
        alphas = [0.5, 0.3, 0.3, 0.2]
    else:
#         label_names = ['with state A', 'with state A+I', 'with state I']
        label_names = ['Navigation task']
        mean_colors = ['orangered', 'lightseagreen', 'goldenrod']
        fill_colors = ['mistyrose', 'paleturquoise', 'khaki']
        alphas = [0.5, 0.3, 0.2]
    
    for ind in range(len(timesteps)):
        tsteps = [s * 1000000 for s in timesteps[ind]]
        plt.plot(tsteps, mean_reward[ind], label=label_names[ind], color=mean_colors[ind])
        if with_std:
            plt.fill_between(tsteps, min_reward[ind], max_reward[ind], color=fill_colors[ind], alpha=alphas[ind])

    axes = plt.gca()
    axes.set_ylim()
    if not test_results:
        axes.set_ylim(top=140000, bottom=-5000)
        plt.title("CARLA Reward", fontdict={'size' : 18})
    else:
        axes.set_ylim(top=140000, bottom=-150000)
#         plt.title("Cumulative Reward with Dynamic Actors", fontdict={'size' : 18})
        plt.title("Cumulative Reward on Navigation task", fontdict={'size' : 18})
    # plt.legend(loc='lower right', prop={'size' : 36})
    
    plt.xlabel('Timesteps', fontdict={'size' : 18})
    plt.ylabel('Total Cumulative Reward', fontdict={'size' : 18})
    plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
    plt.legend()
#     plt.xticks(list(np.arange(0, (math.ceil(timesteps[-1] / 0.5) + 1) * 0.5, 0.5)), ('{}'.format(str(x)) for x in np.arange(0, (math.ceil(timesteps[-1] / 0.5) + 1) * 0.5, 0.5)))
    plt.savefig(os.path.join(log_path, figname), dpi=200)
    plt.clf()
    plt.close()

In [31]:
def plot_success_CARLA(log_path, timesteps, mean_success, min_success, max_success, test_results=False, with_std=True, figname="mean_success.png"):
    plt.figure(figsize=(11, 7))

    if not test_results:
        label_names = ['Without EPS', 'EPS: K = 1', 'EPS: K = 3', 'EPS: K = 5']
        mean_colors = ['orangered', 'lightseagreen', 'goldenrod', 'darkorchid']
        fill_colors = ['mistyrose', 'paleturquoise', 'khaki', 'mediumpurple']
        alphas = [0.5, 0.3, 0.3, 0.2]
    else:
        label_names = ['with state A', 'with state A+I', 'with state I']
#         label_names = ['Navigation task']
        mean_colors = ['orangered', 'lightseagreen', 'goldenrod']
        fill_colors = ['mistyrose', 'paleturquoise', 'khaki']
        alphas = [0.5, 0.3, 0.2]
    
    for ind in range(len(timesteps)):
        tsteps = [s * 1000000 for s in timesteps[ind]]
        plt.plot(tsteps, mean_success[ind], label=label_names[ind], color=mean_colors[ind])
        if with_std:
            plt.fill_between(tsteps, min_success[ind], max_success[ind], color=fill_colors[ind], alpha=alphas[ind])

    axes = plt.gca()
    # plt.legend(loc='lower right', prop={'size' : 36})
    if not test_results:
        plt.title("CARLA Success Metric", fontdict={'size' : 18})
    else:
        plt.title("Cumulative Success Metric with Dynamic Actors", fontdict={'size' : 18})
#         plt.title("Cumulative Success Metric on Navigation task", fontdict={'size' : 18})
    plt.xlabel('Timesteps', fontdict={'size' : 18})
    plt.ylabel('Total Successes', fontdict={'size' : 18})
    plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
    plt.legend()
#     plt.xticks(list(np.arange(0, (math.ceil(timesteps[-1] / 0.5) + 1) * 0.5, 0.5)), ('{}'.format(str(x)) for x in np.arange(0, (math.ceil(timesteps[-1] / 0.5) + 1) * 0.5, 0.5)))
    plt.savefig(os.path.join(log_path, figname), dpi=200)
    plt.clf()
    plt.close()

In [32]:
from numpy import genfromtxt
import statistics


log_path = "./CARLA/"
carla_dirs = ['without_EPS', 'with_EPS_1', 'with_EPS_3', 'with_EPS_5']
indexes = [1, 2, 3]

mean_rewards = []
min_rewards = []
max_rewards = []
mean_success = []
min_success = []
max_success = []
steps = []
for dir_name in carla_dirs:
    r, s = get_data_from_file(log_path, dir_name, indexes)
    rmean, rmin, rmax, smean, smin, smax, timesteps = compute_datapoints(r, s, dir_name)
    
    mean_rewards.append(rmean)
    min_rewards.append(rmin)
    max_rewards.append(rmax)
    mean_success.append(smean)
    min_success.append(smin)
    max_success.append(smax)
    steps.append(timesteps)
plot_reward_CARLA(log_path, steps, mean_rewards, min_rewards, max_rewards, with_std=True, figname="mean_reward.png")
plot_success_CARLA(log_path, steps, mean_success, min_success, max_success, with_std=True, figname="mean_success.png")

In [None]:
from numpy import genfromtxt
import statistics


log_path = "./Dynamic_CARLA/"
carla_dirs = ['A', 'A_I', 'I']
indexes = [1, 2, 3]

mean_rewards = []
min_rewards = []
max_rewards = []
mean_success = []
min_success = []
max_success = []
steps = []
for dir_name in carla_dirs:
    r, s = get_data_from_file(log_path, dir_name, indexes)
    rmean, rmin, rmax, smean, smin, smax, timesteps = compute_datapoints(r, s, dir_name, test_results=True)
    
    mean_rewards.append(rmean)
    min_rewards.append(rmin)
    max_rewards.append(rmax)
    mean_success.append(smean)
    min_success.append(smin)
    max_success.append(smax)
    steps.append(timesteps)
plot_reward_CARLA(log_path, steps, mean_rewards, min_rewards, max_rewards, test_results=True, with_std=True, figname="mean_reward.png")
plot_success_CARLA(log_path, steps, mean_success, min_success, max_success, test_results=True, with_std=True, figname="mean_success.png")

In [None]:
from numpy import genfromtxt
import statistics


log_path = "./Static_CARLA/"
dir_name = 'NeurIPS'
indexes = [1, 2, 3]

mean_rewards = []
min_rewards = []
max_rewards = []
mean_success = []
min_success = []
max_success = []
steps = []
# for dir_name in carla_dirs:
r, s = get_data_from_file(log_path, dir_name, indexes)
rmean, rmin, rmax, smean, smin, smax, timesteps = compute_datapoints(r, s, dir_name, test_results=True)

mean_rewards.append(rmean)
min_rewards.append(rmin)
max_rewards.append(rmax)
mean_success.append(smean)
min_success.append(smin)
max_success.append(smax)
steps.append(timesteps)
plot_reward_CARLA(log_path, steps, mean_rewards, min_rewards, max_rewards, test_results=True, with_std=True, figname="mean_reward.png")
plot_success_CARLA(log_path, steps, mean_success, min_success, max_success, test_results=True, with_std=True, figname="mean_success.png")