## Purpose
This notebook is made to help analysing the results produced by TeachMyAgent's experiments. Using this, one can reproduce the figures we provide in our [paper](https://arxiv.org/abs/2103.09815), as well as the videos and gifs we show on our [website](https://sites.google.com/view/teachmyagent). 

## How to use this notebook
This notebook is broken down into 5 sections:
- **Imports**: import needed packages.
- **Load Data**: load results produced by experiments and format them (e.g. calculate percentage of mastered tasks).
- **Plot definitions**: define all the plot functions we provide (including video generation from learned policies).
- **Experiment graphs**: use the previously defined functions to generate the different figures we show in our paper.
- **Test tasks analysis**: analyse the performance of Deep RL students on test sets (e.g. plot test sets along with performance or use learned policy in a particular task). We also add to this section the use of our functions showing the curriculum generated by the different ACL methods.
----

# Imports

In [None]:
import numpy as np
import random
import tensorflow as tf
import os
import pylab
import seaborn as sns
import scipy.stats as sp
import pickle
import TeachMyAgent.teachers.utils.plot_utils as plotter
import imageio
from scipy.spatial import distance
import json
from collections import OrderedDict
import os.path as osp
import pandas as pd
from IPython.display import display
import copy
import scipy.stats as ss
import sys
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.colorbar as cbar
from matplotlib.patches import Ellipse, Rectangle
import argparse
import math
from TeachMyAgent.run_utils.environment_args_handler import EnvironmentArgsHandler
import TeachMyAgent.students.test_policy as test_policy
from TeachMyAgent.students.run_logs_util import get_run_logs
from TeachMyAgent.teachers.teacher_controller import param_vec_to_param_dict, param_dict_to_param_vec
import re

DIV_LINE_WIDTH = 50
print(np.__version__)
print(sys.executable)
sns.set()

# Load Data

In [None]:
def get_datasets(rootdir, name_filter=None, rename_labels=False):
    global default_colors_palette
    _, models_list, _ = next(os.walk(rootdir))
    print(models_list)
    for dir_name in models_list.copy():
        if "ignore" in dir_name:
            models_list.remove(dir_name)
        if name_filter is not None and name_filter not in dir_name:
            models_list.remove(dir_name)
            
    # setting per-model type colors
    if len(per_model_colors) == 0 and  len(models_list) > len(default_colors_palette):
        default_colors_palette = sns.color_palette("hls", len(models_list))
        
    for i,m_name in enumerate(models_list):
        for m_type, m_color in per_model_colors.items():
            if m_type in m_name:
                colors[m_name] = m_color
        if m_name not in colors:
            colors[m_name] = default_colors_palette[i]
            
        print("extracting data for {}...".format(m_name))
        m_id = m_name
        models_saves[m_id] = OrderedDict()
        models_saves[m_id]['data'] = get_run_logs(rootdir+m_name, min_len=0)
        print("done")
        if m_name not in labels:
            if not rename_labels:
                labels[m_name] = m_name
            else:
                if 'ADR' in m_name:
                    labels[m_name] = 'ADR'
                elif 'ALP-GMM' in m_name:
                    labels[m_name] = 'ALP-GMM'
                elif 'Random' in m_name:
                    labels[m_name] = 'Random'
                elif 'Covar-GMM' in m_name:
                    labels[m_name] = 'Covar-GMM'
                elif 'RIAC' in m_name:
                    labels[m_name] = 'RIAC'
                elif 'GoalGAN' in m_name:
                    labels[m_name] = 'GoalGAN'
                elif 'Self-Paced' in m_name:
                    labels[m_name] = 'Self-Paced'
                elif 'Setter-Solver' in m_name:
                    labels[m_name] = 'Setter-Solver'
                elif 'UPPER_BASELINE' in m_name:
                    labels[m_name] = 'UPPER_BASELINE'
                else:
                    labels[m_name] = m_name
labels = OrderedDict()
default_colors_palette = sns.color_palette()
per_model_colors = OrderedDict([('ALP-GMM', default_colors_palette[0]),
                                ('Covar-GMM', default_colors_palette[1]),
                                ('ADR', default_colors_palette[2]),
                                ('Random', default_colors_palette[3]),
                                ('RIAC', default_colors_palette[4]),
                                ('GoalGAN', default_colors_palette[5]),
                                ('Self-Paced', default_colors_palette[6]),
                                ('Setter-Solver', default_colors_palette[7]),
                                ('UPPER_BASELINE', default_colors_palette[8])])

models_saves = OrderedDict()
colors = OrderedDict()

##### MODIFY THIS TO POINT TO YOUR DATA FOLDER #####
data_folder = "ACL_bench/data/BENCHMARK/"
##### MODIFY THIS TO POINT TO YOUR DATA FOLDER #####

get_datasets(data_folder, rename_labels=True)
# get_datasets(data_folder, rename_labels=True, name_filter="parkour") # You can also add filters

# order runs for legend order as in per_models_colors, with corresponding colors
if len(per_model_colors) > 0:
    ordered_labels = OrderedDict()
    for teacher_type in per_model_colors.keys():
        for k,v in labels.items():
            if teacher_type in k:
                ordered_labels[k] = v
    labels = ordered_labels

## Handle baseline Random teacher

In [None]:
default_configuration = "no"
configurations_to_add = ["minimal", "maximal"]
new_expes_to_add = {}
for expe_id in models_saves:
    if "profiling_benchmark_stumps_Random" in expe_id:
        for new_config in configurations_to_add:
            new_expe_id = expe_id.replace("allow_expert_knowledge_" + default_configuration,
                                          "allow_expert_knowledge_" + new_config)
            new_expes_to_add[new_expe_id] = OrderedDict()
            new_expes_to_add[new_expe_id]['data'] = copy.copy(models_saves[expe_id]['data'])
            labels[new_expe_id] = labels[expe_id]
            colors[new_expe_id] = colors[expe_id]
models_saves.update(new_expes_to_add)

## Handle Upper Baseline

In [None]:
criteria_to_add = ["1", "2", "3", "4"]
new_expes_to_add = {}
for expe_id in models_saves:
    if "UPPER_BASELINE" in expe_id:
        for criterion in criteria_to_add:
            new_expe_id = expe_id.replace("UPPER_BASELINE",
                                          "UPPER_BASELINE_criteria_" + criterion)
            new_expes_to_add[new_expe_id] = OrderedDict()
            new_expes_to_add[new_expe_id]['data'] = copy.copy(models_saves[expe_id]['data'])
            labels[new_expe_id] = labels[expe_id]
            colors[new_expe_id] = colors[expe_id]
models_saves.update(new_expes_to_add)

## Merge experiments by teacher

Some experiments (e.g. on parkour or criteria 5) were broken down into multiple experiments. In order to analyze them, the results must be merged. 

In [None]:
# CAREFUL: IT NEEDS RENAME_LABELS SET TO TRUE
def merge_experiments_by_teacher(experiment_name, result_name=None):
    new_saves = OrderedDict()
    anything_token = "*"
    anything_pattern = "[a-zA-z0-9\-.]*"
    experiment_name_regex_pattern = experiment_name.replace(anything_token, anything_pattern)
    regex = re.compile(experiment_name_regex_pattern)
    for expe_id in models_saves:
        if regex.match(expe_id):
            associated_label = labels[expe_id]
            if result_name is None:
                new_expe_name = experiment_name.replace('*', '').replace('|', ',') + "_" + associated_label
            else:
                new_expe_name = result_name.replace('{LABEL}', associated_label)

            if models_saves[expe_id]['data'] is not None:
                if new_expe_name not in new_saves:
                    new_saves[new_expe_name] = OrderedDict()
                    new_saves[new_expe_name]['data'] = copy.copy(models_saves[expe_id]['data'])
                    labels[new_expe_name] = associated_label
                    colors[new_expe_name] = colors[expe_id]
                else:
                    new_saves[new_expe_name]['data'].extend(copy.copy(models_saves[expe_id]['data']))
    models_saves.update(new_saves)

In [None]:
merge_experiments_by_teacher("*benchmark_parkour*")

In [None]:
merge_experiments_by_teacher("*profiling_benchmark_stumps_*_criteria_5*allow_expert_knowledge_no*", 
                             result_name="profiling_benchmark_stumps_{LABEL}_criteria_5_allow_expert_knowledge_no")

In [None]:
merge_experiments_by_teacher("*profiling_benchmark_stumps_*_criteria_5*allow_expert_knowledge_minimal*", 
                             result_name="profiling_benchmark_stumps_{LABEL}_criteria_5_allow_expert_knowledge_minimal")

In [None]:
merge_experiments_by_teacher("*profiling_benchmark_stumps_*_criteria_5*allow_expert_knowledge_maximal*", 
                             result_name="profiling_benchmark_stumps_{LABEL}_criteria_5_allow_expert_knowledge_maximal")

## Compute mastered tasks percentage

Compute "% of Mastered tasks" metric: percentage of test tasks (over a test set of 100 tasks) on which the agent obtained an episodic reward greater than a threshold (230).

In [None]:
mastered_thr = 230
for i,(m_id,label) in enumerate(labels.items()):
    print(m_id)
    runs_data = models_saves[m_id]['data']
    #collect raw perfs
    print("Seeds : " + str(len(runs_data)))
    for r,run in enumerate(runs_data):
        models_saves[m_id]['data'][r]['nb_mastered'] = []
        models_saves[m_id]['data'][r]['avg_pos_rewards'] = []
        models_saves[m_id]['data'][r]['local_rewards'] = []
        if 'env_test_rewards' in run:
            size_test_set = int(len(run['env_test_rewards'])/len(run['evaluation return']))
            for j in range(len(run['evaluation return'])):#max_epoch):
                test_data = np.array(run['env_test_rewards'][j*size_test_set:(j+1)*(size_test_set)])
                nb_mastered = len(np.where(test_data > mastered_thr)[0])
                models_saves[m_id]['data'][r]['nb_mastered'].append((nb_mastered/size_test_set)*100)
        else:
            print("Skipping seed {}".format(r))

## Compute best seeds

Get best seed of each experiment. This is then used to plot policies and analyze test set performances.

In [None]:
def get_best_seed(expe_name, metric="evaluation return"):
    best_seed = -1
    best_seed_value = -1000
    runs_data = models_saves[expe_name]['data']
    all_values = []
    for run in runs_data:
        if len(run[metric]) > 0:
            data = run[metric][-1]
            all_values.append(data)
            if data > best_seed_value:
                best_seed_value = data
                best_seed = run["config"]["seed"]
        else:
            print("Skipping seed {}: no data".format(run["config"]["seed"]))
    return best_seed, best_seed_value, np.mean(all_values), np.std(all_values)

In [None]:
best_seeds = {}
for i,(m_id,label) in enumerate(labels.items()):
    best_seed, best_seed_value, mean, std = get_best_seed(m_id, metric="nb_mastered")
    best_seeds[m_id] = best_seed
    print("Expe {0} : {1} ({2}) - Mean: {3} ({4})".format(m_id, best_seed, best_seed_value, mean, std))

# Plots definitions

## Curves

In [None]:
def plot_with_shade(subplot_nb, ax,x,y,err,color,shade_color,label,
                  y_min=None,y_max=None, legend=False, leg_size=30, leg_loc='best', title=None,
                  ylim=[0,100], xlim=[0,40], leg_args={}, leg_linewidth=8.0, linewidth=7.0,
                  ticksize=30, y_label='% Mastered env', label_size=30):
    ax.locator_params(axis='x', nbins=5)
    ax.locator_params(axis='y', nbins=5)
    ax.tick_params(axis='both', which='major', labelsize=ticksize)
    ax.plot(x,y, color=color, label=label,linewidth=linewidth)
    ax.fill_between(x,y-err,y+err,color=shade_color,alpha=0.2)
    if legend:
        leg = ax.legend(loc=leg_loc, fontsize=leg_size, **leg_args) #34
        for legobj in leg.legendHandles:
            legobj.set_linewidth(leg_linewidth)
    ax.set_xlabel('Million steps', fontsize=label_size)
    if subplot_nb == 0:
        ax.set_ylabel(y_label, fontsize=label_size)
    ax.set_xlim(xmin=xlim[0],xmax=xlim[1])
    ax.set_ylim(bottom=ylim[0],top=ylim[1])
    if title:
        ax.set_title(title, fontsize=22)

def plot_all_and_median(subplot_nb, ax,x,ys,color,label,
                         y_min=None,y_max=None, legend=False, title=None, x_max=20, y_label='% Mastered env'):
    ax.locator_params(axis='x', nbins=5)
    ax.locator_params(axis='y', nbins=5)
    ax.tick_params(axis='both', which='major', labelsize=30)
    min_len = 999999
    median = np.median(ys, axis=0)
    for k,y in enumerate(ys):
        ax.plot(x[0:min_len],y, color=color, linewidth=1.5, alpha=0.5)
    ax.plot(x[0:min_len],median, color=color, linewidth=7 , label=label)
    if legend:
        leg = ax.legend(loc='best', fontsize=25)
    ax.set_xlabel('Million steps', fontsize=18)
    if subplot_nb == 0:
        ax.set_ylabel(y_label, fontsize=18)
    ax.set_xlim(xmin=0,xmax=x_max)
    if y_min is not None:
        ax.set_ylim(bottom=y_min,top=y_max)
    else:
        ax.set_ylim(top=100)
    if title:
        ax.set_title(title, fontsize=22)
        
def get_percentiles(data, label, max_ep=100):
    nb_zero_perf = np.count_nonzero(data[:max_ep]==0.0)
    print('{} -> nb zeros: {}'.format(label, nb_zero_perf))
    print('{} -> percentile: {}'.format(label, np.percentile(data[:max_ep],[25,50,75,90])))
    
def get_simple_welch(d1, d2):
    return ss.ttest_ind(d1, d2, equal_var=False)

def get_multistep_welch(algo_0, algo_1, epoch=0):
    tt_test = []
    max_values =  []
    for i in range(min(len(algo_0[0]), len(algo_1[0]))):
        d1 = [v[i] for v in algo_0]
        d2 = [v[i] for v in algo_1]
        max_values.append(max(d1 + d2))
        tt_test.append(get_simple_welch(d1, d2))
        
    return tt_test, max_values
    
def get_welch_from_names(algo_0='amb', algo_1='rmb', epoch=0, metric='nb_mastered'):
    print("algo0:{}, algo1:{}".format(algo_0,algo_1))
    final_explos = dict()
    for i,(m_id,d) in enumerate(models_saves.items()):
        if algo_0 in m_id or algo_1 in m_id:
            is_algo_0 = algo_0 in m_id
            final_explos[algo_0 if is_algo_0 else algo_1] = []
            runs_data = d['data']
            ys = []
            if epoch != 0:
                long_enough = True
                for run in runs_data:
                    if len(run[metric]) < epoch:
                        long_enough = False
                if not long_enough:
                    print("aborting: {} not long enough".format(m_id))
                    return
            for run in runs_data:
                final_explos[algo_0 if is_algo_0 else algo_1].append(run[metric][epoch-1])
                    
    print('welch {}'.format(ss.ttest_ind(final_explos[algo_0], final_explos[algo_1], equal_var=False)))

In [None]:
def plot_curves(agent_type, plot_type='shade', metric='nb_mastered', legend=True, y_min=0, y_max=100, x_max=10, 
                allow_different_sizes=False, welch=False, welch_p_threshold=0.05, _ax=None, leg_size=20):
    if _ax is None:
        f, ax = plt.subplots(1,1,figsize=(30,12))
    else:
        ax = _ax
    ys_for_weclh = {}
    max_y = -1
    anything_token = "*"
    anything_pattern = "[a-zA-z0-9\-.]*"
    agent_type_regex_pattern = agent_type.replace(anything_token, anything_pattern)
    regex = re.compile(agent_type_regex_pattern)
    for i,(m_id,label) in enumerate(labels.items()):
        if regex.match(m_id):
            runs_data = models_saves[m_id]['data']
            ys = []
            episodes = []
            nb_seeds = len(runs_data)
            if nb_seeds > 0:
                for run in runs_data:  
                    data = run[metric]
                    if len(run['total timesteps']) > len(episodes):
                        episodes = np.array(run['total timesteps'])
                    ys.append(data)
                if not allow_different_sizes:
                    #clean data in case an expe has seeds with varying epoch number    
                    min_len = 999999
                    for y in ys:
                        if len(y) < min_len:
                            min_len = len(y)
                    ys_same_len = np.empty((len(ys), min_len))
                    for i in range(len(ys)):
                        y = ys[i]
                        for j in range(min_len):
                            ys_same_len[i, j] = y[j]
                    episodes = episodes[0:min_len]
                else:
                    full_len = max([len(y) for y in ys])
                    ys_same_len = np.ma.empty((len(ys), full_len))
                    ys_same_len.mask = True
                    for i in range(len(ys)):
                        y = ys[i]
                        for j in range(len(y)):
                            ys_same_len[i, j] = y[j]
                episodes = [e/1000000 for e in episodes]
                
                if ys_same_len.size > 0:
                    if welch:
                        ys_for_weclh[m_id] = ys_same_len

                    if plot_type in ["shade", "shade_se"]:
                        stds = ys_same_len.std(axis=0)
                        if plot_type == "shade_se":
                            stds = stds / math.sqrt(nb_seeds)
                        means = ys_same_len.mean(axis=0)
                        max_y = max(max_y, max(means + stds))
                        plot_with_shade(0, ax, episodes, means, stds, colors[m_id],
                                        colors[m_id], label, leg_loc=(0,0.39), y_label=metric, leg_args={"frameon":False},
                                        legend=legend,ylim=[y_min,y_max], xlim=[0,x_max], leg_size=leg_size,
                                        ticksize=40, label_size=40)
                    elif plot_type == "all_and_median":
                        plot_all_and_median(0, ax, episodes,ys_same_len,colors[m_id],label,
                                            title="{}".format(agent_type), legend=legend, x_max=x_max,
                                            y_min=y_min, y_max=y_max, y_label=metric)
    if welch:
        is_diff_significant = {}
        i = 0
        for expe in ys_for_weclh:
            is_diff_significant[expe] = {}
            j = 0
            for expe_2 in ys_for_weclh:
                if expe != expe_2:
                    ttest_results, maxs = get_multistep_welch(ys_for_weclh[expe], ys_for_weclh[expe_2])
                    k = 0
                    max_val = max_y if max_y > 0 else max(maxs)
                    for ttest in ttest_results:
                        if ttest[1] < welch_p_threshold:
                            ax.plot(k, max_val + 3*(i+j) + 10, '*', markersize=20, c=list(colors.items())[i+j][1])
                        k += 1
                    j += 1
                                                    
            if len(ys_for_weclh) / 2 == i + 1:
                break
            i += 1
                    

    plt.tight_layout()
    
    if _ax is None:
        f.savefig('TeachMyAgent/graphics/{0}_{1}_{2}.png'.
                  format(agent_type.replace('*', '[]').replace('|', ','), plot_type, metric), bbox_inches='tight')

In [None]:
def plot_all_comparisons(agent_type, metric='nb_mastered', y_min=0, y_max=100, x_max=20, allow_different_sizes=False, welch_p_threshold=0.05):
    anything_token = "*"
    anything_pattern = "[a-zA-z0-9\-.]*"
    agent_type_regex_pattern = agent_type.replace(anything_token, anything_pattern)
    regex = re.compile(agent_type_regex_pattern)
    agents_to_plot = []
    for i,(m_id,label) in enumerate(labels.items()):
        if regex.match(m_id):
            agents_to_plot.append(m_id)
                     
    nb_columns = len(agents_to_plot) + 1
    nb_rows = len(agents_to_plot) + 2        
    fig = plt.figure(constrained_layout=True, figsize=(35 + 5*nb_rows, 30 + 5*nb_columns))
    widths = [0.2 if i == 0 else 1 for i in range(nb_columns)]
    heights = [4, 0.2] + [1 for _ in range(nb_rows - 2)]
    gs = fig.add_gridspec(nb_rows, nb_columns, width_ratios=widths, height_ratios=heights)
    fig.patch.set_facecolor('#f7f7f7')
    
    f_0_0_ax = fig.add_subplot(gs[0, :])
    plot_curves(agent_type, metric=metric, plot_type="shade_se", welch=False, _ax=f_0_0_ax, y_min=y_min, y_max=y_max, x_max=x_max, leg_size=40, allow_different_sizes=allow_different_sizes)
    for i in range(len(agents_to_plot) + 1):
        for j in range(len(agents_to_plot) + 1):
            ax = fig.add_subplot(gs[i+1, j])
            if i == 0 or j == 0:
                ax.set_axis_off()
                if i + j > 0:
                    if i == 0:
                        idx = j - 1
                    else:
                        idx = i - 1
                    ax.text(0.5, 0.5, labels[agents_to_plot[idx]], ha="center", va="center", fontsize=50)
            else:
                if i > j:
                    plot_curves("(" + agents_to_plot[i - 1] + "|" + agents_to_plot[j - 1] + ")", plot_type="shade_se",
                                metric=metric, welch=True, welch_p_threshold=welch_p_threshold, _ax=ax, 
                                y_min=y_min, y_max=y_max, x_max=x_max, allow_different_sizes=allow_different_sizes)
                else:
                    ax.set_axis_off()
                    
    plt.savefig('TeachMyAgent/graphics/comparisons_{}_{}.png'.format(
        agent_type.replace('*', '[]').replace('|', ','), metric.replace(" ", "_")), 
        facecolor='#f7f7f7', edgecolor='none', bbox_inches='tight', dpi=100)   

## Radar chart

In [None]:
# CAREFUL: IT NEEDS RENAME_LABELS SET TO TRUE AS WELL AS PER MODEL COLORS ACTIVATED
def generate_profile_chart(expe_template_name="(*profiling_benchmark_stumps_*|UPPER_BASELINE)_criteria_(1|2|3|4|5)_allow_expert_knowledge_(no|minimal|maximal)$", 
                           baseline_teacher="Random", list_of_teachers=None, tick_step=0.5, timestep=-1):
    anything_token = "*"
    anything_pattern = "[a-zA-z0-9\-.]*"
    experiment_name_regex_pattern = expe_template_name.replace(anything_token, anything_pattern)
    name_regex = re.compile(experiment_name_regex_pattern)
    criteria_regex = re.compile(anything_pattern + "_criteria_[0-9]")
    
    figname = "benchmark_profiling{}".format('_' + '_'.join(list_of_teachers) if list_of_teachers is not None else '')
    if timestep != -1:
        figname += "_timestep-" + str(timestep)
    
    criterion_label = [
        "Mostly unfeasible\n task space",
        "Mostly trivial\n task space",
        "Student that\n forgets",
        "Rugged\n difficulty",
        "Variety of\n students"
    ]
    
    ek_types = ["no", "low", "high"]
    
    linestyle_tuple = [
     ('solid',                 (0, ())),
        
     ('loosely dotted',        (0, (1, 1))),
     ('dotted',                (0, (1, 0.5))),

     ('loosely dashed',        (0, (5, 1.5))),
     ('dashed',                (0, (5, 1))),
     ('densely dashed',        (0, (5, 0.5))),
    
     ('loosely dashdotted',    (0, (3, 1, 1, 1))),   
     ('dashdotted',            (0, (3, 0.5, 1, 0.5)))
    ]
    
    linestyles = {}

    df_columns = [ek + "_" + criterion for criterion in criterion_label.copy() for ek in ek_types.copy()]
    df_indexes = list(set(labels.values()))
    raw_results = pd.DataFrame(
        index=df_indexes, 
        columns=df_columns)
    processed_results = pd.DataFrame(
        index=df_indexes, 
        columns=df_columns)
    
    ### Get results ###
    linestyle_iterrator = 0
    for expe_id in models_saves:
        if name_regex.match(expe_id):
            current_label = labels[expe_id]
            if list_of_teachers is not None and current_label not in list_of_teachers:
                break
                
            if current_label not in linestyles and "UPPER_BASELINE" not in current_label:
                linestyles[current_label] = linestyle_tuple[linestyle_iterrator][1]
                linestyle_iterrator += 1
            
            # Get column prefix
            if "allow_expert_knowledge_no" in expe_id:
                prefix = "no_"
            elif "allow_expert_knowledge_minimal" in expe_id:
                prefix = "low_"
            elif "allow_expert_knowledge_maximal" in expe_id:
                prefix = "high_"
            else:
                raise Exception()
                
            # Get criteria
            match = criteria_regex.match(expe_id)
            criteria_id = match.group()[-1]
            column = prefix + criterion_label[int(criteria_id) - 1]
                
            current_criteria_values = []
            nb_seeds = len(models_saves[expe_id]["data"])
            for seed in range(nb_seeds):
                seed_values_array = models_saves[expe_id]["data"][seed]["nb_mastered"]
                if timestep != -1 and len(seed_values_array) > timestep and not "UPPER_BASELINE" in current_label:
                    seed_value = seed_values_array[timestep]
                else:
                    seed_value = seed_values_array[-1]
                current_criteria_values.append(seed_value)
            
            mean_result = np.mean(current_criteria_values)
            std_result = np.std(current_criteria_values)
            raw_results.loc[current_label][column] = mean_result
            
    ### Generate chart results ###
    for index, row in raw_results.iterrows():
        for col in raw_results.columns:
            if index == baseline_teacher:
                processed_results.loc[index][col] = 1
            else:
                baseline_val = raw_results.loc[baseline_teacher][col]
                current_val = row[col]
                processed_results.loc[index][col] = current_val / baseline_val
                
    ### Generate plot ### 
    N = len(processed_results.columns) / len(ek_types)
    N = int(N)
    
    # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
    angles = [n / float(N) * 2 * math.pi for n in range(N)]
    angles += angles[:1]

    # Initialise the plot
    nb_columns = 2
    nb_rows = 2      
    fig = plt.figure(constrained_layout=True, figsize=(80, 60))
    gs = fig.add_gridspec(nb_rows, nb_columns)
    fig.subplots_adjust(hspace=0.05)
    
    for i in range(len(ek_types)):
        current_ek_filter = ek_types[i] + "_"
        if i == 0:
            subplot_pos = gs[0, :]
        else:
            subplot_pos = gs[1, (i-1)%2]
        ax = fig.add_subplot(subplot_pos, polar=True)
        ax.patch.set_facecolor('white')
        ax.title.set_text((ek_types[i] + " expert knowledge").capitalize())
        ax.title.set_fontsize(80)
        ax.tick_params(axis='both', which='major', pad=-50)

        # Set labels
        plt.xticks(angles[:-1], criterion_label, color='grey', size=65)
        for label, angle in zip(ax.get_xticklabels(), np.rad2deg(angles)):
            if angle == 90 or angle == 270:
                label.set_horizontalalignment('center')
            elif 90 < angle < 270:
                label.set_horizontalalignment('right')
            else:
                label.set_horizontalalignment('left')

        # Set tick lines
        for line in ax.xaxis.get_gridlines():
            line.set_color('grey')
            line.set_alpha(0.95)
            line.set_linestyle(':')
            line.set_linewidth(2)

        for line in ax.yaxis.get_gridlines():
            line.set_color('grey')
            line.set_alpha(0.95)
            line.set_linestyle(':')
            line.set_linewidth(2)

        # Change radar's background
        max_val = max(processed_results.filter(like=current_ek_filter).max())
        ticks = []
        fill_values = np.linspace(0, 2*np.pi, 100)
        for j in np.arange(0, max_val+tick_step, tick_step):
            ticks.append(j)
            ax.fill(fill_values, [j,] * 100, color='k', alpha=0.05)

        # Draw ylabels
        ax.set_rlabel_position(0)
        plt.yticks(ticks, [str(t) for t in ticks], color='k', alpha=0.75, fontsize=55, ha="center")
        plt.ylim(0,max_val + tick_step)
        ax.set_axisbelow(False)
        
        for index, row in processed_results.filter(like=current_ek_filter).iterrows():
            values = list(row.values)
            values += values[:1]
            if index == "UPPER_BASELINE":
                ax.plot(angles, values, "*", color='red', markersize=30)
            else:
                # Plot data
                ax.plot(angles, values, linewidth=10, linestyle=linestyles[index], 
                        color=per_model_colors[index], 
                        label=index + (" (baseline)" if index == baseline_teacher else ""))
                # Fill area
                ax.fill(angles, values, 'b', alpha=0.1, color=per_model_colors[index])

        if i == 0:
            from matplotlib.lines import Line2D
            handles = {}
            legend = ax.legend(loc=(1.5,0.2), fontsize=65)
            for legobj in legend.legendHandles:
                current_legobj = copy.copy(legobj)
                current_legobj.set_linewidth(15)
                handles[current_legobj._label] = current_legobj
                
            del legend
            # CHANGE THIS IF YOU ADD A NEW TEACHER #
            ax.legend(handles=[
                handles['Random (baseline)'],
                handles['ALP-GMM'],
                handles['Covar-GMM'],
                handles['RIAC'],
                handles['Self-Paced'],
                Line2D([], [], linestyle='', label=""),
                Line2D([], [], linestyle='', label='$\it{EK}$* $\it{required}$:'),
                handles['ADR'],
                handles['GoalGAN'],
                handles['Setter-Solver'],
            ], loc=(1.5,0.1), fontsize=65)
    
    plt.savefig('TeachMyAgent/graphics/{}.png'.format(figname), bbox_inches='tight', dpi=100)

## Bar plot

In [None]:
# CAREFUL: IT NEEDS RENAME_LABELS SET TO TRUE AS WELL AS PER MODEL COLORS ACTIVATED
def barplot_annotate_brackets(num1, num2, text, center, height, height_index, yerr=None, barh=.05):
    ref_y = max(height)

    lx, ly = center[num1], height[num1]
    rx, ry = center[num2], height[num2]

    if yerr:
        ly += yerr[num1]
        ry += yerr[num2]

    ax_y0, ax_y1 = plt.gca().get_ylim()
    barh *= (ax_y1 - ax_y0)

    y = max(height) + height_index

    barx = [lx, lx, rx, rx]
    bary = [y, y+barh, y+barh, y]
    mid = ((lx+rx)/2, y+barh)

    plt.plot(barx, bary, c='black', linestyle='solid', linewidth=3)#(0, (1, 0.5)

    kwargs = dict(ha='center', va='bottom')

    plt.text(*mid, text, **kwargs)
    
def generate_comparison_bars(expe_template_name="*profiling_benchmark_stumps_*_criteria_(1|2|3|4|5)_allow_expert_knowledge_(no|minimal|maximal)$", 
                             list_of_teachers=None, timestep=-1):
    anything_token = "*"
    anything_pattern = "[a-zA-z0-9\-.]*"
    experiment_name_regex_pattern = expe_template_name.replace(anything_token, anything_pattern)
    name_regex = re.compile(experiment_name_regex_pattern)
    criteria_regex = re.compile(anything_pattern + "_criteria_[0-9]")
    
    figname = "benchmark_bars{}".format('_' + '_'.join(list_of_teachers) if list_of_teachers is not None else '')
    if timestep != -1:
        figname += "_timestep-" + str(timestep)
    
    criterion_label = [
        "Mostly\n unfeasible\n task space",
        "Mostly\n trivial\n task space",
        "Student\n that\n forgets",
        "Rugged\n difficulty",
        "Variety\n of\n students"
    ]
    
    ek_types = ["no", "low", "high"]

    raw_results = {}
    
    for ek in ek_types:
        raw_results[ek] = {}
        for criterion in criterion_label:
            raw_results[ek][criterion] = {}
    
    ### Get results ###
    linestyle_iterrator = 0
    for expe_id in models_saves:
        if name_regex.match(expe_id):
            current_label = labels[expe_id]
            if list_of_teachers is not None and current_label not in list_of_teachers:
                break
            
            # Get column prefix
            if "allow_expert_knowledge_no" in expe_id:
                results_index = "no"
            elif "allow_expert_knowledge_minimal" in expe_id:
                results_index = "low"
            elif "allow_expert_knowledge_maximal" in expe_id:
                results_index = "high"
            else:
                raise Exception()                   
            # Get criteria
            match = criteria_regex.match(expe_id)
            criteria_id = match.group()[-1]
            current_criterion = criterion_label[int(criteria_id) - 1]
                
            current_criteria_values = []
            nb_seeds = len(models_saves[expe_id]["data"])
            for seed in range(nb_seeds):
                seed_values_array = models_saves[expe_id]["data"][seed]["nb_mastered"]
                if timestep != -1 and len(seed_values_array) > timestep:
                    seed_value = seed_values_array[timestep]
                else:
                    seed_value = seed_values_array[-1]
                current_criteria_values.append(seed_value)
            
            mean_result = np.mean(current_criteria_values)
            std_result = np.std(current_criteria_values)
            raw_results[results_index][current_criterion][current_label] = {
                "mean": mean_result,
                "std": std_result,
                "seeds": current_criteria_values
            }

                
    ### Generate plot ### 
    # Initialise the plot
    nb_columns = len(ek_types) + 1
    nb_rows = len(criterion_label) + 1
    fig = plt.figure(constrained_layout=True, figsize=(100, 150))
    widths = [0.1 if i == 0 else 1 for i in range(nb_columns)]
    heights = [0.1 if i == 0 else 1 for i in range(nb_rows)]
    gs = fig.add_gridspec(nb_rows, nb_columns, width_ratios=widths, height_ratios=heights)
    fig.subplots_adjust(wspace=0.05)
    
    for i in range(nb_columns):
        for j in range(nb_rows):
            ax = fig.add_subplot(gs[j, i])
            if i == 0 or j == 0:
                ax.set_axis_off()
                if i + j > 0:
                    if i == 0:
                        ax.text(0.5, 0.5, criterion_label[j-1], ha="center", va="center", fontsize=50)
                    else:
                        ax.text(0.5, 0.5, (ek_types[i-1] + " expert knowledge").capitalize(), ha="center", va="center", fontsize=50)
                    
            else:
                current_ek_type = ek_types[i-1]
                current_criterion = criterion_label[j-1]
                current_expe = raw_results[current_ek_type][current_criterion]
                means = []
                stds = []
                current_labels = []
                current_colors = []

                current_indexes = np.arange(len((current_expe.keys())))
                is_diff_significant = []
                t1 = 0
                for teacher_1 in current_expe.keys():
                    means.append(current_expe[teacher_1]["mean"])
                    stds.append(current_expe[teacher_1]["std"])
                    current_labels.append(teacher_1)
                    current_colors.append(per_model_colors[teacher_1])
                    t2 = 0
                    for teacher_2 in current_expe.keys():
                        if t2 > t1:
                            if teacher_1 != teacher_2:
                                ttest_result = get_simple_welch(current_expe[teacher_1]["seeds"], 
                                                                current_expe[teacher_2]["seeds"])
                                if ttest_result[1] < 0.05:
                                    is_diff_significant.append((t1, t2))
                        t2 += 1
                    t1 += 1

                ax.p1 = plt.bar(current_indexes, means, color=current_colors)
#                 ax.errs = plt.errorbar(current_indexes, means, yerr=stds)

                k = 0
                for significant_diff in is_diff_significant:
                    barplot_annotate_brackets(significant_diff[0], significant_diff[1], 
                                              "", current_indexes, means, k, barh=0.01)
                    k += 2.2
                plt.xticks(current_indexes, current_labels, fontsize=30)
                plt.ylim(ymax=100)
    
    plt.savefig('TeachMyAgent/graphics/{}.png'.format(figname), bbox_inches='tight', dpi=100)

## Test set plots

In [None]:
def dict_to_args_str(dictionary):
    args_str = []
    for key in dictionary:
        args_str.append("--{}".format(key))
        if dictionary[key] is not None:
            args_str.append("{}".format(dictionary[key]))

    return args_str

In [None]:
def round_values(values):
    if isinstance(values, np.ndarray):
        for i in range(len(values)):
            values[i] = round(values[i], 3)
    else:
        values = round(values, 3)
    return values

In [None]:
def params_to_str(params_dict, line_width=116):
    result = str(params_dict)
    nb_splits = max(1, len(result) // line_width)
    final_result = ""
    for i in range(nb_splits):
        p1 = result[i*line_width:line_width]
        p2 = result[(i+1)*line_width:(i+2)*line_width]
        final_result = final_result + p1 + "\n" + p2
    return final_result

In [None]:
def plot_test_tasks_results(env, env_params_list, env_rewards_list, fig_name, nb_env_test_to_check=None):
    nb_env = len(env_params_list) if nb_env_test_to_check is None else nb_env_test_to_check
    nb_plots_per_row = 2
    nb_rows = math.ceil(nb_env/nb_plots_per_row)
    f = plt.figure()
    f.set_figwidth(25)
    f.set_figheight(6*nb_rows)
        
    for i in range(nb_env):
        fig = plt.subplot(nb_rows, nb_plots_per_row, i+1)
        rounded_current_params = {k: round_values(v) for k, v in env_params_list[i].items()}
        fig.text(-0.05, 1.03, 
                 "Test env nb {0} \nScore performed: {1} \nEnv params: {2}".format(i, env_rewards_list[i], params_to_str(rounded_current_params)), 
                     ha="left", transform=fig.transAxes)
        
        env.set_environment(**env_params_list[i])
        env.reset()
        
        plt.imshow(env.render(mode='rgb_array'))
        plt.axis('off')
        
    plt.savefig('TeachMyAgent/graphics/{}.png'.format(fig_name), bbox_inches='tight', dpi=100)

In [None]:
def perform_test_sets_analysis(dataset_folder, settings, nb_tasks=-1, test_set=None, ep_returns=None):
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    parser = test_policy.get_parser()
    parser.add_argument('--expe_name', type=str)
    
    i = 0
    result = {}
    for setting in settings:
        current_expe_best_seed = best_seeds[setting["expe_name"]]
        data_path = os.path.join(dataset_folder, setting["expe_name"], setting["expe_name"] + "_s" + str(current_expe_best_seed))
        setting["fpath"] = data_path
    
        args_str = dict_to_args_str(setting)

        args = parser.parse_args(args_str)
        env_fn, param_bounds, _, _ = EnvironmentArgsHandler.get_object_from_arguments(args)
        env = env_fn()
        env._SET_RENDERING_VIEWPORT_SIZE(4000, 2000, keep_ratio=True)
        
        if test_set is None:
            test_set_params, rewards = test_policy.load_training_test_set(data_path, order_by_best_rewards=args.bests)
        else:
            test_set_params = test_policy.load_fixed_test_set(data_path, test_set)
            rewards = ep_returns[i]
        result[setting["expe_name"]] = [param_dict_to_param_vec(param_bounds, param) for param in test_set_params]
        
        if nb_tasks == -1:
            nb_tasks = len(test_set_params)
        
        ordering_name = ""
        if args.bests is None:
            ordering_name = "firsts"
        elif args.bests:
            ordering_name = "top"
        else:
            ordering_name = "worse"
        fig_name = "{0}_s{1}_{2}test-set-analysis_{3}_{4}".format(args.expe_name, 
                                                               current_expe_best_seed, 
                                                               "fixed-" if test_set is not None else "",
                                                               ordering_name,
                                                               nb_tasks)
        plot_test_tasks_results(env, test_set_params, rewards, fig_name, nb_env_test_to_check=nb_tasks)
        env.close()
        i+=1
    return result

## Plot Curriculum

In [None]:
def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
    new_cmap = mcolors.LinearSegmentedColormap.from_list(
        'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
        cmap(np.linspace(minval, maxval, n)))
    return new_cmap

def draw_ellipse(position, covariance, ax=None, edge_color=None, face_color=None, **kwargs):
    """Draw an ellipse with a given position and covariance"""
    ax = ax or plt.gca()
    
    covariance = covariance[0:2,0:2]
    position = position[0:2]

    # Convert covariance to principal axes
    if covariance.shape == (2, 2):
        U, s, Vt = np.linalg.svd(covariance)
        angle = np.degrees(np.arctan2(U[1, 0], U[0, 0]))
        width, height = 2 * np.sqrt(s)
    else:
        angle = 0
        width, height = 2 * np.sqrt(covariance)

    # Draw the Ellipse
    for nsig in range(2, 3):
        ax.add_patch(Ellipse(position, nsig * width, nsig * height,
                             angle, **kwargs, edgecolor=edge_color, facecolor=face_color))

def get_colorbar(cmap, ax):
    from mpl_toolkits.axes_grid1 import make_axes_locatable
    import matplotlib.pyplot as plt
    fig = ax.figure
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)
    cbar = fig.colorbar(cmap, cax=cax)
    plt.sca(ax)
    return cbar; cax

def plot_gmm(weights, means, covariances, X=None, ax=None, xlim=[0,1], ylim=[0,1], xlabel='', ylabel='',
             bar=True, bar_side='right'):
    from mpl_toolkits.axes_grid1 import make_axes_locatable
    ft_off = 15

    ax = ax or plt.gca()
    colormap = sns.color_palette("coolwarm", as_cmap=True)
    cmap = truncate_colormap(colormap, minval=0.0,maxval=1.0)
    for pos, covar, w in zip(means, covariances, weights):
        draw_ellipse(pos, covar, alpha=0.6, ax=ax, edge_color=cmap(pos[-1]), face_color=cmap(pos[-1]))

    if bar:
#         divider = make_axes_locatable(ax)
#         cax = divider.append_axes("right", size="5%", pad=0.5)
        cax, _ = cbar.make_axes(ax, location=bar_side, shrink=0.8)
        cb = cbar.ColorbarBase(cax, cmap=cmap)
        cb.set_label('Absolute Learning Progress', fontsize=ft_off + 5)
        cax.tick_params(labelsize=ft_off + 0)
        cax.yaxis.set_ticks_position(bar_side)
        cax.yaxis.set_label_position(bar_side)

def generate_stumps_curriculum(dataset_folder, settings, frequency=1, initial_frequency=250000,
                           stump_height_dims=[-1, 4], stump_spacing_dims=[-1, 7]):
    parser = test_policy.get_parser()
    parser.add_argument('--expe_name', type=str)
    
    result = {}
    for setting in settings:
        current_expe_best_seed = best_seeds[setting["expe_name"]]
        
        args_str = dict_to_args_str(setting)
        args = parser.parse_args(args_str)
        _, param_bounds, _, _ = EnvironmentArgsHandler.get_object_from_arguments(args)
        
        current_label = labels[setting["expe_name"]]
        data = models_saves[setting["expe_name"]]['data'][current_expe_best_seed]
        task_samples = data["periodical_samples"]
        associated_infos = data["periodical_infos"]
        filenames = []
        
        for i in range(0, len(task_samples), frequency):
            if len(task_samples[i]) == 0:
                continue
                
            f, ax = plt.subplots(1,1,figsize=(20,20))
            current_data = task_samples[i]
            infos = associated_infos[i]
            hue = None
            ax.set_ylabel('stump_spacing', fontsize=25)
            ax.set_xlabel('stump_height', fontsize=25)
            plt.xticks(fontsize=18)
            plt.yticks(fontsize=18)
            plt.xlim(stump_height_dims[0], stump_height_dims[1])
            plt.ylim(stump_spacing_dims[0], stump_spacing_dims[1])
            set_legend = lambda: None
            bk_index = infos[0]["bk_index"]
            if current_label in ["ALP-GMM", "Covar-GMM"]:
                if bk_index > 0:
                    plot_gmm(data["weights"][bk_index], data["means"][bk_index], data["covariances"][bk_index], 
                             ax=ax, xlim=stump_height_dims, ylim=stump_spacing_dims)
            elif current_label == "Self-Paced":
                draw_ellipse(data["mean"][bk_index], data["covariance"][bk_index], ax=ax, alpha=0.5)
            elif current_label == "ADR":
                x1 = data["task_space"][bk_index][0][0]
                x2 = data["task_space"][bk_index][1][0]
                y1 = data["task_space"][bk_index][0][1]
                y2 = data["task_space"][bk_index][1][1]
                ax.add_patch(Rectangle((x1, y1), x2-x1, y2-y1, alpha=0.5))
            elif current_label == "Setter-Solver":
                set_legend = lambda: ax.legend(title="Feasibility", fontsize=25)
                hue = [_info["task_infos"][0][0] for _info in infos]
            elif current_label == "RIAC":
#                 for box in data["all_boxes"][bk_index]:
#                     ax.add_patch(Rectangle((box.low[0], box.high[1]), 
#                                            box.low[1]-box.low[0], 
#                                            box.high[1]-box.high[0], 
#                                            alpha=0.5, color=))     
                set_legend = lambda: ax.legend(title="Region ALP", fontsize=25)
                hue = [data["all_alps"][bk_index][_info["task_infos"]] for _info in infos]
            
            g = sns.scatterplot(x=current_data[:, 0], y=current_data[:, 1], ax=ax, hue=hue, s=100)
            legend = set_legend()
            if legend is not None:
                legend.get_title().set_fontsize('25')
                for legobj in legend.legendHandles:
                    legobj.set_linewidth(5.0)
            f_name = "ACL_bench/graphics/gifs/scatter_{}.png".format(i)
            plt.suptitle('Step {}'.format(math.ceil(initial_frequency/frequency) * i), fontsize=25)
            plt.savefig(f_name, bbox_inches='tight')
            plt.close(f)
            filenames.append(f_name)
        
        images = []
        for filename in filenames:
            images.append(imageio.imread(filename))
        imageio.mimsave('TeachMyAgent/graphics/{}.gif'.format(setting["expe_name"] + "_" + str(current_expe_best_seed)), images, duration=0.3)

In [None]:
def generate_parkour_curriculum(dataset_folder, settings, frequency=1, initial_frequency=250000):
    parser = test_policy.get_parser()
    parser.add_argument('--expe_name', type=str)
    
    result = {}
    for setting in settings:
        current_expe_best_seed = best_seeds[setting["expe_name"]]
        data_path = os.path.join(dataset_folder, setting["expe_name"], setting["expe_name"] + "_s" + str(current_expe_best_seed))
        setting["fpath"] = data_path
    
        args_str = dict_to_args_str(setting)

        args = parser.parse_args(args_str)
        env_fn, param_bounds, _, _ = EnvironmentArgsHandler.get_object_from_arguments(args)
        env = env_fn()
        env._SET_RENDERING_VIEWPORT_SIZE(4000, 2000, keep_ratio=True)
        
        fig_name = "{0}_s{1}_curriculum-analysis".format(args.expe_name, 
                                                               current_expe_best_seed)
        data = models_saves[setting["expe_name"]]['data'][current_expe_best_seed]
        tasks = data["periodical_samples"]
        associated_infos = data["periodical_infos"]
        
        nb_env = math.ceil(len(tasks) / frequency)
        
        filenames = []
        
        for i in range(0, nb_env-1, frequency):
            if len(tasks[i]) == 0:
                continue
            current_tasks = tasks[i]
            current_infos = associated_infos[i]
            index = random.randint(0, len(current_tasks)-1)
            task = param_vec_to_param_dict(param_bounds, current_tasks[index])
            associated_info = current_infos[index]
            f, ax = plt.subplots(1,1,figsize=(12,10))

            env.set_environment(**task)
            env.reset()

            plt.imshow(env.render(mode='rgb_array'))
            plt.axis('off')
            f_name = "ACL_bench/graphics/gifs/{}_{}.png".format(fig_name, i)
            plt.suptitle('Step {}'.format(math.ceil(initial_frequency/frequency) * i), fontsize=20)
            plt.savefig(f_name, bbox_inches='tight')
            plt.close(f)
            filenames.append(f_name)

        images = []
        for filename in filenames:
            images.append(imageio.imread(filename))
        imageio.mimsave('TeachMyAgent/graphics/{}.gif'.format(setting["expe_name"] + "_" + str(current_expe_best_seed)), images, duration=0.3)
        env.close()

## Test Policy 

In [None]:
def test_policy_perf(dataset_folder, settings):
    parser = test_policy.get_parser()
    parser.add_argument('--expe_name', type=str)
    ep_returns = []
    
    for setting in settings:
        current_expe_best_seed = best_seeds[setting["expe_name"]]
        data_path = os.path.join(dataset_folder, setting["expe_name"], setting["expe_name"] + "_s" + str(current_expe_best_seed))
        setting["fpath"] = data_path
        setting["record"] = False
        setting["norender"] = None
    
        args_str = dict_to_args_str(setting)

        args = parser.parse_args(args_str)
        ep_returns.append(test_policy.main(args))
    return ep_returns

### Generate Video

In [None]:
def record_policy(dataset_folder, settings):
    parser = test_policy.get_parser()
    parser.add_argument('--expe_name', type=str)
    
    for setting in settings:
        current_expe_best_seed = best_seeds[setting["expe_name"]]
        data_path = os.path.join(dataset_folder, setting["expe_name"], setting["expe_name"] + "_s" + str(current_expe_best_seed))
        setting["fpath"] = data_path
        setting["record"] = True
        setting["recording_path"] = os.path.join(setting["recording_path"], setting["expe_name"] + "_s" + str(current_expe_best_seed))
    
        args_str = dict_to_args_str(setting)

        args = parser.parse_args(args_str)
        test_policy.main(args)

# Experiment graphs

Modify the function calls below to plot your experiments. You can use regex-like patterns with some modifications:
- `*` means anything

## % Nb mastered Plots

In [None]:
plot_curves("benchmark_parkour_(ADR|ALP-GMM|Covar-GMM|RIAC|Random|Setter-Solver|Self-Paced|GoalGAN)$",leg_size=40, y_max=32, plot_type="shade_se", allow_different_sizes=True, x_max=20, welch=False)

In [None]:
plot_curves("*08-01_test_stump_tracks_Self-Paced*", plot_type="all_and_median", allow_different_sizes=True, x_max=10)

## Convergence plots

In [None]:
plot_curves("*profiling_benchmark_stumps_Covar-GMM_criteria_5*_no_", x_max=20, plot_type="shade_se", metric="evaluation return", allow_different_sizes=True, y_min=-300, y_max=310, welch=False)

In [None]:
plot_curves("*subset_parkour_climbing_easy_parkour_1*Random", leg_size=30, x_max=10, welch=False, allow_different_sizes=True, plot_type="shade_se", metric="training return", y_min=-200, y_max=310)

## Comparison plots

In [None]:
plot_all_comparisons("benchmark_parkour_(ADR|ALP-GMM|Covar-GMM|RIAC|Random|Setter-Solver|Self-Paced|GoalGAN)$", welch_p_threshold=0.01, metric="nb_mastered", y_min=0, y_max=100, allow_different_sizes=True)

In [None]:
generate_profile_chart(baseline_teacher="Random", tick_step=1)

In [None]:
generate_profile_chart(baseline_teacher="Random", tick_step=1, timestep=10)

In [None]:
generate_comparison_bars()

# Test tasks analysis

Modify the settings below to load the best seed onf one of your experiments.

In [None]:
#### TO CHANGE ####
settings = [
    {
        "env": "parametric-continuous-parkour-v0",
        "walker_type": "climbing_profile_chimpanzee",
        "bests": True, # Whether the results should be ordered by performance (best performance first)
        "lidars_type": "up", # Use 'up' for climbers, 'down' for walkers and 'full' for swimmers
        "deterministic": None, # Leave thins to None
        "len": 2000, # Leave this to 2000
        "expe_name" : "10-08_subset_parkour_climbing_easy_parkour_1_teacher_Random",
        "episode_ids": "0", # -1 means all the episodes
        "recording_path": "",
    },
]

In [None]:
test_sets = perform_test_sets_analysis(data_folder, settings)

In [None]:
record_policy(data_folder, settings)

## Curriculum

### Parkour

In [None]:
generate_parkour_curriculum(data_folder, settings = [
    {
        "env": "parametric-continuous-parkour-v0",
        "walker_type": "old_classic_bipedal",
        "lidars_type": "down",
        "expe_name" : "14-12_benchmark_parkour_Setter-Solver_walker_type_old_classic_bipedal",
    }])

In [None]:
generate_parkour_curriculum(data_folder, settings = [
    {
        "env": "parametric-continuous-parkour-v0",
        "walker_type": "climbing_profile_chimpanzee",
        "lidars_type": "up",
        "expe_name" : "14-12_benchmark_parkour_Setter-Solver_walker_type_climbing_profile_chimpanzee",
    }])

In [None]:
generate_parkour_curriculum(data_folder, settings = [
    {
        "env": "parametric-continuous-parkour-v0",
        "walker_type": "fish",
        "lidars_type": "full",
        "expe_name" : "14-12_benchmark_parkour_Setter-Solver_walker_type_fish",
    }])

### Stump Tracks

In [None]:
generate_stumps_curriculum(data_folder, settings = [
    {
        "expe_name" : "14-12_profiling_benchmark_stumps_Setter-Solver_criteria_1_allow_expert_knowledge_maximal",
    }], stump_height_dims=[-1, 10], stump_spacing_dims=[-1, 7])

In [None]:
generate_stumps_curriculum(data_folder, settings = [
    {
        "expe_name" : "14-12_profiling_benchmark_stumps_Setter-Solver_criteria_2_allow_expert_knowledge_maximal",
    }], stump_height_dims=[-4, 4], stump_spacing_dims=[-1, 7])

In [None]:
generate_stumps_curriculum(data_folder, settings = [
    {
        "expe_name" : "14-12_profiling_benchmark_stumps_Setter-Solver_criteria_3_allow_expert_knowledge_maximal",
    }], stump_height_dims=[-1, 4], stump_spacing_dims=[-1, 7])