## Purpose
This notebook is made to help analysing results produced by TeachMyAgent's experiments. Using this, one can generate videos of policies as shown on our [website](https://developmentalsystems.org/TeachMyAgent/). 

## How to use this notebook
This notebook is broken down into 4 sections:
- **Imports**: import needed packages.
- **Load Data**: load results produced by experiments and format them (e.g. calculate best seed of each experiment).
- **Plot definitions**: define all the plot functions we provide.
- **Experiment graphs**: use the previously defined functions to generate the different figures.

## Add our paper's results to your plots
In order to add the results we provide in our paper to your plots, make sure you have downloaded them:
1. Go to the `notebooks` folder
2. Make the `download_baselines.sh` script executable: `chmod +x download_baselines.sh`
3. Download results: `./download_baselines.sh`
> **_WARNING:_**  This will download a zip weighting approximayely 4.5GB. Then, our script will extract the zip file in `TeachMyAgent/data`. Once extracted, results will weight approximately 15GB. 
----

# Imports

In [None]:
import sys
import os
import random
import math
import pylab
import copy
import re
from enum import Enum
from collections import OrderedDict

import numpy as np

DIV_LINE_WIDTH = 50
print(np.__version__)
print(sys.executable)

In [None]:
module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
    sys.path.append(module_path)

import TeachMyAgent.students.test_policy as test_policy
from TeachMyAgent.students.run_logs_util import get_run_logs

# Load Data

In [None]:
def get_datasets(rootdir, name_filter=None, rename_labels=False):
    """
        Loads results of experiments.
 
        Results to load can be filtered by their name and each experiment can be associated to a label (usually ACL method's name)
 
        :param rootdir: Directory containing experiments to load (do not forget '/' at the end of the path)
        :param name_filter: String experiments to load must contain
        :param rename_labels: If True, each experiment will be associated to a label (see below). Labels are the names that will appear in plots.
        :type rootdir: str
        :type name_filter: str (or None)
        :type rename_labels: boolean
    """
    _, models_list, _ = next(os.walk(rootdir))
    print(models_list)
    for dir_name in models_list.copy():
        if "ignore" in dir_name:
            models_list.remove(dir_name)
        if name_filter is not None and name_filter not in dir_name:
            models_list.remove(dir_name)         
        
    for i,m_name in enumerate(models_list):           
        print("extracting data for {}...".format(m_name))
        m_id = m_name
        models_saves[m_id] = OrderedDict()
        models_saves[m_id]['data'] = get_run_logs(rootdir+m_name, book_keeping_keys=['env_test_rewards'], min_len=0)
        print("done")
        if m_name not in labels:
            if not rename_labels:
                labels[m_name] = m_name
            else:
                ##### MODIFY THIS IF YOU ADD A NEW METHOD #####
                if 'ADR' in m_name:
                    labels[m_name] = 'ADR'
                elif 'ALP-GMM' in m_name:
                    labels[m_name] = 'ALP-GMM'
                elif 'Random' in m_name:
                    labels[m_name] = 'Random'
                elif 'Covar-GMM' in m_name:
                    labels[m_name] = 'Covar-GMM'
                elif 'RIAC' in m_name:
                    labels[m_name] = 'RIAC'
                elif 'GoalGAN' in m_name:
                    labels[m_name] = 'GoalGAN'
                elif 'Self-Paced' in m_name:
                    labels[m_name] = 'Self-Paced'
                elif 'Setter-Solver' in m_name:
                    labels[m_name] = 'Setter-Solver'
                elif 'UPPER_BASELINE' in m_name:
                    labels[m_name] = 'UPPER_BASELINE'
                else:
                    labels[m_name] = m_name
                ##### MODIFY THIS IF YOU ADD A NEW METHOD #####
labels = OrderedDict()
models_saves = OrderedDict()

##### MODIFY THIS TO POINT TO YOUR DATA FOLDER #####
data_folder = "../TeachMyAgent/data/BENCHMARK/"
##### MODIFY THIS TO POINT TO YOUR DATA FOLDER #####

get_datasets(data_folder, rename_labels=True)
# get_datasets(data_folder, rename_labels=True, name_filter="parkour_RIAC_walker_type_fish") # You can also add filters

## Compute mastered tasks percentage

Compute "% of Mastered tasks" metric: percentage of test tasks (over a test set of 100 tasks) on which the agent obtained an episodic reward greater than a threshold (230).

In [None]:
mastered_thr = 230
for i,(m_id,label) in enumerate(labels.items()):
    print(m_id)
    runs_data = models_saves[m_id]['data']
    #collect raw perfs
    print("Seeds : " + str(len(runs_data)))
    for r,run in enumerate(runs_data):
        models_saves[m_id]['data'][r]['nb_mastered'] = []
        models_saves[m_id]['data'][r]['avg_pos_rewards'] = []
        models_saves[m_id]['data'][r]['local_rewards'] = []
        if 'env_test_rewards' in run:
            size_test_set = int(len(run['env_test_rewards'])/len(run['evaluation return']))
            for j in range(len(run['evaluation return'])):#max_epoch):
                test_data = np.array(run['env_test_rewards'][j*size_test_set:(j+1)*(size_test_set)])
                nb_mastered = len(np.where(test_data > mastered_thr)[0])
                models_saves[m_id]['data'][r]['nb_mastered'].append((nb_mastered/size_test_set)*100)
        else:
            print("Skipping seed {}".format(r))

## Compute best seeds

Get best seed of each experiment. This is then used to analyze test set performances and show curricula.

In [None]:
def get_best_seed(expe_name, metric="evaluation return"):
    """
        Calculate best seed of an experiment.
 
        :param expe_name: Experiment's name
        :param metric: Metric to use to calculate best seed
        :type expe_name: str
        :type metric: str
        :return best seed, its metric value, mean of all seeds, std over seeds
    """
    best_seed = -1
    best_seed_value = -1000
    runs_data = models_saves[expe_name]['data']
    all_values = []
    for run in runs_data:
        if len(run[metric]) > 0:
            data = run[metric][-1]
            all_values.append(data)
            if data > best_seed_value:
                best_seed_value = data
                best_seed = run["config"]["seed"]
        else:
            print("Skipping seed {}: no data".format(run["config"]["seed"]))
    return best_seed, best_seed_value, np.mean(all_values), np.std(all_values)

In [None]:
best_seeds = {}
for i,(m_id,label) in enumerate(labels.items()):
    best_seed, best_seed_value, mean, std = get_best_seed(m_id, metric="nb_mastered")
    best_seeds[m_id] = best_seed
    print("Expe {0} : {1} ({2}) - Mean: {3} ({4})".format(m_id, best_seed, best_seed_value, mean, std))

# Plot definitions

In [None]:
def dict_to_args_str(dictionary):
    args_str = []
    for key in dictionary:
        args_str.append("--{}".format(key))
        if dictionary[key] is not None:
            args_str.append("{}".format(dictionary[key]))

    return args_str

In [None]:
def test_policy_perf(dataset_folder, settings):
    """
        Test best seed of chosen experiments and get the rewards obtained.
 
        :param dataset_folder: Directory containing experiments to load (do not forget '/' at the end of the path)
        :param settings: Dictionary defining experiments to load 
        :return list of rewards
    """
    parser = test_policy.get_parser()
    parser.add_argument('--expe_name', type=str)
    ep_returns = []
    
    for setting in settings:
        current_expe_best_seed = best_seeds[setting["expe_name"]]
        data_path = os.path.join(dataset_folder, setting["expe_name"], setting["expe_name"] + "_s" + str(current_expe_best_seed))
        setting["fpath"] = data_path
        setting["record"] = False
        setting["norender"] = None
    
        args_str = dict_to_args_str(setting)

        args = parser.parse_args(args_str)
        ep_returns.append(test_policy.main(args))
    return ep_returns

In [None]:
def record_policy(dataset_folder, settings):
    """
        Record the policy associated to the best seed of chosen experiments.
 
        :param dataset_folder: Directory containing experiments to load (do not forget '/' at the end of the path)
        :param settings: Dictionary defining experiments to load 
    """
    parser = test_policy.get_parser()
    parser.add_argument('--expe_name', type=str)
    
    for setting in settings:
        current_expe_best_seed = best_seeds[setting["expe_name"]]
        data_path = os.path.join(dataset_folder, setting["expe_name"], setting["expe_name"] + "_s" + str(current_expe_best_seed))
        setting["fpath"] = data_path
        setting["record"] = True
        setting["recording_path"] = os.path.join(setting["recording_path"], setting["expe_name"] + "_s" + str(current_expe_best_seed))
    
        args_str = dict_to_args_str(setting)

        args = parser.parse_args(args_str)
        test_policy.main(args)

# Policies

Modify the settings below to load the best seed on one of your experiments.

In [None]:
record_policy(data_folder, settings=[
    {
        "env": "parametric-continuous-parkour-v0",
        "embodiment": "fish",
        "bests": True, # Whether the results on test set should be ordered by performance (best performance first)
        "lidars_type": "full", # Use 'up' for climbers, 'down' for walkers and 'full' for swimmers
        "deterministic": None, # Leave this to None
        "len": 2000, # Leave this to 2000
        "expe_name" : "04-01_benchmark_parkour_RIAC_walker_type_fish",
        "episode_ids": "0", # Nth best (or worse if bests=False) tasks to record (-1 means all the episodes). Separate tasks with '/'.
        "recording_path": "" # Path to save the video
    },
])

In [None]:
rewards = test_policy_perf(data_folder, settings=[
    {
        "env": "parametric-continuous-parkour-v0",
        "embodiment": "climbing_profile_chimpanzee",
        "bests": True, # Whether the results on test set should be ordered by performance (best performance first)
        "lidars_type": "up", # Use 'up' for climbers, 'down' for walkers and 'full' for swimmers
        "deterministic": None, # Leave this to None
        "len": 2000, # Leave this to 2000
        "expe_name" : "10-08_subset_parkour_climbing_easy_parkour_1_teacher_Random",
        "fixed_test_set": 'walking_test_set_v1', # test set to load (remove this if you want to load the test set used during the experiment)
        "episode_ids": "0", # Nth best (or worse if bests=False) tasks to record (-1 means all the episodes). Separate tasks with '/'.
        "recording_path": "" # Path to save the video
    },
])