# PER vs DDQN in Alien, H.E.R.O. and Chopper Command

In [1]:
import os
import pickle
import yaml
from pathlib import Path
from functools import partial
import pyparsing as pyparse

import pandas as pd
from bokeh.io import show, output_notebook
output_notebook()

from liftoff.liftoff_results import collect_all_results

from lineplot import simple_plot, lineplot

collect_results = partial(collect_all_results, results_dir='../results/dgx', names=['evaluation.pkl', 'cfg.yaml', 'out'])

In [2]:
def expand_resources(experiments):
    expanded_resources = []
    for exp_path, resources in experiments:
        exp_resources = []
        for res in resources:
            res_path = Path(exp_path) / res
            if res_path.suffix == ".yaml":
                exp_resources.append(yaml.safe_load(open(res_path, 'r')))
            elif res_path.suffix == ".pkl":
                exp_resources.append(pickle.load(open(res_path, 'rb')))
            elif res_path.suffix == "":
                exp_resources.append(open(res_path, 'r').readlines())
            else:
                print(res_path.suffix)
                raise ValueError("Unknown file extension, don't know how to open it.")
        expanded_resources.append(exp_resources)
    return expanded_resources


def get_evaluation_reward(txt):
    """ Hack in which we parse the text logs and retrieve the evaluation results.
    """
    ESC = pyparse.Literal('\x1b')
    integer = pyparse.Word(pyparse.nums)
    escapeSeq = pyparse.Combine(ESC + '[' + pyparse.Optional(pyparse.delimitedList(integer,';')) + 
                                pyparse.oneOf(list(pyparse.alphas)))

    strip_ansi_codes = lambda s : pyparse.Suppress(escapeSeq).transformString(s)

    mean_ep_rw = []
    for i, line in enumerate(txt):
        if 'evaluation' in line and 'elapsed' in line:
            
            rewards = strip_ansi_codes(txt[i+2])[-16:-2]
            while rewards.startswith('.'):
                rewards = rewards[1:]
            mean_ep_rw.append(float(rewards.strip()))
    return mean_ep_rw


def fix_rmsprop_arg(cfg):
    """ For a certain commit the rmsprop_eps arg didn't have any effects.
    """
    if cfg['commit'] == "5719dfe@experiments":
        cfg['rmsprop_eps'] = 0.00001
    

def get_experiment_label(cfg, exp_attrs):
    label = "PER_" if cfg['prioritized'] else "DDQN_"
    for k, default in exp_attrs.items():
        if k in cfg:
            label += f"{k}:{cfg[k]}_"
        else:
            label += f"{k}:{default}_"
    return label[:-1]


def make_df(resources):
    dfs = []
    for (cfg, _, _, eval_result) in resources:
        df = pd.DataFrame(data={"step_idx": [i*250000 for i in range(1, len(eval_result)+1)],
                                "rw_per_ep": eval_result})
        df['algo'] = get_experiment_label(cfg, EXP_ATTRS)
        df['game'] = cfg['game']
        df['trial'] = cfg['run_id']
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True, sort=False)


def load_and_prep(exp_name):
    # collect results
    results = collect_results(experiment_names=[exp_name])
    # flatten the list
    results = [trial for exp in results for trial in exp]
    
    # expand/load the collected resources (yaml, pickles, etc.)
    resources = expand_resources(results)
    
    # fix suff
    for res in resources:
        # parse the output files and get the evaluation results
        res.append(get_evaluation_reward(res[1]))
    
        # fix rmsprop_eps value
        fix_rmsprop_arg(res[0])
    
    return resources

In [3]:
EXP_ATTRS = {'rmsprop_eps': 0.00001, 'rmsprop_momentum': 0.0}

## Alien DDQN vs PER

In [4]:
resources = load_and_prep('alien')

for res in resources:
    print(get_experiment_label(res[0], EXP_ATTRS))

PER_rmsprop_eps:1e-05_rmsprop_momentum:0.0
PER_rmsprop_eps:1e-05_rmsprop_momentum:0.0
DDQN_rmsprop_eps:1e-05_rmsprop_momentum:0.0
DDQN_rmsprop_eps:1e-05_rmsprop_momentum:0.0
DDQN_rmsprop_eps:0.01_rmsprop_momentum:0.0
DDQN_rmsprop_eps:0.01_rmsprop_momentum:0.0
PER_rmsprop_eps:0.01_rmsprop_momentum:0.0
PER_rmsprop_eps:0.01_rmsprop_momentum:0.0
PER_rmsprop_eps:0.01_rmsprop_momentum:0.95
PER_rmsprop_eps:0.01_rmsprop_momentum:0.95
DDQN_rmsprop_eps:0.01_rmsprop_momentum:0.95
DDQN_rmsprop_eps:0.01_rmsprop_momentum:0.95


In [5]:
data = make_df(resources)
data.sample(5)

Unnamed: 0,step_idx,rw_per_ep,algo,game,trial
1097,24500000,2915.795,DDQN_rmsprop_eps:0.01_rmsprop_momentum:0.0,alien,1
1466,20250000,1870.31,PER_rmsprop_eps:0.01_rmsprop_momentum:0.0,alien,1
1411,6500000,1046.798,PER_rmsprop_eps:0.01_rmsprop_momentum:0.0,alien,1
313,28500000,1963.486,PER_rmsprop_eps:1e-05_rmsprop_momentum:0.0,alien,1
1033,8500000,1222.038,DDQN_rmsprop_eps:0.01_rmsprop_momentum:0.0,alien,1


In [6]:
smoothed = (data.assign(smooth_rw_per_ep=data.groupby(['game','algo','trial'], as_index=False)[['rw_per_ep']]
                           .rolling(10, min_periods=1).mean().fillna(0)
                           .reset_index(0, drop=True)))

# smoothed.sample(10)

In [7]:
show(lineplot(smoothed, 'step_idx', 'smooth_rw_per_ep', 'algo', 'trial', aggregate=True, title="Smoothed Alien"))

## H.E.R.O.

In [8]:
resources = load_and_prep('hero')

for res in resources:
    print(get_experiment_label(res[0], EXP_ATTRS))

PER_rmsprop_eps:1e-05_rmsprop_momentum:0.0
PER_rmsprop_eps:1e-05_rmsprop_momentum:0.0
DDQN_rmsprop_eps:1e-05_rmsprop_momentum:0.0
DDQN_rmsprop_eps:1e-05_rmsprop_momentum:0.0
DDQN_rmsprop_eps:0.01_rmsprop_momentum:0.0
DDQN_rmsprop_eps:0.01_rmsprop_momentum:0.0
PER_rmsprop_eps:0.01_rmsprop_momentum:0.0
PER_rmsprop_eps:0.01_rmsprop_momentum:0.0
PER_rmsprop_eps:0.01_rmsprop_momentum:0.95
PER_rmsprop_eps:0.01_rmsprop_momentum:0.95
DDQN_rmsprop_eps:0.01_rmsprop_momentum:0.95
DDQN_rmsprop_eps:0.01_rmsprop_momentum:0.95


In [9]:
data = make_df(resources)
data.sample(5)
smoothed = (data.assign(smooth_rw_per_ep=data.groupby(['game','algo','trial'], as_index=False)[['rw_per_ep']]
                           .rolling(10, min_periods=1).mean().fillna(0)
                           .reset_index(0, drop=True)))

# smoothed.sample(10)

In [10]:
show(lineplot(smoothed, 'step_idx', 'smooth_rw_per_ep', 'algo', 'trial', aggregate=True, title="Smoothed H.E.R.O."))

## Chopper Command

In [11]:
resources = load_and_prep('chopper_command')
    
data = make_df(resources)
smoothed = (data.assign(smooth_rw_per_ep=data.groupby(['game','algo','trial'], as_index=False)[['rw_per_ep']]
                           .rolling(10, min_periods=1).mean().fillna(0)
                           .reset_index(0, drop=True)))

In [12]:
show(lineplot(smoothed, 'step_idx', 'smooth_rw_per_ep', 'algo', 'trial', aggregate=True,
              title="Smoothed ChopperCommand"))

In [13]:
show(lineplot(smoothed, 'step_idx', 'smooth_rw_per_ep', 'algo', aggregate=True,
              title="Smoothed ChopperCommand"))