# Preliminary baseline results

In [1]:
import os
import pickle
from pathlib import Path
from functools import partial

import torch
import numpy as np
import pandas as pd
import yaml
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.layouts import column
from bokeh.models import Band, ColumnDataSource, NumeralTickFormatter
from bokeh.palettes import plasma, Colorblind8
output_notebook()

from liftoff.liftoff_results import collect_results
collect_results = partial(collect_results, results_dir='../results', names=['testing.pkl'])

## Fetch the results paths

In [2]:
def make_key(path):
    algo = path.parts[2].split('_')[-1]
    game_name = path.parts[3][7:]
    trial = path.parts[4]
    return '-'.join([game_name, algo, trial])

In [3]:
experiments = ("two_way_dqn", "two_way_ddqn", "two_way_per")

exp_results = [collect_results(experiment_name=exp_name) for exp_name in experiments]
exp_results = [trial for exp in exp_results for trial in exp]


file_paths = [Path(f"{res[0]}/testing.pkl") for res in exp_results if res[1]]

## Preprocess it to dataframes

In [4]:
value_keys = ['ep_cnt', 'rw_per_ep', 'step_per_ep', 'rw_per_step', 'max_q', 'test_fps']
idx_keys = ['step_idx', 'time_idx']

def get_idxs(result, idx_types):
    idxs = {}
    for idx_type in idx_types:
        idxs[idx_type] = [row[idx_type] for row in result['rw_per_ep']]
    return idxs

def get_values(result, value_types):
    values = {}
    for value in value_types:
        values[value] = [row['value'] for row in result[value]]
    return values

def merge_experiments(results):
    for k, v in results.items():
        game, algo, trial = k.split('-')
        v['game'] = game
        v['algo'] = algo
        v['trial'] = trial
    return pd.concat(results.values(), ignore_index=True, sort=False)

def make_df(results_dict):
    # make dataframes out of the dictinaries for each experiment
    results = {key: pd.DataFrame(data = {**get_idxs(vals, idx_keys), **get_values(vals, value_keys)})
               for key, vals in results_dict.items()}
    
    # merge the experiments, adding `variant` and `trial` columns
    return merge_experiments(results)

## Visualize the results

In [5]:
def transform(data, groupby, y, transform='mean'):
    if transform == 'mean':
        return data.groupby(groupby)[y].mean().to_frame().reset_index()
    elif transform == 'std':
        return data.groupby(groupby)[y].std().to_frame().reset_index()
    else:
        raise ValueError


def set_figure(title, y_axis_scale='linear', y_axis_format=".00"):
    fig = figure(title=title, plot_height=500, plot_width=950,
                  y_axis_type=y_axis_scale, background_fill_color='#ffffff')
    fig.yaxis.formatter=NumeralTickFormatter(format=y_axis_format)
    
    if y_axis_scale == 'log':
        fig.ygrid.minor_grid_line_color = '#5B5B5B'
        fig.ygrid.minor_grid_line_alpha = 0.1
    
    return fig


def add_trials(data, x, y, event, trials, event_name, fig, color, aggregate):
    dff = data.loc[data[event] == event_name]
    
    trial_names = dff[trials].unique()
    
    alpha = 1 if not aggregate else 0.7
    alpha_stop = 0.2
    alpha_step = (alpha - alpha_stop) / (len(trial_names) - 1)
    
    lw = 3 if not aggregate else 2
    
    for trial_name in trial_names:
        df = dff.loc[dff[trials] == trial_name]
        df = df.sort_values(by=[x]).reset_index(drop=True)
        fig.line(x=x, y=y, legend=event_name, source=df, line_width=lw, color=color, alpha=alpha)
        alpha -= alpha_step
    return fig


def add_band(x, y, hue_mean, hue_var, hue_name, fig, color):
    hue_var['lower'] = hue_mean[y] - hue_var[y]
    hue_var['upper'] = hue_mean[y] + hue_var[y]
    
    # Bollinger shading glyph:
    band_x = np.append(hue_var[x].values, hue_var[x].values[::-1])
    band_y = np.append(hue_var['lower'].values, hue_var['upper'].values[::-1])
    
    fig.patch(x=band_x, y=band_y, legend=hue_name, color=color, alpha=0.3)
    
    return fig


def lineplot(data, x, y, hue=None, trials=None, aggregate=True, legend_pos='top_left', y_axis_format="00.00",
             title="Plot", y_axis_scale='linear'):
    # set the figure
    fig = set_figure(title, y_axis_format=y_axis_format, y_axis_scale=y_axis_scale)        
    
    # get the mean of each event
    y_mean = transform(data, [x, hue], y)
    
    # get the names of each event we're plotting
    hues = y_mean[hue].unique()
    palette = plasma(len(hues))
    
    # iterate through events and create a line for each
    for hue_name, color in zip(hues, palette):
        
        hue_mean = y_mean.loc[y_mean[hue] == hue_name]
        if aggregate:
            fig.line(x=x, y=y, legend=hue_name, source=hue_mean, line_width=4, color=color, alpha=1)
        
        if trials:
            fig = add_trials(data, x, y, hue, trials, hue_name, fig, color, aggregate)
        else:
            y_var = transform(data, [x, hue], y, transform='std')
            hue_var = y_var.loc[y_var[hue] == hue_name].copy()
            fig = add_band(x, y, hue_mean, hue_var, hue_name, fig, color)
    
    # additional settings
    fig.legend.location = legend_pos
    fig.legend.click_policy="hide"
    return fig


def simple_plot(data, x, y, trials):
    fig = set_figure(title="Simple Plot", y_axis_format='0.0000[00]')
    
    trial_vals = data[trials].unique()
    trial_names = [f'trial {trial_id}' for trial_id in trial_vals]

    if len(Colorblind8) < len(trial_vals):
        palette = plasma(len(trials))
    else:
        palette = Colorblind8

    for trial, color, legend in zip(trial_vals, palette, trial_names):
        df = data.loc[data[trials] == trial]
        fig.line(x=df[x], y=df[y], legend=legend, line_width=4, color=color, alpha=1)
    
    # additional settings
    fig.legend.click_policy="hide"
    fig.legend.location = "top_left"
    return fig

### Some data utils

In [6]:
def filter_by(data, game, algo=None):
    if algo:
        return data.loc[(data['game'] == game) & (data['algo'].isin(algo))]
    return data.loc[data['game'] == game]


def smooth_data(data):
    trials = []
    for trial_value in data['trial'].unique():
        trial = data.loc[data['trial'] == trial_value].reset_index()
        trial['roll'] = trial['rw_per_ep'].rolling(window=5, min_periods=1).mean()
        trials.append(trial)
    return pd.concat(trials, ignore_index=True, sort=False)

## Load data

In [7]:
results_dict = {make_key(path): pickle.load(open(path, 'rb')) for path in file_paths}
for k in results_dict.keys():
    print(k)
    
data = make_df(results_dict)

smoothed = (data.assign(smooth_rw_per_ep=data.groupby(['game','algo','trial'], as_index=False)[['rw_per_ep']]
                           .rolling(5, min_periods=1).mean().fillna(0)
                           .reset_index(0, drop=True)))

smoothed.sample(10)

seaquest-dqn-0
road_runner-dqn-2
road_runner-dqn-0
road_runner-dqn-1
seaquest-ddqn-0
seaquest-ddqn-1
road_runner-ddqn-2
road_runner-ddqn-1
seaquest-per-2
seaquest-per-0
seaquest-per-1
road_runner-per-2
road_runner-per-0
road_runner-per-1


Unnamed: 0,step_idx,time_idx,ep_cnt,rw_per_ep,step_per_ep,rw_per_step,max_q,test_fps,game,algo,trial,smooth_rw_per_ep
119,2500000,11372.303282,100,2054.8,1949.88,1.053808,4.68632,79.438078,seaquest,ddqn,0,910.2
150,5000000,22753.215996,100,23566.0,967.92,24.347053,13.47975,41.426313,road_runner,ddqn,2,21170.2
138,5000000,23118.872598,100,625.6,1029.18,0.607863,9.475384,45.445232,seaquest,ddqn,1,1684.28
239,12500000,58748.413208,100,6920.6,3057.6,2.263409,36.34302,126.858585,seaquest,per,2,4780.62
211,17000000,86646.781386,100,11176.0,3452.54,3.237037,16.01173,114.828877,road_runner,ddqn,1,15759.2
261,5000000,21933.605861,100,2260.0,2397.65,0.94259,31.50523,99.253633,seaquest,per,0,2028.84
110,8500000,36114.79863,100,5.0,200.86,0.024893,3695045.0,9.594442,road_runner,dqn,1,157.2
274,11500000,53422.601404,100,3603.3,2882.21,1.250186,28.84564,122.775713,seaquest,per,0,4506.48
365,5000000,21869.242908,100,33741.0,1375.13,24.536589,23.75749,62.42334,road_runner,per,1,25635.4
241,13500000,63970.431416,100,7835.1,3243.58,2.415572,36.29956,125.240697,seaquest,per,2,6420.42


# Seaquest

### Smoothed Data

In [9]:
show(lineplot(filter_by(smoothed, 'seaquest', ['per', 'ddqn']),
              'step_idx', 'smooth_rw_per_ep', 'algo', 'trial', aggregate=True, title="Smoothed Seaquest PER"))

### Raw data

In [12]:
show(lineplot(filter_by(data, 'seaquest', ['per', 'ddqn']),
              'step_idx', 'rw_per_ep', 'algo', 'trial', aggregate=True, title="Seaquest DDQN vs PER"))

## RoadRunner

### Smoothed data

In [13]:
show(lineplot(filter_by(smoothed, 'road_runner'),
              'step_idx', 'smooth_rw_per_ep', 'algo', 'trial', aggregate=True, title="RoadRunner - DQN vs DDQN vs PER"))

### Raw data

In [14]:
show(lineplot(filter_by(data, 'road_runner'),
              'step_idx', 'rw_per_ep', 'algo', 'trial', aggregate=True, title="RoadRunner - DQN vs DDQN vs PER"))