# Do a seaborn like lineplot for bokeh

In [1]:
import numpy as np
import pandas as pd

from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.layouts import column
from bokeh.models import Band, ColumnDataSource, NumeralTickFormatter
from bokeh.palettes import plasma
output_notebook()

from liftoff.liftoff_results import collect_results
from functools import partial

In [2]:
collect_results = partial(collect_results, results_dir='../results', names=['results.msgpack'])

def get_last_experiment(experiment_name):
    result_paths = collect_results(experiment_name=experiment_name)
    result_paths = [p for p in result_paths if p[1]]
    result_data_frames = [pd.read_msgpack(f'{p[0]}/{p[1][0]}') for p in result_paths]
    return pd.concat(result_data_frames, ignore_index=True, sort=False)


def get_with_timestamps(experiment_name, timestamps):
    result_paths = [collect_results(experiment_name=experiment_name, timestamp=tstmp) for tstmp in timestamps]
    result_paths = [[p for p in tstmp if p[1]] for tstmp in result_paths]
    # result_paths = [p for tstmp in result_paths for p in tstmp]
    result_data_frames = [[pd.read_msgpack(f'{p[0]}/{p[1][0]}') for p in tstmp] for tstmp in result_paths]
    # ugly fix
    result_data_frames = [pd.concat(tstmp, ignore_index=True, sort=False) for tstmp in result_data_frames]
    trial_no = 0
    for i in range(len(result_data_frames)):
        result_data_frames[i]['trial'] += trial_no
        trial_no = result_data_frames[i]['trial'].unique().max()
    return pd.concat(result_data_frames, ignore_index=True, sort=False)


def get_data(experiment_name, timestamps=None):
    if timestamps:
        return get_with_timestamps(experiment_name, timestamps)
    return get_last_experiment(experiment_name)

In [3]:
def monkey_patch(dest, src, col='sampling_type', vals=['rank-td_stoch:0.05']):
    src = src.loc[src[col].isin(vals)]
    return pd.concat([dest, src], ignore_index=True, sort=False)

In [35]:
def transform(data, groupby, y, transform='mean'):
    if transform == 'mean':
        return data.groupby(groupby)[y].mean().to_frame().reset_index()
    elif transform == 'std':
        return data.groupby(groupby)[y].std().to_frame().reset_index()
    else:
        raise ValueError


def set_figure(title, y_axis_scale='linear', y_axis_format=".00"):
    fig = figure(title=title, plot_height=500, plot_width=950,
                  y_axis_type=y_axis_scale, background_fill_color='#ffffff')
    fig.yaxis.formatter=NumeralTickFormatter(format=y_axis_format)
    
    if y_axis_scale == 'log':
        fig.ygrid.minor_grid_line_color = '#5B5B5B'
        fig.ygrid.minor_grid_line_alpha = 0.1
    
    return fig


def add_trials(data, x, y, event, trials, event_name, fig, color, aggregate):
    dff = data.loc[data[event] == event_name]
    
    trial_names = dff[trials].unique()
    
    alpha = 1 if not aggregate else 0.7
    alpha_stop = 0.2
    alpha_step = (alpha - alpha_stop) / (len(trial_names) - 1)
    
    lw = 3 if not aggregate else 2
    
    for trial_name in trial_names:
        df = dff.loc[dff[trials] == trial_name]
        df = df.sort_values(by=[x]).reset_index(drop=True)
        fig.line(x=x, y=y, legend=event_name, source=df, line_width=lw, color=color, alpha=alpha)
        alpha -= alpha_step
    return fig


def add_band(x, y, hue_mean, hue_var, hue_name, fig, color):
    hue_var['lower'] = hue_mean[y] - hue_var[y]
    hue_var['upper'] = hue_mean[y] + hue_var[y]
    
    # Bollinger shading glyph:
    band_x = np.append(hue_var[x].values, hue_var[x].values[::-1])
    band_y = np.append(hue_var['lower'].values, hue_var['upper'].values[::-1])
    
    fig.patch(x=band_x, y=band_y, legend=hue_name, color=color, alpha=0.3)
    
    return fig


def lineplot(data, x, y, hue=None, trials=None, aggregate=True, legend_pos='top_left', y_axis_format=".00",
             title="Plot", y_axis_scale='linear'):
    # set the figure
    fig = set_figure(title, y_axis_format=y_axis_format, y_axis_scale=y_axis_scale)        
    
    # get the mean of each event
    y_mean = transform(data, [x, hue], y)
    
    # get the names of each event we're plotting
    hues = y_mean[hue].unique()
    palette = plasma(len(hues))
    
    # iterate through events and create a line for each
    for hue_name, color in zip(hues, palette):
        
        hue_mean = y_mean.loc[y_mean[hue] == hue_name]
        if aggregate:
            fig.line(x=x, y=y, legend=hue_name, source=hue_mean, line_width=4, color=color, alpha=1)
        
        if trials:
            fig = add_trials(data, x, y, hue, trials, hue_name, fig, color, aggregate)
        else:
            y_var = transform(data, [x, hue], y, transform='std')
            hue_var = y_var.loc[y_var[hue] == hue_name].copy()
            fig = add_band(x, y, hue_mean, hue_var, hue_name, fig, color)
    
    # additional settings
    fig.legend.location = legend_pos
    fig.legend.click_policy="hide"
    return fig


def simple_plot(data, x=None):
    fig = set_figure(y_axis_format='0.0000[00]')
    
    columns = data.columns
    palette = plasma(len(columns))
    x = x if not None else data.index
    for col, color in zip(columns, palette):
        fig.line(x=data.index, y=data[col], legend=col, line_width=2, color=color, alpha=1)
    return fig

## Experiments with fixed priors

In [67]:
df1 = get_data('stochastic_rank', ['2018Dec07-131313', '2018Dec07-141639'])
df1.vote_optim_steps.fillna(df1.optim_steps, inplace=True)
df1.sample(3)

Unnamed: 0,N,mem_size,optim_steps,vote_optim_steps,trial,sampling_type
1122,9,1022,14227,14227.0,11,rank-bayes-β0.001-p0.5_stoch:0.05
1084,12,8190,21285,21285.0,14,rank-bayes-β0.001-p0.5_stoch:0.01
1845,9,1022,4925,4925.0,16,rank-bayes-β0.001-p0.9_stoch:0.01


In [68]:
df1.loc[df1['optim_steps'] != df1['vote_optim_steps']]

Unnamed: 0,N,mem_size,optim_steps,vote_optim_steps,trial,sampling_type
300,16,131070,1083023,1083021.0,3,rank-bayes-β0.001-p0.5_stoch:0.05
301,16,131070,640615,640606.0,9,rank-bayes-β0.001-p0.5_stoch:0.05
304,16,131070,1049623,1047258.0,0,rank-bayes-β0.001-p0.5_stoch:0.05
323,9,1022,15902,15901.0,2,rank-bayes-β0.001-p0.5_stoch:0.05
410,14,32766,102834,102754.0,3,rank-bayes-β0.001-p0.5_stoch:0.05
1475,8,510,3601,3210.0,16,rank-bayes-β0.001-p0.5_stoch:0.01
1543,11,4094,69367,21301.0,10,rank-bayes-β0.001-p0.5_det
1638,5,62,705,558.0,18,rank-bayes-β0.001-p0.5_stoch:0.05
1785,9,1022,4502,3844.0,16,rank-bayes-β0.001-p0.5_det
2085,5,62,920,782.0,16,rank-bayes-β0.001-p0.5_det


In [69]:
# show(lineplot(data, 'N', 'optim_steps', 'sampling_type', 'trial', y_axis_format='00'))

In [70]:
show(lineplot(df1, 'N', 'optim_steps', 'sampling_type', y_axis_format='00'))

In [71]:
# show(lineplot(data, 'N', 'vote_optim_steps', 'sampling_type'))

## Experiments with sampled priors

In [72]:
df2 = get_data('stochastic_rank_redux', [
    #'2018Dec10-001827',
    #'2018Dec10-003234',
    '2018Dec10-034221',
    '2018Dec10-034218'
])
df2.sample(3)

Unnamed: 0,step,loss,vote,optim_steps,vote_optim_steps,N,mem,trial,sampling_type,loss_fn
1267811,2964900.0,0.002298,1.0,3065171,3065171,16,131070,18,rank-bayes-β0.001-p0.5_stoch:0.05,mse
1949320,235200.0,0.007772,0.0,1238043,1238043,15,65534,18,rank-bayes-β0.01-p0.5_stoch:0.05,mse
1368174,97900.0,0.015247,0.0,154240,154240,15,65534,16,rank-bayes-β0.001-p0.5_det,mse


In [73]:
# df2.loc[df2['optim_steps'] != df2['vote_optim_steps']]

In [74]:
# show(lineplot(df2, 'N', 'optim_steps', 'sampling_type', y_axis_format='00'))

In [75]:
df3 = monkey_patch(df2, df1, col='sampling_type', vals=['rank-td_stoch:0.05', 'rank-td_det'])

In [76]:
show(lineplot(df3, 'N', 'optim_steps', 'sampling_type', y_axis_format='00'))

In [77]:
df2.loc[(df2['sampling_type'] == 'rank-bayes-β0.001-p0.5_stoch:0.05') & (df2['N'] == 15)]['trial'].unique()

array([ 7,  8,  3,  4,  2,  0,  5,  1,  6, 15, 16, 11, 17, 18, 12, 10, 13,
        9, 14])

## Anchored Priors

In [78]:
df2 = get_data('stochastic_rank_anchored', [
    '2018Dec10-115611',
    '2018Dec10-115620'
])
df3 = monkey_patch(df2, df1, col='sampling_type', vals=['rank-td_stoch:0.05', 'rank-td_det'])

In [79]:
show(lineplot(df3, 'N', 'optim_steps', 'sampling_type', y_axis_format='00'))

## Noisier MDP

In [5]:
rank_vs_bayes = get_data('endgame', ['2018Dec10-133632', '2018Dec10-133639'])
rank_vs_bayes.sample(3)

Unnamed: 0,step,loss,vote,optim_steps,vote_optim_steps,N,mem,trial,sampling_type,loss_fn
2529789,21987600.0,0.025226,0.0,24590130,,12,8190,11,rank-td_stoch:0.1,mse
855526,820000.0,0.006481,0.0,25968638,,11,4094,18,rank-td_stoch:0.1,mse
963211,11588500.0,0.02637,0.0,25968638,,11,4094,18,rank-td_stoch:0.1,mse


In [33]:
show(lineplot(rank_vs_bayes, 'N', 'optim_steps', 'sampling_type', y_axis_format='00'))

In [7]:
# show(lineplot(df4, 'N', 'optim_steps', 'sampling_type', 'trial', y_axis_format='00'))

In [10]:
uni_vs_bayes = get_data('endgame_uni', ['2018Dec14-132025', '2018Dec14-142237'])
uni_vs_bayes.sample(3)

Unnamed: 0,step,loss,vote,optim_steps,vote_optim_steps,N,mem,trial,sampling_type,loss_fn
7106023,3283100.0,0.007156,0.0,6095405,,10,2046,9,uniform-td_stoch:0.1,mse
6539562,1972100.0,0.00709,0.0,3341293,,9,1022,11,uniform-td_stoch:0.1,mse
3816095,10299400.0,0.058476,0.0,16543886,,11,4094,2,uniform-td_stoch:0.1,mse


In [11]:
show(lineplot(uni_vs_bayes, 'N', 'optim_steps', 'sampling_type', y_axis_format='00'))

In [36]:
df = monkey_patch(rank_vs_bayes, uni_vs_bayes, col='sampling_type', vals=['uniform-td_stoch:0.1'])
show(lineplot(df, 'N', 'optim_steps', 'sampling_type', y_axis_format='00', y_axis_scale='log'))

## Looking at the loss

In [None]:
data = get_data('test')
#data.to_pickle('results.pkl')
data.sample(10)

In [None]:
show(lineplot(data, 'N', 'optim_steps', 'sampling_type', y_axis_format='00'))

In [None]:
def get_by_N(data, N=9):
    df = data.loc[data['vote'] == 0]
    return df.loc[df['N'] == N]

In [None]:
N=7
show(lineplot(get_by_N(data, N=N), 'step', 'loss', 'sampling_type', 'trial', aggregate=False,
     legend_pos='top_right', y_axis_format='0.000', title=f"MSE Loss on deterministic MDP={N}. No of trials=5."))

In [None]:
def get_trial(data, trial_no, N):
    df = data.loc[data['trial'] == trial_no]
    df = df.loc[df['vote'] == 0]
    df = df.loc[df['N'] == N]
    df = df.drop(['vote', 'optim_steps', 'vote_optim_steps', 'N', 'mem', 'trial', 'loss_fn'], axis=1)
    return pd.pivot_table(df, values='loss', index=['step'], columns=['sampling_type']).reset_index(drop=True)

df_trial = get_trial(data, 3, 9)
df_trial.head(20)

In [None]:
data = get_data('test')
N=9
show(lineplot(get_by_N(data, N=N), 'step', 'loss', 'sampling_type', 'trial', aggregate=False,
     legend_pos='top_right', y_axis_format='0.000', title=f"MSE Loss on deterministic MDP={N}. No of trials=5."))

In [None]:
data = get_data('test')
N=9
show(lineplot(get_by_N(data, N=N), 'step', 'loss', 'sampling_type', 'trial', aggregate=False,
     legend_pos='top_right', y_axis_format='0.000', title=f"MSE Loss on deterministic MDP={N}. No of trials=5."))

In [None]:
data.sample(10)