# Uncertainty Prioritization

In [1]:
import numpy as np
import pandas as pd

from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.layouts import column
from bokeh.models import Band, ColumnDataSource, NumeralTickFormatter
from bokeh.models.widgets import RadioButtonGroup, RadioGroup
from bokeh.palettes import plasma
output_notebook()

from liftoff.liftoff_results import collect_results

In [2]:
def get_data(experiment_name):
    result_paths = collect_results(experiment_name=experiment_name,
                                   results_dir='../results', 
                                   names=['results.msgpack'])
    result_paths = [p for p in result_paths if p[1]]
    result_data_frames = [pd.read_msgpack(f'{p[0]}/{p[1][0]}') for p in result_paths]

    return pd.concat(result_data_frames, ignore_index=True)

def get_title(name):
    return ' '.join(name.split('_'))

In [3]:
def transform(data, groupby, y, transform='mean'):
    if transform == 'mean':
        return data.groupby(groupby)[y].mean().to_frame().reset_index()
    else:
        raise ValueError

In [4]:
def do_plot(data, y_axis_scale="linear", legend_pos="top_left"):
    data_plt = figure(title="Data",
                      plot_height=400,
                      plot_width=900,
                      y_axis_type=y_axis_scale,
                      #y_range=(0,1000000),
                      background_fill_color='#ffffff')

    data_plt.yaxis.formatter=NumeralTickFormatter(format="00")

    series = data['sampling_type'].unique()
    for series, color in zip(series, plasma(len(series))):
        df = data.loc[data['sampling_type'] == series]
        data_plt.line(x='N', y='optim_steps', legend=series, source=df,
                      line_width=3, color=color, alpha=0.8)
    data_plt.legend.location = legend_pos
    data_plt.legend.click_policy="hide"
    return data_plt

## Let's look at the data

In [5]:
experiments = ('baseline_loss_sweep',
               'rank_uniform_bayesian', 'rank_uniform_shuffle_only',
               'rank_uniform_shuffle_huber', 'rank_uniform_shuffle_beta_sweep',
               'rank_uniform_boot_beta_loss_sweep')
results = {name: {'data': get_data(name), 'title': get_title(name)} for name in experiments}

### First, the baseline

In [6]:
data = results['baseline_loss_sweep']['data']
data = transform(data, ['N', 'sampling_type'], 'optim_steps')
data.head(5)

Unnamed: 0,N,sampling_type,optim_steps
0,2,greedy-pq_batch_size:1_huber,46.05
1,2,greedy-pq_batch_size:1_mse,37.2
2,2,proportional_alpha:1_batch_size:1_huber,125.85
3,2,proportional_alpha:1_batch_size:1_mse,45.45
4,2,rank_alpha:1_batch_size:32_huber,64.95


In [7]:
show(do_plot(data))

### Rank vs Uniform vs Bayesian

In [8]:
data = results['rank_uniform_boot_beta_loss_sweep']['data']
# Transform the data
data = transform(data, ['N', 'sampling_type'], 'optim_steps')
data.head(10)
show(do_plot(data, y_axis_scale="linear", legend_pos="top_left"))