In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=false; 
code_show_err=false;
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
 
 if (code_show_err){
 $('div.output_stderr').hide();
 } else {
 $('div.output_stderr').show();
 }
 code_show_err = !code_show_err
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [2]:
import yaml
import os
import numpy as np
import pandas as pd
from functools import reduce

from ipywidgets import interact
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Legend, Select
from bokeh.plotting import figure
from bokeh.themes import Theme
import bokeh.plotting as bp
from bokeh.io import show, output_notebook, output_file, push_notebook


output_notebook()

In [9]:
simulation_data_folder = os.path.join('../tests/data/simulations')
dct_simulation_labels = {'jcoin_random': 'Complete Randomization',
                         'jcoin_urn': 'β=1, D=χ2', 
                         'd_range': 'β=1, D=range', 
                         'b2': 'β=2, D=χ2', 
                         'jcoin_custom_d': 'Access calculation; β=1, D=χ2'}
lst_colors = ['red', 'blue', 'green', 'cyan', 'violet']
lst_quantiles = [x/100 for x in range(1, 10)] + [x/10 for x in range(1, 11)]
lst_col = ['n_participants', 'd_min', 'd_max', 'max_d (mean)', 'd_min_ci', 'd_max_ci'] + \
          ['max_d ({0}th_pctl)'.format(int(thres * 100)) for thres in lst_quantiles] +  \
          ['max_asgmt_prop_diff_ge_{0}% (%)'.format(int(thres * 100)) for thres in lst_quantiles]

In [4]:
def get_data_to_display(fname, col_suffix, thres):
    lst_df_simulation_plot_stats = [pd.read_csv(
    os.path.join(simulation_data_folder, simulation_label, fname))[lst_col]
                                .rename(columns=dict([(col, col + ' ({0})'.format(dct_simulation_labels[simulation_label]))
                                                        for col in lst_col if col != 'n_participants']))
                                for simulation_label in dct_simulation_labels.keys()]
    pdf_plot_data = reduce(lambda left, right: left.merge(right,
                                                      on='n_participants',
                                                      how='outer'
                                                      ),
                       tuple(lst_df_simulation_plot_stats))
    pdf_plot_data = pdf_plot_data.loc[pdf_plot_data['n_participants'] <= 2500]
    lst_case_labels = sorted([dct_simulation_labels[simulation_label] for simulation_label in dct_simulation_labels.keys()])
    lst_col_to_display = [col_suffix.format(thres, dct_simulation_labels[simulation_label]) for simulation_label in dct_simulation_labels.keys()
                                 ]
    pdf_data_to_display = pdf_plot_data[['n_participants'] + lst_col_to_display]
    pdf_data_to_display = pdf_data_to_display.rename(columns=dict([(col, [case for case in lst_case_labels if case in col][0])
                                                                   for col in lst_col_to_display if col != 'n_participants']))
    return pdf_data_to_display

In [10]:
def bkapp(doc):
    thres = 50
    p = figure(title="Maximum difference in assignment proportions at different thresholds (urn level)", 
           plot_height=600, plot_width=1000, y_range=(-1,105), 
           background_fill_color='#efefef')
    p.xaxis.axis_label = 'No. participants'
    p.yaxis.axis_label = '% of trials with Maximum difference in assignment proportions >= threshold'
    pdf_data_to_display = get_data_to_display('trial_level_agg_stats.csv', 'max_asgmt_prop_diff_ge_{0}% (%) ({1})', thres)
    trial_all_plot_source = bp.ColumnDataSource(pdf_data_to_display)
    lst_y_col = [dct_simulation_labels[simulation_label] for simulation_label in dct_simulation_labels.keys()]
    lst_line_plots = []
    p.add_layout(Legend(), 'right')
    colr = lst_colors[1]
    col = lst_y_col[1]
    for (colr, leg) in zip(lst_colors, lst_y_col):
        p.line('n_participants', leg, color=colr, legend_label=leg, source=trial_all_plot_source, #name='wave',
            nonselection_alpha=0.4, selection_alpha=1)
    
    thres_input = Select(value='90', title='Select a threshold', options=[str(int(i*100)) for i in lst_quantiles])

    def update_threshold(attr, old, new):
        thres = int(thres_input.value)
        pdf_data_to_display = get_data_to_display('trial_level_agg_stats.csv', 'max_asgmt_prop_diff_ge_{0}% (%) ({1})', thres)
        trial_all_plot_source.data.update(bp.ColumnDataSource(pdf_data_to_display).data)
    
    thres_input.on_change('value', update_threshold)
    
    doc.add_root(column(thres_input, p))

    doc.theme = Theme(json=yaml.load("""
        attrs:
            Figure:
                background_fill_color: "#DDDDDD"
                outline_line_color: white
                toolbar_location: above
                height: 500
                width: 800
            Grid:
                grid_line_dash: [6, 4]
                grid_line_color: white
    """, Loader=yaml.FullLoader))

In [11]:
show(bkapp)

In [12]:
def bkapp_trt_level(doc):
    thres = 50
    p = figure(title="Maximum difference in assignment proportions at different thresholds (treatment level)", 
           plot_height=600, plot_width=1000, y_range=(-1,105), 
           background_fill_color='#efefef')
    p.xaxis.axis_label = 'No. participants'
    p.yaxis.axis_label = '% of trials with Maximum difference in assignment proportions >= threshold'
    pdf_data_to_display = get_data_to_display('trt_level_agg_stats.csv', 'max_asgmt_prop_diff_ge_{0}% (%) ({1})', thres)
    trial_all_plot_source = bp.ColumnDataSource(pdf_data_to_display)
    lst_y_col = [dct_simulation_labels[simulation_label] for simulation_label in dct_simulation_labels.keys()]
    lst_line_plots = []
    p.add_layout(Legend(), 'right')
    colr = lst_colors[1]
    col = lst_y_col[1]
    for (colr, leg) in zip(lst_colors, lst_y_col):
        p.line('n_participants', leg, color=colr, legend_label=leg, source=trial_all_plot_source, #name='wave',
            nonselection_alpha=0.4, selection_alpha=1)
    
    thres_input = Select(value='90', title='Select a threshold', options=[str(int(i*100)) for i in lst_quantiles])

    def update_threshold(attr, old, new):
        thres = int(thres_input.value)
        pdf_data_to_display = get_data_to_display('trt_level_agg_stats.csv', 'max_asgmt_prop_diff_ge_{0}% (%) ({1})', thres)
        trial_all_plot_source.data.update(bp.ColumnDataSource(pdf_data_to_display).data)
    
    thres_input.on_change('value', update_threshold)
    
    doc.add_root(column(thres_input, p))

    doc.theme = Theme(json=yaml.load("""
        attrs:
            Figure:
                background_fill_color: "#DDDDDD"
                outline_line_color: white
                toolbar_location: above
                height: 500
                width: 800
            Grid:
                grid_line_dash: [6, 4]
                grid_line_color: white
    """, Loader=yaml.FullLoader))

In [13]:
show(bkapp_trt_level)