In [196]:
from ipywidgets import interact
import numpy as np
import pandas as pd

from bokeh.models import HoverTool

from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure

from bokeh.transform import log_cmap, linear_cmap
from bokeh.util.hex import hexbin
from bokeh.models import Range1d
output_notebook()

In [197]:
df = pd.read_pickle('./simulations.gzip')
coefs = np.array([c for c in df['coefs'].values])
scores_debiased = np.array([c for c in df['scores_debiased'].values])
scores = np.array([c for c in df['scores'].values])
pvalues = np.array([c for c in df['lr_pvalues'].values])

In [327]:
def update(model_violation='all',
           n_samples='all',
           seed='all',
           n_feat_relevant='all',
           score_summary='max',
           pval_summary='min',
           scoring='r2_debiased'):
    
    filter1 = np.ones(len(df), dtype=bool)
    if model_violation != 'all':
        filter1 = np.logical_and(
            filter1, (df.pathology == model_violation).values)
    if n_samples != 'all':
        unique = df.n_samples.unique()
        n_samples_ = unique[np.abs(unique - n_samples).argmin()]
        print(n_samples_)
        filter1 = np.logical_and(
            filter1, (df.n_samples == n_samples_).values)
    if seed != 'all':
        filter1 = np.logical_and(
            filter1, (df.seed == seed).values)
    if n_feat_relevant != 'all':
        filter1 = np.logical_and(
            filter1, (df.n_feat_relevant == n_feat_relevant).values)

    funcs = {'min': np.min, 'max': np.max, 'mean': np.mean}
    score_summary_ = funcs[score_summary]
    pval_summary_ = funcs[pval_summary]

    x = -np.log10(pval_summary_(pvalues, 1))

    scores_ = {'r2': scores, 'r2_debiased': scores_debiased}[scoring]
    
    y = np.max([score_summary_(scores_, 1), 
                np.zeros(len(df))], 0)

    scat.data_source.data['x'] = x[filter1]
    scat.data_source.data['y'] = y[filter1]
    push_notebook()

In [328]:
x = -np.log10(pvalues.min(1))
y = np.max([scores_debiased.max(-1), np.zeros(len(df))], 0)

p = figure(title="Title",
           match_aspect=True,
           tools="wheel_zoom,reset",
           y_range=Range1d(0, 1),
           x_range=Range1d(x.min(), x.max()))

color = np.array(['blue'] * len(df))
scat = p.scatter(x, y, fill_color=color, color=color, alpha=0.05)
show(p, notebook_handle=True)

In [331]:
interact(update, model_violation=['all', None, 'abs', 'log', 'exp', 'sqrt', '1/x', 'x^2', 'x^3', 'x^4', 'x^5'], n_samples=(100, 1000, 100), seed=['all', 14, 42, 86], n_feat_relevant=(1, 40, 3), score_summary=['mean', 'max'], pval_summary=['min', 'mean'], scoring=['r2', 'r2_debiased'])

<function __main__.update>