# Hacks & helpers

In [None]:
!python -m pip install ipympl
!python -m pip install seaborn

In [None]:
from vw_executor.vw_opts import VwOpts, InteractiveGrid
from pathlib import Path
%matplotlib widget
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from itertools import chain
import scipy
import seaborn as sns
sns.set_theme(style="darkgrid")
from vw_executor.vw import Vw
from functools import reduce

def new_ax():
    _,ax = plt.subplots(dpi=100, figsize=[9,4])
    return ax    

def save_examples(examples, path):     
    with open(path, 'w') as f:
        for ex in examples:
            f.write(f'{json.dumps(ex, separators=(",", ":"))}\n')
            
def load_examples(path):
    with open(path) as f:
        for l in f:
            yield json.loads(l)

def get_simulation(folder, simulator, **kwargs):
    path = Path(folder).joinpath(f'sim-{VwOpts(kwargs)}'.replace(' ', '-'))
    if not path.exists():
        Path(folder).mkdir(parents=True, exist_ok=True)
        examples = list(simulator(**kwargs))
        save_examples(examples, path)
    else:
        examples = list(load_examples(path))
    return examples, path
    
def cb_df(examples):
    return pd.DataFrame([{
        'reward': -e['_label_cost'],
        'shared_good': e['c']['shared']['f'],
        'chosen': e['_labelIndex'],
        'prob': e['_label_probability']
    } for e in examples])    

def _collapse(*grids):
    from collections import OrderedDict
    result = reduce(lambda r, g: OrderedDict(r, **g), grids)
    separator = [len(g) for g in grids]
    return result, separator

def _split(collapsed, separator):
    result = []
    last = 0
    items = list(collapsed.items())
    for c in separator:
        result.append(dict(items[last:last + c]))
        last = last + c
    return tuple(result)

    
class Playground:
    def __init__(self, simulation, visualization, cache_path = '.cache'):
        self.data_folder = Path(cache_path).joinpath('datasets')
        self.simulation = simulation
        self.examples = None
        self.visualization = visualization
        self.last_job = None
        self.vw = Vw(cache_path, handlers=[])
        
    def run(self, simulator_grid, vw_grid):
        def _run_and_plot(separator, **options):
            sim_opts, train_opts = _split(options, separator)
            self.visualization.reset()
            examples, examples_path = get_simulation(self.data_folder, self.simulation, **sim_opts)
            self.visualization.after_simulation(examples)
            self.last_job = self.vw.train(
                [examples_path], train_opts, self.visualization.vw_outputs)
            self.visualization.after_train(examples, self.last_job)
        collapsed, separator = _collapse(simulator_grid, vw_grid)
        widget = interactive(_run_and_plot, separator=fixed(separator), **collapsed)
        print(len(widget.children))
        display(VBox(widget.children))

# Simulator

In [None]:
def my_simulation(n=10000, swap_after=5000, variance=0, bad_log=0):
    offset = 0
    for i in range(1, n):
        if i % swap_after == 0:
            offset = (offset + 1) % 2

        person = i % 2 
        chosen = i % 4  // 2 
        if (chosen + person + offset) % 2 == 0:
            reward =  0.7 + ((chosen + offset) % 2) * 0.1
        else:
            reward = 0.3 - ((chosen + offset + 1) % 2) * 0.1

        reward = reward + scipy.random.normal(0, variance)

        yield {
            "_label_cost": -reward,
            "_label_probability": 0.5,
            "_label_Action": chosen + 1,
            "_labelIndex": chosen,
            "a": [chosen + 1, (chosen + 1) % 2 + 1],
            "c": {'shared': {'f': person,
                '_multi': [
                    {'a': {'f': '0'}, 'b': {f'f{i}': random.random() for i in range(2**bad_log)}},
                    {'a': {'f': '1'}, 'b': {f'f{i}': random.random() for i in range(2**bad_log)}}]}},
            "p": [0.5, 0.5] 
        }
    
def visualize(examples, window=100, ax = None):
    colors = ['green', 'red']
    styles = ['-', '-.']
    ax = ax or new_ax()
    df = cb_df(examples)
    [df[(df['shared_good']==i) & (df['chosen']==j)].rolling(window=window)['reward'].mean().plot(
        ax=ax, color = colors[j], style=styles[i], label=f'Person: {i}, chosen: {j}') for i in [0,1] for j in [0,1]]   
    ax.legend()      

# Visulization

In [None]:
class MyVisualization:
    def __init__(self):
        fig, ax = plt.subplots(1, 2, figsize=(12,4))
        self.fig = fig
        self.ax = ax
        self.vw_outputs = ['-p']
        
    def reset(self):
        self.ax[0].clear()
        self.ax[0].set_title('Rewards')
        self.ax[1].clear()
        self.ax[1].set_title('P(action|person)')
        
    def after_simulation(self, examples):
        visualize(examples, window = 100, ax=self.ax[0]) 
        
    def after_train(self, examples, job):
        colors = ['green', 'red']
        styles = ['-', '-.']
        df = pd.concat([cb_df(examples), job[0].predictions('-p').cb], axis=1)
        [(df[(df['shared_good']==i)][str(j)].rolling(window=100).mean() + i * 0.05).plot(
            ax=self.ax[1], color = colors[j], style=styles[i], label=f'P(chosen={j} | Person={i})') for i in [0,1] for j in [0,1]]
        self.ax[1].legend(loc='center left', bbox_to_anchor=(0.75, 0.5))
        self.fig.canvas.draw_idle()

# Look at the data

In [None]:
examples = my_simulation(n=100, swap_after=50, variance = 0.1, bad_log = 1)
cb_df(examples).head()

In [None]:
visualize(my_simulation(n=10000, swap_after=5000, variance = 0, bad_log = 1), window=50)

In [None]:
next(my_simulation(n=10000, swap_after=5000, variance = 0, bad_log = 1))

# Interact

In [None]:
from ipywidgets import *

fig, ax = plt.subplots(1, 2, figsize=(12,4))

widget = interactive(
    vw_playground,
    n=IntSlider(min=1000, max=20000, step=1000, value=10000),
    swap_after=IntSlider(min=1000, max=20000, step=1000, value=5000),
    variance=FloatSlider(min=0, max=2, step=0.1, value=0),
    bad_log=IntSlider(min=0, max=10, step=1, value=0),
    window=IntSlider(min=1, max=1000, step=10, value=1),
    learning_rate=IntSlider(min=-20,max=10, step=1, value=-20),
    coin=Checkbox(),
    l1=IntSlider(min=-20,max=10, step=1, value=-20),
    cb_type=['ips', 'mtr'],
    interactions=['', '-q sa', '-q ab', '-q sb', '-q ::'],
    ignore=['', '--ignore b'],
    ignore_linear=['', '--ignore_linear sab'],
    marginals=['', '--marginal a'],
    epsilon = FloatSlider(min=0, max=1, step=0.01),
    squareCB=Checkbox(),
    gamma_scale=FloatLogSlider(value=1, min=0, max=20, base=2, step=1),
    ax=fixed(ax),
    fig=fixed(fig))

data_controls = HBox(widget.children[:4])
visualization_controls = HBox(widget.children[4:5])
learning_controls = HBox((Label("Learning", layout=Layout(display="flex", justify_content="flex-start", width="4%")),) + widget.children[5:8])
namespaces_controls = HBox((Label("Namespaces", layout=Layout(display="flex", justify_content="flex-start", width="4%")),) + widget.children[9:12])
exploration_controls = HBox((Label("Exploration", layout=Layout(display="flex", justify_content="flex-start", width="4%")),) + widget.children[13:-1])
output = widget.children[-1]
display(VBox([data_controls, visualization_controls, learning_controls, namespaces_controls, exploration_controls, output]))

In [None]:
from ipywidgets import *

playground = Playground(my_simulation, MyVisualization())
playground.run(
    InteractiveGrid({
        'n': [10000, 20000],
        'swap_after': [5000],
        'variance': [0],
        'bad_log': [0]
    }),
    InteractiveGrid({
        '#base': '--cb_explore_adf --dsjson'
    })
)