# Helpers

In [None]:
!python -m pip install ipympl
!python -m pip install seaborn

In [None]:
from vw_executor.vw_opts import VwOpts
from pathlib import Path

class SimulationBase:
    def __init__(self, folder, **kwargs):
        for k, v in kwargs.items():
            self.__setattr__(k, v)
        self._path = Path(folder).joinpath(f'sim-{VwOpts(kwargs)}'.replace(' ', '-'))
        if not self._path.exists():
            Path(folder).mkdir(parents=True, exist_ok=True)
            self._examples = self.generate()
            self.save()
        else:
            self.load()
            
    def to_dsjson(self):             
        for e in self._examples:
            yield json.dumps(e, separators=(",", ":"))
        
    def save(self):     
        with open(self._path, 'w') as f:
            for ex in self.to_dsjson():
                f.write(f'{ex}\n')
    
    def load(self):
        self._examples = []
        with open(self._path) as f:
            for l in f:
                self._examples.append(json.loads(l))
                
    def cbdf(self):
        return pd.DataFrame([{
            'reward': -e['_label_cost'],
            'shared_good': e['c']['shared']['f'],
            'chosen': e['_labelIndex'],
            'prob': e['_label_probability']
        } for e in self._examples])

In [None]:
%matplotlib widget
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from itertools import chain
import scipy
import seaborn as sns
sns.set_theme(style="darkgrid")
from vw_executor.vw import Vw
from vw_executor.loggers import ConsoleLogger


def new_ax():
    _,ax = plt.subplots(dpi=100, figsize=[9,4])
    return ax        

class Simulation(SimulationBase):
    def __init__(self, folder, n=10000, swap_after=5000, variance=0, bad_log=0):
        super().__init__(folder, n=n, swap_after=swap_after, variance=variance, bad_log=bad_log)
        
    def generate(self):
        examples = []
        offset = 0
        for i in range(1, self.n):
            if i % self.swap_after == 0:
                offset = (offset + 1) % 2

            person = i % 2 # random.randint(0, 1)
            chosen = i % 4  // 2 #random.randint(0, 1)
            if (chosen + person + offset) % 2 == 0:
                reward =  0.7 + ((chosen + offset) % 2) * 0.1#int(random.random() < 0.7 + ((chosen + offset) % 2) * 0.1)
            else:
                reward = 0.3 - ((chosen + offset + 1) % 2) * 0.1 #int(random.random() < 0.3 - ((chosen + offset + 1) % 2) * 0.1)

            reward = reward + scipy.random.normal(0, self.variance)

            examples.append({
                "_label_cost": -reward,
                "_label_probability": 0.5,
                "_label_Action": chosen + 1,
                "_labelIndex": chosen,
                "a": [chosen + 1, (chosen + 1) % 2 + 1],
                "c": {'shared': {'f': person,
                    '_multi': [
                        {'a': {'f': '0'}, 'b': {f'f{i}': random.random() for i in range(2**self.bad_log)}},
                        {'a': {'f': '1'}, 'b': {f'f{i}': random.random() for i in range(2**self.bad_log)}}]}},
                "p": [0.5, 0.5] 
            })
        return examples
    
    def visualize(self, window=100, ax = None):
        colors = ['green', 'red']
        styles = ['-', '-.']
        if ax is None:
            ax = new_ax()
        df = self.cbdf()
        [df[(df['shared_good']==i) & (df['chosen']==j)].rolling(window=window)['reward'].mean().plot(
            ax=ax, color = colors[j], style=styles[i], label=f'Person: {i}, chosen: {j}') for i in [0,1] for j in [0,1]]
        ax.legend()
        
def _predictions_parse(lines):
    for l in lines:
        if ':' in l:
            yield {int(kv.split(':')[0]): float(kv.split(':')[1]) for kv in l.split(',')}

def predictions_2_df(files):
    return pd.DataFrame(_predictions_parse(chain(*map(lambda f: open(f), files))))
    
def plot(sim, job, ax):
    colors = ['green', 'red']
    styles = ['-', '-.']
    prediction_files = job.outputs['-p']
    df = pd.concat([sim.cbdf(), predictions_2_df(prediction_files)], axis=1)
    [(df[(df['shared_good']==i)][j].rolling(window=100).mean() + i * 0.05).plot(
        ax=ax, color = colors[j], style=styles[i], label=f'P(chosen={j} | Person={i})') for i in [0,1] for j in [0,1]]
    ax.legend(loc='center left', bbox_to_anchor=(0.75, 0.5))
    
    
def vw_playground(n=10000, swap_after=5000, variance=0, bad_log=0, window=100, 
                       learning_rate=-20, coin=False, l1=-20, cb_type='ips', 
                       interactions='', ignore='', ignore_linear='', marginals='', 
                       epsilon=None, squareCB=False, gamma_scale=None, 
                       ax=None, fig=None):
    sim = Simulation('data', n=n, swap_after=swap_after, variance=variance, bad_log=bad_log)
    ax[0].clear()
    ax[0].set_title('Rewards')
    sim.visualize(window = window, ax=ax[0]) 
    ax[1].clear()
    ax[1].set_title('P(action|person)')

    opts = {'#base': '--cb_explore_adf --dsjson --epsilon 0.2',
            '#0': interactions,
            '#1': ignore,
            '#2': ignore_linear,
            '#3': marginals,
            '#lr': '--coin' if coin else f'--power_t 0 --learning_rate {2**learning_rate}',
            '--l1': 2**l1,
            '--cb_type': cb_type,
            '#square_cb': '--squarecb' if squareCB else '',
            '#epsilon': f'--epsilon {epsilon}',
            '#gammascale': f'--gamma_scale {gamma_scale}' if squareCB else ''
           }

    result = Vw('.vw_cache', handlers=[]).train([sim._path], opts, ['-p'])


    plot(sim, result, ax[1])
    fig.canvas.draw_idle()

# Look at the data

In [None]:
sim = Simulation('data', n=100, swap_after=50, variance = 0.1, bad_log = 1)
sim.cbdf().head()

In [None]:
Simulation('data', n=10000, swap_after=5000, variance = 0, bad_log = 1).visualize(window=50)

In [None]:
Simulation('data', n=10000, swap_after=5000, variance = 0, bad_log = 1)._examples[0]

# Interact

In [None]:
from ipywidgets import *

fig, ax = plt.subplots(1, 2, figsize=(12,4))

widget = interactive(
    vw_playground,
    n=IntSlider(min=1000, max=20000, step=1000, value=10000),
    swap_after=IntSlider(min=1000, max=20000, step=1000, value=5000),
    variance=FloatSlider(min=0, max=2, step=0.1, value=0),
    bad_log=IntSlider(min=0, max=10, step=1, value=0),
    window=IntSlider(min=1, max=1000, step=10, value=1),
    learning_rate=IntSlider(min=-20,max=10, step=1, value=-20),
    coin=Checkbox(),
    l1=IntSlider(min=-20,max=10, step=1, value=-20),
    cb_type=['ips', 'mtr'],
    interactions=['', '-q sa', '-q ab', '-q sb', '-q ::'],
    ignore=['', '--ignore b'],
    ignore_linear=['', '--ignore_linear sab'],
    marginals=['', '--marginal a'],
    epsilon = FloatSlider(min=0, max=1, step=0.01),
    squareCB=Checkbox(),
    gamma_scale=FloatLogSlider(value=1, min=0, max=20, base=2, step=1),
    ax=fixed(ax),
    fig=fixed(fig))

data_controls = HBox(widget.children[:4])
visualization_controls = HBox(widget.children[4:5])
learning_controls = HBox((Label("Learning", layout=Layout(display="flex", justify_content="flex-start", width="4%")),) + widget.children[5:8])
namespaces_controls = HBox((Label("Namespaces", layout=Layout(display="flex", justify_content="flex-start", width="4%")),) + widget.children[9:12])
exploration_controls = HBox((Label("Exploration", layout=Layout(display="flex", justify_content="flex-start", width="4%")),) + widget.children[13:-1])
output = widget.children[-1]
display(VBox([data_controls, visualization_controls, learning_controls, namespaces_controls, exploration_controls, output]))