In [None]:
from ipywidgets import *
from vw_executor.vw_opts import VwOpts
from vw_executor.vw import Vw
import matplotlib.pyplot as plt

from pathlib import Path
import random
import pandas as pd
import json
import scipy
import seaborn as sns
from playground.utils import cb_df, get_simulation
from playground.dashboard import Dashboard
from playground.vw_playground import VwPlayground

%matplotlib widget
sns.set_theme(style="darkgrid")

def new_ax():
    _,ax = plt.subplots(dpi=100, figsize=[9,4])
    return ax

# Simulator

In [None]:
def my_simulation(n=10000, swap_after=5000, variance=0, bad_features=0):
    offset = 0
    for i in range(1, n):
        if i % swap_after == 0:
            offset = (offset + 1) % 2

        person = i % 2 
        chosen = i % 4  // 2 
        if (chosen + person + offset) % 2 == 0:
            reward =  0.7 + ((chosen + offset) % 2) * 0.1
        else:
            reward = 0.3 - ((chosen + offset + 1) % 2) * 0.1

        reward = reward + scipy.random.normal(0, variance)

        yield {
            "_label_cost": -reward,
            "_label_probability": 0.5,
            "_label_Action": chosen + 1,
            "_labelIndex": chosen,
            "a": [chosen + 1, (chosen + 1) % 2 + 1],
            "c": {'shared': {'f': str(person),
                '_multi': [
                    {'a': {'f': '0'}, 'b': {f'f{i}': random.random() for i in range(int(bad_features))}},
                    {'a': {'f': '1'}, 'b': {f'f{i}': random.random() for i in range(int(bad_features))}}]}},
            "p": [0.5, 0.5] 
        }

# Visualizers

In [None]:
def plot_env(examples, ax=None, window=100):
    ax = ax or new_ax()
    ax.set_title('Rewards')
    colors = ['green', 'red']
    styles = ['-', '-.']
    df = cb_df(examples)
    for i in [0,1]:
        for j in [0,1]:
            d = df[['reward']][(df['shared_good']==str(i)) & (df['chosen']==j)].rolling(window=window).mean()
            sns.lineplot(x = d.index, y=d['reward'], label=f'Person: {i}, chosen: {j}', color = colors[j], linestyle = styles[i], ax=ax)
    ax.legend()

    
def plot_probs(examples, job, ax=None, window=100):
    ax = ax or new_ax()
    ax.set_title('P(action|person)')
    colors = ['green', 'red']
    styles = ['-', '-.']
    df = pd.concat([cb_df(examples), job[0].predictions('-p').cb], axis=1)
    for i in [0,1]:
        for j in [0,1]:
            d = df[(df['shared_good']==str(i))][[str(j)]].rolling(window=window).mean() + i * 0.02
            sns.lineplot(x = d.index, y=d[str(j)], label=f'P(chosen={j} | Person={i})', color = colors[j], linestyle = styles[i], ax=ax)            
    ax.legend(loc='center left', bbox_to_anchor=(0.75, 0.5))
     
        
def plot_reward(examples, job, ax=None, window=100):
    ax = ax or new_ax()
    ax.set_title('Average reward')
    df = job.loss_table
    p = sns.lineplot(x = df.reset_index('file').index, y= -df['loss'], label='Reward')
    p.set_ylabel("reward")
    ax.legend(loc='center left', bbox_to_anchor=(0.75, 0.5))

# Look at the data / Debug

In [None]:
examples = my_simulation(n=100, swap_after=50, variance = 0.1, bad = 1)
cb_df(examples).head()

In [None]:
next(my_simulation(n=10000, swap_after=5000, variance = 0, bad = 1))

In [None]:
visualization = Dashboard(['-p'], [[plot_env, plot_probs]])
visualization.reset()
examples, examples_path = get_simulation('.data', my_simulation, n = 10000, swap_after = 5000, variance = 0, bad = 1)
visualization.after_simulation(examples)
job = Vw('.cache', handlers=[]).train(examples_path, '--cb_explore_adf --dsjson', ['-p'])
visualization.after_train(examples, job)

# Interact

In [None]:
layout = [
    [plot_env, plot_probs],
    [plot_reward]]

dashboard = Dashboard(['-p'], layout, figsize=(12,8))

playground = VwPlayground(my_simulation, dashboard)

playground.run(
    simulator_grid = {
        'n': IntSlider(value=10000, min=100, max=50000),
        'swap_after': IntSlider(value=5000, min=10, max=50000),
        'variance': FloatSlider(value=0, min=0, max=1, step=0.1),
        'bad_features': FloatLogSlider(value=1, min=0, max=10, base=2, step=1),
    },
    vw_grid = {
        '#base': fixed('--cb_explore_adf --dsjson --power_t 0 -P 100'),
        '--learning_rate': FloatLogSlider(value=2**(-20), min=-20, max=10, base=2, step=1),
        '--cb_type': ['ips', 'mtr'],
        '--l1': FloatLogSlider(value=2**(-20), min=-20, max=10, base=2, step=1),
        '#interactions': ['', '-q sa', '-q ::'],
        '#exploration': ['--epsilon 0.1', '--squarecb', '--cover 5', '--synthcover']
    },
    columns=3
)

In [None]:
print('Last command line')
print(playground.last_job[0].args)

In [None]:
playground.last_job[0].stdout.raw