In [None]:
%pip install -r requirements.txt

# Intro

In [None]:
from ipywidgets import *
from vw_executor.vw_opts import VwOpts
from vw_executor.vw import Vw
import matplotlib.pyplot as plt

from pathlib import Path
import random
import pandas as pd
import json
import numpy as np
import seaborn as sns
from playground.utils import get_simulation, cb_df
from playground.dashboard import Dashboard
from playground.vw_playground import VwPlayground
from playground.visualizers import new_ax, plot_reward

%matplotlib widget
sns.set_theme(style="darkgrid")

## Simulator

In [None]:
people = ['Tom', 'Anna']
topics = ['sports', 'politics']

def my_cb_simulation(n=10000, swap_after=5000, variance=0, bad_features=0, delta=0.8, seed=0):
    random.seed(seed)
    np.random.seed(seed)
    offset = 0
    for i in range(1, n):
        if i % swap_after == 0:
            offset = (offset + 1) % 2

        person = i % 2 
        chosen = i % 4  // 2 
        if (chosen + person + offset) % 2 == 0:
            reward =  0.1 + delta + ((chosen + offset) % 2) * 0.1
        else:
            reward = 0.1 - ((chosen + offset + 1) % 2) * 0.1

        reward = reward + np.random.normal(0, variance)

        yield {
            "_label_cost": -reward,
            "_label_probability": 0.5,
            "_label_Action": chosen + 1,
            "_labelIndex": chosen,
            "a": [chosen + 1, (chosen + 1) % 2 + 1],
            "c": {'shared': {'name': people[person],
                '_multi': [
                    {'a': {'topic': topics[0]}, 'b': {f'f{i}': random.random() for i in range(int(bad_features))}},
                    {'a': {'topic': topics[1]}, 'b': {f'f{i}': random.random() for i in range(int(bad_features))}}]}},
            "p": [0.5, 0.5] 
        }

## Visualizers

In [None]:
def plot_env(examples, ax=None, window=100):
    ax = ax or new_ax()
    ax.set_title('Rewards')
    colors = ['green', 'red']
    styles = ['-', '-.']
    df = cb_df(examples)
    for i, p in enumerate(people):
        for j, t in enumerate(topics):
            d = df[['reward']][(df['person']==p) & (df['chosen']==j)].rolling(window=window).mean()
            sns.lineplot(x = d.index, y=d['reward'],
                         label=f'E(r|{p},{t})', color = colors[j], linestyle = styles[i], ax=ax,
                         errorbar=None, sort=False, estimator=None)       
    ax.legend()

    
def plot_probs(examples, job, ax=None, window=100):
    ax = ax or new_ax()
    ax.set_title('P(action|person)')
    ax.set_ylim(-0.05,1.05)
    colors = ['green', 'red']
    styles = ['-', '-.']
    df = pd.concat([cb_df(examples), job[0].predictions('-p').cb], axis=1)
    for i, p in enumerate(people):
        for j, t in enumerate(topics):
            d = df[(df['person']==p)][[str(j)]].rolling(window=window).mean() + i * 0.02
            sns.lineplot(x = d.index, y=d[str(j)],
                         label=f'P({t}|{p})',color = colors[j], linestyle = styles[i], ax=ax,
                         errorbar=None, sort=False, estimator=None)            
    ax.legend(loc='center left', bbox_to_anchor=(0.9, 0.5)) 
     

## Interact

In [None]:
layout = [
    [plot_env, plot_probs]]

dashboard = Dashboard(['-p'], layout, figsize=(12,4))

playground = VwPlayground(my_cb_simulation, dashboard)

playground.run(
    simulator_grid = {
        'n': IntSlider(value=10000, min=100, max=50000),
        'swap_after': IntSlider(value=5000, min=10, max=50000),
        'variance': FloatSlider(value=0, min=0, max=1, step=0.1),
        'bad_features': FloatLogSlider(value=1, min=0, max=10, base=2, step=1),
        'delta': FloatSlider(value=0.8, min=0, max=30),
        'seed': IntSlider(value=0, min=0, max=100),
    },
    vw_grid = {
        '#base': fixed('--cb_explore_adf --dsjson --power_t 0 -P 100 --log_output stderr'),
        '--learning_rate': FloatLogSlider(value=2**(-20), min=-20, max=10, base=2, step=1),
        '--cb_type': fixed('mtr'),
        '--l1': FloatLogSlider(value=2**(-20), min=-20, max=10, base=2, step=1),
        '#interactions': ['', '-q sa', '-q ::'],
        '#exploration': ['--epsilon 0.1', '--squarecb', '--cover 5', '--synthcover'],
        '#explore_eval': ['', '--explore_eval']
    },
    columns=3
)

# Exploration algos

## Epsilon-greedy

In [None]:
layout = [
    [plot_env, plot_probs]]

dashboard = Dashboard(['-p'], layout, figsize=(12,4))
playground = VwPlayground(my_cb_simulation, dashboard)

playground.run(
    simulator_grid = {
        'n': IntSlider(value=10000, min=100, max=50000),
        'swap_after': IntSlider(value=5000, min=10, max=50000),
        'variance': FloatSlider(value=0.4, min=0, max=1, step=0.1),
        'bad_features': FloatLogSlider(value=4, min=0, max=10, base=2, step=1),
        'delta': FloatSlider(value=0.8, min=0, max=30),
        'seed': IntSlider(value=0, min=0, max=100),
    },
    vw_grid = {
        '#base': fixed('--cb_explore_adf --dsjson --power_t 0 -P 100 --log_output stderr'),
        '--learning_rate': FloatLogSlider(value=2**(-3), min=-20, max=10, base=2, step=1),
        '--cb_type': fixed('mtr'),
        '--l1': FloatLogSlider(value=2**(-20), min=-20, max=10, base=2, step=1),
        '#interactions': ['-q ::','', '-q sa'],
        '#explore_eval': ['', '--explore_eval'],
        '--epsilon': FloatSlider(value=0.05, min=0, max=1, step=0.01)
    },
    columns=3
)

## Bagging

In [None]:
layout = [
    [plot_env, plot_probs]]

dashboard = Dashboard(['-p'], layout, figsize=(12,4))
playground = VwPlayground(my_cb_simulation, dashboard)

playground.run(
    simulator_grid = {
        'n': IntSlider(value=10000, min=100, max=50000),
        'swap_after': IntSlider(value=5000, min=10, max=50000),
        'variance': FloatSlider(value=0.4, min=0, max=1, step=0.1),
        'bad_features': FloatLogSlider(value=4, min=0, max=10, base=2, step=1),
        'delta': FloatSlider(value=0.8, min=0, max=30),
        'seed': IntSlider(value=0, min=0, max=100),
    },
    vw_grid = {
        '#base': fixed('--cb_explore_adf --dsjson --power_t 0 -P 100 --log_output stderr'),
        '--learning_rate': FloatLogSlider(value=2**(-3), min=-20, max=10, base=2, step=1),
        '--cb_type': fixed('mtr'),
        '--l1': FloatLogSlider(value=2**(-20), min=-20, max=10, base=2, step=1),
        '#interactions': ['-q ::','', '-q sa'],
        '#explore_eval': ['', '--explore_eval'],
        '--epsilon': FloatSlider(value=0, min=0, max=1, step=0.01),
        '--bag': IntSlider(valu=1, min=1, max=100)
    },
    columns=3
)

## Cover

In [None]:
layout = [
    [plot_env, plot_probs]]

dashboard = Dashboard(['-p'], layout, figsize=(12,4))
playground = VwPlayground(my_cb_simulation, dashboard)

playground.run(
    simulator_grid = {
        'n': IntSlider(value=10000, min=100, max=50000),
        'swap_after': IntSlider(value=5000, min=10, max=50000),
        'variance': FloatSlider(value=0.4, min=0, max=1, step=0.1),
        'bad_features': FloatLogSlider(value=4, min=0, max=10, base=2, step=1),
        'delta': FloatSlider(value=0.8, min=0, max=30),
        'seed': IntSlider(value=0, min=0, max=100),
    },
    vw_grid = {
        '#base': fixed('--cb_explore_adf --dsjson --power_t 0 -P 100 --log_output stderr'),
        '--learning_rate': FloatLogSlider(value=2**(-3), min=-20, max=10, base=2, step=1),
        '--cb_type': fixed('mtr'),
        '--l1': FloatLogSlider(value=2**(-20), min=-20, max=10, base=2, step=1),
        '#interactions': ['-q ::','', '-q sa'],
        '#explore_eval': ['', '--explore_eval'],
        '--epsilon': FloatSlider(value=0, min=0, max=1, step=0.01),
        '--cover': IntSlider(value=1, min=1, max=100),
        '--psi': FloatSlider(value=1, min=0, max=10)
    },
    columns=3
)

## Synthcover

In [None]:
layout = [
    [plot_env, plot_probs]]

dashboard = Dashboard(['-p'], layout, figsize=(12,4))
playground = VwPlayground(my_cb_simulation, dashboard)

playground.run(
    simulator_grid = {
        'n': IntSlider(value=10000, min=100, max=50000),
        'swap_after': IntSlider(value=5000, min=10, max=50000),
        'variance': FloatSlider(value=0.4, min=0, max=1, step=0.1),
        'bad_features': FloatLogSlider(value=4, min=0, max=10, base=2, step=1),
        'delta': FloatSlider(value=0.8, min=0, max=30),
        'seed': IntSlider(value=0, min=0, max=100),
    },
    vw_grid = {
        '#base': fixed('--cb_explore_adf --dsjson --power_t 0 -P 100  --log_output stderr --synthcover'),
        '--learning_rate': FloatLogSlider(value=2**(-3), min=-20, max=10, base=2, step=1),
        '--cb_type': fixed('mtr'),
        '--l1': FloatLogSlider(value=2**(-20), min=-20, max=10, base=2, step=1),
        '#interactions': ['-q ::','', '-q sa'],
        '#explore_eval': ['', '--explore_eval'],
        '--epsilon': FloatSlider(value=0, min=0, max=1, step=0.01),
        '--synthcoverpsi': FloatSlider(value=1, min=0, max=10, step=0.1),
        '--synthcoversize': IntSlider(value=100, min=0, max=1000)
    },
    columns=3
)

## SquareCB

In [None]:
layout = [
    [plot_env, plot_probs]]

dashboard = Dashboard(['-p'], layout, figsize=(12,4))
playground = VwPlayground(my_cb_simulation, dashboard)

playground.run(
    simulator_grid = {
        'n': IntSlider(value=10000, min=100, max=50000),
        'swap_after': IntSlider(value=5000, min=10, max=50000),
        'variance': FloatSlider(value=0.4, min=0, max=1, step=0.1),
        'bad_features': FloatLogSlider(value=4, min=0, max=10, base=2, step=1),
        'delta': FloatSlider(value=0.8, min=0, max=30),
        'seed': IntSlider(value=0, min=0, max=100),
    },
    vw_grid = {
        '#base': fixed('--cb_explore_adf --dsjson --power_t 0 -P 100  --log_output stderr --squarecb'),
        '--learning_rate': FloatLogSlider(value=2**(-3), min=-20, max=10, base=2, step=1),
        '--cb_type': fixed('mtr'),
        '--l1': FloatLogSlider(value=2**(-20), min=-20, max=10, base=2, step=1),
        '#interactions': ['-q ::','', '-q sa'],
        '#explore_eval': ['', '--explore_eval'],
        '--gamma_scale': FloatSlider(value=10, min=0, max=100, step=0.1),
        '--gamma_exponent': FloatSlider(value=0.5, min=0, max=10, step=0.1)
    },
    columns=3
)

## RegCB

In [None]:
layout = [
    [plot_env, plot_probs]]

dashboard = Dashboard(['-p'], layout, figsize=(12,4))
playground = VwPlayground(my_cb_simulation, dashboard)

playground.run(
    simulator_grid = {
        'n': IntSlider(value=10000, min=100, max=50000),
        'swap_after': IntSlider(value=5000, min=10, max=50000),
        'variance': FloatSlider(value=0.4, min=0, max=1, step=0.1),
        'bad_features': FloatLogSlider(value=4, min=0, max=10, base=2, step=1),
        'delta': FloatSlider(value=0.8, min=0, max=30),
        'seed': IntSlider(value=0, min=0, max=100),
    },
    vw_grid = {
        '#base': fixed('--cb_explore_adf --dsjson --power_t 0 -P 100  --log_output stderr --regcb'),
        '--learning_rate': FloatLogSlider(value=2**(-3), min=-20, max=10, base=2, step=1),
        '--cb_type': fixed('mtr'),
        '--l1': FloatLogSlider(value=2**(-20), min=-20, max=10, base=2, step=1),
        '#interactions': ['-q ::','', '-q sa'],
        '#explore_eval': ['', '--explore_eval'],
        '--mellowness': FloatSlider(value=0, min=0, max=10, step=0.01)
    },
    columns=3
)

## Softmax

In [None]:
layout = [
    [plot_env, plot_probs]]

dashboard = Dashboard(['-p'], layout, figsize=(12,4))
playground = VwPlayground(my_cb_simulation, dashboard)

playground.run(
    simulator_grid = {
        'n': IntSlider(value=10000, min=100, max=50000),
        'swap_after': IntSlider(value=5000, min=10, max=50000),
        'variance': FloatSlider(value=0.4, min=0, max=1, step=0.1),
        'bad_features': FloatLogSlider(value=4, min=0, max=10, base=2, step=1),
        'delta': FloatSlider(value=0.8, min=0, max=30),
        'seed': IntSlider(value=0, min=0, max=100),
    },
    vw_grid = {
        '#base': fixed('--cb_explore_adf --dsjson --power_t 0 -P 100  --log_output stderr --softmax'),
        '--learning_rate': FloatLogSlider(value=2**(-3), min=-20, max=10, base=2, step=1),
        '--cb_type': fixed('mtr'),
        '--l1': FloatLogSlider(value=2**(-20), min=-20, max=10, base=2, step=1),
        '#interactions': ['-q ::','', '-q sa'],
        '#explore_eval': ['', '--explore_eval'],
        '--lambda': FloatSlider(value=1, min=0, max=10, step=0.1),
        '--epsilon': FloatSlider(value=0, min=0, max=1, step=0.01)
    },
    columns=3
)