# Common stuff

In [None]:
from vw_executor.vw_opts import VwOpts

from ipywidgets import interactive, VBox, Accordion, Layout, GridBox, fixed, FloatSlider, IntSlider
from functools import reduce
import matplotlib.pyplot as plt


def to_vw_cmd(config):
    converters = {
        'Coin': lambda config: {
            '#lr': '--coin',
            '--ftrl_alpha': config['entryReduction']['config'].get('alpha', None),
            '--ftrl_beta': config['entryReduction']['config'].get('beta', None)},
        'CbExploreAdfGreedy': lambda config: {
            '#lr': '--cb_explore_adf',
            '--epsilon': config['entryReduction']['config'].get('epsilon', None),
        },
        'CbExploreAdfSquareCb': lambda config: {
            '#lr': '--cb_explore_adf --squarecb',
            '--epsilon': config['entryReduction']['config'].get('uniform_epsilon', None),
            '--gamma_scale': config['entryReduction']['config'].get('gamma_scale', None),
            '--gamma_exponent': config['entryReduction']['config'].get('gamma_exponent', None)
        }
    }
    result = VwOpts(converters[config['entryReduction']['typename']](config))
    result['--quiet'] = ''
    return str(result)

def _concatenate(*grids):
    from collections import OrderedDict
    result = reduce(lambda r, g: OrderedDict(r, **g), grids)
    separator = [len(g) for g in grids]
    return result, separator

def _split(concatenated, separator):
    result = []
    last = 0
    items = list(concatenated.items())
    for c in separator:
        result.append(dict(items[last:last + c]))
        last = last + c
    return tuple(result)

def _grid_layout(elements, columns=4):
    rows = (len(elements) - 1) // columns + 1
    layout = Layout(grid_template_rows=' '.join(['auto'] * rows), grid_template_columns=' '.join(['auto'] * columns))
    return GridBox(children=elements, layout=layout)

class Playground:
    def __init__(self, simulator, config_factory, pyvw_e2e, reml_e2e):
        self.simulator = simulator
        self.dataset = []
        self.config_factory = config_factory
        self.pyvw_e2e = pyvw_e2e
        self.reml_e2e = reml_e2e

    def run(self, simulator_grid, config_grid, columns=4):
        def _run_and_plot(separator, **options):
            sim_opts, train_opts = _split(options, separator)
            self.dataset = list(self.simulator(**sim_opts))
            self.config = self.config_factory(**train_opts)
            self.results = {
                'pyvw': self.pyvw_e2e(self.dataset, self.config),
                'reml': self.reml_e2e(self.dataset, self.config)
            }
            plt.plot(self.results['pyvw'].history, label='pyvw')
            plt.plot(self.results['reml'].history, label='reml')
            plt.legend()

        concatenated, separator = _concatenate(simulator_grid, config_grid)
        widget = interactive(_run_and_plot, separator=fixed(separator), **concatenated)
        simulator_controls = _grid_layout(widget.children[:len(simulator_grid)], columns)
        vw_controls = _grid_layout(widget.children[len(simulator_grid):len(simulator_grid) + len(config_grid)], columns)
        controls = Accordion(children=[simulator_controls, vw_controls])
        controls.set_title(0, 'Simulator args')
        controls.set_title(1, 'Config args')
        output = widget.children[-1]
        display(VBox([controls, output]))

def diff(datasets, configs, pyvw_e2e, reml_e2e):
    result = []
    for i, dataset in enumerate(datasets):
        for config in configs:
            result.append({
                'dataset': i,
                'config': to_vw_cmd(config),
                'pyvw': pyvw_e2e(dataset, config).value,
                'reml': reml_e2e(dataset, config).value}).set_index(['dataset', 'config'])

# Regression

In [None]:
class MSE:
    def __init__(self):
        self.num = 0
        self.denum = 0
        self.history = []

    @property
    def value(self):
        return None if self.denum == 0 else self.num / self.denum
    
    def add(self, y, yhat):
        self.num += (y - yhat) ** 2
        self.denum += 1
        if self.denum & (self.denum - 1) == 0:
            self.history.append(self.value)

def reml_regression(dataset, config):
    from reductionml import Workspace, FormatType
    result = MSE()
    workspace = Workspace.create_from_config(config)
    parser = workspace.create_parser(FormatType.VwText)
    for ex in dataset:
        features, label = parser.parse(ex)
        pred = workspace.predict_then_learn(features, label)
        result.add(pred.prediction, label.value)
    return result
    
def pyvw_regression(dataset, config):
    from vowpalwabbit import pyvw
    result = MSE()
    workspace = pyvw.Workspace(to_vw_cmd(config))
    for ex in dataset:
        pred = workspace.predict(ex)
        workspace.learn(ex)
        result.add(pred, float(ex.split('|')[0].strip()))
    return result


## Simulator

In [None]:
def simulator(n, seed, sigma, p, q):
    import numpy as np
    import random
    random.seed(seed)
    np.random.seed(seed)
    yhat = lambda x: p*x + q
    for i in range(n):
         x = random.random()
         yield f'{yhat(x) + np.random.normal(sigma)} |a x:{x}'   

## Coin betting

In [None]:
def config_factory(alpha, beta):
    return {
        'entryReduction': {
            'typename': 'Coin',
            'config': {
                'alpha': alpha,
                'beta': beta
            }
        },
        "globalConfig": {
        }
    }

plg = Playground(
    simulator,
    config_factory,
    pyvw_e2e = pyvw_regression,
    reml_e2e = reml_regression)
    
plg.run(
    simulator_grid = {
        'n': IntSlider(min=100, max=1000, value=100),
        'seed': IntSlider(min=0, max=1000, value=0),
        'sigma': FloatSlider(min=0, max=1, step=0.01, value=0),
        'p': FloatSlider(min=-5, max=5, step=0.01, value=2),
        'q': FloatSlider(min=-5, max=5, step=0.01, value=3),
    },
    config_grid = {
        'alpha': FloatSlider(min=0, max=10, step=0.1, value=0.5),
        'beta': FloatSlider(min=0, max=10, step=0.1, value=0.5)
    }
)

# CB

In [None]:
class IPS:
    def __init__(self):
        self.num = 0
        self.denum = 0
        self.history = []

    @property
    def value(self):
        return None if self.denum == 0 else self.num / self.denum
    
    def add(self, p_log, r, p_pred):
        self.num += r * p_pred / p_log
        self.denum += 1
        if self.denum & (self.denum - 1) == 0:
            self.history.append(self.value)

def reml_cb(dataset, config):
    from reductionml import Workspace, FormatType
    result = IPS()
    workspace = Workspace.create_from_config(config)
    parser = workspace.create_parser(FormatType.VwText)
    for (chosen, p_log, r, ex) in dataset:
        features, label = parser.parse('\n'.join(ex))
        pred = workspace.predict_then_learn(features, label)
        p_pred = [kv[1] for kv in sorted(pred.value)][chosen]
        result.add(p_log, r, p_pred)
    return result
    
def pyvw_cb(dataset, config):
    from vowpalwabbit import pyvw
    result = IPS()
    workspace = pyvw.Workspace(to_vw_cmd(config))
    for (chosen, p_log, r, ex) in dataset:
        pred = workspace.predict(ex)
        workspace.learn(ex)
        result.add(p_log, r, pred[chosen])
    return result

## Simulator

In [None]:
def simulator(n, p0, seed):
     import numpy as np
     import random
     random.seed(seed)
     np.random.seed(seed)

     contexts = ['Tom', 'Anna']
     prefs = {
          'Tom': 'politics',
          'Anna': 'sports'}
     for _ in range(n):
          context = contexts[random.randint(0,1)]
          chosen = int(random.random() > p0)
          r = [
               int(prefs[context]=="politics"),
               int(prefs[context]=="sports")]
          pmf = [p0, 1-p0]
          action = [
                    [
                         f'0:{-r[0]}:{pmf[0]} |a politics',
                         '|a sports'
                    ],
                    [
                         f'|a politics',
                         f'1:{-r[1]}:{pmf[1]} |a sports'                 
                    ]
               ][chosen]
          
          yield chosen, pmf[chosen], r[chosen], ([f'shared |s {context}'] + action)

## Epsilon-greedy

In [None]:
def config_factory(epsilon):
    return {
        'entryReduction': {
            'typename': 'CbExploreAdfGreedy',
            'config': {
                'epsilon': epsilon
            }
        },
        "globalConfig": {
        }
    }

plg = Playground(
    simulator,
    config_factory,
    pyvw_e2e = pyvw_cb,
    reml_e2e = reml_cb)
plg.run(
    simulator_grid = {
        'n': IntSlider(min=100, max=1000, value=100),
        'p0': FloatSlider(min=0, max=1, step=0.01, value=0.5),
        'seed': IntSlider(min=0, max=1000, value=0)
    },
    config_grid = {
        'epsilon': FloatSlider(min=0, max=1, step=0.01, value=0.2)
    }
)

## SquareCB

In [None]:
def config_factory(epsilon, gamma_scale, gamma_exponent):
    return {
        'entryReduction': {
            'typename': 'CbExploreAdfSquareCb',
            'config': {
                'uniformEpsilon': epsilon,
                'gammaScale': gamma_scale,
                'gammaExponent': gamma_exponent,
            }
        },
        "globalConfig": {
        }
    }

plg = Playground(
    simulator,
    config_factory,
    pyvw_e2e = pyvw_cb,
    reml_e2e = reml_cb)
plg.run(
    simulator_grid = {
        'n': IntSlider(min=100, max=1000, value=100),
        'p0': FloatSlider(min=0, max=1, step=0.01, value=0.5),
        'seed': IntSlider(min=0, max=1000, value=0)
    },
    config_grid = {
        'epsilon': FloatSlider(min=0, max=1, step=0.01, value=0.1),
        'gamma_scale': FloatSlider(min=0, max=10, step=0.1, value=10),
        'gamma_exponent': FloatSlider(min=0, max=1, step=0.01, value=0.5)
    }
)