# Initiailize VW executor

In [None]:
from vw_executor.vw import Vw

vw = Vw('vw','.vw_cache')

# Get the data

In [None]:
inputs = [
    'vw_executor/tests/data/cb_1000_0.json',
    'vw_executor/tests/data/cb_1000_1.json', 
    ]
inputs

# Define options grid and train

In [None]:
import pandas as pd
from vw_executor.vw_opts import Grid

opts = pd.DataFrame(Grid(
    {
        '#base': ['--ccb_explore_adf -P 100 --preserve_performance_counters --save_resume --dsjson --epsilon 0.2'],
        '--cb_type': ['ips', 'mtr']
    }) * (
        Grid({'--learning_rate': [0.01,0.1], '--power_t': [0]}) + Grid({'#coin': ['--coin']})
    ))
opts

In [None]:
result = vw.train(inputs, opts, ['-p'])
result

# Evaluate the best one


## Some helpers (TDB: to write properly)

In [None]:
import json
from itertools import chain


def _logs_parse(lines):
    for l in lines:
        o = json.loads(l)
        yield {'r': -o['_label_cost'], 'p': o['_label_probability'], 'a': o['_labelIndex']}
        
def _predictions_parse(lines):
    for l in lines:
        if ':' in l:
            yield {int(kv.split(':')[0]): float(kv.split(':')[1]) for kv in l.split(',')}

def logs_2_df(files):
    return pd.DataFrame(_logs_parse(chain(*map(lambda f: open(f), files))))

def predictions_2_df(files):
    return pd.DataFrame(_predictions_parse(chain(*map(lambda f: open(f), files))))

def _get_estimators_result(i, est, result):
    from estimators.bandits import ips, clopper_pearson
    result['i'].append(i)
    result['online'].append(est['online'].get())
    result['estimate'].append(est['estimate'].get())
    int_result = est['interval'].get()
    result['lower'].append(int_result[0])
    result['upper'].append(int_result[1])

def estimate_cb(decisions, window):
    from estimators.bandits import ips, snips, clopper_pearson, gaussian
    result = {'i': [], 'estimate': [], 'lower': [], 'upper': [], 'online': []}
    est_default = lambda: {'estimate': ips.Estimator(), 'interval': gaussian.Interval(), 'online': ips.Estimator()}
    est = est_default()
    for i, row in decisions.iterrows():
        p_log = row['p']
        r = row['r']
        p_pred = row[int(row['a'])]
        est['estimate'].add_example(p_log, r, p_pred)
        est['interval'].add_example(p_log, r, p_pred)
        est['online'].add_example(p_log, r, p_log)
        if ((i + 1) % window == 0):
            _get_estimators_result(i, est, result)
            est = est_default()

    if est['online'].data['N'] > 2:
        _get_estimators_result(i, est, result)
       
    return pd.DataFrame(result)

def plot_cb(ax, estimates):
    import matplotlib.pyplot as plt
    y = estimates[f'estimate']
    l = estimates[f'lower']
    u = estimates[f'upper']
    o = estimates[f'online']
    ax.plot(estimates['i'], y, label='est')
    ax.fill_between(estimates['i'], l, u, alpha=.1)
    ax.plot(estimates['i'], o, label='online')
    plt.legend(loc='best')

def new_ax():
    import matplotlib.pyplot as plt
    fig,ax = plt.subplots(dpi=100, figsize=[16,6])
    return ax

In [None]:
best_job = result.sort_values(by='!Loss').iloc[0]['!Job']
prediction_files = best_job.outputs['-p']
decisions = pd.concat([logs_2_df(inputs), predictions_2_df(prediction_files)], axis=1)
estimations = estimate_cb(decisions, 100)
plot_cb(new_ax(), estimations)
