In [None]:
from Pipeline.ccb.dsjson.processor import Processor
from Pipeline.ccb.dsjson.predictions import Predictor
from Pipeline import azure_blob_logs
from Pipeline.utils import Mapper
from Pipeline.dataflow import MultilineFiles, PickleFiles, CsvFiles

from Pipeline.ccb.vw.predictions import VwPredicionsFiles

from pathlib import Path

In [None]:
local_data_root = Path('/Users/alextaim/data/ccb')
vw_cache_folder = Path('/Users/alextaim/data/.vw_cache')

raw_folder = local_data_root.joinpath('raw')
slim_folder = local_data_root.joinpath('slim')
sample_folder = local_data_root.joinpath('sample')
predict_folder = local_data_root.joinpath('predict.pickle')
baseline_predict_folder = local_data_root.joinpath('baseline.predict')
baseline_estimate_folder = local_data_root.joinpath('baseline.estimate')
cfe_estimate_folder = local_data_root.joinpath('cfe.estimate')

In [None]:
files = ['/Users/alextaim/data/ccb/raw/01.json',
    '/Users/alextaim/data/ccb/raw/02.json']

raw = MultilineFiles(files)

# Generate predictions

In [None]:
from VwPipeline import Loggers, Handlers
from VwPipeline.VwCache import VwCache
from VwPipeline.Vw import Vw
from VwPipeline.VwOpts import dimension, product
import pandas as pd

#your vw path
vw_path = r'vw'

cache = VwCache(vw_cache_folder)
vw = Vw(
    vw_path,
    cache,
    handlers=[Handlers.WidgetHandler()],
    )

opts = pd.DataFrame(product(
    dimension('#base', ['--ccb_explore_adf --dsjson   -P 1 --preserve_performance_counters --save_resume']),
    dimension('#learning', ['--coin']),
))
preds = vw.train(files, opts, ['-p'])
prediction_files = {preds.iloc[0]['!Job'].name: preds.iloc[0]['!Outputs']['-p']}
prediction_files

# Generate slim dsjson

In [None]:
from Pipeline.ccb.dsjson.processor import Processor

processor = Processor()
slim = MultilineFiles().init(raw.process(processor.process, path_gen=Mapper(raw_folder, slim_folder), process=True), procs=2)

# Sample

In [None]:

from Pipeline.filters import UniformSampler
from Pipeline.ccb.dsjson.processor import Processor

sampler = UniformSampler(0.5)
processor = Processor(filters=[lambda l: sampler.do(l)])
sample = MultilineFiles().init(raw.process(processor.process, path_gen=Mapper(raw_folder, sample_folder), process=True), procs=2)

# Predict

In [None]:
from Pipeline.ccb.dsjson.predictions import Predictor

predictor = Predictor(filters=[lambda l: True])
baseline_preds = PickleFiles().init(raw.process(predictor.predict_df, path_gen=Mapper(raw_folder, baseline_predict_folder), process=True), procs=2)

# Preestimate

In [None]:
from Pipeline.estimator import Estimator
from Pipeline.ccb.estimators import cb_estimator
from Pipeline.cb.estimators import ips_snips
import json

ips0 = lambda: cb_estimator(ips_snips(), [0])

estimators = {
    'baseline1_old': {'ips_snips_0' : ips0 },
    'random': {'ips_snips_0': ips0 } }
online_estimator = ips0
    
estimator = Estimator(
    estimators = estimators, 
    online_estimator = online_estimator,
    window='1min')

baseline_preestimates = PickleFiles().init(
    baseline_preds.process(
        estimator.preestimate,
        path_gen=Mapper(baseline_predict_folder, baseline_estimate_folder),
        process=True),
    procs=2)


# Evaluate

In [None]:
df = baseline_preestimates.open().resample('2min').sum()
df

In [None]:
df['online'] = df.apply(lambda r: r['online'].get('ips')['e'], axis = 1)
df[('random', 'l')] = df.apply(lambda r: r["('random', 'ips_snips_0')"].get('gaussian')['l'], axis = 1)
df[('random', 'u')] = df.apply(lambda r: r["('random', 'ips_snips_0')"].get('gaussian')['u'], axis = 1)
df[('random', 'e')] = df.apply(lambda r: r["('random', 'ips_snips_0')"].get('ips')['e'], axis = 1)

In [None]:
from pandas.plotting import register_matplotlib_converters
import matplotlib.pyplot as plt


register_matplotlib_converters()
fig, ax = plt.subplots(dpi=100, figsize=[16, 6])

ax.plot(df.index, df['online'], label='online')
ax.fill_between(df.index, df[('random', 'l')], df[('random', 'u')], alpha = .1)


# Evaluate predictions from vw

In [None]:
policy_name = '--coin'
coin_predictions = prediction_files[policy_name]

In [None]:
estimator = Estimator(factory = Pipeline.ccb.estimators.create, estimators = {policy_name: ['ccb|ips_snips|0']}, window='1min')
coin_preestimates = PickleFiles().init(VwPredicionsFiles(coin_predictions, baseline_preds, policy_name).process(estimator.preestimate, path_gen=Mapper(vw_cache_folder, cfe_estimate_folder), process=True))

In [None]:
coin_preestimates.open().resample('2min').sum()

In [None]:
baseline_preestimates = pd.concat([estimator.read_preestimate(p) for p in preestimates])
cfe_stats = evaluate(baseline_preestimates.resample('1min').sum())
cfe_stats

# Merge evaluations

In [None]:
all_stats = baseline_stats.join(cfe_stats)