In [None]:
from Pipeline.ccb.dsjson.processor import Processor
from Pipeline.ccb.dsjson.predictions import Predictor
from Pipeline import azure_blob_logs
from Pipeline.utils import Mapper
from Pipeline.dataflow import MultilineFiles, PickleFiles, CsvFiles, Fileset

from pathlib import Path

In [None]:
local_data_root = Path('/Users/alextaim/data/ccb')
vw_cache_folder = Path('/Users/alextaim/data/.vw_cache')

raw_folder = local_data_root.joinpath('raw')
slim_folder = local_data_root.joinpath('slim')
sample_folder = local_data_root.joinpath('sample')
predict_folder = local_data_root.joinpath('predict.pickle')
baseline_predict_folder = local_data_root.joinpath('baseline.predict')
baseline_estimate_folder = local_data_root.joinpath('baseline.estimate')
cfe_estimate_folder = local_data_root.joinpath('cfe.estimate')

In [None]:
files = ['/Users/alextaim/data/ccb/raw/01.json',
    '/Users/alextaim/data/ccb/raw/02.json']

raw = MultilineFiles(files)

# Generate predictions

In [None]:
from VwPipeline import Loggers, Handlers
from VwPipeline.VwCache import VwCache
from VwPipeline.Vw import Vw
from VwPipeline.VwOpts import dimension, product
import pandas as pd

#your vw path
vw_path = r'vw'

cache = VwCache(vw_cache_folder)
vw = Vw(
    vw_path,
    cache,
    handlers=[Handlers.WidgetHandler()],
    )

opts = pd.DataFrame(product(
    dimension('#base', ['--ccb_explore_adf --dsjson   -P 1 --preserve_performance_counters --save_resume']),
    dimension('#learning', ['--coin']),
))
preds = vw.train(files, opts, ['-p'])
prediction_files = {preds.iloc[0]['!Job'].name: preds.iloc[0]['!Outputs']['-p']}
prediction_files

# Generate slim dsjson

In [None]:
from Pipeline.ccb.dsjson import processor

In [None]:
from Pipeline.ccb.dsjson.processor import Processor

processor = Processor()
slim = MultilineFiles().init(raw.process(processor.process, path_gen=Mapper(raw_folder, slim_folder), process=True))

# Sample

In [None]:

from Pipeline.filters import UniformSampler
from Pipeline.ccb.dsjson.processor import Processor

sampler = UniformSampler(0.5)
processor = Processor(filters=[lambda l: sampler.do(l)])
sample = MultilineFiles().init(raw.process(processor.process, path_gen=Mapper(raw_folder, sample_folder), process=True))

# Predict

In [None]:
from Pipeline.ccb.dsjson.predictions import Predictor

predictor = Predictor(filters=[lambda l: True])
baseline_preds = PickleFiles().init(raw.process(predictor.predict_df, path_gen=Mapper(raw_folder, baseline_predict_folder), process=True))

# Preestimate

In [None]:
from Pipeline.estimators import Estimator, evaluate
import Pipeline.ccb.estimators
import json

estimator = Estimator(factory = Pipeline.ccb.estimators.create, estimators = {'baseline1_old': ['ccb|ips_snips|0'], 'random': ['ccb|ips_snips|0']}, online_estimator = 'ccb|ips_snips|0', window='1min')
baseline_preestimates = PickleFiles().init(baseline_preds.process(estimator.preestimate_df, path_gen=Mapper(baseline_predict_folder, baseline_estimate_folder), process=True))


# Evaluate

In [None]:
df = baseline_preestimates.open().resample('2min').sum()
df

In [None]:
df.iloc[0]["('online', 'ccb|ips_snips|0')"].get(type='snips')

In [None]:
df['online'] = df.apply(lambda r: r["('online', 'ccb|ips_snips|0')"].get('ips')['e'], axis = 1)

In [None]:
df['online'].plot()


# Evaluate predictions from vw

In [None]:
from Pipeline.ccb.vw import predictions

class VwPredicionsFiles(Fileset):
    def _read(self, i, path):
        print(f'{i}: {path}')
        labels = self.label_fileset.read(i)
        labels['_tmp'] = list(predictions.lines_2_slots(open(path)))
        labels[('b', policy_name)] = labels.apply(lambda r: [ap[1][ap[0]] for ap in zip(r['a'], r['_tmp'])], axis = 1)
        return labels[['t', 'a', 'r', 'p', 'n', ('b', policy_name)]]        

    @staticmethod
    def _write(path, o):
        raise Exception('Not supported')

    def __init__(self, files, label_fileset, policy_name):
        super().__init__(files=files, reader=self._read, writer=VwPredicionsFiles._write)
        self.label_fileset = label_fileset
        self.policy_name = policy_name

In [None]:
policy_name = '--coin'
coin_predictions = prediction_files[policy_name]

In [None]:
estimator = Estimator(factory = Pipeline.ccb.estimators.create, estimators = {policy_name: ['ccb|ips_snips|0']}, window='1min')
coin_preestimates = PickleFiles().init(VwPredicionsFiles(coin_predictions, baseline_preds, policy_name).process(estimator.preestimate_df, path_gen=Mapper(vw_cache_folder, cfe_estimate_folder), process=True))

In [None]:
coin_preestimates.open().resample('2min').sum()

In [None]:
baseline_preestimates = pd.concat([estimator.read_preestimate(p) for p in preestimates])
cfe_stats = evaluate(baseline_preestimates.resample('1min').sum())
cfe_stats

# Merge evaluations

In [None]:
all_stats = baseline_stats.join(cfe_stats)