# Simple test

In [1]:
from estimators.ccb import pdis_cressieread
from estimators.bandits import snips
from estimators.bandits import clopper_pearson

cb_est = snips.Estimator()
cb_int = clopper_pearson.Interval()

ccb_est = pdis_cressieread.Estimator()
ccb_int = pdis_cressieread.Interval()

In [2]:
ccb_est.add_example([0.1, 0.1, 0.1], [1,2,3], [0.5, 0.5, 0.5])
print(f'estimate: {ccb_est.get()}')

ccb_int.add_example([0.1, 0.1, 0.1], [1,2,3], [0.5, 0.5, 0.5])
print(f'interval: {ccb_int.get()}')

estimate: [1.0, 2.0, 3.0000000000000426]
interval: [[0, 1], [0, 1], [0, 1]]


In [3]:
                #action 0           #action 1
# slot 0           1                    0
# slot 1           0                    0.8
ccb_est = pdis_cressieread.Estimator()
epsilon = 0.1
for i in range(1000):
    ccb_est.add_example(
        p_preds = [1 - epsilon if i%2 else epsilon, 1],
        rs = [i % 2, 0.8 * (i % 2)],
        p_logs = [0.5, 1])
print(f'estimate: {ccb_est.get()}')

estimate: [0.900224424911165, 0.7201795399289322]


# Simulator

In [4]:
# a0, a1, a2
# s0, s1

# matrix of rewards r_ij (i is 0,1, j is 0,1,2)
# online policy: epsilon greedy 

import numpy as np
import random
import pandas as pd

class EpsilonGreedy:
    def __init__(self, epsilon):
        self.epsilon = epsilon

    def explore(self, n):
        return self.epsilon / n

    def exploit(self, n):
        return self.epsilon / n + 1 - self.epsilon

def baseline_random(n):
    return [(1/n, baseline_random(n-1)) for _ in range(n)]

def baseline_1(n, excl=[]):
    def _idx_2_id(i, excl):
        return sorted(set(range(n + len(excl))) - set(excl))[i]
    if len(excl) in excl:
        return [(0, baseline_1(n-1, excl + [_idx_2_id(i, excl)])) for i in range(n)]
    else:
        return [(int(_idx_2_id(i, excl) == len(excl)), baseline_1(n-1, excl + [_idx_2_id(i, excl)]))  for i in range(n)]

class Traffic:
    def __init__(self, rewards, policy, prob = 1):
        self.rewards = rewards
        self.policy = policy
        self.prob = prob

class CcbSimulation(pd.DataFrame):
    def __init__(self, n, traffic: list):
        slots = []
        traffic_pmf = [t.prob for t in traffic]
        for i in range(n):
            traffic_idx = np.random.choice(range(len(traffic_pmf)), p=traffic_pmf)
            rewards = traffic[traffic_idx].rewards
            nactions = len(rewards[0])
            nslots = len(rewards)
            actions = list(range(nactions))
            state = traffic[traffic_idx].policy
            for j in range(nslots):
                pmf = [a[0] for a in state]
                chosen_idx = np.random.choice(range(len(pmf)), p=pmf)
                chosen = actions[chosen_idx]
                state = state[chosen_idx][1]
                r = int(random.random() < rewards[j][chosen])
                actions = sorted(set(actions) - {actions[chosen_idx]})
                slots.append({'session': i, 'slot': j, 'p_log': pmf[chosen_idx], 'r': r, 'chosen': chosen, 'chosen_idx': chosen_idx})
        super().__init__(slots)

    def predict(self, policy):
        p_pred = []
        for _, e in self.sessions.iterrows():
            state = policy
            for i in range(len(e['p_log'])):
                pmf = [a[0] for a in state]
                chosen_idx = e['chosen_idx'][i]
                p_pred.append(pmf[chosen_idx])
                state = state[chosen_idx][1]
        self['p_pred'] = p_pred

    def cfe(self, alpha=0.05):
        nslots = self['slot'].max() + 1
        cb_est = [snips.Estimator() for i in range(nslots)]
        cb_int = [clopper_pearson.Interval() for i in range(nslots)]

        ccb_est = pdis_cressieread.Estimator()
        ccb_int = pdis_cressieread.Interval()

        for _, e  in self.sessions.iterrows():
            for i in range(len(e['p_pred'])):
                cb_est[i].add_example(p_pred=e['p_pred'][i], r=e['r'][i], p_log=e['p_log'][i])
                cb_int[i].add_example(p_pred=e['p_pred'][i], r=e['r'][i], p_log=e['p_log'][i])

            ccb_int.add_example(p_preds=e['p_pred'], rs=e['r'], p_logs=e['p_log'])
            ccb_est.add_example(p_preds=e['p_pred'], rs=e['r'], p_logs=e['p_log'])

        cb_int_results = [i.get(alpha) for i in cb_int]
        ccb_int_result = ccb_int.get(alpha)

        ccb_est_result = ccb_est.get()
        result = pd.DataFrame([
                dict({'name': 'cb', 'metric': 'est'}, **{f'slot_{i}': cb_est[i].get() for i in range(nslots)}),
                dict({'name': 'cb', 'metric': 'lb'}, **{f'slot_{i}': cb_int_results[i][0] for i in range(nslots)}),
                dict({'name': 'cb', 'metric': 'ub'}, **{f'slot_{i}': cb_int_results[i][1] for i in range(nslots)}),
                dict({'name': 'ccb', 'metric': 'est'}, **{f'slot_{i}': ccb_est_result[i] for i in range(nslots)}),
                dict({'name': 'ccb', 'metric': 'lb'}, **{f'slot_{i}': ccb_int_result[i][0] for i in range(nslots)}),
                dict({'name': 'ccb', 'metric': 'ub'}, **{f'slot_{i}': ccb_int_result[i][1] for i in range(nslots)}),
        ])
        result['all_slots'] = result['slot_0'] + result['slot_1']
        return result.set_index(['name', 'metric'])

    @property
    def sessions(self):
        agg = {'p_log': list, 'r': list, 'chosen': list, 'chosen_idx': list}
        if 'p_pred' in self.columns:
            agg['p_pred'] = list
        return self.groupby('session').agg(agg)

## No slot dependencies

In [5]:
epsilon = 0.2

rewards = np.array([
    [0.4, 0.8, 0.2],
    [0.4, 0.8, 0.2]])

eg = EpsilonGreedy(epsilon)
epsilon_greedy = [
    (eg.explore(3), [               #0
        (eg.exploit(2), []),        #1
        (eg.explore(2), []),        #2
    ]),
    (eg.exploit(3), [               #1
        (eg.exploit(2), []),        #0
        (eg.explore(2), []),        #2
    ]),
    (eg.explore(3), [               #2
        (eg.explore(2), []),        #0
        (eg.exploit(2), []),        #1
    ])
]

In [6]:
n = 10000
sim_eg = CcbSimulation(n, [Traffic(rewards, epsilon_greedy)])
sim_b1 = CcbSimulation(n, [Traffic(rewards, baseline_1(3))])
sim_br = CcbSimulation(n, [Traffic(rewards, baseline_random(3))])
rewards

array([[0.4, 0.8, 0.2],
       [0.4, 0.8, 0.2]])

In [7]:
simulation_stats = pd.DataFrame([
    {'policy': f'epsilon-greedy({epsilon})', 'slot_0': sim_eg[sim_eg["slot"]==0]["r"].mean(), 'slot_1': sim_eg[sim_eg["slot"]==1]["r"].mean()},
    {'policy': f'baseline_1', 'slot_0': sim_b1[sim_b1["slot"]==0]["r"].mean(), 'slot_1': sim_b1[sim_b1["slot"]==1]["r"].mean()},
    {'policy': f'baseline_random', 'slot_0': sim_br[sim_br["slot"]==0]["r"].mean(), 'slot_1': sim_br[sim_br["slot"]==1]["r"].mean()},
])
simulation_stats['all_slots'] = simulation_stats['slot_0'] + simulation_stats['slot_1']
simulation_stats

Unnamed: 0,policy,slot_0,slot_1,all_slots
0,epsilon-greedy(0.2),0.7298,0.4324,1.1622
1,baseline_1,0.4017,0.7992,1.2009
2,baseline_random,0.4745,0.4668,0.9413


In [8]:
sim_br.predict(epsilon_greedy)
sim_br.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.744853,0.631566,1.376419
cb,lb,0.735612,0.612318,1.34793
cb,ub,0.763248,0.63795,1.401198
ccb,est,0.744164,0.439572,1.183735
ccb,lb,0.729458,0.411759,1.141218
ccb,ub,0.755801,0.47205,1.227851


In [9]:
sim_br.predict(baseline_1(3))
sim_br.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.400955,0.802548,1.203502
cb,lb,0.386488,0.515251,0.901739
cb,ub,0.420082,0.543115,0.963197
ccb,est,0.400955,0.802326,1.20328
ccb,lb,0.38427,0.782539,1.166809
ccb,ub,0.41839,0.821749,1.240139


In [10]:
sim_br.predict(baseline_random(3))
sim_br.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.4745,0.4668,0.9413
cb,lb,0.464672,0.456983,0.921654
cb,ub,0.484343,0.476637,0.96098
ccb,est,0.4745,0.4668,0.9413
ccb,lb,0.464713,0.457022,0.921734
ccb,ub,0.484287,0.476578,0.960866


In [11]:
sim_eg.predict(epsilon_greedy)
sim_eg.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.7298,0.4324,1.1622
cb,lb,0.72098,0.422661,1.143641
cb,ub,0.738486,0.442178,1.180664
ccb,est,0.7298,0.4324,1.1622
ccb,lb,0.721096,0.42269,1.143786
ccb,ub,0.738504,0.44211,1.180614


In [12]:
sim_eg.predict(baseline_1(3))
sim_eg.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.42623,0.794431,1.22066
cb,lb,0.391066,0.101443,0.49251
cb,ub,0.467565,0.114367,0.581932
ccb,est,0.42623,0.788829,1.215058
ccb,lb,0.386638,0.717391,1.10403
ccb,ub,0.471399,0.82796,1.299359


In [13]:
sim_eg.predict(baseline_random(3))
sim_eg.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.466126,0.336308,0.802434
cb,lb,0.451075,0.322516,0.773591
cb,ub,0.495311,0.364611,0.859922
ccb,est,0.469148,0.462772,0.931921
ccb,lb,0.452232,0.433282,0.885513
ccb,ub,0.486112,0.492263,0.978375


## With slot dependencies

In [35]:
epsilon = 0.2

rewards = np.array([
    [0.4, 0.8, 0.2],
    [0.2, 0.4, 0.8]])

eg = EpsilonGreedy(epsilon)
epsilon_greedy = [
    (eg.explore(3), [               #0
        (eg.explore(2), []),        #1
        (eg.exploit(2), []),        #2
    ]),
    (eg.exploit(3), [               #1
        (eg.explore(2), []),        #0
        (eg.exploit(2), []),        #2
    ]),
    (eg.explore(3), [               #2
        (eg.explore(2), []),        #0
        (eg.exploit(2), []),        #1
    ])
]

In [36]:
n = 10000
sim_eg = CcbSimulation(n, [Traffic(rewards, epsilon_greedy)])
sim_b1 = CcbSimulation(n, [Traffic(rewards, baseline_1(3))])
sim_br = CcbSimulation(n, [Traffic(rewards, baseline_random(3))])
rewards

array([[0.4, 0.8, 0.2],
       [0.2, 0.4, 0.8]])

In [37]:
simulation_stats = pd.DataFrame([
    {'policy': f'epsilon-greedy({epsilon})', 'slot_0': sim_eg[sim_eg["slot"]==0]["r"].mean(), 'slot_1': sim_eg[sim_eg["slot"]==1]["r"].mean()},
    {'policy': f'baseline_1', 'slot_0': sim_b1[sim_b1["slot"]==0]["r"].mean(), 'slot_1': sim_b1[sim_b1["slot"]==1]["r"].mean()},
    {'policy': f'baseline_random', 'slot_0': sim_br[sim_br["slot"]==0]["r"].mean(), 'slot_1': sim_br[sim_br["slot"]==1]["r"].mean()},
])
simulation_stats['all_slots'] = simulation_stats['slot_0'] + simulation_stats['slot_1']
simulation_stats

Unnamed: 0,policy,slot_0,slot_1,all_slots
0,epsilon-greedy(0.2),0.7379,0.7177,1.4556
1,baseline_1,0.3955,0.4072,0.8027
2,baseline_random,0.4659,0.468,0.9339


In [38]:
sim_br.predict(epsilon_greedy)
sim_br.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.731768,0.624701,1.356469
cb,lb,0.710449,0.613525,1.323975
cb,ub,0.738928,0.639142,1.37807
ccb,est,0.729258,0.697683,1.426941
ccb,lb,0.702842,0.66571,1.368552
ccb,ub,0.746628,0.717115,1.463743


In [39]:
sim_br.predict(baseline_1(3))
sim_br.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.394706,0.403087,0.797793
cb,lb,0.390359,0.259308,0.649667
cb,ub,0.424004,0.28416,0.708163
ccb,est,0.394706,0.408116,0.802822
ccb,lb,0.377854,0.38411,0.761965
ccb,ub,0.411558,0.432122,0.84368


In [40]:
sim_br.predict(baseline_random(3))
sim_br.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.4659,0.468,0.9339
cb,lb,0.456084,0.458181,0.914265
cb,ub,0.475736,0.477838,0.953574
ccb,est,0.4659,0.468,0.9339
ccb,lb,0.456123,0.45822,0.914343
ccb,ub,0.475677,0.47778,0.953457


In [41]:
sim_eg.predict(epsilon_greedy)
sim_eg.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.7379,0.7177,1.4556
cb,lb,0.729162,0.708765,1.437927
cb,ub,0.746499,0.726508,1.473007
ccb,est,0.7379,0.7177,1.4556
ccb,lb,0.72928,0.708878,1.438158
ccb,ub,0.74652,0.726522,1.473042


In [42]:
sim_eg.predict(baseline_1(3))
sim_eg.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.412698,0.362137,0.774835
cb,lb,0.391066,0.038399,0.429465
cb,ub,0.467565,0.066763,0.534328
ccb,est,0.412698,0.305556,0.718254
ccb,lb,0.374595,0.19064,0.565235
ccb,ub,0.450802,0.440399,0.891201


In [43]:
sim_eg.predict(baseline_random(3))
sim_eg.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.472389,0.496729,0.969119
cb,lb,0.46154,0.475687,0.937227
cb,ub,0.505816,0.519986,1.025801
ccb,est,0.477159,0.458946,0.936105
ccb,lb,0.460027,0.431139,0.891166
ccb,ub,0.494291,0.489497,0.983787


## Locked content

In [44]:
epsilon = 0.2

rewards = np.array([
    [0.4, 0.8, 0.2],
    [0.4, 0.8, 0.2]])

eg = EpsilonGreedy(epsilon)
epsilon_greedy_lock0 = [
    (0, [               #0
        (eg.exploit(2), []),        #1
        (eg.explore(2), []),        #2
    ]),
    (1, [               #1
        (eg.exploit(2), []),        #0
        (eg.explore(2), []),        #2
    ]),
    (0, [               #2
        (eg.explore(2), []),        #0
        (eg.exploit(2), []),        #1
    ])
]

baseline_random_lock0 = [
    (0, [
        (1/2, []),
        (1/2, []),
    ]),
    (1, [
        (1/2, []),
        (1/2, []),
    ]),
    (0, [
        (1/2, []),
        (1/2, []),
    ])
]

baseline_1_lock0 = [
    (0, [
        (0, []),
        (0, []),
    ]),
    (1, [
        (1, []),
        (0, []),
    ]),
    (0, [
        (1, []),
        (0, []),
    ])
]

In [45]:
n = 10000
sim_eg_lock0 = CcbSimulation(n, [Traffic(rewards, epsilon_greedy_lock0)])
sim_b1 = CcbSimulation(n, [Traffic(rewards, baseline_1_lock0)])
sim_br = CcbSimulation(n, [Traffic(rewards, baseline_random_lock0)])
rewards

array([[0.4, 0.8, 0.2],
       [0.4, 0.8, 0.2]])

In [46]:
simulation_stats = pd.DataFrame([
    {'policy': f'epsilon-greedy_lock0({epsilon})', 'slot_0': sim_eg_lock0[sim_eg_lock0["slot"]==0]["r"].mean(), 'slot_1': sim_eg_lock0[sim_eg_lock0["slot"]==1]["r"].mean()},
    {'policy': f'baseline_1', 'slot_0': sim_b1[sim_b1["slot"]==0]["r"].mean(), 'slot_1': sim_b1[sim_b1["slot"]==1]["r"].mean()},
    {'policy': f'baseline_random', 'slot_0': sim_br[sim_br["slot"]==0]["r"].mean(), 'slot_1': sim_br[sim_br["slot"]==1]["r"].mean()},
])
simulation_stats['all_slots'] = simulation_stats['slot_0'] + simulation_stats['slot_1']
simulation_stats

Unnamed: 0,policy,slot_0,slot_1,all_slots
0,epsilon-greedy_lock0(0.2),0.7946,0.3818,1.1764
1,baseline_1,0.8006,0.4,1.2006
2,baseline_random,0.8036,0.2998,1.1034


In [47]:
sim_eg_lock0.predict(epsilon_greedy_lock0)
sim_eg_lock0.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.7946,0.3818,1.1764
cb,lb,0.786547,0.372264,1.15881
cb,ub,0.802481,0.391405,1.193887
ccb,est,0.7946,0.3818,1.1764
ccb,lb,0.786682,0.372278,1.158959
ccb,ub,0.802518,0.391322,1.193841


In [48]:
sim_eg_lock0.predict(baseline_1_lock0)
sim_eg_lock0.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.7946,0.40144,1.19604
cb,lb,0.786547,0.392401,1.178948
cb,ub,0.802481,0.412773,1.215255
ccb,est,0.7946,0.40144,1.19604
ccb,lb,0.786682,0.391299,1.177981
ccb,ub,0.802518,0.411582,1.2141


In [49]:
sim_eg_lock0.predict(baseline_random_lock0)
sim_eg_lock0.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.7946,0.302135,1.096735
cb,lb,0.786547,0.278775,1.065321
cb,ub,0.802481,0.319372,1.121854
ccb,est,0.7946,0.30302,1.09762
ccb,lb,0.786682,0.28478,1.071462
ccb,ub,0.802518,0.334993,1.137511


## Variable slots count

In [50]:
epsilon = 0.2

rewards_1 = np.array([
    [0.2, 0.8]])

rewards_2 = np.array([
    [0.2, 0.8],
    [0.4, 0.8]])

eg = EpsilonGreedy(epsilon)

epsilon_greedy = [
    (eg.explore(2), [
        (1, [])
    ]),
    (eg.exploit(2), [
        (1, [])
    ])
]

In [51]:
n = 10000
sim_eg = CcbSimulation(n, [Traffic(rewards_1, epsilon_greedy, 0.5), Traffic(rewards_2, epsilon_greedy, 0.5)])
sim_b1 = CcbSimulation(n, [Traffic(rewards_1, baseline_1(2), 0.5), Traffic(rewards_2, baseline_1(2), 0.5)])
sim_br = CcbSimulation(n, [Traffic(rewards_1, baseline_random(2), 0.5), Traffic(rewards_2, baseline_random(2), 0.5)])
rewards_2

array([[0.2, 0.8],
       [0.4, 0.8]])

In [52]:
simulation_stats = pd.DataFrame([
    {'policy': f'epsilon-greedy({epsilon})', 'slot_0': sim_eg[sim_eg["slot"]==0]["r"].mean(), 'slot_1': sim_eg[sim_eg["slot"]==1]["r"].mean()},
    {'policy': f'baseline_1', 'slot_0': sim_b1[sim_b1["slot"]==0]["r"].mean(), 'slot_1': sim_b1[sim_b1["slot"]==1]["r"].mean()},
    {'policy': f'baseline_random', 'slot_0': sim_br[sim_br["slot"]==0]["r"].mean(), 'slot_1': sim_br[sim_br["slot"]==1]["r"].mean()},
])
simulation_stats['all_slots'] = simulation_stats['slot_0'] + simulation_stats['slot_1']
simulation_stats

Unnamed: 0,policy,slot_0,slot_1,all_slots
0,epsilon-greedy(0.2),0.7456,0.431698,1.177298
1,baseline_1,0.1973,0.798365,0.995665
2,baseline_random,0.4932,0.595074,1.088274


In [53]:
sim_eg.predict(baseline_random(2))
sim_eg.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.501992,0.431698,0.93369
cb,lb,0.481845,0.41782,0.899666
cb,ub,0.526143,0.445657,0.9718
ccb,est,0.503465,0.295316,0.798781
ccb,lb,0.490358,0.279542,0.7699
ccb,ub,0.523086,0.312064,0.83515


In [54]:
sim_eg.predict(baseline_1(2))
sim_eg.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.200198,0.773077,0.973275
cb,lb,0.177522,0.07399,0.251513
cb,ub,0.228246,0.08946,0.317706
ccb,est,0.200198,0.398414,0.598612
ccb,lb,0.175285,0.367934,0.543219
ccb,ub,0.239615,0.433866,0.673481


In [55]:
sim_eg.predict(epsilon_greedy)
sim_eg.cfe(alpha=0.05)

Unnamed: 0_level_0,Unnamed: 1_level_0,slot_0,slot_1,all_slots
name,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cb,est,0.7456,0.431698,1.177298
cb,lb,0.736943,0.41782,1.154763
cb,ub,0.754114,0.445657,1.199771
ccb,est,0.7456,0.213,0.9586
ccb,lb,0.737064,0.204975,0.942039
ccb,ub,0.754136,0.221025,0.975161
