# A/B Testing from Scratch: Multi-armed Bandits

In [1]:
import numpy as np
import pandas as pd

#widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

#plots
import matplotlib.pyplot as plt
from plotnine import *

#stats
import scipy as sp
import statsmodels as sm

In [28]:
def gen_campaigns(ps, total_ts, clicks, scaler, seed):
    #generate fake data
    nb_groups = len(ps)
    np.random.seed(seed)
    ns = np.random.triangular(clicks/2,clicks,clicks*1.5,size=total_ts*nb_groups).astype(int)
    np.random.seed(seed)
    es = np.random.randn(total_ts*nb_groups) / scaler
    
    df = pd.DataFrame({'ts':range(total_ts)})
    for i in range(nb_groups):
        r = slice(i*total_ts,(i+1)*total_ts)
        df[f'click_{i}'] = ns[r]
        df[f'conv_{i}'] = ((ps[i] + es[r]) * ns[r]).astype(int)
        df[f'cumu_click_{i}'] = df[f'click_{i}'].cumsum()
        df[f'cumu_conv_{i}'] = df[f'conv_{i}'].cumsum()
        df[f'cumu_rate_{i}'] = df[f'cumu_conv_{i}'] / df[f'cumu_click_{i}']
    return df
    
#     conv_days = pd.DataFrame({'click_day':range(nb_days),'click_a':n1,'conv_a':c1,'click_b':n2,'conv_b':c2})
#     conv_days =  conv_days[['click_day','click_a','click_b','conv_a','conv_b']]
#     conv_days['cumu_click_a'] = conv_days.click_a.cumsum()
#     conv_days['cumu_click_b'] = conv_days.click_b.cumsum()
#     conv_days['cumu_conv_a'] = conv_days.conv_a.cumsum()
#     conv_days['cumu_conv_b'] = conv_days.conv_b.cumsum()
#     conv_days['cumu_rate_a'] = conv_days.cumu_conv_a / conv_days.cumu_click_a
#     conv_days['cumu_rate_b'] = conv_days.cumu_conv_b / conv_days.cumu_click_b
#     return conv_days

df = gen_campaigns(ps=[0.1,0.12,0.13],
                  clicks=100,
                  total_ts = 60,
                  scaler=300,
                  seed = 1412) #god-mode 
df.head()

Unnamed: 0,ts,click_0,conv_0,cumu_click_0,cumu_conv_0,cumu_rate_0,click_1,conv_1,cumu_click_1,cumu_conv_1,cumu_rate_1,click_2,conv_2,cumu_click_2,cumu_conv_2,cumu_rate_2
0,0,125,12,125,12,0.096,100,12,100,12,0.12,112,14,112,14,0.125
1,1,114,11,239,23,0.096234,54,6,154,18,0.116883,77,10,189,24,0.126984
2,2,67,6,306,29,0.094771,95,11,249,29,0.116466,54,6,243,30,0.123457
3,3,96,9,402,38,0.094527,76,9,325,38,0.116923,105,13,348,43,0.123563
4,4,89,9,491,47,0.095723,96,11,421,49,0.11639,103,13,451,56,0.124169


In [65]:
class Arm:
    def __init__(self,true_p):
        self.true_p = true_p
        self.reset()
    def reset(self):
        self.impressions = 0
        self.actions = 0
    def get_state(self):
        return self.impressions,self.actions
    def pull(self):
        self.impressions+=1
        res = 1 if np.random.random() < self.true_p else 0
        self.actions+=res
        return res
a = Arm(0.1)
for i in range(100): a.pull()
a.get_state()

(100, 10)

In [123]:
class MusketeerEnv:
    def __init__(self, true_ps = [0.1,0.12,0.13], avg_impressions=500):
        self.true_ps = true_ps
        self.avg_impressions = avg_impressions
        self.nb_arms = len(true_ps)
        self.reset()
    def reset(self):
        self.t = -1
        self.ds=[]
        self.arms = [Arm(p) for p in self.true_ps]
        return self.get_state()
    def get_state(self):
        return [self.arms[i].get_state() for i in range(self.nb_arms)]
    def get_impressions(self):
        return int(np.random.triangular(self.avg_impressions/2,
                                    self.avg_impressions,
                                    self.avg_impressions*1.5))
    def step(self, ps):
        self.t+=1
        impressions = self.get_impressions()
        for i in np.random.choice(a=self.nb_arms,size=impressions,p=ps):
            self.arms[i].pull()
        self.record()
        return self.get_state()
    def record(self):
        d = {'t':self.t}
        for i in range(self.nb_arms):
            d[f'impressions_{i}'],d[f'actions_{i}'] = self.arms[i].get_state()
        self.ds.append(d)
    def show_df(self):
        df = pd.DataFrame(self.ds)
        for i in range(self.nb_arms):
            df[f'rate_{i}'] = df[f'actions_{i}'] / df[f'impressions_{i}']
        cols = ['t'] + [f'rate_{i}' for i in range(self.nb_arms)] + \
               [f'impressions_{i}' for i in range(self.nb_arms)] + \
               [f'actions_{i}' for i in range(self.nb_arms)]
        df = df[cols]
        return df
env = MusketeerEnv()
# env.step([0.1,0.2,0.7])

In [124]:
for i in range(10):
    print(env.step([0.1,0.2,0.7]))

[(39, 6), (104, 9), (339, 47)]
[(97, 12), (204, 20), (694, 98)]
[(148, 19), (320, 37), (1127, 157)]
[(191, 22), (408, 53), (1461, 196)]
[(252, 24), (525, 71), (1898, 257)]
[(317, 28), (648, 81), (2328, 307)]
[(372, 32), (740, 94), (2644, 346)]
[(418, 40), (844, 106), (2963, 385)]
[(478, 43), (929, 111), (3317, 438)]
[(528, 47), (1032, 125), (3641, 481)]


In [125]:
env.show_df()

Unnamed: 0,t,rate_0,rate_1,rate_2,impressions_0,impressions_1,impressions_2,actions_0,actions_1,actions_2
0,0,0.153846,0.086538,0.138643,39,104,339,6,9,47
1,1,0.123711,0.098039,0.14121,97,204,694,12,20,98
2,2,0.128378,0.115625,0.139308,148,320,1127,19,37,157
3,3,0.115183,0.129902,0.134155,191,408,1461,22,53,196
4,4,0.095238,0.135238,0.135406,252,525,1898,24,71,257
5,5,0.088328,0.125,0.131873,317,648,2328,28,81,307
6,6,0.086022,0.127027,0.130862,372,740,2644,32,94,346
7,7,0.095694,0.125592,0.129936,418,844,2963,40,106,385
8,8,0.089958,0.119483,0.132047,478,929,3317,43,111,438
9,9,0.089015,0.121124,0.132107,528,1032,3641,47,125,481
