# Bayesian Bandit A/B-Testing

In [1]:
# !which python
# !pip install nbformat
# !pip install kaleido
# !makedir images
# !makedir video

In [2]:
from typing import Dict, List, Any, Union

import numpy as np
import pandas as pd
import math

from tqdm import tqdm

from scipy import stats
from scipy.stats import beta, gamma

# import util functions
from bayesian_bandit_test import Environment, Agent, Bandit
from bayesian_test import Bayesian_AB_Test

from graph import visualisation  # conda install -n python3 -c conda-forge colorlover
from graph import Video
import plotly
import plotly.graph_objects as go

# Init visualisation tool
plot = visualisation(renderer="vscode")  # vscode | iframe for browsers

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 5000)
pd.set_option("display.width", 10000)

tqdm.pandas()

# Parameters

In [3]:
# A/B-Test
# # No cold start problem
BANDIT_PARAMS = {
    "A": {"period": 0, "ctr": 0.1, "cpm": 1},
    "B": {"period": 0, "ctr": 0.3, "cpm": 2},
}

# With cold start problem
# BANDIT_PARAMS = {'A': {'period':0, 'ctr':0.1, 'cpm':1},
#                  'B': {'period':0, 'ctr':0.3, 'cpm':2},
#                  'C': {'period':100, 'ctr': 0.05, 'cpm': 1},
#                  'D': {'period':200, 'ctr': 0.4, 'cpm': 3}}

# Plotting
WIDTH_SAVE, HEIGHT_SAVE = 1200, 400

In [4]:
# # Early case
# config = {'optimise_for': 'ctr',
#           'n_periods': 500,
#           'max_impr_before_update_param': 100,
#           'recency_param': 0.6, # decay parameter`per day`
#           'n_periods_per_day': 24, # number of periods per day
#           'video': 'video/bandit_abcd_ctr_slow.mp4'
#          }

# For video
config = {
    "optimise_for": "ctr",
    "n_periods": 500,
    "max_impr_before_update_param": 5000,
    "recency_param": 0.6,  # decay parameter`per day`
    "n_periods_per_day": 1,  # number of periods per day
    "video": "video/bandit_abcd_ctr_fast.mp4",
}

bandit = Bandit(
    bandit_params=BANDIT_PARAMS, n_periods=config["n_periods"] + 1, config=config
)
bandit.run()
bandit.agent.df_log["A"].head().to_csv("A.csv")
bandit.agent.df_log["B"].head().to_csv("B.csv")
# bandit.df_metrics.tail()

In [8]:
bandit.agent.df_log["A"]

Unnamed: 0,period,n_impr,n_impr_w_sum,n_clicks,n_clicks_w_sum,cost,cost_sum,ctr,cpc,alpha,beta,a,scale,cost_w_sum
0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,1,1,1.0,1000.0,
1,1,602,1.0,79,1.0,0.602,,1.0,1.0,2,1,2.0,1.0,1.0
2,2,1162,362.2,100,48.4,1.162,,0.133628,0.028124,49,315,2.3612,0.020661,1.3612
3,3,0,914.92,0,89.44,0.0,,0.097757,0.021399,90,826,2.91392,0.011181,1.91392
4,4,0,549.352,0,54.064,0.0,,0.098414,0.028639,55,496,2.548352,0.018497,1.548352
5,5,0,330.0112,0,32.8384,0.0,,0.099507,0.040471,34,298,2.329011,0.030452,1.329011
6,6,0,198.40672,0,20.10304,0.0,,0.101322,0.059563,21,179,2.197407,0.049744,1.197407
7,7,0,119.444032,0,12.461824,0.0,,0.104332,0.08975,13,108,2.118444,0.080245,1.118444
8,8,0,72.066419,0,7.877094,0.0,,0.109303,0.135972,9,65,2.071066,0.12695,1.071066
9,9,0,43.639852,0,5.126257,0.0,,0.117467,0.203392,6,40,2.04264,0.195074,1.04264


# Plotting

In [7]:
def extract_period(df: pd.DataFrame, period: int) -> pd.DataFrame:
    """Extract data for given period"""
    return {
        variant: df[variant][df[variant].period == period]
        for variant in df.keys()
        if sum(df[variant].period == period) > 0
    }

In [8]:
# Impressions / Clicks over time
df = bandit.agent.df_log.copy()

p_data = []
for i, variant in enumerate(bandit.agent.variants):
    p_data += [
        plot.plot(
            x=df[variant].period,
            y=df[variant].n_impr_w_sum,
            color=i,
            opacity=0.4,
            name=f"impr. {variant}",
            showlegend=True,
        ),
        plot.plot(
            x=df[variant].period,
            y=df[variant].n_clicks_w_sum,
            color=i,
            opacity=0.7,
            name=f"clicks {variant}",
            showlegend=True,
        ),
    ]
layout = plot.layout(
    title=f"Observations - impr. & clicks",
    x_label="time",
    y_label="#",
    theme="dark",
    width=1200,
    height=400,
)
fig = go.Figure(data=p_data, layout=layout).show()
# layout['width'], layout['height'] = WIDTH_SAVE, HEIGHT_SAVE
# go.Figure(data=p_data, layout=layout).write_image('images/impr_clicks.png')

In [19]:
PERIOD = 300

df_T = extract_period(df=bandit.agent.df_log, period=PERIOD)

# Click-Through-Rate - Beta distribution
for variant in df_T:
    print(variant)
    df_T[variant]

x = np.linspace(0, 0.5, 1000)
p_data = [
    plot.plot(
        x=x,
        y=beta.pdf(x, df_T[variant].alpha, df_T[variant].beta),
        color=i,
        opacity=0.7,
        name=variant,
        showlegend=True,
    )
    for i, variant in enumerate(df_T)
]
layout = plot.layout(
    title=f"Beta distributions at T:{PERIOD}",
    x_label="Click-Through-Rate",
    y_label="p",
    # theme="dark",
    width=1200,
    height=400,
)
layout["xaxis"]["range"] = [0, 0.5]
fig = go.Figure(data=p_data, layout=layout).show()
layout["width"], layout["height"] = WIDTH_SAVE, HEIGHT_SAVE
go.Figure(data=p_data, layout=layout).write_image("images/bandit_beta_ab.png")

# Cost-per-Click - gamma distribution
x = np.linspace(0, 50, 1000)
p_data = [
    plot.plot(
        x=x,
        y=gamma.pdf(x, a=df_T[variant].a, scale=df_T[variant].scale),
        color=i,
        opacity=0.7,
        name=variant,
        showlegend=True,
    )
    for i, variant in enumerate(df_T)
]
layout = plot.layout(
    title=f"Gamma distributions at T:{PERIOD}",
    x_label="Cost-per-Click",
    y_label="p",
    theme="dark",
    width=1200,
    height=400,
)
layout["xaxis"]["range"] = [0, 50]
fig = go.Figure(data=p_data, layout=layout).show()

A


Unnamed: 0,period,n_impr,n_impr_w_sum,n_clicks,n_clicks_w_sum,cost,cost_sum,ctr,cpc,alpha,beta,a,scale,cost_w_sum
300,300,0,50.955205,0,5.390218,0.0,,0.105783,0.21491,6,47,2.158413,0.185521,1.158413


B


Unnamed: 0,period,n_impr,n_impr_w_sum,n_clicks,n_clicks_w_sum,cost,cost_sum,ctr,cpc,alpha,beta,a,scale,cost_w_sum
300,300,3450,3225.326086,1098,963.141961,22.873261,,0.298618,0.006263,964,2263,7.032068,0.001038,6.032068


In [8]:
# Regret over time
p_data = [ plot.plot(x=bandit.df_metrics.period, y=bandit.df_metrics.regret, color=0, opacity=0.9, name=f'regret', showlegend=True)]
layout = plot.layout(title=f'Regret', x_label='periods', y_label='#', theme='dark', width=1200, height=400)
fig = go.Figure(data=p_data, layout=layout).show()
layout['width'], layout['height'] = WIDTH_SAVE, HEIGHT_SAVE
go.Figure(data=p_data, layout=layout).write_image('images/bandit_ab_regret.png')

# Regret - CDF
hist, bins = np.histogram(bandit.df_metrics.regret, bins=100)
p_data = [ plot.plot(x=bins, y=hist, color=0, opacity=0.6, name=f'regret', showlegend=True)]
layout = plot.layout(title=f'Regret - Distribution', x_label='periods', y_label='#', theme='dark', width=1200, height=400)
fig = go.Figure(data=p_data, layout=layout).show()


p_data = [ plot.plot(x=bins, y=np.cumsum(hist)/sum(hist), color=0, opacity=0.6, name=f'regret', showlegend=True)]
layout = plot.layout(title=f'Regret - CDF', x_label='periods', y_label='#', theme='dark', width=1200, height=400)
fig = go.Figure(data=p_data, layout=layout).show()

#### CTR

In [9]:
# P(A>B)
# map to dataframe, where each row is a period and each column is a variant
df_p_ab = pd.DataFrame(bandit.df_metrics.P_ab_ctr.to_list(), columns=bandit.agent.variants)

p_data = [ plot.plot(x=bandit.df_metrics.period, y=df_p_ab[variant], color=i, opacity=0.7, name=f'P - {variant}', showlegend=True) for i, variant in enumerate(df_p_ab.columns)]
layout = plot.layout(title=f'p_ab', x_label='periods', y_label='#', theme='dark', width=1200, height=400)
fig = go.Figure(data=p_data, layout=layout).show()

# Loss
# map loss_ctr, where each row is a period and each column is a variant
df_loss = pd.DataFrame(bandit.df_metrics.loss_ctr.to_list())
df_loss = df_loss.applymap(lambda x: (0, 0) if pd.isna(x) else x)

for i, variant in enumerate(df_loss.columns):
    tmp1 = df_loss[variant].apply(lambda x: x[0])
    tmp2 = df_loss[variant].apply(lambda x: x[1])
    p_data = [ plot.plot(x=bandit.df_metrics.period, y=tmp1, color=0, opacity=0.7, name=f'P - {variant} - A', showlegend=True),
               plot.plot(x=bandit.df_metrics.period, y=tmp2, color=1, opacity=0.7, name=f'P - {variant} - B', showlegend=True) ]
    layout = plot.layout(title=f'loss', x_label='periods', y_label='#', theme='dark', width=1200, height=400)
    fig = go.Figure(data=p_data, layout=layout).show()

#### CpC

In [10]:
# P(A>B)
# map to dataframe, where each row is a period and each column is a variant
df_p_ab = pd.DataFrame(bandit.df_metrics.P_ab_cpc.to_list(), columns=bandit.agent.variants)

p_data = [ plot.plot(x=bandit.df_metrics.period, y=df_p_ab[variant], color=i, opacity=0.7, name=f'P - {variant}', showlegend=True) for i, variant in enumerate(bandit.agent.variants)]
layout = plot.layout(title=f'p_ab', x_label='periods', y_label='#', theme='dark', width=1200, height=400)
fig = go.Figure(data=p_data, layout=layout).show()

# Loss
# map loss_ctr, where each row is a period and each column is a variant
df_loss = pd.DataFrame(bandit.df_metrics.loss_cpc.to_list())
df_loss = df_loss.applymap(lambda x: (0, 0) if pd.isna(x) else x)

for i, variant in enumerate(df_loss.columns):
    tmp1 = df_loss[variant].apply(lambda x: x[0])
    tmp2 = df_loss[variant].apply(lambda x: x[1])
    p_data = [ plot.plot(x=bandit.df_metrics.period, y=tmp1, color=0, opacity=0.7, name=f'P - {variant} - A', showlegend=True),
               plot.plot(x=bandit.df_metrics.period, y=tmp2, color=1, opacity=0.7, name=f'P - {variant} - B', showlegend=True) ]
    layout = plot.layout(title=f'loss', x_label='periods', y_label='#', theme='dark', width=1200, height=400)
    fig = go.Figure(data=p_data, layout=layout).show()

<hr>

### Video

In [11]:
# Bandit - AA-etst - CTR
N_STEPS = bandit.df_metrics.shape[0]-1

colormap = ['#ff0000', '#ff00ff', '#ffff00', '#00ff00']
video = Video(xlabel='CTR', x_lim=0.5, y_lim=100, n_versions=4, colormap=colormap, txt_pos=0.5)
with video.writer.saving(video.fig, config['video'], 200):
    x = np.linspace(0, 1, 50000)
    for period in tqdm(range(N_STEPS+1)):
        df_T = extract_period(df=bandit.agent.df_log, period=period)
        txt = 'Period: {}\n\nClicks  |  Impressions  |  P(a>b)\n'.format(period)
        for i, variant in enumerate(df_T):
            if config['optimise_for'] == 'ctr':
                video.plts[i].set_data(x, beta.pdf(x, df_T[variant].alpha.values[0], df_T[variant].beta.values[0]))
            if config['optimise_for'] == 'cpc':
                video.plts[i].set_data(x, gamma.pdf(x, a=df_T[variant].a.values[0], scale=df_T[variant].scale.values[0]))
        
            txt += '{}: {: >8.1f}  |  {: >8.1f}  |  {: >8.1f}\n'.format(variant,
                                                    df_T[variant].n_clicks_w_sum.values[0],
                                                    df_T[variant].n_impr_w_sum.values[0],
                                                    100*bandit.df_metrics.P_ab_ctr[period][i])

        txt += 'regret: {:.3f} '.format(bandit.df_metrics.regret_avg[period])
        video.txt_time.set_text(txt)
        video.writer.grab_frame(facecolor=video.fig.get_facecolor(), edgecolor='none')
print('Completed movie: {}'.format(config['video']))

100%|██████████| 501/501 [00:41<00:00, 12.00it/s]


Completed movie: video/bandit_abcd_ctr_fast.mp4
