# Multi-armed bandits: Maximizing business metrics while experimenting

### Simulate measurement of click on an ad

In [2]:
def measure_click(ctr):
    return 1 if np.random.uniform(0, 1) < ctr else 0


def measure_a():
    return measure_click(ctr=0.005)


def measure_b():
    return measure_click(ctr=0.007)

### Design an A/B test

In [3]:
def design_ab_test():
    def pilot_study(num_pilot_measurements):
        clicked_pre_a = np.array([measure_a() for _ in range(num_pilot_measurements)])
        clicked_pre_b = np.array([measure_b() for _ in range(num_pilot_measurements)])

        sd_1 = np.sqrt(clicked_pre_a.std() ** 2 + clicked_pre_b.std() ** 2)
        return sd_1

    # Run a pilot study to measure sd_1.
    sd_1 = pilot_study(1000)

    # Define a practical significance level of 0.001 (0.1%)
    prac_sig = 0.001

    # Calculate the number of individual measurements.
    num_ind = (2.48 * sd_1 / prac_sig) ** 2
    return int(num_ind)

In [4]:
import numpy as np

np.random.seed(17)
num_ind = design_ab_test()
num_ind

91561

Observation:
91,561 individual measurements are required.

Say you have the following data:
- CPC = cost per click of $1
- CTR = click through rate of 0.5%

How much does it take to earn $100,000 per year? 

$$ CTR \times CPC \times \frac{ads}{day} \times \frac{days}{year} = [\text{\$100,000 revenue}]$$

In [13]:
import math

n = math.ceil(100000 / 365 / 1 / 0.005)
print(f"{n} ads per day")

54795 ads per day


In [15]:
def run_ab_test(num_ind):
    clicked_a = []
    clicked_b = []

    for n in range(num_ind):
        # Randomize between A and B
        if np.random.uniform(0, 1) < 0.5:
            clicked = measure_a()
            clicked_a.append(clicked)
        else:
            clicked = measure_b()
            clicked_b.append(clicked)

    clicked_a = np.array(clicked_a)
    clicked_b = np.array(clicked_b)

    return clicked_a, clicked_b

### Analyze the A/B test data

In [17]:
def analyze_ab_test(clicked_a, clicked_b, num_ind):
    mean_a = clicked_a.mean()
    mean_b = clicked_b.mean()
    std_a = clicked_a.std()
    std_b = clicked_b.std()

    # Calculate the difference of mean click rates, m and the standard error, se.
    m = mean_b - mean_a
    se = np.sqrt((std_a**2 + std_b**2) / num_ind)

    # Calculate the z-score, which determines whether we accept or reject version B.
    z = m / se

    return z

In [18]:
np.random.seed(17)
num_ind = design_ab_test()
clicked_a, clicked_b = run_ab_test(num_ind)
z = analyze_ab_test(clicked_a, clicked_b, num_ind)
num_ind, z

(91561, 2.954555022088617)

Observation: Since z = 2.95 (z > 1.64), we would accept the change and replace model A with model B.

### Trace the CTR as the A/B test runs.

In [19]:
def ab_test(num_ind):
    sum_clicks = 0.0
    num_ads = 0.0
    sum_a, sum_b = 0.0, 0.0
    num_a, num_b = 0, 0

    ctr_vs_n = []
    ctr_a = []
    ctr_b = []

    for n in range(num_ind):
        if np.random.uniform(0, 1) < 0.5:
            clicked = measure_a()
            sum_a += clicked
            num_a += 1
        else:
            clicked = measure_b()
            sum_b += clicked
            num_b += 1
        sum_clicks += clicked
        num_ads += 1
        if num_a > 0 and num_b > 0:
            ctr_a.append(sum_a / num_a)
            ctr_b.append(sum_b / num_b)
            ctr_vs_n.append(sum_clicks / num_ads)

    return ctr_vs_n, ctr_a, ctr_b

In [20]:
def epsilon_greedy(num_ind, epsilon):
    sum_clicks = 0.0
    num_ads = 0.0
    sum_a, sum_b = 0.0, 0.0
    num_a, num_b = 0, 0
    ctr_vs_n = []
    used_b = []

    for _ in range(num_ind):
        select = "Randomize"
        if np.random.uniform(0, 1) < 1 - epsilon:
            ctr_a = sum_a / num_a if num_a > 0 else 0
            ctr_b = sum_b / num_b if num_b > 0 else 0
            if ctr_a > ctr_b:
                select = "A"
            elif ctr_b > ctr_a:
                select = "B"
        if select == "Randomize":
            if np.random.uniform(0, 1) < 0.5:
                select = "A"
            else:
                select = "B"

        if select == "A":
            clicked = measure_a()
            sum_a += clicked
            num_a += 1
            used_b.append(False)
        else:
            clicked = measure_b()
            sum_b += clicked
            num_b += 1
            used_b.append(True)
        sum_clicks += clicked
        num_ads += 1

        ctr_vs_n.append(sum_clicks / num_ads)
    return ctr_vs_n, used_b