`Exploration:` We are exploring the multi-armed bandit problem using 3 different strategies - `Thompson Sampling`, `Upper Confidence Bound (UCB)`, and `Epsilon Greedy` Algorithms. We have a sample scenario wherein we have 10 slots, and a 1000 rounds (or tries) to Explore and Exploit the scenario.

In [82]:
import numpy as np
import plotly.graph_objects as go
import random

In [83]:
NUM_ALGOS = 3
NUM_SLOTS = 10
NUM_ROUNDS = 1000

cumulative_rewards = np.zeros((NUM_ALGOS, NUM_ROUNDS))

In [84]:
def thompson(num_rounds, num_slots):
    product_rewards_1 = [0] * num_slots
    product_rewards_0 = [0] * num_slots
    total_rewards = []

    for round_num in range(num_rounds):
        product_selected = 0
        max_beta = 0
        
        for i in range(num_slots):
            beta_sample = random.betavariate(product_rewards_1[i] + 1, product_rewards_0[i] + 1)
            if beta_sample > max_beta:
                max_beta = beta_sample
                product_selected = i

        reward = random.choice([0, 1])  # Simulate customer click
        if reward == 1:
            product_rewards_1[product_selected] += 1
        else:
            product_rewards_0[product_selected] += 1
        
        total_rewards.append(sum(product_rewards_1))

    return total_rewards

In [85]:
def ucb(num_rounds, num_slots):
    rewards = [0] * num_slots
    counts = [0] * num_slots
    total_rewards = []

    for round_num in range(num_rounds):
        product_selected = 0
        max_ucb = 0

        for i in range(num_slots):
            if counts[i] > 0:
                avg_reward = rewards[i] / counts[i]
                delta = np.sqrt(2 * np.log(round_num + 1) / counts[i])
                ucb = avg_reward + delta
            else:
                ucb = 1e400

            if ucb > max_ucb:
                max_ucb = ucb
                product_selected = i

        reward = random.choice([0, 1])
        rewards[product_selected] += reward
        counts[product_selected] += 1

        total_rewards.append(sum(rewards))

    return total_rewards

In [86]:
def e_greedy(num_rounds, num_slots, epsilon=0.1):
    rewards = [0] * num_slots
    counts = [0] * num_slots
    total_rewards = []

    for round_num in range(num_rounds):
        explore = random.uniform(0, 1) < epsilon
        if explore:
            product_selected = random.randint(0, num_slots - 1)
        else:
            avg_rewards = [rewards[i] / counts[i] if counts[i] > 0 else 0 for i in range(num_slots)]
            product_selected = np.argmax(avg_rewards)

        reward = random.choice([0, 1])
        rewards[product_selected] += reward
        counts[product_selected] += 1

        total_rewards.append(sum(rewards))

    return total_rewards

In [87]:
def run_algorithms(num_rounds, num_slots):
    ts_rewards = thompson(num_rounds, num_slots)
    ucb_rewards = ucb(num_rounds, num_slots)
    eg_rewards = e_greedy(num_rounds, num_slots)

    return ts_rewards, ucb_rewards, eg_rewards

In [88]:
def vision(ts_rewards, ucb_rewards, eg_rewards):
    rounds = list(range(1, len(ts_rewards) + 1))
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=rounds, y=ts_rewards, mode='lines', name='Thompson Sampling', line=dict(color='red')))
    fig.add_trace(go.Scatter(x=rounds, y=ucb_rewards, mode='lines', name='Upper Confidence Bound', line=dict(color='green')))
    fig.add_trace(go.Scatter(x=rounds, y=eg_rewards, mode='lines', name='Epsilon-Greedy', line=dict(color='blue')))
    
    fig.update_layout(
        title='Performance of Multi-Armed Bandit Algorithms',
        xaxis_title='Rounds',
        yaxis_title='Cumulative Rewards',
        template='plotly_dark'
    )
    
    fig.show()

In [89]:
def main():
    ts_rewards, ucb_rewards, eg_rewards = run_algorithms(NUM_ROUNDS, NUM_SLOTS)
    vision(ts_rewards, ucb_rewards, eg_rewards)

In [90]:
main()

`Simulation of a real-world scenario:`
We have an e-commerce website which has 4 ad-slots - Top banner, pop-up, footer, sidebar. Each of these ad-slots has a certain CTR (click-through rate). This is our `True CTR`. We will be using the best algorithm from the  previous question to predict the `Predicted CTR` for each ad-slot.

In [91]:
# Constants for the ad-slots scenario
NUM_AD_SLOTS = 4
NUM_SIMULATION_ROUNDS = 1000

# True CTRs for the 4 ad-slots (simulated true probabilities of clicks)
TRUE_CTRS = [0.05, 0.12, 0.08, 0.15]

In [92]:
# Thompson Sampling for estimating CTRs of ad-slots
def thompson_sampling_ads(num_rounds, num_ad_slots, true_ctrs):
    ad_rewards_1 = [0] * num_ad_slots
    ad_rewards_0 = [0] * num_ad_slots
    estimated_ctrs = []

    for round_num in range(num_rounds):
        ad_selected = 0
        max_beta = 0
        
        for i in range(num_ad_slots):
            beta_sample = random.betavariate(ad_rewards_1[i] + 1, ad_rewards_0[i] + 1)
            if beta_sample > max_beta:
                max_beta = beta_sample
                ad_selected = i

        # Simulate the click based on true CTRs
        reward = 1 if random.random() < true_ctrs[ad_selected] else 0
        
        if reward == 1:
            ad_rewards_1[ad_selected] += 1
        else:
            ad_rewards_0[ad_selected] += 1

        # Update estimated CTRs
        est_ctr = [ad_rewards_1[i] / (ad_rewards_1[i] + ad_rewards_0[i]) if ad_rewards_1[i] + ad_rewards_0[i] > 0 else 0 for i in range(num_ad_slots)]
        estimated_ctrs.append(est_ctr)

    final_estimated_ctrs = [ad_rewards_1[i] / (ad_rewards_1[i] + ad_rewards_0[i]) if ad_rewards_1[i] + ad_rewards_0[i] > 0 else 0 for i in range(num_ad_slots)]
    
    return final_estimated_ctrs, estimated_ctrs

In [95]:
# Run the Thompson Sampling for ad-slots and calculate the estimated CTRs
def run_ad_simulation():
    final_estimated_ctrs, estimated_ctrs = thompson_sampling_ads(NUM_SIMULATION_ROUNDS, NUM_AD_SLOTS, TRUE_CTRS)

    # Print True CTRs and Estimated CTRs
    print("True CTRs:", TRUE_CTRS)
    print("Estimated CTRs:", final_estimated_ctrs)

    # Visualization of CTR estimation over rounds
    rounds = list(range(1, NUM_SIMULATION_ROUNDS + 1))
    fig = go.Figure()

    for i in range(NUM_AD_SLOTS):
        slot_ctrs = [estimated_ctrs[round_num][i] for round_num in range(NUM_SIMULATION_ROUNDS)]
        fig.add_trace(go.Scatter(x=rounds, y=slot_ctrs, mode='lines', name=f'Ad Slot {i+1}'))

    fig.update_layout(
        title='Estimated CTRs for Each Ad Slot Over Rounds',
        xaxis_title='Rounds',
        yaxis_title='Estimated CTR',
        template='plotly_dark'
    )
    
    fig.show()

In [96]:
run_ad_simulation()

True CTRs: [0.05, 0.12, 0.08, 0.15]
Estimated CTRs: [0.06976744186046512, 0.14939024390243902, 0.0945945945945946, 0.16756756756756758]
