In [10]:
from src.banditcoot.arms import BernoulliArm
from src.banditcoot.algorithms import EpsilonGreedy
import pandas as pd
import numpy as np

In [11]:
n_iter                = 1
horizon               = 100
discounts             = [0.10, 0.20, 0.30, 0.40]
true_conversion_rates = [0.03, 0.10, 0.12, 0.13]
est_conversion_rates  = [0.05, 0.08, 0.12, 0.15]
arpu = [(1-i) * 69.99 for i in discounts]
arms = [BernoulliArm(p) for p in true_conversion_rates]

algo = EpsilonGreedy(
    epsilon = 0.2,
    n_arms = 4,
    rewards = arpu,
    conv_rates = est_conversion_rates
)

In [12]:
chosen_arms = [0.0 for i in range(n_iter * horizon)]
rewards = [0.0 for i in range(n_iter * horizon)]
cumulative_rewards = [0.0 for i in range(n_iter * horizon)]
sim_nums = [0.0 for i in range(n_iter * horizon)]
times = [0.0 for i in range(n_iter * horizon)]
n_arms = len(arms)

In [13]:
# get cohort of users
users = pd.DataFrame(
    data = {
        "cohort": 1,
        "user_id": range(100)
    }
)

# choose arms for cohort
users["arm"] = [algo.select_arm() for i in users.user_id]

# record whether conversio occurs
users["conversion"] = users.apply(lambda row: arms[row["arm"]].draw(), axis = 1).astype(int)

# record revenue from chosen arms for cohort
users["revenue"] = users.apply(lambda row: row["conversion"] * arpu[row["arm"]], axis = 1)

# update estimated reward from each arm
#users.query(f"arm==0").
#for i in range(len(arms)):
#    users.query(f"arm=={i}").
#algo.batch_update(chosen_arm, reward)

In [14]:
users.head(25)

Unnamed: 0,cohort,user_id,arm,conversion,revenue
0,1,0,3,1,41.994
1,1,1,3,0,0.0
2,1,2,3,0,0.0
3,1,3,3,0,0.0
4,1,4,3,0,0.0
5,1,5,3,0,0.0
6,1,6,3,1,41.994
7,1,7,3,0,0.0
8,1,8,3,0,0.0
9,1,9,3,0,0.0


In [None]:
for sim in range(n_iter):
    sim += 1
    algo.initialize(n_arms)

    for t in range(horizon):
        t += 1
        index = (sim - 1) * horizon + t - 1
        sim_nums[index] = sim
        times[index] = t
        
        # choose an arm for time t
        chosen_arm = algo.select_arm()
        chosen_arms[index] = chosen_arm
        
        # record reward from chosen arm at time t
        reward = arms[chosen_arms[index]].draw() * reward_amounts[chosen_arm]
        rewards[index] = reward

        # record cumulative rewards
        cumulative_rewards[index] = cumulative_rewards[index - 1] + reward
        
        # update estimated reward from each arm
        algo.update(chosen_arm, reward)

return [sim_nums, times, chosen_arms, rewards, cumulative_rewards]