In [3]:
import numpy as np
import pandas as pd
import time

from functools import partial
from scipy.stats import randint, uniform
from sim_lib import simulation

pd.options.mode.chained_assignment = None


In [4]:
def eps_greedy(history: pd.DataFrame, eps: float):
    if uniform.rvs() < eps:
        n = history.shape[0]
        return history.index[randint.rvs(0, n)]

    ctr = history['clicks'] / (history['impressions'] + 10)
    n = np.argmax(ctr)
    return history.index[n]

policy = partial(eps_greedy, eps=0.08)

In [5]:
def bernouli_strategy(history: pd.DataFrame, k: int = 1, n: int = 5):
    bets = np.random.beta(history['clicks'] + k, history['impressions'] - history['clicks'] + n)
    return history.index[np.argmax(bets)]

In [68]:
class UCB_strategy:
    def __init__(self):
        self.time_stamp = 0
        
    def __call__(self, history: pd.DataFrame):
        self.time_stamp += 1
        log_t = np.sqrt(2 * np.log(self.time_stamp) / (history['impressions'] + 1)) 
        bets = (history['clicks'] + 1) / (history['impressions'] + 1) + log_t
        return history.index[np.argmax(bets)]


In [81]:
# seed for homework
np.random.seed(seed=384758917)

start = time.time()
output = simulation(policy, n=200000)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


374.4513592720032

In [82]:
# baseline regret
output['regret'], output['regret']/output['rounds'],  output['total_banners']

(2792.237649427154, 0.01396118824713577, 174)

In [77]:
# seed for homework
np.random.seed(seed=384758917)

start = time.time()
output_bernoulli = simulation(bernouli_strategy, n=200000)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


407.91813683509827

In [75]:
# baseline regret
output_bernoulli['regret'], output_bernoulli['regret']/output_bernoulli['rounds'],  output_bernoulli['total_banners']

(1662.978266216189, 0.008314891331080945, 216)

In [36]:
# seed for homework
np.random.seed(seed=384758917)

start = time.time()
output_ucb = simulation(UCB_random_strategy(), n=200000)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


460.8145442008972

In [37]:
output_ucb['regret'], output_ucb['regret']/output_ucb['rounds'],  output_ucb['total_banners']

(9157.451510862631, 0.045787257554313156, 175)

In [69]:
# seed for homework
np.random.seed(seed=384758917)

start = time.time()
output_gaussian = simulation(normal_strategy, n=200000)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


493.2178587913513

In [70]:
output_gaussian['regret'], output_gaussian['regret']/output_gaussian['rounds'],  output_gaussian['total_banners']

(16196.556375302382, 0.08098278187651191, 183)