## Step 2 Notebook

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
import os
os.chdir('/content/drive/MyDrive/DataIntelligenceApplications/')

Mounted at /content/drive


In [2]:
import numpy as np
import matplotlib.pyplot as plt
from environments.JointEnvironment import *
from learners.GTS_Learner import *
from learners.GPTS_Learner import *
from data_generators.basic_generator import *
from learners.pricing.thompson_sampling import ThompsonSampling
from learners.pricing.ucb import UCB

In [3]:
json_src = 'src/basic003.json'
data_gen = BasicDataGenerator(json_src)

prices = data_gen.get_prices()
bids = data_gen.get_bids()
margins = data_gen.get_margins()

aggr_cpc = data_gen.get_costs_per_click(mode='aggregate')
aggr_n_clicks = data_gen.get_daily_clicks(mode='aggregate')
aggr_conv_rates = data_gen.get_conversion_rates(mode='aggregate')
aggr_purch = data_gen.get_future_purchases(mode='aggregate')

In [4]:
n_arms = len(bids)

min_bid = min(bids)
max_bid = max(bids)

sigma = 10

In [5]:
print(15*'-','DATA', '-'*15)
print(f'prices = {prices}')
print(f'bids = {bids}')
print(f'margins = {margins}')
print(f'conv_rates = {aggr_conv_rates}')
print(f'cpc = {aggr_cpc}')
print(f'n_clicks = {aggr_n_clicks}')
print(f'aggr_tau = {aggr_purch}')

--------------- DATA ---------------
prices = [15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
bids = [0.3, 0.35, 0.4, 0.5, 0.6, 0.75, 0.9, 1.0, 1.25, 1.5]
margins = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
conv_rates = [0.204 0.211 0.205 0.189 0.176 0.152 0.128 0.113 0.097 0.09 ]
cpc = [0.294 0.342 0.389 0.483 0.576 0.713 0.847 0.935 1.151 1.361]
n_clicks = [115.248 124.161 131.72  143.8   152.973 163.133 170.442 174.226 181.125
 185.61 ]
aggr_tau = [2.85  2.485 2.12  1.755 1.39  1.025 0.84  0.69  0.65  0.65 ]


## Obj
We have to first sample n_clicks and cpc values from the bidding environment.
Then we pass these values to the pricing learners (UCB and TS) which then proceed to update their values. After they have finished updating for the considered n_clicks, we can use the estimated conversion rate, estimated tau to update the bidding learners (GPTS and GTS). 
The optimal arm chosen by each learner indicates the solution to the problem.

Bidding environment passes reward --> we need to just pass the sampled value to the bidding learners and then compute the reward in the learners after the pricing learners' estimations.

Doubts: by doing this do we introduce randomness to the pricing learners? 
There is no overall view of joint update.

In [6]:
T = 60
n_experiments = 50 

gts_rewards_per_experiment = []
gpts_rewards_per_experiment = []

In [7]:
env = JointEnvironment(bids = bids, prices = prices, sigma = sigma, src = json_src, mode = 'aggregate')

In [None]:
env.expected_rew()

In [11]:
env.get_opt()

405.385505

In [9]:
for exp in range(0, n_experiments):
    print(f'experiment number = {exp}')
    env = JointEnvironment(bids = bids, prices = prices, sigma = sigma, src = json_src, mode = 'aggregate')
    gts_learner = GTS_Learner(n_arms = n_arms)
    gpts_learner = GPTS_Learner(arms = bids)
    for t in range(0, T):
        pulled_arm = gts_learner.pull_arm()
        sampled_n_clicks, sampled_cpc = env.round(pulled_arm)
        
        gts_learner.update(pulled_arm, reward)
        
        pulled_arm = gpts_learner.pull_arm()
        reward = env.round(pulled_arm)
        if(e == 0):
            print('reward = ' + f'{reward}')
            
        gpts_learner.update(pulled_arm, reward)
        
    gts_rewards_per_experiment.append(gts_learner.collected_rewards)
    gpts_rewards_per_experiment.append(gpts_learner.daily_collected_rewards)

experiment number = 0


NameError: ignored

In [None]:
#opt = np.max(env.means)
opt = env.get_opt()


plt.figure(0)
plt.ylabel("Regret")
plt.xlabel("t")
plt.plot(np.cumsum(np.mean(opt - gts_rewards_per_experiment, axis=0)), 'r')
plt.plot(np.cumsum(np.mean(opt - gpts_rewards_per_experiment, axis=0)), 'g')
plt.legend(['GTS', 'GPTS'])
plt.show()

In [None]:
env.get_opt_arm()

In [None]:
env.get_opt()

In [None]:
env.expected_rew()