## Step 2 Notebook

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
import os
os.chdir('/content/drive/MyDrive/DataIntelligenceApplications/')

Mounted at /content/drive


In [2]:
import numpy as np
import matplotlib.pyplot as plt
from environments.joint_environment import *
from learners.joint.GPTS_Learner_Joint import *
from data_generators.basic_generator import *
from learners.joint.thompson_sampling_Joint import ThompsonSampling
#from learners.pricing.thompson_sampling import ThompsonSampling


In [3]:
json_src = 'src/basic003.json'
data_gen = BasicDataGenerator(json_src)

prices = data_gen.get_prices()
bids = data_gen.get_bids()
margins = data_gen.get_margins()

aggr_cpc = data_gen.get_costs_per_click(mode='aggregate')
aggr_n_clicks = data_gen.get_daily_clicks(mode='aggregate')
aggr_conv_rates = data_gen.get_conversion_rates(mode='aggregate')
aggr_purch = data_gen.get_future_purchases(mode='aggregate')

In [4]:
n_arms = len(bids)

min_bid = min(bids)
max_bid = max(bids)

sigma = 10

In [5]:
print(15*'-','DATA', '-'*15)
print(f'prices = {prices}')
print(f'bids = {bids}')
print(f'margins = {margins}')
print(f'conv_rates = {aggr_conv_rates}')
print(f'cpc = {aggr_cpc}')
print(f'n_clicks = {aggr_n_clicks}')
print(f'aggr_tau = {aggr_purch}')

--------------- DATA ---------------
prices = [15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
bids = [0.3, 0.35, 0.4, 0.5, 0.6, 0.75, 0.9, 1.0, 1.25, 1.5]
margins = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
conv_rates = [0.204 0.211 0.205 0.189 0.176 0.152 0.128 0.113 0.097 0.09 ]
cpc = [0.294 0.342 0.389 0.483 0.576 0.713 0.847 0.935 1.151 1.361]
n_clicks = [115.248 124.161 131.72  143.8   152.973 163.133 170.442 174.226 181.125
 185.61 ]
aggr_tau = [2.85  2.485 2.12  1.755 1.39  1.025 0.84  0.69  0.65  0.65 ]


## Obj
We have to first sample n_clicks and cpc values from the bidding environment.
Then we pass these values to the pricing learners (UCB and TS) which then proceed to update their values. After they have finished updating for the considered n_clicks, we can use the estimated conversion rate, estimated tau to update the bidding learners (GPTS and GTS). 
The optimal arm chosen by each learner indicates the solution to the problem.

Bidding environment passes reward --> we need to just pass the sampled value to the bidding learners and then compute the reward in the learners after the pricing learners' estimations.

Doubts: by doing this do we introduce randomness to the pricing learners? 
There is no overall view of joint update.

In [6]:
T = 20
n_experiments = 5 

gpts_rewards_per_experiment = []
ts_rewards_per_experiment = []

In [7]:
env = JointEnvironment(src = json_src, mode = 'aggregate')

Environment created with fixed bid: 0.5


In [8]:
#env.expected_rew()

In [9]:
#print(f'arm n°{env.get_opt_arm()}')
#print(env.get_opt())

In [None]:
T = 40
n_experiments = 5 

gpts_rewards_per_experiment = []
ts_rewards_per_experiment = []

for exp in range(0, n_experiments):
    print(f'experiment number = {exp}')
    env = JointEnvironment(src = json_src, mode = 'aggregate')
    gpts_learner = GPTS_Learner(arms = bids)
    ts_learner = ThompsonSampling(arm_values = prices)
    for t in range(0, T):
      
        ts_learner.next_day()
        month_purchases = env.get_next_purchases_at_day(t, keep=False)
        if month_purchases is not None:
            pulled_arms = env.get_selected_arms_at_day(t - 30, keep=False)
            for arm, n_purchases in zip(pulled_arms, month_purchases):
                ts_learner.update_single_future_purchase(arm, n_purchases)
          
        pricing_pulled_arm = ts_learner.pull_arm()  
        daily_reward = env.day_round(pricing_pulled_arm)
      
        #daily_reward = env.pricing_day_round(pricing_pulled_arm, bidding_reward['n_clicks'], bidding_reward['cpc'])
        #daily_reward = env.pricing_day_round(pricing_pulled_arm, aggr_n_clicks[4], aggr_cpc[4])

        for outcome, cost in daily_reward:
            ts_learner.update(pricing_pulled_arm, outcome, cost)

        bidding_pulled_arm = gpts_learner.pull_arm()
        bidding_reward = env.bidding_round(bidding_pulled_arm)  
        bidding_reward['tau'] = ts_learner.next_purchases_estimation[pricing_pulled_arm]
        bidding_reward['conv_rates'] = np.sum(daily_reward[:,0])/np.size(daily_reward[:,0])
        bidding_reward['margin'] = bidding_reward['margin'][pricing_pulled_arm]
        gpts_learner.update(bidding_pulled_arm, bidding_reward)

            
    ts_rewards_per_experiment.append(ts_learner.daily_collected_rewards)
    gpts_rewards_per_experiment.append(gpts_learner.daily_collected_rewards)

experiment number = 0
Environment created with fixed bid: 0.5
arm = 5
passed reward dictionary {'n_clicks': 163.3913645980737, 'cpc': 0.7233267450659836, 'margin': 11, 'tau': 0.0, 'conv_rates': 0.0625}
calculated reward -5.853780765438365
arm = 3
passed reward dictionary {'n_clicks': 143.80782907394567, 'cpc': 0.4823949587847223, 'margin': 6, 'tau': 0.0, 'conv_rates': 0.2013888888888889}
calculated reward 104.39562168530463
arm = 0
passed reward dictionary {'n_clicks': 114.59359328191454, 'cpc': 0.288225047731706, 'margin': 12, 'tau': 0.0, 'conv_rates': 0.06944444444444445}
calculated reward 62.4659171748346
arm = 9
passed reward dictionary {'n_clicks': 187.92950734948667, 'cpc': 1.362948815322373, 'margin': 7, 'tau': 0.0, 'conv_rates': 0.1875}
calculated reward -9.480821009898792
arm = 0
passed reward dictionary {'n_clicks': 116.62598656563961, 'cpc': 0.2958233333585992, 'margin': 5, 'tau': 0.0, 'conv_rates': 0.2638888888888889}
calculated reward 119.38082194980288
arm = 8
passed rewa

In [None]:
ts_learner.daily_collected_rewards

In [None]:
opt_arm = np.argmax(margins * aggr_conv_rates * (1 + aggr_purch) - aggr_cpc[4])
opt = (margins[opt_arm] * aggr_conv_rates[opt_arm] * (1 + aggr_purch[opt_arm]) - aggr_cpc[3]) * np.rint(aggr_n_clicks[3]).astype(int)
print(opt)

In [None]:
#MATCH SIZE
for exp in range(0,len(ts_rewards_per_experiment)):
  ts_rewards_per_experiment[exp] = np.append(ts_rewards_per_experiment[exp],0)

print(len(ts_rewards_per_experiment[0]))
print(len(gpts_rewards_per_experiment[0]))
print(ts_rewards_per_experiment[0])
print(gpts_rewards_per_experiment[0])

In [None]:
(ts_rewards_per_experiment[0])

In [None]:
gpts_rewards_per_experiment[0]

In [None]:
opt = env.get_opt()


plt.figure(0)
plt.ylabel("Regret")
plt.xlabel("t")
plt.plot(np.cumsum(np.mean(opt - (ts_rewards_per_experiment), axis=0)), 'g')
plt.legend(['GPTS'])
plt.show()

In [None]:
env.get_opt_arm()

In [None]:
env.get_opt()

In [None]:
env.expected_rew()