The goal is modeling a scenario in which a seller exploits advertising tools to attract more and more users to its website, thus increasing the number of possible buyers. The seller needs to learn simultaneously the conversion rate and the number of users the advertising tools can attract.

1) Imagine:
    - one product to sell;
    - three classes of users, where, for every user, we can observe the values of two binary features (feel free to choose the features and their domains);
    - the conversion rate curve of each class of users;
    - three subcampaigns, each with a different ad, to advertise the product, and each targeting a different class of users;
    - there are three abrupt phases;
    - for every abrupt phase and for every subcampaign, the probability distribution over the daily number of clicks for every value of budget allocated to that subcampaign.

3) Design a sliding-window combinatorial bandit algorithm for the case, instead, in which there are the three phases aforementioned.
Plot the cumulative regret and compare it with the cumulative regret that a non-sliding-window algorithm would obtain.


9 curve, 3 fasi

In [1]:
import matplotlib.pyplot as plt
import numpy as np
from project.Urbano08.BiddingEnvironment import BiddingEnvironment
from project.Urbano08.Learner import Learner
import pandas as pd

In [2]:
len_window = 30
n_obs = 60 #6 abrupt phase
print_span = 20 #ogni quanti giorni printare i grafici

init_days = 20 #primi giorni si pullano gli arm in modo casuale senza pensare al reward massimo

n_subcamp = 3

max_bid = 1
max_clicks = 100
n_arms = 11

noise_std = 3.0

bids = np.linspace(0, max_bid, n_arms)
print(bids)

total_click_each_day  = pd.DataFrame(columns=['bid_sub1', 'bid_sub2', 'bid_sub3',"click1","click2","click3"])


env = BiddingEnvironment(bids,max_clicks,noise_std)
learners = []

for i in range(0, n_subcamp):
    learners.append(Learner(n_arms,bids))

[0.  0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. ]


In [3]:
for i in range(0, n_obs):
    pulled = [0,0,0]
    #per i primi init_days giorni si pullano in modo causale, successivamente si usa la tabella
    if(init_days > 0):
        init_days = init_days - 1
        first = i % 3
        pulled[first] = learners[first].pull_arm()
        pulled[(first + 1 )% 3] = np.random.randint(0,n_arms - pulled[first])
        pulled[(first + 2 )% 3] = n_arms - pulled[first] - pulled[(first + 1 )% 3] - 1
    else:
        #uso l'algoritmo della tabella per selezionare gli arm che mi danno un reward massimo
        algorithm_table = np.ndarray(shape=(0,len(bids)), dtype=float)
        table_all_Subs = np.ndarray(shape=(0,len(bids)), dtype=float)
        for l in learners:
            table_all_Subs = np.append(table_all_Subs,np.atleast_2d(l.means.T),0)
        for l in learners:
            algorithm_table = np.append(algorithm_table,np.atleast_2d(np.zeros(len(bids))),0)
        algorithm_table[0,:] = table_all_Subs[0,:]
        allocations_table = [[[j] for j in range(0,n_arms) ]]

        for i in range(1, n_subcamp):
            allocations_table.append([[],[],[],[],[],[],[],[],[],[],[]])

            for j in range(0, len(bids)):
                possibilities = np.array([])
                poss_allocation = []
                for p in range(0,j+1):
                    #if bids[p] + bids[j-p] <= bids[j] :
                    possibilities = np.append(possibilities, table_all_Subs[i,p]+algorithm_table[i-1,j-p])
                    poss_allocation.append(allocations_table[i-1][j-p] + [p])
                    #print(str(i)+" "+ str(p) + " "+ str(j-p))

                max_index = np.argmax(possibilities)
                #print(poss_allocation)
                #print(possibilities)

                #print(max_index)
                algorithm_table[i,j] = possibilities[max_index]
                allocations_table[i][j] = poss_allocation[max_index]

        pulled = allocations_table[n_subcamp-1].pop()

    clicks = env.round(pulled[0],pulled[1],pulled[2])


    for x in range(0,n_subcamp):
        learners[x].update(pulled[x], clicks[x])
    total_click_each_day = total_click_each_day.append({
        'bid_sub1':pulled[0],
        'bid_sub2':pulled[1],
        'bid_sub3':pulled[2],
        "click1":clicks[0],
        "click2":clicks[1],
        "click3":clicks[2]
    }, ignore_index=True)




