## Stochastic environment (First requirement)

In [78]:
import numpy as np
import matplotlib.pyplot as plt 
import Configuration as config

### Stochastic environment 
The stochastic environment takes as "input" the action of the agent which corresponds to the bid. Then it simulates the distribution from which the other bids are sampled and the probability conversion (prob. of buying given a price -> demand curve normalized). Eventually, it outputs the reward associated with  

In [47]:
class pricing_stochastic_env():
    def __init__(self, probability_conversion):
        #self.rewards 
        self.probability_conversion = probability_conversion

    def round(self, p_t, n_t):
        d_t = np.random.binomial(n_t, self.conversion_probability(p_t))
        r_t = (p_t - self.cost) * d_t      # the reward is equal to the profit given by (price-cost)*expected demand (sampled)
        return d_t, r_t


    

In [None]:
class bidding_stochastic_env():
    def __init__(self, B, T, lambdas):
        self.B = B
        self.T = T      
        #self.rho = B/T
        self.lambdas = lambdas
        self.n_slots = len(lambdas)
        self.ctrs = np.ones(T)    # for simplicity we keep them all equal to 1

    def get_winners(self, bids):
        adv_values = self.ctrs*bids
        adv_ranking = np.argsort(adv_values)        # array with the index of adv_values sorted in increasing order
        winners = adv_ranking[-self.n_slots:]       # taking the last n_slots indexes, i.e. the indexes of the highest values
        winners_values = adv_values[winners]
        return winners, winners_values
        
    def get_payments_per_click(self, values):
        adv_ranking = np.argsort(values)    # sorting the values (q param times bid) in increasing order
        for i in range(self.n_slots):
            payment = values[adv_ranking[-i]]/self.ctrs[adv_ranking[-i-1]]  # generalized second price auctions
        return payment.round(2)

    def round(self, bids):
        # bids contains all bids, including mine which is stored in the first position
        winners, values = self.get_winners(bids)        # winners are the indexes of the highest values in the array
        payments_per_click = self.get_payments_per_click(values)
        return winners, payments_per_click


### Pricing algorithm
Build a pricing strategy using the continuous set of prices $p \in [0,1]$ and Gaussian Processes

In [48]:
# taken from lab session 3
class RBFGaussianProcess:
    def __init__(self, scale=1, reg=1e-2):
        self.scale = scale 
        self.reg = reg
        self.k_xx_inv = None

    def rbf_kernel_incr_inv(self, B, C, D):
        temp = np.linalg.inv(D - C @ self.k_xx_inv @ B)
        block1 = self.k_xx_inv + self.k_xx_inv @ B @ temp @ C @ self.k_xx_inv
        block2 = - self.k_xx_inv @ B @ temp
        block3 = - temp @ C @ self.k_xx_inv
        block4 = temp
        res1 = np.concatenate((block1, block2), axis=1)
        res2 = np.concatenate((block3, block4), axis=1)
        res = np.concatenate((res1, res2), axis=0)
        return res

    def rbf_kernel(self, a, b):
        a_ = a.reshape(-1, 1)
        b_ = b.reshape(-1, 1)
        output = -1 * np.ones((a_.shape[0], b_.shape[0]))
        for i in range(a_.shape[0]):
            output[i, :] = np.power(a_[i] - b_, 2).ravel()
        return np.exp(-self.scale * output)
    
    def fit(self, x=np.array([]), y=np.array([])):
        x,y = np.array(x),np.array(y)
        if self.k_xx_inv is None:
            self.y = y.reshape(-1,1)
            self.x = x.reshape(-1,1)
            k_xx = self.rbf_kernel(self.x, self.x) + self.reg * np.eye(self.x.shape[0])
            self.k_xx_inv = np.linalg.inv(k_xx)
        else:
            B = self.rbf_kernel(self.x, x)
            self.x = np.vstack((self.x, x))
            self.y = np.vstack((self.y, y))
            self.k_xx_inv = self.rbf_kernel_incr_inv(B, B.T, np.array([1 + self.reg]))

        return self

    def predict(self, x_predict):
        k = self.rbf_kernel(x_predict, self.x)

        mu_hat = k @ self.k_xx_inv @ self.y
        sigma_hat = 1 - np.diag(k @ self.k_xx_inv @ k.T)

        return mu_hat.ravel(), sigma_hat.ravel()

Implementing the Gaussian Processes in our agent:
- recall to normalize the input -> in the continuous set $[0,1]$

In [49]:
# taken from lab session 3 as well
class pricing_agent():
    def __init__(self, T, discretization=100):
        self.T = T
        self.arms = np.linspace(0, 1, discretization)
        self.gp = RBFGaussianProcess(scale=2).fit()
        self.a_t = None
        self.action_hist = np.array([])
        self.reward_hist = np.array([])
        self.mu_t = np.zeros(discretization)
        self.sigma_t = np.zeros(discretization)
        self.gamma = lambda t: np.log(t+1)**2 
        self.beta = lambda t: 1 + 0.5*np.sqrt(2 * (self.gamma(t) + 1 + np.log(T)))      # from PAPER LINKED IGP-UCB ALGORITHM
        self.N_pulls = np.zeros(discretization)
        self.t = 0
    
    def pull_arm(self):
        self.mu_t, self.sigma_t = self.gp.predict(self.arms) 
        ucbs = self.mu_t + self.beta(t) * self.sigma_t
        self.a_t = np.argmax(ucbs)
        return self.arms[self.a_t]
    
    def update(self, r_t):
        self.N_pulls[self.a_t] += 1
        self.action_hist = np.append(self.action_hist, self.arms[self.a_t])
        self.reward_hist = np.append(self.reward_hist, r_t)
        self.gp = self.gp.fit(self.arms[self.a_t], r_t)
        self.t += 1

### Bidding algorithms
Consider a sequence of generalized second-price auctions. Build two learning algorithms to deal with the bidding problem:
- a primal-dual algorithm for truthful auctions
- a UCB-like algorithm

Generalized second-price auctions: each advertiser $a$ pays $p_a = \dfrac{q_{a+1}}{q_a} b_{a+1}$ if the ad is clicked. This has to be 

In [50]:
class primal_dual_agent():
    def __init__(self, B, T, eta) -> None:
        pass
    
    def action():
        # choose price to bid
        pass
    
    def update():
        pass

This can be implemented following the algorithm in the slide 18 from slide package 8.

In [51]:
class UCB_agent():
    def __init__(self, B, T, eta=0.1) -> None:
        self.budget = B     # budget
        self.T = T          # number of rounds
        self.eta = eta      # learning rate
    
    def action():
        # choose price to bid 
        pass
    
    def update():
        pass

### General framework to be modeled
Recall that:
- GPUCB1 works on the continuous domain [0,1]
- the UCB-like algorithm for bidding works on the discretized set of bids

In [52]:
# importing parameters
ITERATIONS = config.ITERATIONS
AUCTIONS = config.AUCTIONS
N_USERS = config.N_USERS
N_ADVERTISERS = config.N_ADVERTISERS    # number of companies that compete for slot auctions, including myself
NUMBER_OF_ARMS = config.NUMBER_OF_ARMS    # needed for the UCB algorithm
PRICES = config.NUMBER_OF_ARMS    # actual arms of the ucb algorithm
BUGDET = config.BUGDET
NUMBER_OF_SLOTS = config.NUMBER_OF_SLOTS
LAMBDAS = config.LAMBDAS


conversion_probability = lambda p: 1-p/20
pricing_agent = pricing_agent(ITERATIONS, 100)
bidding_agent = UCB_agent(BUGDET, ITERATIONS)


pse = pricing_stochastic_env(conversion_probability)
bse = bidding_stochastic_env(B=BUGDET, T=ITERATIONS, lambdas=LAMBDAS)

my_valuation = 0.8      # randomly chosen
utilities = np.array([])
my_bids = np.array([])
my_payments = np.array([])
total_wins = 0


for round in range(ITERATIONS):
    #p_t = pricing_agent.pull_arm()    # pricing agent choose a price p for the product

    for auction in range(AUCTIONS):
        my_bid = bidding_agent.action()    # bidding agent decides how much to bid 
        other_bids = np.random.uniform(0, 1, size = (N_ADVERTISERS)) # size = (N_ADVERTISERS, N_USERS) 
        all_bids = np.append(my_bid, other_bids)    # all bids, including mine in first position
        winners, payments_per_click = bse.round(bids=all_bids)    # somebody wins the auction
        m_t = other_bids.max()    # maximum bid among the other advertisers
        my_win = 0 if 0 in winners else 1 
        f_t, c_t = (my_valuation-m_t)*my_win, m_t*my_win
        bidding_agent.update(f_t, c_t)
        # logging
        utilities = np.append(utilities, f_t)
        my_bids = np.append(my_bids, my_bid)
        my_payments = np.append(my_payments, c_t)
        total_wins+=my_win
        