In [1]:
import numpy as np
import pandas as pd
#import xgboost
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import time
import collections
import itertools as it
from sklearn import ensemble, linear_model
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)

### In this file, we fit a bidding strategy given the random strategy choices of 29 opponents

In [2]:
# Load validation data
val = pd.read_csv('validation.csv')

In [3]:
# Load CTR estimators
CTR_models_val = pd.DataFrame(np.vstack([np.loadtxt('CTR_models/logr/r1_sams_logr_6.csv'),
                                        np.loadtxt('CTR_models/gbm_dam/pCTR_gbm_dam_val.csv'), 
                                        pd.read_csv('CTR_models/gbm2/pCTR_gbm2_val.csv', index_col=0).values.flatten(), 
                                        np.loadtxt('ensemble/ensemble_base_learners_set2/val/r7_mlp_nn_14.csv')]).T,
                            columns=['log_reg', 'Damian', 'Stefan2', 'NN'])

# Build ensemble
weighted_CTR_val = CTR_models_val[['Damian', 'Stefan2', 'NN']].copy()

w = np.atleast_2d([1, 6, 5]) #np.array([x1, x2, x3])
weighted_CTR_val = np.log(weighted_CTR_val/(1 + weighted_CTR_val))
weighted_CTR_val =  np.dot(weighted_CTR_val, w.T)
weighted_CTR_val /= np.sum(w) # normalize weights
weighted_CTR_val = 1 / (1 + np.exp(-weighted_CTR_val)) # take logistic / sigmoid
weighted_CTR_val = weighted_CTR_val.flatten()

CTR_models_val['ensemble1'] = weighted_CTR_val
CTR_models_val.head()

Unnamed: 0,log_reg,Damian,Stefan2,NN,ensemble1
0,0.000533,7.8e-05,0.000291,0.000155,0.0002
1,0.000205,9.3e-05,0.000211,1.7e-05,6.9e-05
2,0.000139,6.3e-05,0.000351,1.1e-05,7.2e-05
3,0.00021,7.5e-05,0.000274,2.5e-05,9.1e-05
4,0.000306,9.2e-05,0.000335,7e-06,6.1e-05


### Load bids

In [4]:
bid_look_up = pd.read_csv('proper_datasets/bid_look_up.csv', index_col=0)

In [5]:
bid_look_up.shape

(303925, 180)

In [6]:
bid_look_up.head()

Unnamed: 0,log_reg_linbid,Damian_linbid,Stefan2_linbid,NN_linbid,ensemble1_linbid,log_reg_revlinbid,Damian_revlinbid,Stefan2_revlinbid,NN_revlinbid,ensemble1_revlinbid,...,Stefan2_ortb2_cap2_calm,Stefan2_ortb2_cap2_aggre,NN_ortb2_cap1_calm,NN_ortb2_cap1_aggre,NN_ortb2_cap2_calm,NN_ortb2_cap2_aggre,ensemble1_ortb2_cap1_calm,ensemble1_ortb2_cap1_aggre,ensemble1_ortb2_cap2_calm,ensemble1_ortb2_cap2_aggre
0,62.0,12.0,35.0,19.0,30.0,255.0,1396.0,416.0,982.0,459.0,...,38.0,58.0,31.0,47.0,31.0,47.0,37.0,55.0,37.0,55.0
1,24.0,15.0,25.0,2.0,10.0,661.0,1182.0,573.0,8811.0,1326.0,...,30.0,46.0,3.0,5.0,3.0,5.0,14.0,22.0,14.0,22.0
2,16.0,10.0,42.0,1.0,11.0,980.0,1749.0,344.0,13720.0,1276.0,...,44.0,66.0,2.0,4.0,2.0,4.0,15.0,23.0,15.0,23.0
3,24.0,12.0,33.0,3.0,14.0,647.0,1452.0,440.0,5973.0,1006.0,...,37.0,55.0,6.0,8.0,6.0,8.0,19.0,29.0,19.0,29.0
4,35.0,14.0,40.0,1.0,9.0,444.0,1193.0,361.0,21040.0,1513.0,...,42.0,64.0,2.0,2.0,2.0,2.0,13.0,19.0,13.0,19.0


In [7]:
# Load the auction function
# FUNCTION:

def simulate_auction_ORTB(t_bids, t_payprice, t_clicks, agents, t_budget, spending_info, verbose=0):
    
    nrand = t_payprice.shape[0]
    rand_helper_vals = np.random.random(size=(nrand,agents)) # to efficiently split ties randomly

    def calc_things(x, t_payprice):
        exc_price = np.subtract(x, t_payprice)
        return exc_price 

    def element_compare(first_price, second_price, payprice):
        if (second_price == 0) & (first_price != 0):
            second_price = payprice        
        return second_price
    

    while True:

        # define valid bids (bids times a boolean of whether a bid is higher than the payprice (otherwise instant no-win))
        valid_bids = t_bids * np.any([(t_bids > t_payprice[:, None])], axis=0)

        # Find second highest prices
        sorted_prices = np.atleast_1d([np.sort(valid_bids[i,:])[::-1] for i in range(valid_bids.shape[0])]) 
        second_prices = sorted_prices[:,1]
        first_prices = sorted_prices[:,0]
        second_prices = np.vectorize(element_compare)(first_prices, second_prices, t_payprice)

        # Find highest bids (win prices) -> win goes to the first column among winners in case of ties (np.where returns duplicates!)
        win_cols = np.argmax(valid_bids+rand_helper_vals, axis=1)

        # Compute costs given win (second price is paid)
        valid_costs = np.zeros(shape=t_bids.shape) #tt_bids.copy()
        valid_costs[range(t_bids.shape[0]),win_cols] = second_prices

        costs_sum = np.cumsum(valid_costs, axis=0)

        bool_entry = (costs_sum >= t_budget)

        bool_sum = np.sum(bool_entry)

        if bool_sum > 0:

            t_bids[bool_entry] = 0 
            if verbose >= 1:
                print('t_bids was updated, %i bid removals' % bool_sum)

        else:
            
            break

    clicks = (valid_costs>0) * t_clicks[:, None]
            
    if verbose >= 1:
        print('DONE')

        print('clicks:')
        print(np.sum(clicks, axis=0)) # clicks
        print(np.mean(np.sum(clicks, axis=0))) # avg clicks per agent
        
        print('OUR MODEL')
        print(np.sum(clicks, axis=0)[-1])

        print('wins:')
        print(np.sum((valid_costs>0), axis=0)) # wins
        print(np.mean(np.sum((valid_costs>0), axis=0))) # avg wins per agent
        print('OUR MODEL')
        print(np.sum((valid_costs>0), axis=0)[-1]) # wins
        
        print('cost for OUR MODEL')
        print(np.sum((valid_costs), axis=0)[-1])
        
        print(time.time()-start)
        
    if spending_info:
        spendings = np.sum(valid_costs, axis=0) 
        res = [agents, np.mean(np.sum(clicks, axis=0)), 
               np.mean(np.sum((valid_costs>0), axis=0)),
               spendings] 
        
    else:    

        res = np.sum(clicks, axis=0)[-1], np.sum((valid_costs>0), axis=0)[-1], np.sum(valid_costs, axis=0)[-1]
    
    return res

In [8]:
def build_custom_ortb2(cc, lamb, pCTR):
    
    return np.round(cc * (
                      (pCTR + np.sqrt(cc**2*lamb**2+pCTR**2)/(cc*lamb))**(1/3) 
                      - (cc*lamb/(pCTR + np.sqrt(cc**2*lamb**2+pCTR**2)))**(1/3)
                     ))       

In [129]:
## Run logreg and ensemble1 for fitting ortb2 on bootstrap samples
# ===================== BOOTSTRAPPING FOR FITTING ORTB2 ===================== #

#np.loadtxt('CTR_models/logr/r1_sams_logr_6.csv'),
#            np.loadtxt('CTR_models/gbm_dam/pCTR_gbm_dam_val.csv'), 
#            pd.read_csv('CTR_models/gbm2/pCTR_gbm2_val.csv', index_col=0).values.flatten(), 
#            np.loadtxt('ensemble/ensemble_base_learners_set2/val/r7_mlp_nn_14.csv')
# columns=['log_reg', 'Damian', 'Stefan2', 'NN'])

start = time.time()
res = []
spending_info = False #True
t_payprice = np.array(val['payprice'])
t_clicks = np.array(val['click'])

res_bt_ids = []
res_bt_click = []
res_bt_win = []
res_bt_cost = []
res_bt = []

#pCTR = CTR_models_val['ensemble1']
pCTR = CTR_models_val['log_reg']

base_sub = [0,1,2,3,4,10,11,12,13,14,15,16,17,18,19]
bids_subset = bid_look_up.iloc[:,base_sub] 
bids_subset['extra_id0'] = bids_subset.iloc[:,0]
bids_subset['extra_id1'] = bids_subset.iloc[:,1]
bids_subset['extra_id2'] = bids_subset.iloc[:,2]
bids_subset['extra_id3'] = bids_subset.iloc[:,3]
bids_subset['extra_id4'] = bids_subset.iloc[:,4]

nrand = t_payprice.shape[0]
nsims = 50
nsamp = bid_look_up.shape[0]
nstrats = 19 ## remove 1, otherwise it may draw 180 (out of bounds)

for isim in range(nsims):
    for agents in [30]:
        strategy_ids = np.random.randint(0, nstrats+1, agents-1)
        resamp_ids = np.random.randint(0, nsamp, nsamp)

        t_bids = bids_subset.iloc[resamp_ids,strategy_ids]
        t_bids['custom'] = np.zeros(nsamp)
        t_bids = np.array(t_bids)
        
        budget = 6250 * 1000
        
        # tune cc and/or lambda:
        cc = 58
        for lamb in np.linspace(1*10**(-9), 1.5*10**(-6),30):
            t_bids[:,-1] = build_custom_ortb2(cc, lamb, pCTR[resamp_ids]) # custom_strat
            out = simulate_auction_ORTB(t_bids, t_payprice, t_clicks, agents, budget, spending_info, verbose=0)
            
            res_bt.append([isim, cc, lamb, out])

        for cc in range(10,100,10):
            for lamb in [10**(-8), 10**(-7), 10**(-6)]: 

                t_bids[:,-1] = build_custom_ortb2(cc, lamb, pCTR[resamp_ids]) #custom_strat
                            
                out = simulate_auction_ORTB(t_bids, t_payprice, t_clicks, agents, budget, spending_info, verbose=0)
                res_bt.append([isim, cc, lamb, out]) 
    
        print('===== Simulation %i done, runtime so far: %.f ======' % (isim, time.time()-start))
    
print(time.time()-start) 

pd.DataFrame(res_bt).to_csv('output_simulation/ortb_multi/r1_acer_logreg.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

11410.973900794983
