In [1]:
import numpy as np
import pandas as pd
#import xgboost
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import time
import collections
import itertools as it
from sklearn import ensemble, linear_model
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)


### "simple" setup of simulating using 30 random agents selecting single-agent strategies
#### (next file is fitting a strategy given the random choice of 29 agents)

In [2]:
# Load validation data
val = pd.read_csv('validation.csv')

In [3]:
# Load CTR estimators
CTR_models_val = pd.DataFrame(np.vstack([np.loadtxt('CTR_models/logr/r1_sams_logr_6.csv'),
                                        np.loadtxt('CTR_models/gbm_dam/pCTR_gbm_dam_val.csv'), 
                                        pd.read_csv('CTR_models/gbm2/pCTR_gbm2_val.csv', index_col=0).values.flatten(), 
                                        np.loadtxt('ensemble/ensemble_base_learners_set2/val/r7_mlp_nn_14.csv')]).T,
                            columns=['log_reg', 'Damian', 'Stefan2', 'NN'])

# Build ensemble
weighted_CTR_val = CTR_models_val[['Damian', 'Stefan2', 'NN']].copy()

w = np.atleast_2d([1, 6, 5])
weighted_CTR_val = np.log(weighted_CTR_val/(1 + weighted_CTR_val))
weighted_CTR_val =  np.dot(weighted_CTR_val, w.T)
weighted_CTR_val /= np.sum(w) # normalize weights
weighted_CTR_val = 1 / (1 + np.exp(-weighted_CTR_val)) # take logistic / sigmoid
weighted_CTR_val = weighted_CTR_val.flatten()

CTR_models_val['ensemble1'] = weighted_CTR_val
CTR_models_val.head()

Unnamed: 0,log_reg,Damian,Stefan2,NN,ensemble1
0,0.000533,7.8e-05,0.000291,0.000155,0.0002
1,0.000205,9.3e-05,0.000211,1.7e-05,6.9e-05
2,0.000139,6.3e-05,0.000351,1.1e-05,7.2e-05
3,0.00021,7.5e-05,0.000274,2.5e-05,9.1e-05
4,0.000306,9.2e-05,0.000335,7e-06,6.1e-05


In [127]:
# Define bid look-up-table
bid_look_up = pd.DataFrame()

In [128]:
# Fit and save linear strategies

def fit_linear_strategy(pCTR_val):
    
    CTR_frac = pCTR_val / np.mean(pCTR_val)
    #print(CTR_frac.shape)
    df_sub = val[['click','payprice']].copy()
    
    start = time.time()
    clicks = []
    budget = 6250 * 1000
    for base_bid in range(200):

        bid = base_bid * CTR_frac 
        df = df_sub[df_sub['payprice'] < bid]
        
        cost = df['payprice'].cumsum()
        eligible = df[cost < budget]
        clicks.append([base_bid, eligible['click'].sum(), eligible['payprice'].sum()])
    
    opt = np.argmax(np.array(clicks)[:,1])
    
    return opt


def save_linear_strategy(bid_look_up, CTR_models_val):
    for col in CTR_models_val.columns:

        pCTR_val = CTR_models_val.loc[:,col]
        base_bid = fit_linear_strategy(pCTR_val)
        
        CTR_frac = pCTR_val / np.mean(pCTR_val)
        bid_look_up[col + '_linbid'] = np.round(base_bid * CTR_frac)
        
    return bid_look_up

bid_look_up = save_linear_strategy(bid_look_up, CTR_models_val)

In [129]:
# Fit and save linear strategies

def fit_rev_linear_strategy(pCTR_val):
    
    CTR_frac = np.mean(pCTR_val) / pCTR_val

    df_sub = val[['click','payprice']].copy()
    
    start = time.time()
    clicks = []
    budget = 6250 * 1000
    for base_bid in range(200):
       
        bid = base_bid * CTR_frac 

        df = df_sub[df_sub['payprice'] < bid]

        cost = df['payprice'].cumsum()
        eligible = df[cost < budget]

        clicks.append([base_bid, eligible['click'].sum(), eligible['payprice'].sum()])
    
    opt = np.argmax(np.array(clicks)[:,1])
    
    return opt


def save_rev_linear_strategy(bid_look_up, CTR_models_val):
    for col in CTR_models_val.columns:

        pCTR_val = CTR_models_val.loc[:,col]
        base_bid = fit_rev_linear_strategy(pCTR_val)
        
        CTR_frac = np.mean(pCTR_val) / pCTR_val
        bid_look_up[col + '_revlinbid'] = np.round(base_bid * CTR_frac)
        
    return bid_look_up

bid_look_up = save_rev_linear_strategy(bid_look_up, CTR_models_val)

In [130]:
bid_look_up.sort_values(by='log_reg_linbid', ascending=False).head()

Unnamed: 0,log_reg_linbid,Damian_linbid,Stefan2_linbid,NN_linbid,ensemble1_linbid,log_reg_revlinbid,Damian_revlinbid,Stefan2_revlinbid,NN_revlinbid,ensemble1_revlinbid
38982,101508.0,347.0,25101.0,3907.0,8178.0,0.0,50.0,1.0,5.0,2.0
101057,95804.0,621.0,15568.0,5123.0,7819.0,0.0,28.0,1.0,4.0,2.0
153755,90635.0,461.0,19988.0,4980.0,8376.0,0.0,38.0,1.0,4.0,2.0
192338,86375.0,6941.0,11356.0,983.0,4317.0,0.0,2.0,1.0,19.0,3.0
86332,75781.0,3824.0,3200.0,1669.0,2835.0,0.0,5.0,5.0,11.0,5.0


In [131]:
bid_look_up.head()

Unnamed: 0,log_reg_linbid,Damian_linbid,Stefan2_linbid,NN_linbid,ensemble1_linbid,log_reg_revlinbid,Damian_revlinbid,Stefan2_revlinbid,NN_revlinbid,ensemble1_revlinbid
0,62.0,12.0,35.0,19.0,30.0,255.0,1396.0,416.0,982.0,459.0
1,24.0,15.0,25.0,2.0,10.0,661.0,1182.0,573.0,8811.0,1326.0
2,16.0,10.0,42.0,1.0,11.0,980.0,1749.0,344.0,13720.0,1276.0
3,24.0,12.0,33.0,3.0,14.0,647.0,1452.0,440.0,5973.0,1006.0
4,35.0,14.0,40.0,1.0,9.0,444.0,1193.0,361.0,21040.0,1513.0


In [136]:
# Fit and save ORTB1
# Fit and save capped linear strategies

def fit_ortb1(pCTR_val, maxbid, cc):
    
    start = time.time()
    res = []
    budget = 6250 * 1000
    df_sub = val[['click','payprice']].copy()

    for lmbda in np.linspace(10**(-8), 10**(-5),400):

        lamb = lmbda 
        bid = np.minimum(np.sqrt(cc/lamb * pCTR_val + cc**2) - cc, maxbid)
        df = df_sub[df_sub['payprice'] < bid]
        cost = df['payprice'].cumsum()
        eligible = df[cost < budget]
        y = eligible['click'].sum() # "clicks"
        res.append([lmbda, y])
 
    opt = np.array(res)[np.argmax(np.array(res)[:,1]),0]

    return opt


def save_ortb1(bid_look_up, CTR_models_val, maxbid, cc):
    
    for col in CTR_models_val.columns:
        pCTR_val = CTR_models_val.loc[:,col]
        lamb = fit_ortb1(pCTR_val, maxbid, cc)
        bid = np.sqrt(cc/lamb * pCTR_val + cc**2) - cc
        bid_look_up[col + '_ortb1'] = np.round(np.minimum(bid, maxbid))
    return bid_look_up

bid_look_up = save_ortb1(bid_look_up, CTR_models_val, maxbid=10**7, cc = 31)

In [137]:
bid_look_up.describe()

Unnamed: 0,log_reg_linbid,Damian_linbid,Stefan2_linbid,NN_linbid,ensemble1_linbid,log_reg_revlinbid,Damian_revlinbid,Stefan2_revlinbid,NN_revlinbid,ensemble1_revlinbid,log_reg_ortb1,Damian_ortb1,Stefan2_ortb1,NN_ortb1,ensemble1_ortb1
count,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0
mean,85.999039,112.999355,86.000471,100.998523,84.999997,1142.597,575.280309,335.563504,4372.314,473.295247,65.836973,71.048795,69.625921,69.89277,70.441112
std,487.782326,1526.586838,509.008216,1255.13011,366.01276,8488.211,504.374206,204.273215,32349.95,583.728922,61.423295,84.41597,55.980105,91.749856,58.568786
min,0.0,7.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.0,9.0,0.0,1.0
25%,22.0,18.0,33.0,9.0,25.0,189.0,190.0,191.0,216.0,159.0,33.0,29.0,44.0,19.0,37.0
50%,45.0,52.0,47.0,40.0,49.0,346.0,335.0,304.0,477.0,278.0,55.0,61.0,57.0,58.0,60.0
75%,83.0,91.0,76.0,88.0,86.0,723.0,973.0,442.0,2233.0,555.0,82.0,89.0,78.0,97.0,87.0
max,101508.0,144531.0,80783.0,103837.0,40914.0,1439609.0,2328.0,3210.0,8994396.0,34170.0,3762.0,4574.0,3368.0,4227.0,2447.0


In [138]:
# Fit and save ORTB2

def fit_ortb2(pCTR_val, maxbid, cc):
    
    start = time.time()
    res = []
    budget = 6250 * 1000
    df_sub = val[['click','payprice']].copy()
    
    for lmbda in np.linspace(10**(-8), 10**(-5),400):

        lamb = lmbda
        bid = np.minimum(cc * (
                          (pCTR_val + np.sqrt(cc**2*lamb**2+pCTR_val**2)/(cc*lamb))**(1/3) 
                          - (cc*lamb/(pCTR_val + np.sqrt(cc**2*lamb**2+pCTR_val**2)))**(1/3)
                         ), 
                         maxbid)

        df = df_sub[df_sub['payprice'] < bid]
        cost = df['payprice'].cumsum()
        eligible = df[cost < budget]
        y = eligible['click'].sum() # "clicks"
        res.append([lmbda, y])

    opt = np.array(res)[np.argmax(np.array(res)[:,1]),0]
    
    return opt


def save_ortb2(bid_look_up, CTR_models_val, maxbid, cc):
    
    for col in CTR_models_val.columns:
        pCTR_val = CTR_models_val.loc[:,col]
        lamb = fit_ortb2(pCTR_val, maxbid, cc)
        bid = cc * ((pCTR_val + np.sqrt(cc**2*lamb**2+pCTR_val**2)/(cc*lamb))**(1/3) - (cc*lamb/(pCTR_val + np.sqrt(cc**2*lamb**2+pCTR_val**2)))**(1/3))
        bid_look_up[col + '_ortb2'] = np.round(np.minimum(bid, maxbid))
    return bid_look_up

bid_look_up = save_ortb2(bid_look_up, CTR_models_val, maxbid=10**7, cc = 58)

In [139]:
bid_look_up.describe()
bid_look_up.head()

Unnamed: 0,log_reg_linbid,Damian_linbid,Stefan2_linbid,NN_linbid,ensemble1_linbid,log_reg_revlinbid,Damian_revlinbid,Stefan2_revlinbid,NN_revlinbid,ensemble1_revlinbid,log_reg_ortb1,Damian_ortb1,Stefan2_ortb1,NN_ortb1,ensemble1_ortb1,log_reg_ortb2,Damian_ortb2,Stefan2_ortb2,NN_ortb2,ensemble1_ortb2
0,62.0,12.0,35.0,19.0,30.0,255.0,1396.0,416.0,982.0,459.0,68.0,22.0,46.0,35.0,43.0,70.0,23.0,48.0,39.0,46.0
1,24.0,15.0,25.0,2.0,10.0,661.0,1182.0,573.0,8811.0,1326.0,35.0,25.0,37.0,6.0,19.0,37.0,27.0,38.0,4.0,18.0
2,16.0,10.0,42.0,1.0,11.0,980.0,1749.0,344.0,13720.0,1276.0,26.0,18.0,52.0,4.0,20.0,26.0,18.0,55.0,3.0,19.0
3,24.0,12.0,33.0,3.0,14.0,647.0,1452.0,440.0,5973.0,1006.0,35.0,21.0,44.0,8.0,24.0,37.0,22.0,46.0,7.0,24.0
4,35.0,14.0,40.0,1.0,9.0,444.0,1193.0,361.0,21040.0,1513.0,46.0,25.0,51.0,2.0,17.0,49.0,27.0,53.0,2.0,16.0


In [140]:
# Add capped versions of all previous strategies (linear, rev linear, ortb1, ortb2)

def cap_strategy(bid_look_up, maxbid):
    for col in bid_look_up.columns:
        bid_look_up[col + '_cap1'] = np.round(np.minimum(bid_look_up.loc[:,col], maxbid[0]))
        bid_look_up[col + '_cap2'] = np.round(np.minimum(bid_look_up.loc[:,col], maxbid[1]))
        
    return bid_look_up
        
bid_look_up = cap_strategy(bid_look_up, maxbid=[500,1000])

In [141]:
#bid_look_up.describe()
bid_look_up.head()

Unnamed: 0,log_reg_linbid,Damian_linbid,Stefan2_linbid,NN_linbid,ensemble1_linbid,log_reg_revlinbid,Damian_revlinbid,Stefan2_revlinbid,NN_revlinbid,ensemble1_revlinbid,...,log_reg_ortb2_cap1,log_reg_ortb2_cap2,Damian_ortb2_cap1,Damian_ortb2_cap2,Stefan2_ortb2_cap1,Stefan2_ortb2_cap2,NN_ortb2_cap1,NN_ortb2_cap2,ensemble1_ortb2_cap1,ensemble1_ortb2_cap2
0,62.0,12.0,35.0,19.0,30.0,255.0,1396.0,416.0,982.0,459.0,...,70.0,70.0,23.0,23.0,48.0,48.0,39.0,39.0,46.0,46.0
1,24.0,15.0,25.0,2.0,10.0,661.0,1182.0,573.0,8811.0,1326.0,...,37.0,37.0,27.0,27.0,38.0,38.0,4.0,4.0,18.0,18.0
2,16.0,10.0,42.0,1.0,11.0,980.0,1749.0,344.0,13720.0,1276.0,...,26.0,26.0,18.0,18.0,55.0,55.0,3.0,3.0,19.0,19.0
3,24.0,12.0,33.0,3.0,14.0,647.0,1452.0,440.0,5973.0,1006.0,...,37.0,37.0,22.0,22.0,46.0,46.0,7.0,7.0,24.0,24.0
4,35.0,14.0,40.0,1.0,9.0,444.0,1193.0,361.0,21040.0,1513.0,...,49.0,49.0,27.0,27.0,53.0,53.0,2.0,2.0,16.0,16.0


In [142]:
bid_look_up.describe()

Unnamed: 0,log_reg_linbid,Damian_linbid,Stefan2_linbid,NN_linbid,ensemble1_linbid,log_reg_revlinbid,Damian_revlinbid,Stefan2_revlinbid,NN_revlinbid,ensemble1_revlinbid,...,log_reg_ortb2_cap1,log_reg_ortb2_cap2,Damian_ortb2_cap1,Damian_ortb2_cap2,Stefan2_ortb2_cap1,Stefan2_ortb2_cap2,NN_ortb2_cap1,NN_ortb2_cap2,ensemble1_ortb2_cap1,ensemble1_ortb2_cap2
count,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,...,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0
mean,85.999039,112.999355,86.000471,100.998523,84.999997,1142.597,575.280309,335.563504,4372.314,473.295247,...,62.435844,62.466138,69.643122,69.840961,67.016675,67.061022,66.195045,66.396147,67.649101,67.68074
std,487.782326,1526.586838,509.008216,1255.13011,366.01276,8488.211,504.374206,204.273215,32349.95,583.728922,...,41.959372,42.364327,43.417402,46.200331,33.891907,34.644743,52.734118,55.138037,40.126155,40.561046
min,0.0,7.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,14.0,14.0,7.0,7.0,0.0,0.0,1.0,1.0
25%,22.0,18.0,33.0,9.0,25.0,189.0,190.0,191.0,216.0,159.0,...,34.0,34.0,32.0,32.0,46.0,46.0,19.0,19.0,40.0,40.0
50%,45.0,52.0,47.0,40.0,49.0,346.0,335.0,304.0,477.0,278.0,...,58.0,58.0,68.0,68.0,59.0,59.0,64.0,64.0,64.0,64.0
75%,83.0,91.0,76.0,88.0,86.0,723.0,973.0,442.0,2233.0,555.0,...,82.0,82.0,92.0,92.0,78.0,78.0,98.0,98.0,87.0,87.0
max,101508.0,144531.0,80783.0,103837.0,40914.0,1439609.0,2328.0,3210.0,8994396.0,34170.0,...,500.0,1000.0,500.0,1000.0,500.0,1000.0,500.0,1000.0,500.0,860.0


In [143]:
bid_look_up.shape

(303925, 60)

In [144]:
# Add time preference
def time_preference(bid_look_up):
    for col in bid_look_up.columns:
        bid_look_up[col + '_calm'] = np.round(bid_look_up.loc[:,col] * np.linspace(0.8,1.2,bid_look_up.shape[0]))
        bid_look_up[col + '_aggre'] = np.round(bid_look_up.loc[:,col] * np.linspace(1.2,0.8,bid_look_up.shape[0]))
        
    return bid_look_up
        
bid_look_up = time_preference(bid_look_up)

In [146]:
bid_look_up.describe()

Unnamed: 0,log_reg_linbid,Damian_linbid,Stefan2_linbid,NN_linbid,ensemble1_linbid,log_reg_revlinbid,Damian_revlinbid,Stefan2_revlinbid,NN_revlinbid,ensemble1_revlinbid,...,Stefan2_ortb2_cap2_calm,Stefan2_ortb2_cap2_aggre,NN_ortb2_cap1_calm,NN_ortb2_cap1_aggre,NN_ortb2_cap2_calm,NN_ortb2_cap2_aggre,ensemble1_ortb2_cap1_calm,ensemble1_ortb2_cap1_aggre,ensemble1_ortb2_cap2_calm,ensemble1_ortb2_cap2_aggre
count,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,...,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0,303925.0
mean,85.999039,112.999355,86.000471,100.998523,84.999997,1142.597,575.280309,335.563504,4372.314,473.295247,...,67.049328,67.072715,66.187332,66.202754,66.388971,66.40332,67.63847,67.659732,67.669583,67.691898
std,487.782326,1526.586838,509.008216,1255.13011,366.01276,8488.211,504.374206,204.273215,32349.95,583.728922,...,35.605622,35.844675,53.602556,53.664326,56.015566,56.047281,41.082401,41.203381,41.497934,41.648434
min,0.0,7.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.0,7.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
25%,22.0,18.0,33.0,9.0,25.0,189.0,190.0,191.0,216.0,159.0,...,45.0,45.0,19.0,19.0,19.0,19.0,39.0,39.0,39.0,39.0
50%,45.0,52.0,47.0,40.0,49.0,346.0,335.0,304.0,477.0,278.0,...,59.0,59.0,63.0,63.0,63.0,63.0,63.0,63.0,63.0,63.0
75%,83.0,91.0,76.0,88.0,86.0,723.0,973.0,442.0,2233.0,555.0,...,79.0,79.0,98.0,98.0,98.0,98.0,87.0,87.0,87.0,87.0
max,101508.0,144531.0,80783.0,103837.0,40914.0,1439609.0,2328.0,3210.0,8994396.0,34170.0,...,1076.0,1199.0,600.0,600.0,1184.0,1199.0,592.0,599.0,996.0,1031.0


In [145]:
bid_look_up.shape

(303925, 180)

In [147]:
# Save data file
## done bid_look_up.to_csv('proper_datasets/bid_look_up.csv')

### Load bids

In [4]:
bid_look_up = pd.read_csv('proper_datasets/bid_look_up.csv', index_col=0)

In [5]:
bid_look_up.shape

(303925, 180)

In [6]:
bid_look_up.head()

Unnamed: 0,log_reg_linbid,Damian_linbid,Stefan2_linbid,NN_linbid,ensemble1_linbid,log_reg_revlinbid,Damian_revlinbid,Stefan2_revlinbid,NN_revlinbid,ensemble1_revlinbid,...,Stefan2_ortb2_cap2_calm,Stefan2_ortb2_cap2_aggre,NN_ortb2_cap1_calm,NN_ortb2_cap1_aggre,NN_ortb2_cap2_calm,NN_ortb2_cap2_aggre,ensemble1_ortb2_cap1_calm,ensemble1_ortb2_cap1_aggre,ensemble1_ortb2_cap2_calm,ensemble1_ortb2_cap2_aggre
0,62.0,12.0,35.0,19.0,30.0,255.0,1396.0,416.0,982.0,459.0,...,38.0,58.0,31.0,47.0,31.0,47.0,37.0,55.0,37.0,55.0
1,24.0,15.0,25.0,2.0,10.0,661.0,1182.0,573.0,8811.0,1326.0,...,30.0,46.0,3.0,5.0,3.0,5.0,14.0,22.0,14.0,22.0
2,16.0,10.0,42.0,1.0,11.0,980.0,1749.0,344.0,13720.0,1276.0,...,44.0,66.0,2.0,4.0,2.0,4.0,15.0,23.0,15.0,23.0
3,24.0,12.0,33.0,3.0,14.0,647.0,1452.0,440.0,5973.0,1006.0,...,37.0,55.0,6.0,8.0,6.0,8.0,19.0,29.0,19.0,29.0
4,35.0,14.0,40.0,1.0,9.0,444.0,1193.0,361.0,21040.0,1513.0,...,42.0,64.0,2.0,2.0,2.0,2.0,13.0,19.0,13.0,19.0


In [41]:
# Load the auction function
# FUNCTION:

def simulate_auction(t_bids, t_payprice, t_clicks, agents, t_budget, spending_info, verbose=0):
    
    nrand = t_payprice.shape[0]
    rand_helper_vals = np.random.random(size=(nrand,agents)) # to efficiently split ties randomly

    def calc_things(x, t_payprice):
        exc_price = np.subtract(x, t_payprice)
        return exc_price

    def element_compare(first_price, second_price, payprice):
        if (second_price == 0) & (first_price != 0):
            second_price = payprice        
        return second_price


    while True:

        # define valid bids (bids times a boolean of whether a bid is higher than the payprice (otherwise instant no-win))      
        valid_bids = t_bids * np.any([(t_bids > t_payprice[:, None])], axis=0)
 
        # Find second highest prices
        sorted_prices = np.atleast_1d([np.sort(valid_bids[i,:])[::-1] for i in range(valid_bids.shape[0])]) 
        second_prices = sorted_prices[:,1]
        first_prices = sorted_prices[:,0]
        second_prices = np.vectorize(element_compare)(first_prices, second_prices, t_payprice)

        # Find highest bids (win prices) -> win goes to the first column among winners in case of ties (np.where returns duplicates!)
        win_cols = np.argmax(valid_bids+rand_helper_vals, axis=1)
        
        # Compute costs given win (second price is paid)
        valid_costs = np.zeros(shape=t_bids.shape) #tt_bids.copy()
        valid_costs[range(t_bids.shape[0]),win_cols] = second_prices

        costs_sum = np.cumsum(valid_costs, axis=0)
        bool_entry = (costs_sum >= t_budget)
        bool_sum = np.sum(bool_entry)

        if bool_sum > 0:
            
            t_bids[bool_entry] = 0 
            if verbose >= 1:
                print('t_bids was updated, %i bid removals' % bool_sum)

        else:
            
            break

    clicks = (valid_costs>0) * t_clicks[:, None]
            
    if verbose >= 1:
        print('DONE')
        
        print('clicks:')
        print(np.sum(clicks, axis=0)) # clicks
        print(np.mean(np.sum(clicks, axis=0))) # avg clicks per agent

        print('wins:')
        print(valid_costs[:2,:])
        print(np.sum((valid_costs>0), axis=0)) # wins
        print(np.mean(np.sum((valid_costs>0), axis=0))) # avg wins per agent
        print(time.time()-start)

        
        
    if spending_info:
        spendings = np.sum(valid_costs, axis=0) 
        res = [agents, np.mean(np.sum(clicks, axis=0)), 
               np.mean(np.sum((valid_costs>0), axis=0)),
               spendings] 
        
    else:    

        res = [np.sum(clicks, axis=0), np.sum((valid_costs>0), axis=0), np.sum(valid_costs, axis=0)]
    
    return res

### Simulate best response among 30 agents randomly selecting between 120 strategies
#### Bootstrap samples

In [62]:
# ===================== BOOTSTRAPPING ===================== #

start = time.time()
res = []
spending_info = False
t_payprice = np.array(val['payprice'])
t_clicks = np.array(val['click'])

res_bt_ids = []
res_bt_click = []
res_bt_win = []
res_bt_cost = []

nrand = t_payprice.shape[0]
nsims = 10000
nsamp = bid_look_up.shape[0]
nstrats = 179 # remove 1, otherwise it may draw 180 (out of bounds)
budget = 6250 * 1000

for isim in range(nsims):
    for agents in [30]: 
        strategy_ids = np.random.randint(0, nstrats+1, agents)
        resamp_ids = np.random.randint(0, nsamp, nsamp)
        t_bids = np.array(bid_look_up.iloc[resamp_ids,strategy_ids])

        out = simulate_auction(t_bids, t_payprice, t_clicks, agents, budget, spending_info, verbose=0)
        
        res_bt_ids.append(strategy_ids)
        res_bt_click.append(out[0])
        res_bt_win.append(out[1])
        res_bt_cost.append(out[2])
    
    if isim % 100 == 0:
        print('===== Simulation %i done, runtime so far: %.f ======' % (isim, time.time()-start))
    
print(time.time()-start) 



KeyboardInterrupt: 

In [63]:
np.savetxt('output_simulation/s2_bt_ids.csv', res_bt_ids)
np.savetxt('output_simulation/s2_bt_click.csv', res_bt_click)
np.savetxt('output_simulation/s2_bt_win.csv', res_bt_win)
np.savetxt('output_simulation/s2_bt_cost.csv', res_bt_cost)

### Simulate best response among 30 agents randomly selecting between 120 strategies
#### Original validation set (no bootstrapping)

In [43]:
# ===================== ORIGINAL VALIDATION SET ===================== #

start = time.time()
res = []
spending_info = False
t_payprice = np.array(val['payprice']) 
t_clicks = np.array(val['click'])

res_ids = []
res_click = []
res_win = []
res_cost = []

nrand = t_payprice.shape[0]
nsims = 10000
nstrats = 179 # remove 1, otherwise it may draw 180 (out of bounds)
budget = 6250 * 1000

for isim in range(nsims):
    for agents in [30]: 
        strategy_ids = np.random.randint(0, nstrats+1, agents)
        t_bids = np.array(bid_look_up.iloc[:,strategy_ids])

        out = simulate_auction(t_bids, t_payprice, t_clicks, agents, budget, spending_info, verbose=0)

        res_ids.append(strategy_ids)
        res_click.append(out[0])
        res_win.append(out[1])
        res_cost.append(out[2])
    
    if isim % 100 == 0:
        print('===== Simulation %i done, runtime so far: %.f ======' % (isim, time.time()-start))
    
print(time.time()-start) 



KeyboardInterrupt: 

In [33]:
res_ids

[array([ 67,  25, 163, 156, 163,  74, 174,  45,  35, 146, 111,  78, 156,
        144, 106, 105, 145, 119, 141,  79,  22, 161,  38,  29, 121,  22,
        142,  57, 126,  94])]

In [34]:
res_click

[array([96, 10,  0,  1,  0,  7,  8,  0,  3,  1,  5,  0,  1,  1, 13,  3,  0,
        10,  1,  0,  0,  0,  2,  8,  9,  0,  0,  0,  3,  1], dtype=int64)]

In [35]:
res_win

[array([13916,  8566,   965,   455,  1001, 21158, 23706,   177, 15441,
         5041,  4765,  9356,   456,  5029, 23549,  7102,  1263,  3228,
         2238,  6329,   184,   900, 26966,  3565, 18592,   176, 22077,
         2357, 13194, 12099])]

In [36]:
res_cost

[array([4360738., 1869624.,   61751.,   41770.,   63493., 6249692.,
        2197623.,   12564., 6249618.,  387623., 1571202., 6249898.,
          41366.,  375077., 5291037., 1776134.,   85271., 1102759.,
         371997., 6249771.,   25781.,   57723., 6113958.,  955335.,
        6249976.,   24817., 2103523.,  195457., 6249512.,  743802.])]

In [44]:
np.savetxt('output_simulation/s1_ids.csv', res_ids)
np.savetxt('output_simulation/s1_click.csv', res_click)
np.savetxt('output_simulation/s1_win.csv', res_win)
np.savetxt('output_simulation/s1_cost.csv', res_cost)

In [249]:
res_ids[0][19], res_click[0][19], res_win[0][19], res_cost[0][19]

(3, 62, 20071, 5920629.0)

In [238]:
pd.read_csv('output_simulation/s1_sams_ids.csv', delimiter=' ', header=None)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,54.0,33.0,97.0,94.0,125.0,73.0,16.0,168.0,21.0,5.0,...,125.0,166.0,36.0,136.0,151.0,105.0,103.0,160.0,136.0,17.0


In [239]:
pd.read_csv('output_simulation/s1_sams_click.csv', delimiter=' ', header=None)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,0.0,0.0,0.0,1.0,3.0,0.0,0.0,3.0,10.0,3.0,...,4.0,4.0,8.0,5.0,1.0,5.0,1.0,0.0,2.0,0.0


In [240]:
pd.read_csv('output_simulation/s1_sams_win.csv', delimiter=' ', header=None)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,14.0,9089.0,5941.0,4777.0,15066.0,7051.0,24.0,4802.0,14529.0,12685.0,...,14966.0,26737.0,21293.0,17855.0,1081.0,9499.0,2138.0,15170.0,18029.0,12.0


In [241]:
pd.read_csv('output_simulation/s1_sams_cost.csv', delimiter=' ', header=None)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,637.0,6249830.0,432247.0,392792.0,6249680.0,6249573.0,1321.0,398527.0,3256293.0,6249849.0,...,6249884.0,2223346.0,6249913.0,6249578.0,185207.0,2221446.0,1008083.0,1032564.0,6236990.0,515.0
