# Bid creation

Based on calculated pCTRs from different ML algorithms (GBM,LR,NN,etc), different bid strategies (Linear, ORTB1/2, etc) can be selected to find optimal parameters via grid search. Then, based on the best performing parameter (highest clicks & CTR), the bids are created for the Test set.

In [1]:
#Import libraries 

import os
import random
import numpy as np
#print (np.__version__)
import pandas as pd
import math

from glob import glob
from IPython.display import display
from tqdm import tnrange, tqdm_notebook
import time
import datetime
import itertools


#### Read original dataset

In [2]:
#Defining original dataset directory.
Path = 'c://Datasets//Multiagent//rtb'

#Importing data.
os.chdir(Path)
Datasets_features = {}
for Filename in glob('*.csv'):
    Datasets_features[Filename[:-4].title()] = pd.read_csv(Filename, sep = ',')
    
#Defining variables.
Train_ftr = Datasets_features['Train']
Validation_ftr = Datasets_features['Validation']

#### Define general functions

In [3]:

#Linear bidding strategy.
def linear_bidding(base_bid, pCTR, Init_CTR):
    return (base_bid*pCTR/Init_CTR)

def nlogn_bidding(base_bid, pCTR, Init_CTR):
    return (base_bid*(pCTR/Init_CTR)*np.log(pCTR/Init_CTR))

def exp_bidding(base_bid, pCTR, Init_CTR):
    return (base_bid*np.e**(pCTR/Init_CTR))

def ortb_bidding(pCTR, c=50, lmda=5.2e-7, ortbtype=2):
    if ortbtype==2: 
        expr = (pCTR + np.sqrt(c ** 2 * lmda ** 2 + pCTR **2)) / (c * lmda)
        return c * (expr ** (1 / 3) - (1 / expr) ** (1 / 3))  
    else:
        return np.sqrt(c  / lmda * pCTR + c ** 2) - c

#Winning criterion #1.
def win_auction_1(bid, payprice):
    return bid >= payprice
    
#Winning criterion #2.
def win_auction_2(bid, payprice, Others):
    return ((bid >= payprice) and (bid >= max(Others)))
    
#Bidding strategy parameter eveluation.
def Bidding_strategy_prm(Budget, Init_CTR, Init_eCPC, Clicks_Prices, 
                         Pred_CTRs, prm, Dict_alg, key,alg):
    
    empCPM = []
    Bids, Cost, Imps, Clks = 0, 0, 0, 0
    
    #Looping on bid requests.
    for item in range(0, len(Clicks_Prices)):
        bid = 0
        Bids += 1
        pCTR = Pred_CTRs[item]           

            
        #Linear strategy.
        if key == 'Linear':
            Criterion = 1
            bid = linear_bidding(prm, pCTR, Init_CTR)
            Win = win_auction_1(bid, Clicks_Prices[item][1])
        # NLogN strategy 
        elif key == 'NLogN':
            Criterion = 1
            bid = nlogn_bidding(prm, pCTR, Init_CTR)
            Win = win_auction_1(bid, Clicks_Prices[item][1])
        elif key == 'Exp':
            Criterion = 1
            bid = exp_bidding(prm, pCTR, Init_CTR)
            Win = win_auction_1(bid, Clicks_Prices[item][1])
        elif key == 'Ortb':
            Criterion = 1
            bid = ortb_bidding(pCTR, prm[0], prm[1],prm[2])
            Win = win_auction_1(bid, Clicks_Prices[item][1])                    
        #Exception: error.
        else:
            print('[Strategy error]: Wrong bidding strategy name!')
            break
            
        #Computing bid outcome.
        if Win:
            Imps += 1
            Clks += Clicks_Prices[item][0]
            
            if Criterion == 1:
                Cost += Clicks_Prices[item][1]/1000
                empCPM.append(Clicks_Prices[item][1])
            else:
                Cost += max(Clicks_Prices[item][1],max(others))/1000
                empCPM.append(max(Clicks_Prices[item][1],max(others)))
                
        #Stopping condition.
        if Cost > Budget:
            break
            
    #Reporting results.
    if Imps != 0: empCTR = Clks/Imps
    else: empCTR = 0
    
    avgCPM = np.mean(empCPM)
    if Clks != 0: avgCPC = np.sum(empCPM)/(Clks*1000)
    else: avgCPC = 0
    
    Output =  key + ',' + alg + ',' + str(prm) + ',' + str(Budget) + ',' + str(Cost) + ','\
              + str(Bids) + ',' + str(Imps) + ',' + str(Clks) + ','\
              + str(empCTR) + ',' + str(avgCPM) + ',' + str(avgCPC)
    #temp_df = pd.DataFrame(,columns=['Algorithm', 'Parameter', 'Budget','Spent','Imps', 'Click','eCTR','avgCPM','avgCPC'])
    #curr_strat_vals.loc[len(curr_strat_vals)]=[]
    return Output, (key,alg,prm,Budget,Cost,Bids,Imps,Clks,empCTR,avgCPM,avgCPC)
    
#Bidding strategy simulation.
def Bidding_strategy(Budget, Init_CTR, Init_eCPC, Clicks_Prices, Pred_CTRs,
                     file, Dict_alg, key, alg):
    parameters = Dict_alg[key]
    strategy_results = {}
    for idx in tnrange(len(parameters), desc='Params'):
        prm = parameters[idx]
        #print (idx,'/',len(parameters))     
        Output, result = Bidding_strategy_prm(Budget, Init_CTR, Init_eCPC, Clicks_Prices,
                                      Pred_CTRs, prm, Dict_alg, key, alg)
        strategy_results[idx]=(result)
        file.write(Output + '\n')
    return strategy_results

#Actual bid creation and writing into file, based on baseB
def createBids(bidIds,prm,pred_CTRs,init_CTR,alg_name, key ):
    Filename = 'Team_07_'+ alg_name +'_'+ key+ '_bids' +'.csv'
    print('\tUsing best param...' + str(prm))
    with open(Filename, 'w') as file:
        file.write('bidid,bidprice' + '\n')
        for idx in range(len(bidIds)):
            bidId = bidIds[idx]
            pCTR  = pred_CTRs[idx]
            bid = 0
            if key == "Linear":
                bid   = linear_bidding(prm,pCTR, init_CTR)
            elif key == 'NLogN':
                bid = nlogn_bidding(prm, pCTR, init_CTR)
            elif key == 'Exp':
                bid = exp_bidding(prm,pCTR,init_CTR)
            elif key == 'Ortb':
                bid = ortb_bidding(pCTR, prm[0], prm[1],prm[2])
            else:
                raise NameError('key not found')
            
            #print(bidId,' ', baseBid,' ',pCTR,' ',init_CTR,' ', bid)
            file.write(bidId + ',' + '{:.1f}'.format(bid) +'\n')
            
    print('\t[File %s]: Process completed!' %Filename,'\n')

#### Read CTR information, base bid optimisation and bid creation

Grid search performed over the defined parameters, and then, using the best parameter, the base bids will be calculated accordingly for each algorithm/bid strategy combination. The basebids are calculated and written, and finally, a table is shown comparing the best performers overall.

For CTR, two files should be in the folder, with the next format: '<Algorithm\>\_test\_pCTR.csv' and '<Algorithm\>\_validation\_pCTR.csv' (Note: Currently case sensitive). Then, the algorithm name shall be included in the _algs_ array.



In [4]:

#Defining CTR directory.
Path = 'C://Datasets//Multiagent//calc//pctrs'

#Importing data.
os.chdir(Path)
Datasets_CTR = {}
overall_alg_vals = pd.DataFrame(columns=['BidStrat','Algorithm', 'Parameter', 'Budget','Spent','Bids','Imps', 'Click','eCTR','avgCPM','avgCPC'])

for Filename in glob('*_pCTR.csv'):
    Datasets_CTR[Filename[:-4]] = pd.read_csv(Filename, sep = ',')

#Define algorithms to use
algs = ['LGBM','LR','NN', 'ensemble_avg','ensemble_wavg'] #,'Catboost'


#Initialising values.
Init_CTR = sum(Train_ftr['click'])/len(Train_ftr)
Init_eCPC = sum(Train_ftr['payprice'])/(sum(Train_ftr['click'])*1000)
print("avgCTR: ",Init_CTR)

PathBB =   'c://Datasets//Multiagent//calc//bb'
PathBids = 'c://Datasets//Multiagent//calc//bids'
    
for alg in algs:
    #Defining variables.
    Test_CTR = Datasets_CTR[alg+'_test_pCTR']
    Validation_CTR = Datasets_CTR[alg+'_validation_pCTR']
    #Training bidding strategies.
    Budget = 6250
    random.seed(10)    
    
    #Defining general variables.
    Pred_CTRs = []
    Clicks_Prices = []

    #Initialising parameters.
    Prm_lin = np.concatenate((np.arange(2, 20, 2),np.arange(20, 100, 5),
                              np.arange(100, 400, 10), np.arange(400, 800, 50)),
                              axis=0)
    
    
    #Define grid search parameters
    Prm_lin = np.concatenate((np.linspace(1e-7, 1e-5, 15),np.linspace(1e-5, 1e-3, 15),np.linspace(1e-3, 1e-1, 15)))
    Prm_log = np.logspace(1,4,num=35)    
    c_range = np.linspace(85, 200, 5)  #Simplified grid parameters for speed (demo purposes) 
    lmda_range =   np.linspace(4e-6, 5e-6, 5)
    ortb_range = {1,2}
    Prm_ortb = list(itertools.product(c_range,lmda_range,ortb_range))  
    
    # Select bidding strategies to use
    Dict_alg = {'Ortb': Prm_ortb,'Linear':Prm_lin,'NLogN': Prm_log  }
    
    Pred_CTRs = list(Validation_CTR['pCTR'].values)
    Pred_CTRs_Test = list(Test_CTR['pCTR'].values)
    Clicks_Prices = list(zip(Validation_ftr['click'], Validation_ftr['payprice']))

    #Training and saving results.
    os.chdir(PathBB)

    Header = 'bidStrat,algorithm,parameter,budget,spend,bids,imps,click,empCTR,avgCPM,avgCPC'
    print('pCTR estimator:', alg)
    for key in Dict_alg.keys():
        now = datetime.datetime.now()
        print('\t',key, 'bid  (Started', time.strftime("%H:%M:%S"),')' )
        os.chdir(PathBB)
        Filename = key + '_BB_'+ alg+'.csv'
        strategy_results = {}
        with open(Filename, 'w') as file:
            file.write(Header + '\n')
            strategy_results = Bidding_strategy(Budget, Init_CTR, Init_eCPC, Clicks_Prices,
                             Pred_CTRs, file, Dict_alg, key, alg)
        print('\t[File %s]: Process completed!' %Filename)
        strategy_results_df = pd.DataFrame.from_dict(strategy_results).transpose()
        strategy_results_df.rename(columns={0:'BidStrat', 1:'Algorithm', 2:'Parameter', 3:'Budget',4:'Spent',5:'Bids',
                                            6:'Imps', 7:'Click',8:'eCTR',9:'avgCPM',10:'avgCPC',11:'Agent'}, inplace=True)
        overall_alg_vals = overall_alg_vals.append(strategy_results_df.sort_values(['Click','eCTR'],ascending=False).head(3), sort=False)
        bestParam = strategy_results_df.sort_values(['Click','eCTR'],ascending=False).head(1).iloc[0,2]
        os.chdir(PathBids)
        createBids(Test_CTR['bidid'],bestParam,Pred_CTRs_Test,Init_CTR,alg,key)

print('\n• Full process completed!')
display(overall_alg_vals.sort_values(['Click','eCTR'],ascending= False))

avgCTR:  0.0007375623256619447
pCTR estimator: LGBM
	 Ortb bid  (Started 20:30:31 )


HBox(children=(IntProgress(value=0, description='Params', max=50), HTML(value='')))


	[File Ortb_BB_LGBM.csv]: Process completed!
	Using best param...(200.0, 4e-06, 2)
	[File Team_07_LGBM_Ortb_bids.csv]: Process completed! 

	 Linear bid  (Started 20:32:01 )


HBox(children=(IntProgress(value=0, description='Params', max=45), HTML(value='')))


	[File Linear_BB_LGBM.csv]: Process completed!
	Using best param...1e-07
	[File Team_07_LGBM_Linear_bids.csv]: Process completed! 

	 NLogN bid  (Started 20:32:22 )


HBox(children=(IntProgress(value=0, description='Params', max=35), HTML(value='')))


	[File NLogN_BB_LGBM.csv]: Process completed!
	Using best param...5436.183620153837
	[File Team_07_LGBM_NLogN_bids.csv]: Process completed! 

pCTR estimator: LR
	 Ortb bid  (Started 20:33:02 )


HBox(children=(IntProgress(value=0, description='Params', max=50), HTML(value='')))


	[File Ortb_BB_LR.csv]: Process completed!
	Using best param...(142.5, 4.25e-06, 2)
	[File Team_07_LR_Ortb_bids.csv]: Process completed! 

	 Linear bid  (Started 20:34:33 )


HBox(children=(IntProgress(value=0, description='Params', max=45), HTML(value='')))


	[File Linear_BB_LR.csv]: Process completed!
	Using best param...1e-07
	[File Team_07_LR_Linear_bids.csv]: Process completed! 

	 NLogN bid  (Started 20:34:57 )


HBox(children=(IntProgress(value=0, description='Params', max=35), HTML(value='')))


	[File NLogN_BB_LR.csv]: Process completed!
	Using best param...5436.183620153837
	[File Team_07_LR_NLogN_bids.csv]: Process completed! 

pCTR estimator: NN
	 Ortb bid  (Started 20:35:33 )


HBox(children=(IntProgress(value=0, description='Params', max=50), HTML(value='')))


	[File Ortb_BB_NN.csv]: Process completed!
	Using best param...(200.0, 4.25e-06, 2)
	[File Team_07_NN_Ortb_bids.csv]: Process completed! 

	 Linear bid  (Started 20:36:51 )


HBox(children=(IntProgress(value=0, description='Params', max=45), HTML(value='')))


	[File Linear_BB_NN.csv]: Process completed!
	Using best param...0.1
	[File Team_07_NN_Linear_bids.csv]: Process completed! 

	 NLogN bid  (Started 20:37:09 )


HBox(children=(IntProgress(value=0, description='Params', max=35), HTML(value='')))


	[File NLogN_BB_NN.csv]: Process completed!
	Using best param...10000.0
	[File Team_07_NN_NLogN_bids.csv]: Process completed! 

pCTR estimator: ensemble_avg
	 Ortb bid  (Started 20:37:45 )


HBox(children=(IntProgress(value=0, description='Params', max=50), HTML(value='')))


	[File Ortb_BB_ensemble_avg.csv]: Process completed!
	Using best param...(171.25, 4.25e-06, 2)
	[File Team_07_ensemble_avg_Ortb_bids.csv]: Process completed! 

	 Linear bid  (Started 20:39:10 )


HBox(children=(IntProgress(value=0, description='Params', max=45), HTML(value='')))


	[File Linear_BB_ensemble_avg.csv]: Process completed!
	Using best param...1e-07
	[File Team_07_ensemble_avg_Linear_bids.csv]: Process completed! 

	 NLogN bid  (Started 20:39:39 )


HBox(children=(IntProgress(value=0, description='Params', max=35), HTML(value='')))


	[File NLogN_BB_ensemble_avg.csv]: Process completed!
	Using best param...1968.4194472866134
	[File Team_07_ensemble_avg_NLogN_bids.csv]: Process completed! 

pCTR estimator: ensemble_wavg
	 Ortb bid  (Started 20:40:36 )


HBox(children=(IntProgress(value=0, description='Params', max=50), HTML(value='')))


	[File Ortb_BB_ensemble_wavg.csv]: Process completed!
	Using best param...(171.25, 4.25e-06, 2)
	[File Team_07_ensemble_wavg_Ortb_bids.csv]: Process completed! 

	 Linear bid  (Started 20:41:58 )


HBox(children=(IntProgress(value=0, description='Params', max=45), HTML(value='')))


	[File Linear_BB_ensemble_wavg.csv]: Process completed!
	Using best param...1e-07
	[File Team_07_ensemble_wavg_Linear_bids.csv]: Process completed! 

	 NLogN bid  (Started 20:42:17 )


HBox(children=(IntProgress(value=0, description='Params', max=35), HTML(value='')))


	[File NLogN_BB_ensemble_wavg.csv]: Process completed!
	Using best param...1968.4194472866134
	[File Team_07_ensemble_wavg_NLogN_bids.csv]: Process completed! 


• Full process completed!


Unnamed: 0,BidStrat,Algorithm,Parameter,Budget,Spent,Bids,Imps,Click,eCTR,avgCPM,avgCPC
33,Ortb,ensemble_avg,"(171.25, 4.25e-06, 2)",6250,5745.87,303925,126205,164,0.00129947,45.5281,35.0358
33,Ortb,ensemble_wavg,"(171.25, 4.25e-06, 2)",6250,5742.03,303925,126420,164,0.00129726,45.4203,35.0124
43,Ortb,ensemble_avg,"(200.0, 4.25e-06, 2)",6250,5917.05,303925,127755,164,0.00128371,46.3156,36.0796
43,Ortb,ensemble_wavg,"(200.0, 4.25e-06, 2)",6250,5914.56,303925,128016,164,0.00128109,46.2017,36.0644
31,Ortb,ensemble_avg,"(171.25, 4e-06, 2)",6250,6149.22,303925,132409,164,0.00123859,46.4411,37.4953
31,Ortb,ensemble_wavg,"(171.25, 4e-06, 2)",6250,6143.86,303925,132619,164,0.00123663,46.3272,37.4626
41,Ortb,LGBM,"(200.0, 4e-06, 2)",6250,6209.1,303925,137766,160,0.00116139,45.0699,38.8068
31,Ortb,LGBM,"(171.25, 4e-06, 2)",6250,6037.75,303925,136250,159,0.00116697,44.3138,37.9733
45,Ortb,LGBM,"(200.0, 4.5e-06, 2)",6250,5493.59,303925,127016,157,0.00123606,43.2511,34.991
23,Ortb,LR,"(142.5, 4.25e-06, 2)",6250,6229.73,303925,128537,155,0.00120588,48.4664,40.1918
