# Abstract

This notebook uses all the implemented pCTR and basebid models along with the different strategies and uses a function to find the best-performing individual model on the validation set.

# Import data

In [1]:
import pandas as pd
import numpy as np
from functools import reduce
import matplotlib.pyplot as plt
import seaborn as sns
import time

%matplotlib inline
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None  # To ignore SettingWithCopyWarning warning

In [2]:
train = pd.read_csv("train.csv")
validation = pd.read_csv("validation.csv")

In [3]:
print(train.shape, validation.shape)

(2430981, 25) (303925, 25)


# Import Predictions

In [4]:
lr_preds = pd.read_csv("Prediction_csv/lr_pCTR.csv", index_col  = "Unnamed: 0")
rf_preds = pd.read_csv("Prediction_csv/rf_pCTR.csv", index_col  = "Unnamed: 0")
xgb_preds = pd.read_csv("Prediction_csv/xgb_pCTR.csv", index_col  = "Unnamed: 0")
dwane_preds = pd.read_csv("Prediction_csv/click_predictions_narrow.csv")
lasso_preds = pd.read_csv("Prediction_csv/lasso_basebid.csv", index_col  = "Unnamed: 0")
ridge_preds = pd.read_csv("Prediction_csv/ridge_basebid.csv", index_col  = "Unnamed: 0")
elasticnet_preds = pd.read_csv("Prediction_csv/elasticnet_basebid.csv", index_col  = "Unnamed: 0")
rfg_preds = pd.read_csv("Prediction_csv/rfg_basebid.csv", index_col  = "Unnamed: 0")
xgbr_preds = pd.read_csv("Prediction_csv/xgb_basebid.csv", index_col  = "Unnamed: 0")

In [5]:
# Ensemble model
ensemble_prob =xgb_preds["click_proba"]*dwane_preds["click_proba"]
ensemble_pCTR_preds = pd.DataFrame(columns = ["bidid", "click_proba"], 
                                   data = list(zip(validation["bidid"], ensemble_prob)))

ensemble_basebid = 0.2* (lasso_preds["predicted_payprice"] + ridge_preds["predicted_payprice"] + elasticnet_preds["predicted_payprice"] +\
                  rfg_preds["predicted_payprice"] + xgbr_preds["predicted_payprice"])
ensemble_basebid_preds = pd.DataFrame(columns = ["bidid", "predicted_payprice"], 
                                   data = list(zip(validation["bidid"], ensemble_basebid)))

In [6]:
ensemble_pCTR_preds.head()

Unnamed: 0,bidid,click_proba
0,bbcb813b6166538503d8b33a5602d7d72f6019dc,0.0
1,5a07316c49477cb5d9b4d5aa39c27d6c3be7f92d,0.0
2,f6ece71dae81d6b16bfb24ad6dd5611472d4c673,0.0
3,b4d5c57c9b38ff5a12954fa01e11931b4e6bfbbb,0.0
4,0899bf144249458ea9c89188473694bf44c7ca15,0.0


In [7]:
ensemble_basebid_preds.head()

Unnamed: 0,bidid,predicted_payprice
0,bbcb813b6166538503d8b33a5602d7d72f6019dc,67.205068
1,5a07316c49477cb5d9b4d5aa39c27d6c3be7f92d,77.476141
2,f6ece71dae81d6b16bfb24ad6dd5611472d4c673,78.09452
3,b4d5c57c9b38ff5a12954fa01e11931b4e6bfbbb,45.960159
4,0899bf144249458ea9c89188473694bf44c7ca15,49.426684


In [8]:
base_bid = train.loc[ train["click"] == 1, "payprice"].mean()
avgCTR = train["click"].sum()*100/train.shape[0]

In [9]:
print("Base bid: {}, avgCTR: {}, ratio: {}".format(base_bid, avgCTR, base_bid/avgCTR))

Base bid: 105.46402677077523, avgCTR: 0.07375623256619447, ratio: 1429.899862037066


# Strategies

In [10]:
def Strategy1(sub_pCTR, sub_basebid):
    
    #avgCTR = train["click"].sum()*100/train.shape[0]
    avgCTR = sub_pCTR["click_proba"].mean()
    base_bid = train.loc[ train["click"] == 1, "payprice"].mean()
    bidprice = (sub_pCTR["click_proba"] * base_bid) / avgCTR
    return bidprice

def Strategy2(sub_pCTR, sub_basebid):
    
    #avgCTR = 0.5
    avgCTR = sub_pCTR["click_proba"].mean()
    bidprice = (sub_pCTR["click_proba"] * sub_basebid["predicted_payprice"]) / avgCTR
    return bidprice

def Strategy3(sub_pCTR, sub_basebid):
    
    #avgCTR = 0.5
    avgCTR = sub_pCTR["click_proba"].mean()
    sub_pCTR_ = sub_pCTR.copy()
    sub_pCTR_["click_proba"] = sub_pCTR_["click_proba"].map(lambda x: 0 if x <0.3 else np.exp(x))
    bidprice = (sub_pCTR_["click_proba"] * sub_basebid["predicted_payprice"]) / avgCTR
    return bidprice

def Strategy4(sub_pCTR, sub_basebid):
    
    #avgCTR = 0.5
    avgCTR = sub_pCTR["click_proba"].mean()
    sub_pCTR_ = sub_pCTR.copy()
    sub_pCTR_["click_proba"] = (sub_pCTR_["click_proba"] + 0.5)**2 - 0.5
    sub_pCTR_.loc[sub_pCTR_["click_proba"] < 0, "click_proba"] = 0
    bidprice = (sub_pCTR_["click_proba"] * sub_basebid["predicted_payprice"]) / avgCTR
    return bidprice

# Evaluate Results

In [11]:
budget = 6250*1000

def ValidationDataFrame(submission_pCTR, submission_basebid, strategy):
    
    validation_check = validation[["bidid", "click", "bidprice", "payprice"]]
    validation_check["click_proba"] = submission_pCTR["click_proba"]
    validation_check["basebid_predicted"] = submission_basebid["predicted_payprice"]
    validation_check["bidprice_predicted"] = strategy(submission_pCTR, submission_basebid)
    
    return validation_check


def ValidateStrategy(df):
    
    impressions = 0
    clicks = 0
    cost = 0
    auctions_participated = 0
    balance = budget
    
    for row in df.iterrows():
        
        if cost < budget:
            
            auctions_participated+=1
            
            if (row[1]["bidprice_predicted"] >= row[1]["payprice"]):
                
                if (balance > row[1]["bidprice_predicted"]):
                    
                    impressions+=1
                    clicks+=row[1]["click"]
                    cost+=row[1]["payprice"]
                    balance-=row[1]["payprice"]
                else:
                    pass
    
        else:
            break
    
    # Metrics
    ctr = clicks*100/impressions
    cpm = cost/impressions
    cpc = cost/clicks/1000
    
    print("Strategy statistics:")
    print("Auctions participated: {} | Impressions: {} | Clicks: {} | Cost: {} | CTR: {} | CPM: {} | CPC: {}".format(auctions_participated, \
                                                                                                                     impressions, clicks, cost,\
                                                                                                                     ctr, cpm, cpc))
    print("\n")
            
    return impressions, clicks, cost, auctions_participated, ctr, cpm, cpc

# def StrategyResults(impressions, clicks, cost):
#     ctr = clicks*100/impressions
#     spend = cost
#     cpm = cost/(impressions*1000)
#     if clicks > 0:
#         cpc = cost/clicks
#     else:
#         cpc = np.inf
        
#     print("Strategy results")
#     print("CTR: {} | Clicks: {} | Spend: {} | CPM: {} | CPC: {}".format(ctr, clicks, spend, cpm, cpc))
#     print("\n")
        
#     return ctr, clicks, spend, cpm, cpc

In [12]:
pCTR_preds = [ensemble_pCTR_preds, lr_preds, rf_preds, xgb_preds, dwane_preds]
basebid_preds = [ensemble_basebid_preds, lasso_preds, ridge_preds, elasticnet_preds, rfg_preds, xgbr_preds]
pCTR_model_names = ["Ensemble", "Logistic Regression", "Random Forest", "XGBoost", "Forest of RF"]
basebid_model_names = ["Ensemble", "Lasso", "Ridge", "Elastic Net", "Random Forest Regressor", "XGBoost Regressor"]
results = pd.DataFrame(columns = ["pCTR_model", "basebid_model", "Strategy", "Impressions", "Clicks", "Cost", "CTR", "CPM", "CPC"])
i=0
j=0
k=0

start = time.time()

for pCTR_pred in pCTR_preds:
    for basebid_pred in basebid_preds:
        
        # Strategy 1
        validation_check = ValidationDataFrame(pCTR_pred, basebid_pred, Strategy1)
        print("\033[1m pCTR model: {} \033[0m, \033[1m basebid model: {} \033[0m, \033[1m Strategy 1 \033[0m ".format(pCTR_model_names[i], basebid_model_names[j]))
        impressions, clicks, cost, auctions_participated, ctr, cpm, cpc = ValidateStrategy(validation_check)
        results.loc[k] = [pCTR_model_names[i], basebid_model_names[j], "Strategy1", impressions, clicks, cost, ctr, cpm, cpc]
        k+=1
        
        # Strategy 2
        validation_check = ValidationDataFrame(pCTR_pred, basebid_pred, Strategy2)
        print("\033[1m pCTR model: {} \033[0m, \033[1m basebid model: {} \033[0m, \033[1m Strategy 2 \033[0m ".format(pCTR_model_names[i], basebid_model_names[j]))
        impressions, clicks, cost, auctions_participated, ctr, cpm, cpc = ValidateStrategy(validation_check)
        results.loc[k] = [pCTR_model_names[i], basebid_model_names[j], "Strategy2", impressions, clicks, cost, ctr, cpm, cpc]
        k+=1
        
        # Strategy 3
        validation_check = ValidationDataFrame(pCTR_pred, basebid_pred, Strategy3)
        print("\033[1m pCTR model: {} \033[0m, \033[1m basebid model: {} \033[0m, \033[1m Strategy 3 \033[0m ".format(pCTR_model_names[i], basebid_model_names[j]))
        impressions, clicks, cost, auctions_participated, ctr, cpm, cpc= ValidateStrategy(validation_check)
        results.loc[k] = [pCTR_model_names[i], basebid_model_names[j], "Strategy3", impressions, clicks, cost, ctr, cpm, cpc]
        k+=1
        
        # Strategy 4
        validation_check = ValidationDataFrame(pCTR_pred, basebid_pred, Strategy4)
        print("\033[1m pCTR model: {} \033[0m, \033[1m basebid model: {} \033[0m, \033[1m Strategy 4 \033[0m ".format(pCTR_model_names[i], basebid_model_names[j]))
        impressions, clicks, cost, auctions_participated, ctr, cpm, cpc= ValidateStrategy(validation_check)
        results.loc[k] = [pCTR_model_names[i], basebid_model_names[j], "Strategy4", impressions, clicks, cost, ctr, cpm, cpc]
        k+=1
        
        j+=1
        
        
    j=0    
    i+=1
    
end = time.time()
print("Total time: {} mins".format((end-start)/60))

[1m pCTR model: Ensemble [0m, [1m basebid model: Ensemble [0m, [1m Strategy 1 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 53628 | Clicks: 151 | Cost: 3945401 | CTR: 0.28156932945476243 | CPM: 73.56979562914896 | CPC: 26.12848344370861


[1m pCTR model: Ensemble [0m, [1m basebid model: Ensemble [0m, [1m Strategy 2 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 49391 | Clicks: 146 | Cost: 3742856 | CTR: 0.2956004130307141 | CPM: 75.78012188455386 | CPC: 25.636


[1m pCTR model: Ensemble [0m, [1m basebid model: Ensemble [0m, [1m Strategy 3 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 6004 | Clicks: 101 | Cost: 498730 | CTR: 1.6822118587608261 | CPM: 83.06628914057295 | CPC: 4.937920792079208


[1m pCTR model: Ensemble [0m, [1m basebid model: Ensemble [0m, [1m Strategy 4 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 8921 | Clicks: 108 | Cost: 715533 | CTR: 1.2106266

[1m pCTR model: Logistic Regression [0m, [1m basebid model: Lasso [0m, [1m Strategy 3 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 82376 | Clicks: 53 | Cost: 6249967 | CTR: 0.06433912790133048 | CPM: 75.8712124890745 | CPC: 117.92390566037736


[1m pCTR model: Logistic Regression [0m, [1m basebid model: Lasso [0m, [1m Strategy 4 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 152951 | Clicks: 122 | Cost: 6202455 | CTR: 0.07976410745925165 | CPM: 40.55190878124367 | CPC: 50.83979508196721


[1m pCTR model: Logistic Regression [0m, [1m basebid model: Ridge [0m, [1m Strategy 1 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 122870 | Clicks: 68 | Cost: 6249925 | CTR: 0.05534304549523887 | CPM: 50.86615935541629 | CPC: 91.91066176470589


[1m pCTR model: Logistic Regression [0m, [1m basebid model: Ridge [0m, [1m Strategy 2 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 138

Strategy statistics:
Auctions participated: 303925 | Impressions: 16621 | Clicks: 116 | Cost: 1139839 | CTR: 0.6979122796462307 | CPM: 68.57824438962759 | CPC: 9.826198275862069


[1m pCTR model: Random Forest [0m, [1m basebid model: Elastic Net [0m, [1m Strategy 1 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 122112 | Clicks: 110 | Cost: 6249998 | CTR: 0.09008123689727464 | CPM: 51.18250458595388 | CPC: 56.818163636363636


[1m pCTR model: Random Forest [0m, [1m basebid model: Elastic Net [0m, [1m Strategy 2 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 126848 | Clicks: 134 | Cost: 6249996 | CTR: 0.10563824419778002 | CPM: 49.27153758829465 | CPC: 46.641761194029854


[1m pCTR model: Random Forest [0m, [1m basebid model: Elastic Net [0m, [1m Strategy 3 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 14132 | Clicks: 115 | Cost: 1162144 | CTR: 0.8137560147183697 | CPM: 82.23492782337956 | CPC: 10.

Strategy statistics:
Auctions participated: 303925 | Impressions: 99688 | Clicks: 160 | Cost: 6020706 | CTR: 0.1605007623786213 | CPM: 60.39549394109622 | CPC: 37.6294125


[1m pCTR model: XGBoost [0m, [1m basebid model: Random Forest Regressor [0m, [1m Strategy 3 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 6897 | Clicks: 101 | Cost: 581952 | CTR: 1.4644048136871104 | CPM: 84.37755545889517 | CPC: 5.76190099009901


[1m pCTR model: XGBoost [0m, [1m basebid model: Random Forest Regressor [0m, [1m Strategy 4 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 10026 | Clicks: 108 | Cost: 792043 | CTR: 1.0771992818671454 | CPM: 78.99890285258329 | CPC: 7.333731481481482


[1m pCTR model: XGBoost [0m, [1m basebid model: XGBoost Regressor [0m, [1m Strategy 1 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 115614 | Clicks: 144 | Cost: 6249999 | CTR: 0.12455238984898023 | CPM: 54.059188333592815 | CPC: 43.40

Strategy statistics:
Auctions participated: 303925 | Impressions: 55325 | Clicks: 148 | Cost: 4724848 | CTR: 0.2675101671938545 | CPM: 85.40168097605061 | CPC: 31.92464864864865


Total time: 50.37581826448441 mins


In [13]:
#results["CPM"] = results["CPM"]/1000
# results["CPC"] = results["CPC"]/1000

In [14]:
results.to_csv("ValidationSet_Evaluation_Results.csv")

In [17]:
pd.options.display.max_rows = results.shape[0]
results.sort_values(by = ["Clicks", "CTR"], ascending = False)

Unnamed: 0,pCTR_model,basebid_model,Strategy,Impressions,Clicks,Cost,CTR,CPM,CPC
113,Forest of RF,Random Forest Regressor,Strategy2,72696,160,5788614,0.220095,79.627682,36.178838
96,Forest of RF,Ensemble,Strategy1,76822,160,5990518,0.208274,77.979199,37.440738
100,Forest of RF,Lasso,Strategy1,76822,160,5990518,0.208274,77.979199,37.440738
104,Forest of RF,Ridge,Strategy1,76822,160,5990518,0.208274,77.979199,37.440738
108,Forest of RF,Elastic Net,Strategy1,76822,160,5990518,0.208274,77.979199,37.440738
112,Forest of RF,Random Forest Regressor,Strategy1,76822,160,5990518,0.208274,77.979199,37.440738
116,Forest of RF,XGBoost Regressor,Strategy1,76822,160,5990518,0.208274,77.979199,37.440738
89,XGBoost,Random Forest Regressor,Strategy2,99688,160,6020706,0.160501,60.395494,37.629413
73,XGBoost,Ensemble,Strategy2,103711,160,5942123,0.154275,57.295012,37.138269
85,XGBoost,Elastic Net,Strategy2,104562,160,5774405,0.153019,55.224699,36.090031


In [16]:
pd.options.display.max_rows = 60