In [1]:
import pandas as pd
import numpy as np
from functools import reduce
import matplotlib.pyplot as plt
import seaborn as sns
import time

%matplotlib inline
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None  # To ignore SettingWithCopyWarning warning

# Import data

In [2]:
train = pd.read_csv("train.csv")
validation = pd.read_csv("validation.csv")

In [3]:
print(train.shape, validation.shape)

(2430981, 25) (303925, 25)


# Import Predictions

In [4]:
lr_preds = pd.read_csv("Prediction_csv/lr_pCTR.csv", index_col  = "Unnamed: 0")
rf_preds = pd.read_csv("Prediction_csv/rf_pCTR.csv", index_col  = "Unnamed: 0")
xgb_preds = pd.read_csv("Prediction_csv/xgb_pCTR.csv", index_col  = "Unnamed: 0")
dwane_preds = pd.read_csv("Prediction_csv/click_predictions_narrow.csv")
lasso_preds = pd.read_csv("Prediction_csv/lasso_basebid.csv", index_col  = "Unnamed: 0")
ridge_preds = pd.read_csv("Prediction_csv/ridge_basebid.csv", index_col  = "Unnamed: 0")
elasticnet_preds = pd.read_csv("Prediction_csv/elasticnet_basebid.csv", index_col  = "Unnamed: 0")
rfg_preds = pd.read_csv("Prediction_csv/rfg_basebid.csv", index_col  = "Unnamed: 0")
xgbr_preds = pd.read_csv("Prediction_csv/xgb_basebid.csv", index_col  = "Unnamed: 0")

In [5]:
rf_preds.head()

Unnamed: 0,bidid,click_proba
0,bbcb813b6166538503d8b33a5602d7d72f6019dc,0.060008
1,5a07316c49477cb5d9b4d5aa39c27d6c3be7f92d,0.034608
2,f6ece71dae81d6b16bfb24ad6dd5611472d4c673,0.042802
3,b4d5c57c9b38ff5a12954fa01e11931b4e6bfbbb,0.015992
4,0899bf144249458ea9c89188473694bf44c7ca15,0.029209


In [6]:
base_bid = train.loc[ train["click"] == 1, "payprice"].mean()
avgCTR = train["click"].sum()*100/train.shape[0]

In [7]:
print("Base bid: {}, avgCTR: {}, ratio: {}".format(base_bid, avgCTR, base_bid/avgCTR))

Base bid: 105.46402677077523, avgCTR: 0.07375623256619447, ratio: 1429.899862037066


# Strategies

In [8]:
def Strategy1(sub_pCTR, sub_basebid):
    
    avgCTR = train["click"].sum()*100/train.shape[0]
    base_bid = train.loc[ train["click"] == 1, "payprice"].mean()
    bidprice = (sub_pCTR["click_proba"] * base_bid) / avgCTR
    return bidprice

def Strategy2(sub_pCTR, sub_basebid):
    
    avgCTR = 0.5
    bidprice = (sub_pCTR["click_proba"] * sub_basebid["predicted_payprice"]) / avgCTR
    return bidprice

def Strategy3(sub_pCTR, sub_basebid):
    
    avgCTR = 0.5
    sub_pCTR_ = sub_pCTR.copy()
    sub_pCTR_["click_proba"] = sub_pCTR_["click_proba"].map(lambda x: 0 if x <0.3 else np.exp(x))
    bidprice = (sub_pCTR_["click_proba"] * sub_basebid["predicted_payprice"]) / avgCTR
    return bidprice

def Strategy4(sub_pCTR, sub_basebid):
    
    avgCTR = 0.5
    sub_pCTR_ = sub_pCTR.copy()
    sub_pCTR_["click_proba"] = (sub_pCTR_["click_proba"] + 0.5)**2 - 0.5
    sub_pCTR_.loc[sub_pCTR_["click_proba"] < 0, "click_proba"] = 0
    bidprice = (sub_pCTR_["click_proba"] * sub_basebid["predicted_payprice"]) / avgCTR
    return bidprice

# Evaluate Results

In [9]:
budget = 6250*1000

def ValidationDataFrame(submission_pCTR, submission_basebid, strategy):
    
    validation_check = validation[["bidid", "click", "bidprice", "payprice"]]
    validation_check["click_proba"] = submission_pCTR["click_proba"]
    validation_check["basebid_predicted"] = submission_basebid["predicted_payprice"]
    validation_check["bidprice_predicted"] = strategy(submission_pCTR, submission_basebid)
    
    return validation_check


def ValidateStrategy(df):
    
    impressions = 0
    clicks = 0
    cost = 0
    auctions_participated = 0
    balance = budget
    
    for row in df.iterrows():
        
        if cost < budget:
            
            auctions_participated+=1
            
            if (row[1]["bidprice_predicted"] >= row[1]["payprice"]):
                
                if (balance > row[1]["bidprice_predicted"]):
                    
                    impressions+=1
                    clicks+=row[1]["click"]
                    cost+=row[1]["payprice"]
                    balance-=row[1]["payprice"]
                else:
                    pass
    
        else:
            break
    
    # Metrics
    ctr = clicks*100/impressions
    cpm = cost/(impressions*1000)
    cpc = cost/clicks
    
    print("Strategy statistics:")
    print("Auctions participated: {} | Impressions: {} | Clicks: {} | Cost: {} | CTR: {} | CPM: {} | CPC: {}".format(auctions_participated, \
                                                                                                                     impressions, clicks, cost,\
                                                                                                                     ctr, cpm, cpc))
    print("\n")
            
    return impressions, clicks, cost, auctions_participated, ctr, cpm, cpc

# def StrategyResults(impressions, clicks, cost):
#     ctr = clicks*100/impressions
#     spend = cost
#     cpm = cost/(impressions*1000)
#     if clicks > 0:
#         cpc = cost/clicks
#     else:
#         cpc = np.inf
        
#     print("Strategy results")
#     print("CTR: {} | Clicks: {} | Spend: {} | CPM: {} | CPC: {}".format(ctr, clicks, spend, cpm, cpc))
#     print("\n")
        
#     return ctr, clicks, spend, cpm, cpc

In [10]:
pCTR_preds = [lr_preds, rf_preds, xgb_preds, dwane_preds]
basebid_preds = [lasso_preds, ridge_preds, elasticnet_preds, rfg_preds, xgbr_preds]
pCTR_model_names = ["Logistic Regression", "Random Forest", "XGBoost", "Dwane Model"]
basebid_model_names = ["Lasso", "Ridge", "Elastic Net", "Random Forest Regressor", "XGBoost Regressor"]
results = pd.DataFrame(columns = ["pCTR_model", "basebid_model", "Strategy", "Impressions", "Clicks", "Cost", "CTR", "CPM", "CPC"])
i=0
j=0
k=0

start = time.time()

for pCTR_pred in pCTR_preds:
    for basebid_pred in basebid_preds:
        
        # Strategy 1
        validation_check = ValidationDataFrame(pCTR_pred, basebid_pred, Strategy1)
        print("\033[1m pCTR model: {} \033[0m, \033[1m basebid model: {} \033[0m, \033[1m Strategy 1 \033[0m ".format(pCTR_model_names[i], basebid_model_names[j]))
        impressions, clicks, cost, auctions_participated, ctr, cpm, cpc = ValidateStrategy(validation_check)
        results.loc[k] = [pCTR_model_names[i], basebid_model_names[j], "Strategy1", impressions, clicks, cost, ctr, cpm, cpc]
        k+=1
        
        # Strategy 2
        validation_check = ValidationDataFrame(pCTR_pred, basebid_pred, Strategy2)
        print("\033[1m pCTR model: {} \033[0m, \033[1m basebid model: {} \033[0m, \033[1m Strategy 2 \033[0m ".format(pCTR_model_names[i], basebid_model_names[j]))
        impressions, clicks, cost, auctions_participated, ctr, cpm, cpc = ValidateStrategy(validation_check)
        results.loc[k] = [pCTR_model_names[i], basebid_model_names[j], "Strategy2", impressions, clicks, cost, ctr, cpm, cpc]
        k+=1
        
        # Strategy 3
        validation_check = ValidationDataFrame(pCTR_pred, basebid_pred, Strategy3)
        print("\033[1m pCTR model: {} \033[0m, \033[1m basebid model: {} \033[0m, \033[1m Strategy 3 \033[0m ".format(pCTR_model_names[i], basebid_model_names[j]))
        impressions, clicks, cost, auctions_participated, ctr, cpm, cpc= ValidateStrategy(validation_check)
        results.loc[k] = [pCTR_model_names[i], basebid_model_names[j], "Strategy3", impressions, clicks, cost, ctr, cpm, cpc]
        k+=1
        
        # Strategy 4
        validation_check = ValidationDataFrame(pCTR_pred, basebid_pred, Strategy4)
        print("\033[1m pCTR model: {} \033[0m, \033[1m basebid model: {} \033[0m, \033[1m Strategy 4 \033[0m ".format(pCTR_model_names[i], basebid_model_names[j]))
        impressions, clicks, cost, auctions_participated, ctr, cpm, cpc= ValidateStrategy(validation_check)
        results.loc[k] = [pCTR_model_names[i], basebid_model_names[j], "Strategy4", impressions, clicks, cost, ctr, cpm, cpc]
        k+=1
        
        j+=1
        
        
    j=0    
    i+=1
    
end = time.time()
print("Total time: {} mins".format((end-start)/60))

[1m pCTR model: Logistic Regression [0m, [1m basebid model: Lasso [0m, [1m Strategy 1 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 79694 | Clicks: 51 | Cost: 6249493 | CTR: 0.06399478003362863 | CPM: 0.07841861369739253 | CPC: 122539.07843137255


[1m pCTR model: Logistic Regression [0m, [1m basebid model: Lasso [0m, [1m Strategy 2 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 152272 | Clicks: 109 | Cost: 5915338 | CTR: 0.0715824314384785 | CPM: 0.03884718135967217 | CPC: 54269.15596330275


[1m pCTR model: Logistic Regression [0m, [1m basebid model: Lasso [0m, [1m Strategy 3 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 83573 | Clicks: 53 | Cost: 6249940 | CTR: 0.0634176109509052 | CPM: 0.07478420063896235 | CPC: 117923.39622641509


[1m pCTR model: Logistic Regression [0m, [1m basebid model: Lasso [0m, [1m Strategy 4 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions

Strategy statistics:
Auctions participated: 303925 | Impressions: 22158 | Clicks: 59 | Cost: 452296 | CTR: 0.2662695189096489 | CPM: 0.02041231158046755 | CPC: 7666.033898305085


[1m pCTR model: Random Forest [0m, [1m basebid model: Elastic Net [0m, [1m Strategy 3 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 11761 | Clicks: 105 | Cost: 904867 | CTR: 0.8927812260862171 | CPM: 0.07693793044809115 | CPC: 8617.780952380952


[1m pCTR model: Random Forest [0m, [1m basebid model: Elastic Net [0m, [1m Strategy 4 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 6436 | Clicks: 54 | Cost: 273512 | CTR: 0.839030453697949 | CPM: 0.042497203231821004 | CPC: 5065.037037037037


[1m pCTR model: Random Forest [0m, [1m basebid model: Random Forest Regressor [0m, [1m Strategy 1 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 110570 | Clicks: 91 | Cost: 6249994 | CTR: 0.08230080491996021 | CPM: 0.056525223840101294 

Strategy statistics:
Auctions participated: 303925 | Impressions: 3745 | Clicks: 84 | Cost: 200891 | CTR: 2.2429906542056073 | CPM: 0.05364245660881175 | CPC: 2391.559523809524


[1m pCTR model: Dwane Model [0m, [1m basebid model: Lasso [0m, [1m Strategy 1 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 75369 | Clicks: 142 | Cost: 6249963 | CTR: 0.18840637397338428 | CPM: 0.08292484973928273 | CPC: 44013.82394366197


[1m pCTR model: Dwane Model [0m, [1m basebid model: Lasso [0m, [1m Strategy 2 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 45474 | Clicks: 118 | Cost: 2745549 | CTR: 0.25948893873422174 | CPM: 0.06037623697057659 | CPC: 23267.36440677966


[1m pCTR model: Dwane Model [0m, [1m basebid model: Lasso [0m, [1m Strategy 3 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 59180 | Clicks: 153 | Cost: 5160853 | CTR: 0.2585332882730652 | CPM: 0.08720603244339303 | CPC: 33731.06535947712


[1m p

In [11]:
results

Unnamed: 0,pCTR_model,basebid_model,Strategy,Impressions,Clicks,Cost,CTR,CPM,CPC
0,Logistic Regression,Lasso,Strategy1,79694,51,6249493,0.063995,0.078419,122539.078431
1,Logistic Regression,Lasso,Strategy2,152272,109,5915338,0.071582,0.038847,54269.155963
2,Logistic Regression,Lasso,Strategy3,83573,53,6249940,0.063418,0.074784,117923.396226
3,Logistic Regression,Lasso,Strategy4,134661,107,4850487,0.079459,0.036020,45331.654206
4,Logistic Regression,Ridge,Strategy1,79694,51,6249493,0.063995,0.078419,122539.078431
5,Logistic Regression,Ridge,Strategy2,152124,112,6075859,0.073624,0.039940,54248.741071
6,Logistic Regression,Ridge,Strategy3,83392,53,6249998,0.063555,0.074947,117924.490566
7,Logistic Regression,Ridge,Strategy4,134501,111,5060465,0.082527,0.037624,45589.774775
8,Logistic Regression,Elastic Net,Strategy1,79694,51,6249493,0.063995,0.078419,122539.078431
9,Logistic Regression,Elastic Net,Strategy2,151356,103,5863568,0.068051,0.038740,56927.844660


In [12]:
results.to_csv("ValidationSet_Eavaluation_Results.csv")

In [13]:
results.sort_values(by = "Clicks", ascending = False)

Unnamed: 0,pCTR_model,basebid_model,Strategy,Impressions,Clicks,Cost,CTR,CPM,CPC
66,Dwane Model,Ridge,Strategy3,59202,156,5182149,0.263505,0.087533,33218.903846
74,Dwane Model,Random Forest Regressor,Strategy3,59124,156,5172717,0.263852,0.087489,33158.442308
78,Dwane Model,XGBoost Regressor,Strategy3,59144,155,5187546,0.262072,0.087710,33468.038710
70,Dwane Model,Elastic Net,Strategy3,59306,154,5192201,0.259670,0.087549,33715.590909
62,Dwane Model,Lasso,Strategy3,59180,153,5160853,0.258533,0.087206,33731.065359
40,XGBoost,Lasso,Strategy1,92851,149,4504986,0.160472,0.048518,30234.805369
52,XGBoost,Random Forest Regressor,Strategy1,92851,149,4504986,0.160472,0.048518,30234.805369
56,XGBoost,XGBoost Regressor,Strategy1,92851,149,4504986,0.160472,0.048518,30234.805369
48,XGBoost,Elastic Net,Strategy1,92851,149,4504986,0.160472,0.048518,30234.805369
44,XGBoost,Ridge,Strategy1,92851,149,4504986,0.160472,0.048518,30234.805369
