In [1]:
import pandas as pd
import numpy as np
from functools import reduce
import matplotlib.pyplot as plt
import seaborn as sns
import time

%matplotlib inline
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None  # To ignore SettingWithCopyWarning warning

# Import data

In [2]:
train = pd.read_csv("Prediction_csv/train.csv")
validation = pd.read_csv("Prediction_csv/validation.csv")

In [3]:
print(train.shape, validation.shape)

(2430981, 25) (303925, 25)


# Import Predictions

In [4]:
lr_preds = pd.read_csv("Prediction_csv/lr_pCTR.csv", index_col  = "Unnamed: 0")
rf_preds = pd.read_csv("Prediction_csv/rf_pCTR.csv", index_col  = "Unnamed: 0")
xgb_preds = pd.read_csv("Prediction_csv/xgb_pCTR.csv", index_col  = "Unnamed: 0")
dwane_preds = pd.read_csv("Prediction_csv/click_predictions_narrow.csv")
lasso_preds = pd.read_csv("Prediction_csv/lasso_basebid.csv", index_col  = "Unnamed: 0")
ridge_preds = pd.read_csv("Prediction_csv/ridge_basebid.csv", index_col  = "Unnamed: 0")
elasticnet_preds = pd.read_csv("Prediction_csv/elasticnet_basebid.csv", index_col  = "Unnamed: 0")
rfg_preds = pd.read_csv("Prediction_csv/rfg_basebid.csv", index_col  = "Unnamed: 0")
xgbr_preds = pd.read_csv("Prediction_csv/xgb_basebid.csv", index_col  = "Unnamed: 0")

In [5]:
rf_preds.head()

Unnamed: 0,bidid,click_proba
0,bbcb813b6166538503d8b33a5602d7d72f6019dc,0.060008
1,5a07316c49477cb5d9b4d5aa39c27d6c3be7f92d,0.034608
2,f6ece71dae81d6b16bfb24ad6dd5611472d4c673,0.042802
3,b4d5c57c9b38ff5a12954fa01e11931b4e6bfbbb,0.015992
4,0899bf144249458ea9c89188473694bf44c7ca15,0.029209


In [6]:
base_bid = train.loc[ train["click"] == 1, "payprice"].mean()
avgCTR = train["click"].sum()*100/train.shape[0]

In [7]:
print("Base bid: {}, avgCTR: {}, ratio: {}".format(base_bid, avgCTR, base_bid/avgCTR))

Base bid: 105.46402677077523, avgCTR: 0.07375623256619447, ratio: 1429.899862037066


# Strategies

In [8]:
def Strategy1(sub_pCTR, sub_basebid):
    
    avgCTR = train["click"].sum()*100/train.shape[0]
    base_bid = train.loc[ train["click"] == 1, "payprice"].mean()
    bidprice = (sub_pCTR["click_proba"] * base_bid) / avgCTR
    return bidprice

def Strategy2(sub_pCTR, sub_basebid):
    
    avgCTR = 0.5
    bidprice = (sub_pCTR["click_proba"] * sub_basebid["predicted_payprice"]) / avgCTR
    return bidprice

def Strategy3(sub_pCTR, sub_basebid):
    
    avgCTR = 0.5
    sub_pCTR_ = sub_pCTR.copy()
    sub_pCTR_["click_proba"] = sub_pCTR_["click_proba"].map(lambda x: 0 if x <0.3 else np.exp(x))
    bidprice = (sub_pCTR_["click_proba"] * sub_basebid["predicted_payprice"]) / avgCTR
    return bidprice

def Strategy4(sub_pCTR, sub_basebid):
    
    avgCTR = 0.5
    sub_pCTR_ = sub_pCTR.copy()
    sub_pCTR_["click_proba"] = (sub_pCTR_["click_proba"] + 0.5)**2 - 0.5
    sub_pCTR_.loc[sub_pCTR_["click_proba"] < 0, "click_proba"] = 0
    bidprice = (sub_pCTR_["click_proba"] * sub_basebid["predicted_payprice"]) / avgCTR
    return bidprice

# Evaluate Results

In [9]:
budget = 6250*1000

def ValidationDataFrame(submission_pCTR, submission_basebid, strategy):
    
    validation_check = validation[["bidid", "click", "bidprice", "payprice"]]
    validation_check["click_proba"] = submission_pCTR["click_proba"]
    validation_check["basebid_predicted"] = submission_basebid["predicted_payprice"]
    validation_check["bidprice_predicted"] = strategy(submission_pCTR, submission_basebid)
    
    return validation_check


def ValidateStrategy(df):
    
    impressions = 0
    clicks = 0
    cost = 0
    auctions_participated = 0
    balance = budget
    
    for row in df.iterrows():
        
        if cost <= budget:
            
            auctions_participated+=1
            
            if (row[1]["bidprice_predicted"] >= row[1]["payprice"]):
                
                if (balance > row[1]["bidprice_predicted"]):
                    
                    impressions+=1
                    clicks+=row[1]["click"]
                    cost+=row[1]["payprice"]
                    balance-=row[1]["payprice"]
                else:
                    pass
    
        else:
            break
    
    # Metrics
    ctr = clicks*100/impressions
    cpm = cost/(impressions*1000)
    cpc = cost/clicks
    
    print("Strategy statistics:")
    print("Auctions participated: {} | Impressions: {} | Clicks: {} | Cost: {} | CTR: {} | CPM: {} | CPC: {}".format(auctions_participated, \
                                                                                                                     impressions, clicks, cost,\
                                                                                                                     ctr, cpm, cpc))
    print("\n")
            
    return impressions, clicks, cost, auctions_participated, ctr, cpm, cpc

# def StrategyResults(impressions, clicks, cost):
#     ctr = clicks*100/impressions
#     spend = cost
#     cpm = cost/(impressions*1000)
#     if clicks > 0:
#         cpc = cost/clicks
#     else:
#         cpc = np.inf
        
#     print("Strategy results")
#     print("CTR: {} | Clicks: {} | Spend: {} | CPM: {} | CPC: {}".format(ctr, clicks, spend, cpm, cpc))
#     print("\n")
        
#     return ctr, clicks, spend, cpm, cpc

In [10]:
pCTR_preds = [lr_preds, rf_preds, xgb_preds, dwane_preds]
basebid_preds = [lasso_preds, ridge_preds, elasticnet_preds, rfg_preds, xgbr_preds]
pCTR_model_names = ["Logistic Regression", "Random Forest", "XGBoost", "Dwane Model"]
basebid_model_names = ["Lasso", "Ridge", "Elastic Net", "Random Forest Regressor", "XGBoost Regressor"]
results = pd.DataFrame(columns = ["pCTR_model", "basebid_model", "Strategy", "Impressions", "Clicks", "Cost", "CTR", "CPM", "CPC"])
i=0
j=0
k=0

start = time.time()

for pCTR_pred in pCTR_preds:
    for basebid_pred in basebid_preds:
        
        # Strategy 1
        validation_check = ValidationDataFrame(pCTR_pred, basebid_pred, Strategy1)
        print("\033[1m pCTR model: {} \033[0m, \033[1m basebid model: {} \033[0m, \033[1m Strategy 1 \033[0m ".format(pCTR_model_names[i], basebid_model_names[j]))
        impressions, clicks, cost, auctions_participated, ctr, cpm, cpc = ValidateStrategy(validation_check)
        results.loc[k] = [pCTR_model_names[i], basebid_model_names[j], "Strategy1", impressions, clicks, cost, ctr, cpm, cpc]
        k+=1
        
        # Strategy 2
        validation_check = ValidationDataFrame(pCTR_pred, basebid_pred, Strategy2)
        print("\033[1m pCTR model: {} \033[0m, \033[1m basebid model: {} \033[0m, \033[1m Strategy 2 \033[0m ".format(pCTR_model_names[i], basebid_model_names[j]))
        impressions, clicks, cost, auctions_participated, ctr, cpm, cpc = ValidateStrategy(validation_check)
        results.loc[k] = [pCTR_model_names[i], basebid_model_names[j], "Strategy2", impressions, clicks, cost, ctr, cpm, cpc]
        k+=1
        
        # Strategy 3
        validation_check = ValidationDataFrame(pCTR_pred, basebid_pred, Strategy3)
        print("\033[1m pCTR model: {} \033[0m, \033[1m basebid model: {} \033[0m, \033[1m Strategy 3 \033[0m ".format(pCTR_model_names[i], basebid_model_names[j]))
        impressions, clicks, cost, auctions_participated, ctr, cpm, cpc= ValidateStrategy(validation_check)
        results.loc[k] = [pCTR_model_names[i], basebid_model_names[j], "Strategy3", impressions, clicks, cost, ctr, cpm, cpc]
        k+=1
        
        # Strategy 4
        validation_check = ValidationDataFrame(pCTR_pred, basebid_pred, Strategy4)
        print("\033[1m pCTR model: {} \033[0m, \033[1m basebid model: {} \033[0m, \033[1m Strategy 4 \033[0m ".format(pCTR_model_names[i], basebid_model_names[j]))
        impressions, clicks, cost, auctions_participated, ctr, cpm, cpc= ValidateStrategy(validation_check)
        results.loc[k] = [pCTR_model_names[i], basebid_model_names[j], "Strategy4", impressions, clicks, cost, ctr, cpm, cpc]
        k+=1
        
        j+=1
        
        
    j=0    
    i+=1
    
end = time.time()
print("Total time: {} mins".format((end-start)/60))

[1m pCTR model: Logistic Regression [0m, [1m basebid model: Lasso [0m, [1m Strategy 1 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 79694 | Clicks: 51 | Cost: 6249493 | CTR: 0.06399478003362863 | CPM: 0.07841861369739253 | CPC: 122539.07843137255


[1m pCTR model: Logistic Regression [0m, [1m basebid model: Lasso [0m, [1m Strategy 2 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 151952 | Clicks: 109 | Cost: 5901069 | CTR: 0.07173317889859956 | CPM: 0.03883508607981468 | CPC: 54138.247706422015


[1m pCTR model: Logistic Regression [0m, [1m basebid model: Lasso [0m, [1m Strategy 3 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 83574 | Clicks: 53 | Cost: 6249942 | CTR: 0.06341685213104555 | CPM: 0.0747833297437002 | CPC: 117923.43396226416


[1m pCTR model: Logistic Regression [0m, [1m basebid model: Lasso [0m, [1m Strategy 4 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressio

Strategy statistics:
Auctions participated: 303925 | Impressions: 22074 | Clicks: 63 | Cost: 460390 | CTR: 0.28540364229410164 | CPM: 0.02085666394853674 | CPC: 7307.777777777777


[1m pCTR model: Random Forest [0m, [1m basebid model: Elastic Net [0m, [1m Strategy 3 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 11760 | Clicks: 107 | Cost: 906550 | CTR: 0.9098639455782312 | CPM: 0.07708758503401361 | CPC: 8472.429906542056


[1m pCTR model: Random Forest [0m, [1m basebid model: Elastic Net [0m, [1m Strategy 4 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 6473 | Clicks: 56 | Cost: 279776 | CTR: 0.8651320871311602 | CPM: 0.04322199907307277 | CPC: 4996.0


[1m pCTR model: Random Forest [0m, [1m basebid model: Random Forest Regressor [0m, [1m Strategy 1 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 110570 | Clicks: 91 | Cost: 6249994 | CTR: 0.08230080491996021 | CPM: 0.056525223840101294 | CPC: 686

Strategy statistics:
Auctions participated: 303925 | Impressions: 3724 | Clicks: 84 | Cost: 200943 | CTR: 2.255639097744361 | CPM: 0.05395891514500537 | CPC: 2392.1785714285716


[1m pCTR model: Dwane Model [0m, [1m basebid model: Lasso [0m, [1m Strategy 1 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 75369 | Clicks: 142 | Cost: 6249963 | CTR: 0.18840637397338428 | CPM: 0.08292484973928273 | CPC: 44013.82394366197


[1m pCTR model: Dwane Model [0m, [1m basebid model: Lasso [0m, [1m Strategy 2 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 45524 | Clicks: 118 | Cost: 2749174 | CTR: 0.2592039363852034 | CPM: 0.06038955276337756 | CPC: 23298.084745762713


[1m pCTR model: Dwane Model [0m, [1m basebid model: Lasso [0m, [1m Strategy 3 [0m 
Strategy statistics:
Auctions participated: 303925 | Impressions: 59182 | Clicks: 153 | Cost: 5161436 | CTR: 0.2585245513838667 | CPM: 0.08721293636578689 | CPC: 33734.875816993466


[1m 

In [11]:
results

Unnamed: 0,pCTR_model,basebid_model,Strategy,Impressions,Clicks,Cost,CTR,CPM,CPC
0,Logistic Regression,Lasso,Strategy1,79694,51,6249493,0.063995,0.078419,122539.078431
1,Logistic Regression,Lasso,Strategy2,151952,109,5901069,0.071733,0.038835,54138.247706
2,Logistic Regression,Lasso,Strategy3,83574,53,6249942,0.063417,0.074783,117923.433962
3,Logistic Regression,Lasso,Strategy4,134072,107,4821431,0.079808,0.035962,45060.102804
4,Logistic Regression,Ridge,Strategy1,79694,51,6249493,0.063995,0.078419,122539.078431
5,Logistic Regression,Ridge,Strategy2,152217,112,6083821,0.073579,0.039968,54319.830357
6,Logistic Regression,Ridge,Strategy3,83387,53,6249997,0.063559,0.074952,117924.471698
7,Logistic Regression,Ridge,Strategy4,134674,112,5074334,0.083164,0.037679,45306.553571
8,Logistic Regression,Elastic Net,Strategy1,79694,51,6249493,0.063995,0.078419,122539.078431
9,Logistic Regression,Elastic Net,Strategy2,152014,107,5919240,0.070388,0.038939,55320.000000


In [12]:
results.to_csv("ValidationSet_Eavaluation_Results.csv")

In [13]:
results.sort_values(by = "Clicks", ascending = False)

Unnamed: 0,pCTR_model,basebid_model,Strategy,Impressions,Clicks,Cost,CTR,CPM,CPC
66,Dwane Model,Ridge,Strategy3,59200,156,5182384,0.263514,0.087540,33220.410256
78,Dwane Model,XGBoost Regressor,Strategy3,59024,155,5175335,0.262605,0.087682,33389.258065
74,Dwane Model,Random Forest Regressor,Strategy3,59155,155,5176612,0.262023,0.087509,33397.496774
70,Dwane Model,Elastic Net,Strategy3,59326,155,5197643,0.261268,0.087612,33533.180645
62,Dwane Model,Lasso,Strategy3,59182,153,5161436,0.258525,0.087213,33734.875817
40,XGBoost,Lasso,Strategy1,92851,149,4504986,0.160472,0.048518,30234.805369
44,XGBoost,Ridge,Strategy1,92851,149,4504986,0.160472,0.048518,30234.805369
52,XGBoost,Random Forest Regressor,Strategy1,92851,149,4504986,0.160472,0.048518,30234.805369
56,XGBoost,XGBoost Regressor,Strategy1,92851,149,4504986,0.160472,0.048518,30234.805369
48,XGBoost,Elastic Net,Strategy1,92851,149,4504986,0.160472,0.048518,30234.805369


In [14]:
from sklearn.metrics import mean_squared_error, make_scorer

In [15]:
def neg_rmse(y, y_pred):
    
    return -np.sqrt(mean_squared_error(y, y_pred))

basebid = train.loc[ train["click"] == 1, "payprice"].mean()
y_preds = list(basebid* np.ones(validation.shape[0]))
neg_rmse(validation["payprice"], y_preds)

-65.91350731270695