# Web Economics

## Bidding Strategy
#### Maximilian Bartolo

Date: 9th March 2017

## Overview
The purpose of this project is to bid for stuff.

In [1]:
#Step1: Importing the libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns #seaborn makes plots cleaner and extends functionality
import itertools
pd.options.mode.chained_assignment = None  # default='warn'

#Import additional required libraries
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.svm import LinearSVC, SVC, OneClassSVM
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, IsolationForest
from sklearn.naive_bayes import GaussianNB
from sklearn.feature_selection import RFE
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, precision_recall_fscore_support
from sklearn.metrics import confusion_matrix
from sklearn import preprocessing
from sklearn.decomposition import PCA
from mpl_toolkits.mplot3d import Axes3D

pd.options.mode.chained_assignment = None  # default='warn'

#magic to show the plots within the notebook
%matplotlib inline

import time
import random
import re

# Importing the Data

In [19]:
# Importing the dataset
df_val = pd.read_csv("data/validation.csv", low_memory=False)

In [2]:
#Create a sample submission dataset (from val)
df_val_submit = pd.read_csv("data-submissions/df_val_ensemble.csv", low_memory=False)
df_test_submit = pd.read_csv("data-submissions/df_test_ensemble.csv", low_memory=False)

In [7]:
cols_of_interest = ['bidid', 'strategy_ensemble_bid']
df_val_submit = df_val_submit[cols_of_interest]
# df_test_submit = df_test_submit[cols_of_interest]

#rename column to bidprice
col_names = ['bidid', 'bidprice']
df_val_submit.columns = col_names
# df_test_submit.columns = col_names

In [10]:
# Importing the submission data
df_test_submit = pd.read_csv("data-submissions/2017-03-17 testing_bidding_price.csv", low_memory=False)

### Validation Function

In [41]:
glob_cash_in_bank = 25000000
glob_cash_in_bank = glob_cash_in_bank * (1)

In [42]:
def calc_score(n_impressions, n_clicks, n_rows_in_budget, cash_in_bank):
    #alphas = [0.8, 0.15, 0, 0.05, 0.005, 0.005]
    alphas = [1, 0, 0, 0, 0, 0]
    normalizers = [1, 1, 1, glob_cash_in_bank, 1/500, 1/100]
    alphas = [x/y for x, y in zip(alphas, normalizers)]
    
    if n_impressions > 0:
        ctr = n_clicks/n_impressions
    else:
        ctr = 0
    conversions = n_clicks
    cvr = 0
    spend = glob_cash_in_bank - cash_in_bank
    
    if n_impressions > 0:
        avg_cpm = spend/(n_impressions*1000)
    else:
        avg_cpm = 1
    if n_clicks > 0:
        avg_cpc = spend/n_clicks
    else:
        avg_cpc = 1
    
    score_components = [
                        alphas[0]*ctr,
                        alphas[1]*conversions,
                        alphas[2]*cvr,
                        -1*alphas[3]*(spend),
                        -1*alphas[4]*(avg_cpm), 
                        -1*alphas[5]*(avg_cpc)
                       ]
    score = sum(score_components)
    return score, score_components

In [43]:
rand_seed = 27
random.seed(rand_seed)
np.random.seed(rand_seed)

def validate_results(df, cash_in_bank, random=True):
    col_name_validate = 'bidprice_validate'
    
    df_temp = df[df[col_name_validate] > 0]
    if random == True:
        df_temp = df_temp.sample(frac=1, random_state=rand_seed).reset_index(drop=True)
    
    strategy_impressions = 0
    strategy_clicks = 0
    n_rows_in_budget = 0
    
    for row in df_temp.iterrows():
        row = row[1]
        if cash_in_bank > 0:
            n_rows_in_budget += 1
            if row[col_name_validate] > row['payprice']: #was bidprice
                strategy_impressions += 1
                strategy_clicks += int(row['click'])
                cash_in_bank -= row['payprice'] #was bidprice but Jun Wang said payprice
        else:
            break

    score, score_components = calc_score(strategy_impressions, strategy_clicks, n_rows_in_budget, cash_in_bank)
    return score, score_components, cash_in_bank, strategy_impressions, strategy_clicks

In [52]:
def calc_results(budget_ratio):
    glob_cash_in_bank = 25000000
    glob_cash_in_bank = glob_cash_in_bank * budget_ratio
    
    cash_in_bank = glob_cash_in_bank
    df_validate = df_val.copy()
    df_validate['bidprice_validate'] = df_val_submit['bidprice'].copy()

    score, score_components, cash_in_bank, strategy_impressions, strategy_clicks = \
                    validate_results(df=df_validate, cash_in_bank=cash_in_bank, random=True)
    cost = (glob_cash_in_bank-cash_in_bank)/1000
    ctr = strategy_clicks/strategy_impressions
    cpc = cost/strategy_clicks
    
    return cost, strategy_impressions, strategy_clicks, ctr, cpc

In [54]:
budget_ratios = [1, 1/2, 1/4, 1/8, 1/16]
budget_ratio_names = ['Full', '1/2', '1/4', '1/8', '1/16']
for i, budget_ratio in enumerate(budget_ratios):
    cost, strategy_impressions, strategy_clicks, ctr, cpc = calc_results(budget_ratio)
    print ("{} Budget:".format(budget_ratio_names[i]))
    print ("Cost: ${:.2f}  |  Impressions: {:.0f}   |   Clicks: {:.0f}  |  CTR: {:.5f}%  |  CPC: ${:.2f}" \
       .format(cost, strategy_impressions, strategy_clicks, ctr, cpc))
    print ()

Full Budget:
Cost: $7689.70  |  Impressions: 88747   |   Clicks: 116  |  CTR: 0.00131%  |  CPC: $66.29

1/2 Budget:
Cost: $7689.70  |  Impressions: 88747   |   Clicks: 116  |  CTR: 0.00131%  |  CPC: $66.29

1/4 Budget:
Cost: $6250.05  |  Impressions: 72111   |   Clicks: 93  |  CTR: 0.00129%  |  CPC: $67.20

1/8 Budget:
Cost: $3125.00  |  Impressions: 35881   |   Clicks: 46  |  CTR: 0.00128%  |  CPC: $67.93

1/16 Budget:
Cost: $1562.54  |  Impressions: 17896   |   Clicks: 24  |  CTR: 0.00134%  |  CPC: $65.11



## Preliminary Submission Results

#### Preliminary submission results on Validation set:
```
Full Budget:
Cost: $7689.70  |  Impressions: 88747   |   Clicks: 116  |  CTR: 0.00131%  |  CPC: $66.29

1/2 Budget:
Cost: $7689.70  |  Impressions: 88747   |   Clicks: 116  |  CTR: 0.00131%  |  CPC: $66.29

1/4 Budget:
Cost: $6250.05  |  Impressions: 72111   |   Clicks: 93  |  CTR: 0.00129%  |  CPC: $67.20

1/8 Budget:
Cost: $3125.00  |  Impressions: 35881   |   Clicks: 46  |  CTR: 0.00128%  |  CPC: $67.93

1/16 Budget:
Cost: $1562.54  |  Impressions: 17896   |   Clicks: 24  |  CTR: 0.00134%  |  CPC: $65.11
```

#### Preliminary submission results on Test set:
```
Full Budget:
Cost: $7756.04  |  Impressions: 89606   |   Clicks: 109  |  CTR: 0.00122%  |  CPC: $71.16

1/2 Budget:
Cost: $7756.04  |  Impressions: 89606   |   Clicks: 109  |  CTR: 0.00122%  |  CPC: $71.16

1/4 Budget:
Cost: $6249.99  |  Impressions: 72154   |   Clicks: 87  |  CTR: 0.00121%  |  CPC: $71.84

1/8 Budget:
Cost: $3124.99  |  Impressions: 35961   |   Clicks: 42  |  CTR: 0.00117%  |  CPC: $74.40

1/16 Budget:
Cost: $1562.00  |  Impressions: 17918   |   Clicks: 20  |  CTR: 0.00111%  |  CPC: $78.10
```