In [1]:
import pandas as pd
import numpy as np
from __future__ import division
import matplotlib.pyplot as plt
%pylab inline
pd.options.display.max_columns = 30
pd.options.display.max_rows = 30
filepath="train.csv" 
data=pd.read_csv(filepath)
# Training set:
filepath_val="validation.csv"
data_val=pd.read_csv(filepath_val)
# Testing set:
filepath_test="test.csv"
data_test=pd.read_csv(filepath_test)

Populating the interactive namespace from numpy and matplotlib


## Constant bid
### Assumptions:
    - Based on pareto chart, 70  173 8.51% 8.51 % 0.33%
                             77  99  4.87% 13.37% 0.66%
                             80  57  2.80% 16.18% 1.00%
                             50  50  2.46% 18.63% 1.33%
                             88  44  2.16% 20.80% 1.66%
                             6   37  1.82% 22.62% 1.99%
                             73  36  1.77% 24.39% 2.33%
                             31  34  1.67% 26.06% 2.66%
                             20  29  1.43% 27.48% 2.99%
                             76  27  1.33% 28.81% 3.32%
                             84  23  1.13% 29.94% 3.65%
                             160 22  1.08% 31.02% 3.99%
                             201 19  0.93% 31.96% 4.32%
                             61  19  0.93% 32.89% 4.65%
                             
      These payrpices covers 32.89% of the clicks in the training set. 
      Thus, I'll try every number from 1 to 300 and choose the highest eCPC. 
      It wasn't a surprise that with budget limitation, a constant bid of 6 will give the highest eCPC.

In [2]:
array = np.zeros((300,8))

for j in range(300):
    constant = j
    loss = 0
    win = 0
    clicks = 0
    budget = 25000
    for i,c in data_val[['payprice','click']].values:
        if constant < i or constant > budget:
            loss = loss+1
        else:
            win = win+1
            clicks = clicks+c
            budget = budget - i

    array[j][0]=clicks #number of clicks
    array[j][1]=25000-budget #Money spent
    array[j][2]=win #Auction won
    array[j][3]=loss #Auction lose
    array[j][4]=(win/(loss+win))*100 #Ration of win
    array[j][5]=clicks/win # CTR
    array[j][6]=((25000-budget)/win)*1000 #CPM
    array[j][7]=(25000-budget)/clicks #eCPC

c = inf
place = 0

for k in range(len(array)):
    if array[k][7]!=inf and array[k][7]<c:
        c = array[k][7]
        place = k   

print ("The constant is:",place)
print("Number of clicks:",array[place][0])
print("Money spent:",array[place][1])
print("Win:",array[place][2])
print("Lose:",array[place][3])
print("Ration of win:",array[place][4])
print("CTR:",array[place][5])
print("CPM:",array[place][6])
print("eCPC:",array[place][7])



The constant is: 6
Number of clicks: 4.0
Money spent: 24995.0
Win: 5281.0
Lose: 294468.0
Ration of win: 1.76180737884
CTR: 0.000757432304488
CPM: 4733.00511267
eCPC: 6248.75


## Random bid
### Random values from this range of bids 
                         70  173 8.51% 8.51 % 0.33%
                         77  99  4.87% 13.37% 0.66%
                         80  57  2.80% 16.18% 1.00%
                         50  50  2.46% 18.63% 1.33%
                         88  44  2.16% 20.80% 1.66%
                         6   37  1.82% 22.62% 1.99%
                         73  36  1.77% 24.39% 2.33%
                         31  34  1.67% 26.06% 2.66%
                         20  29  1.43% 27.48% 2.99%
                         76  27  1.33% 28.81% 3.32%
                         84  23  1.13% 29.94% 3.65%
                         160 22  1.08% 31.02% 3.99%
                         201 19  0.93% 31.96% 4.32%
                         61  19  0.93% 32.89% 4.65%
                         
    *After first random values, i've chosen random values between the first two smallest values - 6 and 31

In [3]:
import random
array2 = np.zeros((1,8))
rnd = [70,77,80,50,88,6,73,31,20,76,84,160,201,61]
pay = 0
payold = 0
loss = 0
win = 0
clicks = 0
budget = 25000
for i,c in data_val[['payprice','click']].values:
    #bid = random.choice(rnd)
    bid = random.randrange(6,31)
    if bid < i or bid > budget:
        loss = loss+1
    else:
        win = win+1
        clicks = clicks+c
        budget = budget - i
array2[0][0]=clicks #number of clicks
array2[0][1]=25000-budget #Money spent
array2[0][2]=win #Auction won
array2[0][3]=loss #Auction lose
array2[0][4]=(win/(loss+win))*100 #Ration of win
array2[0][5]=clicks/win # CTR
array2[0][6]=((25000-budget)/win)*1000 #CPM
array2[0][7]=(25000-budget)/clicks #eCPC

print("Number of clicks:",array2[0][0])
print("Money spent:",array2[0][1])
print("Win:",array2[0][2])
print("Lose:",array2[0][3])
print("Ration of win:",array2[0][4])
print("CTR:",array2[0][5])
print("CPM:",array2[0][6])
print("eCPC:",array2[0][7])

Number of clicks: 3.0
Money spent: 24998.0
Win: 1903.0
Lose: 297846.0
Ration of win: 0.634864503301
CTR: 0.00157645822386
CPM: 13136.1008933
eCPC: 8332.66666667


In [4]:
from sklearn.linear_model import LogisticRegression
import numpy as np
from collections import defaultdict
from sklearn.feature_extraction import DictVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from __future__ import division
import string
import math
import csv
import random
import time
import pandas as pd

# Constant strategy
def get_constant_bid(filepath):
    payprice_click_df = pd.read_csv(filepath, usecols=["payprice","click"])
    #print "training data imported: %d rows, %d columns"%(training_data.shape[0], training_data.shape[1])

    # Taking payprice mean of positive clicks:
    avg_winning_pp = payprice_click_df[payprice_click_df["payprice"]>0].mean()["payprice"]
    max_winning_pp = payprice_click_df[payprice_click_df["payprice"] > 0].max()["payprice"]

    b= abs(max_winning_pp-avg_winning_pp)/4
    constant_bid = avg_winning_pp + b
    print ("constant bid: %0.2f" %constant_bid)
    return (int(constant_bid))

def get_random_bid(filepath):
    payprice_click_df = pd.read_csv(filepath, usecols=["payprice", "click"])
    avg_winning_pp = payprice_click_df[payprice_click_df["payprice"] > 0].mean()["payprice"]
    max_winning_pp = payprice_click_df[payprice_click_df["payprice"] > 0].max()["payprice"]
    #std_winning_pp = payprice_click_df[payprice_click_df["payprice"] > 0].std()["payprice"]
    b = abs(max_winning_pp - avg_winning_pp) / 4

    min_pp = avg_winning_pp +b#+ std_winning_pp
    max_pp = avg_winning_pp +b+10 #2*std_winning_pp

    return int(min_pp),int(max_pp)


def process_event(row):
    instance = {'bidprice': int(row[21]), 'payprice': int(row[22])}
    return instance

def RTB_simulation(mode, validation_path, training_path, start_budget = 25000):  # param is the dictionary with the bidprice per advertiser
    impressions = 0
    clicks = 0
    budget=start_budget

    # Stragegies:
    if mode == 'constant':
        # Calculating constant bid:
        constant_bid=get_constant_bid(training_path)
        # Iterating over each new impression:
        with open(validation_path, 'r') as csvfile:
            reader = csv.reader(csvfile, delimiter=',', quotechar='"')
            next(reader)

            for row in reader:
                #print "row"
                #conts_payprice = 4000  # param[advertiser]
                if budget > constant_bid:
                    instance = process_event(row)
                    payprice = instance['payprice']
                    if constant_bid > payprice:
                        impressions += 1
                        budget -= payprice
                        #print "budget %d" %budget
                        if row[0] == "1":
                            clicks += 1

    if mode == 'random':
        min_bid,max_bid= get_random_bid(training_path)
        #print "span: %d - %d "%(min_bid,max_bid)

        with open(validation_path, 'r') as csvfile:
            reader = csv.reader(csvfile, delimiter=',', quotechar='"')
            next(reader)

            for row in reader:
                current_bid = random.randrange(min_bid,max_bid)
                #print "current_bid: %d budget: %d"%(current_bid,budget)
                if budget > current_bid:
                    #payprice = int(row[22])
                    instance = process_event(row)
                    payprice = instance['payprice']
                    #print "current_bid - payprice : %d"%(current_bid - payprice)
                    if current_bid > payprice:
                        impressions += 1
                        budget -=payprice
                        if row[0] == "1":
                            clicks += 1


    print("Impressions:{0}".format(impressions))
    print("Clicks:{0}".format(clicks))
    if impressions > 0:
        return (clicks / impressions) * 100
    else:
        return 0




if __name__=="__main__":
    # MAIN:
    st=time.time()
    training_path = r"train.csv"
    validation_path = r"validation.csv"
    #training_events, labels = load_data(training_path)
    print (time.time()-st)

    CTR=RTB_simulation('random', validation_path, training_path, 25000)

    print ("time: " +str(time.time()-st))

1.9073486328125e-06
Impressions:444
Clicks:1
time: 10.468076705932617
