# Evaluate On Evaluation Set

## Summary

This notebook will do following steps:
 -  Pull validation data
 -  Load CTR predictions from CSV
 -  Load BaseBid Predictions for CSV
 -  Combine above using a formula
 -  Summarise Results

# Preparation

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from random import randint    
import seaborn as sns
from score_models import Scorer
import timeit

%matplotlib inline

# Pull Validation Data

In [2]:
path = "C:/Users/Akis/OneDrive/Masters/Web Economics/Data/"
validation_df = pd.read_csv(path+'validation.csv')

In [3]:
avgCTR =validation_df.loc[validation_df["click"] == 1].shape[0]/(validation_df.shape[0])
print(avgCTR)

0.0006646376573167722


# Load CTR Predictions

Need a function that will combine multiple dataframes into one

In [4]:
path='C:/Users/Akis/OneDrive/Masters/Web Economics/Data/WebEcon Team Drive/predictions/'
teamACTRData = pd.read_csv(path+'lr_pCTR.csv')
teamACTR=teamACTRData[['click_proba']]
teamACTR = teamACTR.rename(index=str, columns={"click_proba": "pCTR"})

# Load Basebid Predictions

Need a function that will combine multiple dataframes into one

In [6]:
teamABaseBidData = pd.read_csv(path+'ridge_basebid.csv')
teamABaseBid=teamABaseBidData[['base_bid']]
print(teamABaseBid.head(2))


    base_bid
0  80.350004
1  95.867239


# Build Bid Prices for all models

Need a function that will build a new dataframe conatining bids of all teams by combining the previous two dataframes.

In [7]:
def Function1(resBaseBid,resCTR):
    avCTR = 0.5
    Bids=resBaseBid.values * (resCTR/avCTR)
    #print("Bids:",Bids)
    return Bids

In [8]:
def Function2(reBaseBid,reCTR):
    avCTR = 0.5
    reCTR=reCTR.copy(True)
    reCTR=(((reCTR+0.5)**2)-0.5)
    reCTR[reCTR < 0] = 0
    Bids=reBaseBid.values * (reCTR/avCTR)
    return Bids

In [9]:
def Function3(rBaseBid,rCTR):
    avCTR = 0.5
    rCTR=rCTR.copy(True)
    rCTR[rCTR < 0.5] = 0
    #rCTR=(((rCTR+0.5)**4)-0.5)
    Bids=rBaseBid.values*1.2 * (rCTR/avCTR)
    return Bids

In [10]:
resultsBids=pd.DataFrame(index=range(validation_df.shape[0]))
resultsBids['click']=validation_df['click']
resultsBids['payprice']=validation_df['payprice']

resultsBids['TeamAFormula1'] = Function1(teamABaseBid,teamACTR).values

print(resultsBids.head(2))

   click  payprice  TeamAFormula1
0      0        23      75.681350
1      0        75      75.357042


In [11]:
print(resultsBids.head(20))

    click  payprice  TeamAFormula1
0       0        23      75.681350
1       0        75      75.357042
2       0        65      79.423925
3       0         6      36.382727
4       0         5      46.016489
5       0        22      37.358419
6       0        31      56.270605
7       0        20      83.579240
8       0        58      39.174854
9       0        55      47.593749
10      0        88      80.067262
11      0        55      42.885943
12      0        49      78.761406
13      0       110      49.670200
14      0       160     184.623743
15      0        16      60.494599
16      0        50      51.030263
17      0        76      99.243468
18      0        89     101.878999
19      0       136      58.977454


# Hold Auction for each

Need a function that will step through the impressions one by one checking who won the impression, whilst keeping track of each player's budget.

In [12]:
print(resultsBids.head())
#Scorer()

   click  payprice  TeamAFormula1
0      0        23      75.681350
1      0        75      75.357042
2      0        65      79.423925
3      0         6      36.382727
4      0         5      46.016489


In [24]:
start = timeit.timeit()
s = Scorer()
df_summary,df = s.set_df(resultsBids, ['payprice','TeamAFormula1'],6250000,20)
end = timeit.timeit()
print(end - start,"seconds")

# Scores

Summarise the main metrics for each team.

In [25]:
print(df_summary)

      budget  click      team_name     win
0  14.000000    148  TeamAFormula1  215626
1  -0.736412     54       payprice   88163


In [26]:
print(df)

        click  payprice  TeamAFormula1  winner       price
0           0        23      75.681350       1   23.000000
1           0        75      75.357042       1   75.000000
2           0        65      79.423925       1   65.000000
3           0         6      36.382727       1   20.000000
4           0         5      46.016489       1   20.000000
5           0        22      37.358419       1   22.000000
6           0        31      56.270605       1   31.000000
7           0        20      83.579240       1   20.000000
8           0        58      39.174854       0   39.174854
9           0        55      47.593749       0   47.593749
10          0        88      80.067262       0   80.067262
11          0        55      42.885943       0   42.885943
12          0        49      78.761406       1   49.000000
13          0       110      49.670200       0   49.670200
14          0       160     184.623743       1  160.000000
15          0        16      60.494599       1   20.0000