In [1]:
import numpy as np
import pandas as pd
#import xgboost
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import time
import collections
import itertools as it
from sklearn import ensemble, linear_model
import matplotlib.pyplot as plt

In [2]:
val = pd.read_csv('validation.csv')
test = pd.read_csv('test.csv')

In [3]:
CTR_models_val = pd.DataFrame(np.vstack([np.loadtxt('CTR_models/logr/r1_sams_logr_6.csv'),
                                        np.loadtxt('CTR_models/gbm_dam/pCTR_gbm_dam_val.csv'), 
                                        pd.read_csv('CTR_models/gbm2/pCTR_gbm2_val.csv', index_col=0).values.flatten(), 
                                        np.loadtxt('ensemble/ensemble_base_learners_set2/val/r7_mlp_nn_14.csv')]).T,
                            columns=['LogReg', 'Damian', 'Stefan2', 'NN'])

In [None]:
############################### HELPER FUNCTIONS FOR SUB C2 ###############################

def add_increments(factors, splits, addition, nsamp, start, end, current):
    
    split_ids = np.linspace(start*nsamp, end*nsamp, splits+1, dtype=np.int)
    #print(split_ids)
    #print(int(split_ids))
    
    for i in range(len(split_ids)-1):
        
        factors.iloc[split_ids[i]:split_ids[i+1]] = current + addition 
        current += addition
        
    return factors

def build_time_preference(nsamp):
    
    passive_time = pd.DataFrame(np.zeros(shape=(nsamp, 7)), columns=['t1','t2','t3','t4','t5', 't6', 't7'])

    passive_time.iloc[:np.int(nsamp*0.5), 0] = 0.5
    
    passive_time.iloc[:np.int(nsamp*0.75), 1] = 0.5
    passive_time.iloc[:np.int(nsamp*0.5), 2] = 0.75
    
    passive_time.iloc[:np.int(nsamp*0.75), 3] = 0.75
    
    passive_time.iloc[:np.int(nsamp*0.5), 4] = 0.5
    passive_time.iloc[np.int(nsamp*0.5):np.int(nsamp*0.75), 4] = 0.75
    
    passive_time.iloc[:np.int(nsamp*0.85), 5] = 0.5
    
    passive_time.iloc[:np.int(nsamp*0.85), 6] = 0.25
    
    time_factor = pd.DataFrame() 
    
    for col in passive_time.columns:
        if col == 't1':
            start = 0.5
            end = 1
            current = 0.5
            time_factor[col + '_step1'] = add_increments(passive_time['t1'], splits=5, addition=0.2, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step2'] = add_increments(passive_time['t1'], splits=5, addition=0.4, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step3'] = add_increments(passive_time['t1'], splits=10, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step4'] = add_increments(passive_time['t1'], splits=20, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step5'] = add_increments(passive_time['t1'], splits=2, addition=0.5, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step6'] = add_increments(passive_time['t1'], splits=2, addition=0.25, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step7'] = add_increments(passive_time['t1'], splits=5, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
        elif col == 't2':
            start = 0.75
            end = 1
            current = 0.5
            time_factor[col + '_step1'] = add_increments(passive_time['t2'], splits=5, addition=0.2, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step2'] = add_increments(passive_time['t2'], splits=5, addition=0.4, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step3'] = add_increments(passive_time['t2'], splits=10, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step4'] = add_increments(passive_time['t2'], splits=20, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step5'] = add_increments(passive_time['t2'], splits=2, addition=0.5, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step6'] = add_increments(passive_time['t2'], splits=2, addition=0.25, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step7'] = add_increments(passive_time['t2'], splits=5, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
        elif col == 't3':
            start = 0.5
            end = 1
            current = 0.75
            time_factor[col + '_step1'] = add_increments(passive_time['t3'], splits=5, addition=0.2, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step2'] = add_increments(passive_time['t3'], splits=5, addition=0.4, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step3'] = add_increments(passive_time['t3'], splits=10, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step4'] = add_increments(passive_time['t3'], splits=20, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step5'] = add_increments(passive_time['t3'], splits=2, addition=0.5, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step6'] = add_increments(passive_time['t3'], splits=2, addition=0.25, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step7'] = add_increments(passive_time['t3'], splits=5, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
        elif col == 't4':
            start = 0.75
            end = 1
            current = 0.75
            time_factor[col + '_step1'] = add_increments(passive_time['t4'], splits=5, addition=0.2, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step2'] = add_increments(passive_time['t4'], splits=5, addition=0.4, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step3'] = add_increments(passive_time['t4'], splits=10, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step4'] = add_increments(passive_time['t4'], splits=20, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step5'] = add_increments(passive_time['t4'], splits=2, addition=0.5, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step6'] = add_increments(passive_time['t4'], splits=2, addition=0.25, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step7'] = add_increments(passive_time['t4'], splits=5, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
        elif col == 't5':
            start = 0.75
            end = 1
            current = 0.75
            time_factor[col + '_step1'] = add_increments(passive_time['t5'], splits=5, addition=0.2, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step2'] = add_increments(passive_time['t5'], splits=5, addition=0.4, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step3'] = add_increments(passive_time['t5'], splits=10, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step4'] = add_increments(passive_time['t5'], splits=20, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step5'] = add_increments(passive_time['t5'], splits=2, addition=0.5, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step6'] = add_increments(passive_time['t5'], splits=2, addition=0.25, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step7'] = add_increments(passive_time['t5'], splits=5, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
        elif col == 't6':
            start = 0.85
            end = 1
            current = 0.5
            time_factor[col + '_step1'] = add_increments(passive_time['t6'], splits=5, addition=0.2, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step2'] = add_increments(passive_time['t6'], splits=5, addition=0.4, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step3'] = add_increments(passive_time['t6'], splits=10, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step4'] = add_increments(passive_time['t6'], splits=20, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step5'] = add_increments(passive_time['t6'], splits=2, addition=0.5, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step6'] = add_increments(passive_time['t6'], splits=2, addition=0.25, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step7'] = add_increments(passive_time['t6'], splits=5, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
        elif col == 't7':
            start = 0.85
            end = 1
            current = 0.25
            time_factor[col + '_step1'] = add_increments(passive_time['t7'], splits=5, addition=0.2, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step2'] = add_increments(passive_time['t7'], splits=5, addition=0.4, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step3'] = add_increments(passive_time['t7'], splits=10, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step4'] = add_increments(passive_time['t7'], splits=20, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step5'] = add_increments(passive_time['t7'], splits=2, addition=0.5, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step6'] = add_increments(passive_time['t7'], splits=2, addition=0.25, nsamp=nsamp, start=start, end=end, current=current)    
            time_factor[col + '_step7'] = add_increments(passive_time['t7'], splits=5, addition=0.1, nsamp=nsamp, start=start, end=end, current=current)    
               
    time_factor['regular'] = np.ones(nsamp)
        
    return time_factor

In [17]:
pCTR_test = np.loadtxt('ensemble/ensemble_test_probs/ens_set1_test_probs.csv')

CTR_frac = pCTR_test / np.mean(pCTR_test)

# 240.0 1000 24.5 26.700001 42372.238281 6249909.5 236.519852 0.147523

base_bid = 240
cap = 1000
time_setting = 't4_step2'

final_bids = base_bid*CTR_frac # * np.linspace(1.2,0.8,len(pCTR_test)))
final_bids = np.minimum(final_bids, cap)
final_bids = final_bids * build_time_preference(pCTR_test.shape[0])[time_setting]

final_bids = np.round(final_bids)

submission = np.array([test['bidid'], final_bids])
print(submission[:10])

tmp = pd.DataFrame(submission.T, columns=['bidid','bidprice'])
tmp.to_csv('subs/sub57/testing_bidding_price.csv', index=False)
print(tmp.head())
print(tmp.tail())
print(np.mean(final_bids))


[['366c563de7d90feb9d4dab53e795a93fb3157387'
  '29167d4caa719788b5a342dbaa25151d53121f80'
  'ff8bc3f4d44a3ea60c5f3a3a8fbe7cd98fb2966e' ...
  'edb83d1e92bab4113360a655b68c2e44ace6a81d'
  '5eca186cb78a6eaadf91aa7070c88f2c25e31464'
  'afcdff59c976336fbcecb98b2c7379d369496131']
 [352.0 692.0 52.0 ... 132.0 443.0 256.0]]
                                      bidid bidprice
0  366c563de7d90feb9d4dab53e795a93fb3157387      352
1  29167d4caa719788b5a342dbaa25151d53121f80      692
2  ff8bc3f4d44a3ea60c5f3a3a8fbe7cd98fb2966e       52
3  844c2da00d45315f20b748ec131c26ee99a7cbc7      242
4  c6017f0ad0c44d7d0c9b62583ea863f28941c0ca      219
                                           bidid bidprice
303370  c3bb9e4340efd0b7727e1340be7975856878e27c      101
303371  75a0dac04603fa79e811f1c3e7d1a309f5a81bbf       27
303372  edb83d1e92bab4113360a655b68c2e44ace6a81d      132
303373  5eca186cb78a6eaadf91aa7070c88f2c25e31464      443
303374  afcdff59c976336fbcecb98b2c7379d369496131      256
205.930755665430