# Abstract

This notebook deals with the preparation of the data so they can be used as input to the models. It includes __missing values imputation__, __feature engineering__, __encoding__ of categorical variables, __undersampling__ the majority class and __oversampling__ the minority class to create a balanced dataset.

# Import and Basic Info

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import math
import random

%matplotlib inline
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None  # To ignore SettingWithCopyWarning warning

# Set a random seed for repeatability
rand_seed = 123
random.seed(rand_seed)
np.random.seed(rand_seed)

### Define for which dataset the code will run

In [2]:
# This variable denotes we want features for the pCTR predictor or for the base_bid predictor.
name_helper = "pCTR___"

In [3]:
train = pd.read_csv("train.csv")
validation = pd.read_csv("validation.csv")
test = pd.read_csv("test.csv")

In [4]:
train.shape, validation.shape, test.shape

((2430981, 25), (303925, 25), (303375, 22))

In [5]:
train.columns

Index(['click', 'weekday', 'hour', 'bidid', 'userid', 'useragent', 'IP',
       'region', 'city', 'adexchange', 'domain', 'url', 'urlid', 'slotid',
       'slotwidth', 'slotheight', 'slotvisibility', 'slotformat', 'slotprice',
       'creative', 'bidprice', 'payprice', 'keypage', 'advertiser', 'usertag'],
      dtype='object')

In [6]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2430981 entries, 0 to 2430980
Data columns (total 25 columns):
click             int64
weekday           int64
hour              int64
bidid             object
userid            object
useragent         object
IP                object
region            int64
city              int64
adexchange        float64
domain            object
url               object
urlid             float64
slotid            object
slotwidth         int64
slotheight        int64
slotvisibility    object
slotformat        object
slotprice         int64
creative          object
bidprice          int64
payprice          int64
keypage           object
advertiser        int64
usertag           object
dtypes: float64(2), int64(11), object(12)
memory usage: 463.7+ MB


In [7]:
train.head(10)

Unnamed: 0,click,weekday,hour,bidid,userid,useragent,IP,region,city,adexchange,domain,url,urlid,slotid,slotwidth,slotheight,slotvisibility,slotformat,slotprice,creative,bidprice,payprice,keypage,advertiser,usertag
0,0,5,22,b7bea80521fdecd95d2d761a38c91c3f09618066,2e880fb7d690cf7377b2e42e701728e3f3c0e4c1,windows_ie,125.37.175.*,2,2,2.0,trqRTvKaXTKfgg24JKTI,6447a7dfa30fe6eb410c91860e7c9b45,,2015392487,200,200,2,0,5,a4f763f78ef3eedfe614263b94a8924e,238,5,0f951a030abdaedd733ee8d114ce2944,3427,
1,0,1,20,4f51205475678f5a124bc76b2c54163bf8eaa7eb,3a1fe01360ff8100e7d006b83b77a3e4c01d928c,windows_chrome,171.36.92.*,238,239,1.0,20fc675468712705dbf5d3eda94126da,3ddf173a94bd23c326683b6373c75dd4,,mm_10982364_973726_8930541,300,250,FourthView,Na,0,10722,294,23,,2821,
2,0,3,13,b604e3fd054a658ab7ced4285ebf2ef54d2bd890,801d18a056b6fe6b06a794aef17fb0d6daff2414,windows_ie,59.46.106.*,40,41,2.0,trqRTJn7O95I1mKYUV,625d1b5916ea925332c7b326c0574cfa,,1720123646,250,250,2,0,5,798b2d49952d77f1eace9f23c210d0b5,238,24,0f951a030abdaedd733ee8d114ce2944,3427,10052100061386610110
3,0,6,23,0348beeae93e561584c3b50fc9e7746a33048ad7,0d6eaf2259699990e38a1fc5116f112070b9ecdc,windows_ie,114.250.226.*,1,1,1.0,5F97t5E0BTK7XhNrUMpENpn,dedc488b98ca20707bc9a723957e7d1f,,mm_10027070_118039_10308280,160,600,2,1,0,cb7c76e7784031272e37af8e7e9b062c,300,25,bebefa5efe83beee17a3d245e7c5085b,1458,138661006310111
4,0,5,6,268149c1789bce2bc9798ffd97ec431219bafeb3,a239d9bb642460d974ba67f85e63b8d3e214da0e,windows_ie,183.63.192.*,216,233,2.0,13625cb070ffb306b425cd803c4b7ab4,4199d1227c511fc776b76594dabff9f8,,1120200150,728,90,OtherView,Na,133,7330,277,133,,2259,
5,0,4,17,1be2cf4a47a2a6aee4b0fa64d1b786d3897be4f0,8810fd690f88ddfb797867e3e08e7816f274bd46,windows_ie,119.45.182.*,276,277,1.0,trqRTuxoGQdcFNKbuKz,bdbc836b2f86c0391bd2f1e1e50739b3,,mm_10024662_3445902_11178345,336,280,2,1,0,77819d3e0b3467fe5c7b16d68ad923a1,300,123,bebefa5efe83beee17a3d245e7c5085b,1458,1000610077100631005710110
6,0,3,12,6b4f23dd215a5f11136c2f29214236cfee1f0122,fbd1bcaf2b33108d2c7f88ac41f0b0194b508a9b,windows_ie,112.82.93.*,80,89,3.0,3FKElpuEMusyJqKbuKz,6957deb8d83340086ef3cb2abd9c7198,,Fashion_F_Width1,1000,90,0,0,20,86c2543527c86a893d4d4f68810a0416,300,20,43f4a37f42a7c5e6219e2601b26c6976,3386,1008310024100631349610093100061005710110
7,0,3,16,f7c1d8ea177211249456c79e194617ce107bc077,9b539ba886146562a3991fd9f94bb9f9bad1f647,windows_ie,118.75.92.*,15,19,1.0,trqRTvp8gIc7gspy,4c70d1fc007d5349286f58da6d08fa9,,mm_33208362_3404787_11119183,160,600,2,1,0,cb7c76e7784031272e37af8e7e9b062c,300,134,bebefa5efe83beee17a3d245e7c5085b,1458,100631002413800138661000610111
8,0,5,14,9e97694096fe8692851048a893231a7f7ff87922,ddc9c85b6a5726aafaa4c42239e90fb7a44a88d7,windows_ie,61.185.128.*,333,342,3.0,ersbQv1RdoTy1m58uG,cbec43b96a01da4a0e06cc78d483a24f,,ALLINONE_F_Width1,1000,90,0,0,70,832b91d59d0cb5731431653204a76c0e,300,70,bebefa5efe83beee17a3d245e7c5085b,1458,10063100061008310110
9,0,5,0,d1b7c7fdb88ce3aa78a540bb29848dd77329a574,63b4f9a3638f81af361ab3c4190fd3e1b5e70fea,windows_chrome,117.9.211.*,2,2,3.0,ersbQv1RdoTy1m58uG,1b2f87d6f3b8b473afe7e9fbe597afca,,ALLINONE_F_Width1,1000,90,0,0,70,a10c31a8ff5f42930b4c34035e523886,241,77,d29e59bf0f7f8243858b8183f14d4412,3358,1340310031100631008310006100771007510110


## Missing values

In [8]:
def MissingValues(df):
    
    """
    Function to detect null values
    """
    
    obj_cols = list(df.select_dtypes(include=['object']).columns)
    nulls_list = [df[col].str.match("null").sum()*100/df.shape[0] for col in obj_cols]
    nulls_df = pd.DataFrame(index = obj_cols, columns = ["Nulls_Percentage"], data = nulls_list)
    
    return nulls_df

def ImputeMissingValues(df):
    
    """
    As will be seen, the only column that has null values and will be actually used as a feature
    in our models is "adexchange". All other columns with null values (i.e. domain, url, urlid, keypage)
    will be ignored because they have too many values to be encoded (feature space would be enormous)
    """
    
    ### Remember to impute slot_format and slot_visibility!!!
    
    #df.loc[ df["adexchange"] == "null", "adexchange"] = df["adexchange"].dropna().mode()[0]
    df["adexchange"] = df["adexchange"].fillna(df["adexchange"].dropna().mode()[0])
    df["usertag"] = df["usertag"].fillna("")  ## not optimal
    return df


In [9]:
train.isnull().sum()

click                   0
weekday                 0
hour                    0
bidid                   0
userid                  0
useragent               0
IP                      0
region                  0
city                    0
adexchange          49829
domain             137135
url                 86812
urlid             2430981
slotid                  0
slotwidth               0
slotheight              0
slotvisibility          0
slotformat              0
slotprice               0
creative                0
bidprice                0
payprice                0
keypage            504990
advertiser              0
usertag            497479
dtype: int64

In [10]:
validation.isnull().sum()

click                  0
weekday                0
hour                   0
bidid                  0
userid                 0
useragent              0
IP                     0
region                 0
city                   0
adexchange          6176
domain             16932
url                10850
urlid             303925
slotid                 0
slotwidth              0
slotheight             0
slotvisibility         0
slotformat             0
slotprice              0
creative               0
bidprice               0
payprice               0
keypage            62944
advertiser             0
usertag            62186
dtype: int64

In [11]:
train_2 = ImputeMissingValues(train)
validation_2 = ImputeMissingValues(validation)

In [12]:
train_2.shape, validation_2.shape

((2430981, 25), (303925, 25))

In [13]:
train_2.isnull().sum()

click                   0
weekday                 0
hour                    0
bidid                   0
userid                  0
useragent               0
IP                      0
region                  0
city                    0
adexchange              0
domain             137135
url                 86812
urlid             2430981
slotid                  0
slotwidth               0
slotheight              0
slotvisibility          0
slotformat              0
slotprice               0
creative                0
bidprice                0
payprice                0
keypage            504990
advertiser              0
usertag                 0
dtype: int64

In [14]:
validation_2.isnull().sum()

click                  0
weekday                0
hour                   0
bidid                  0
userid                 0
useragent              0
IP                     0
region                 0
city                   0
adexchange             0
domain             16932
url                10850
urlid             303925
slotid                 0
slotwidth              0
slotheight             0
slotvisibility         0
slotformat             0
slotprice              0
creative               0
bidprice               0
payprice               0
keypage            62944
advertiser             0
usertag                0
dtype: int64

## Feature Engineering

### Usertags

Because estimating the usertags is time consuming, we estimate it once before we proceed for further fearure engineering (so we don't need to recalculate usertags every time we run the Feature Engineering function)

In [15]:
def UsertagCategories(df):
    
    # Drop nan
    df = df["usertag"].dropna().reset_index(drop = True)
    
    # Find unique usertags
    usertags_list = [df[i].split(",") for i in range(df.shape[0])]
    
    # itertools.chain.from_iterable joins a list of lists into a single list
    usertags = np.unique(list(itertools.chain.from_iterable(usertags_list)))
    
    # Remove the empty string ""
    usertags = [tag for tag in usertags if len(tag) > 0]
    
    return usertags

In [16]:
usertags_train = UsertagCategories(train)
usertags_validation = UsertagCategories(validation)

In [17]:
# Check if train and validation/test have the same usertags
list(set(usertags_train) - set(usertags_validation)), list(set(usertags_validation) - set(usertags_train))

([], [])

In [18]:
def FeatureEngineering(df):
    
        # Convert numerical to categorical
        df["weekday_cat"] = df["weekday"].map(lambda x: str(x))
        df["hour_cat"] = df["hour"].map(lambda x: str(x))
        df["region_cat"] = df["region"].map(lambda x: str(x))
        df["city_cat"] = df["city"].map(lambda x: str(x))
        df["adexchange_cat"] = df["adexchange"].map(lambda x: str(x))
        df["advertiser_cat"] = df["advertiser"].map(lambda x: str(x))
        
        # Operating system
        df["os"] = df["useragent"].map(lambda x: x.split("_")[0])
        
        # Browser
        df["browser"] = df["useragent"].map(lambda x: x.split("_")[1])
        
        # Slotarea
        df["slotarea"] = df["slotwidth"]*df["slotheight"]   ####.astype("category")
        
        # Part of the day
        df["part_of_the_day"] = ""
        
        df.loc[(df["weekday"] == 0) & (df["hour"] > 0) & (df["hour"] <=8), "part_of_the_day"] = "Sunday_Night"
        df.loc[(df["weekday"] == 0) & (df["hour"] > 8) & (df["hour"] < 17), "part_of_the_day"] = "Sunday_Morning"
        df.loc[(df["weekday"] == 0) & (((df["hour"] >= 17) & (df["hour"] <= 23)) | ((df["hour"] == 0))), "part_of_the_day"] = "Sunday_Evening"
        
        df.loc[(df["weekday"] == 1) & (df["hour"] > 0) & (df["hour"] <=8), "part_of_the_day"] = "Monday_Night"
        df.loc[(df["weekday"] == 1) & (df["hour"] > 8) & (df["hour"] < 17), "part_of_the_day"] = "Monday_Morning"
        df.loc[(df["weekday"] == 1) & (((df["hour"] >= 17) & (df["hour"] <= 23)) | ((df["hour"] == 0))), "part_of_the_day"] = "Monday_Evening"

        
        df.loc[(df["weekday"] == 2) & (df["hour"] > 0) & (df["hour"] <=8), "part_of_the_day"] = "Tuesday_Night"
        df.loc[(df["weekday"] == 2) & (df["hour"] > 8) & (df["hour"] < 17), "part_of_the_day"] = "Tuesday_Morning"
        df.loc[(df["weekday"] == 2) & (((df["hour"] >= 17) & (df["hour"] <= 23)) | ((df["hour"] == 0))), "part_of_the_day"] = "Tuesday_Evening"

        df.loc[(df["weekday"] == 3) & (df["hour"] > 0) & (df["hour"] <=8), "part_of_the_day"] = "Wednesday_Night"
        df.loc[(df["weekday"] == 3) & (df["hour"] > 8) & (df["hour"] < 17), "part_of_the_day"] = "Wednesday_Morning"
        df.loc[(df["weekday"] == 3) & (((df["hour"] >= 17) & (df["hour"] <= 23)) | ((df["hour"] == 0))), "part_of_the_day"] = "Wednesday_Evening"

        df.loc[(df["weekday"] == 4) & (df["hour"] > 0) & (df["hour"] <=8), "part_of_the_day"] = "Tuesday_Night"
        df.loc[(df["weekday"] == 4) & (df["hour"] > 8) & (df["hour"] < 17), "part_of_the_day"] = "Tuesday_Morning"
        df.loc[(df["weekday"] == 4) & (((df["hour"] >= 17) & (df["hour"] <= 23)) | ((df["hour"] == 0))), "part_of_the_day"] = "Tuesday_Evening"

        df.loc[(df["weekday"] == 5) & (df["hour"] > 0) & (df["hour"] <=8), "part_of_the_day"] = "Friday_Night"
        df.loc[(df["weekday"] == 5) & (df["hour"] > 8) & (df["hour"] < 17), "part_of_the_day"] = "Friday_Morning"
        df.loc[(df["weekday"] == 5) & (((df["hour"] >= 17) & (df["hour"] <= 23)) | ((df["hour"] == 0))), "part_of_the_day"] = "Friday_Evening"

        
        df.loc[(df["weekday"] == 6) & (df["hour"] > 0) & (df["hour"] <=8), "part_of_the_day"] = "Saturday_Night"
        df.loc[(df["weekday"] == 6) & (df["hour"] > 8) & (df["hour"] < 17), "part_of_the_day"] = "Saturday_Morning"
        df.loc[(df["weekday"] == 6) & (((df["hour"] >= 17) & (df["hour"] <= 23)) | ((df["hour"] == 0))), "part_of_the_day"] = "Saturday_Evening"
        
        # Slotprice binning
        df["slotprice_cat"] = 0
        
        df.loc[ df["slotprice"] <= 10, "slotprice_cat"] = 0
        df.loc[ (df["slotprice"] > 10) & (df["slotprice"] <= 50), "slotprice_cat"] = 1
        df.loc[ (df["slotprice"] > 50) & (df["slotprice"] <= 100), "slotprice_cat"] = 2
        df.loc[ df["slotprice"] > 100, "slotprice_cat"] = 3

        
        # Usertags
        for tag in usertags_train:
            col_name = "usertag_" + tag
            df[col_name] = df["usertag"].map(lambda x: 1 if tag in x.split(",") else 0)
        
  
        return df


def DropColumns(df):
    
    columns = ["weekday", "hour", "bidid", "userid", "useragent", "IP", "domain", "url", "urlid", "slotid",
               "slotwidth", "slotheight", "slotprice", "keypage", "usertag", "region", "city", "adexchange", "advertiser"]
    df.drop(columns, axis = 1, inplace = True)
    
    return df


def GetDummies(df):
    
    df = pd.get_dummies(df)
    return df

### Combine train and validation sets

Before applying the feature engineering function and encode categorical variables we will concatenate tarin and validation sets. The reason to do that, is that if we encode train and validation categorical columns separately, there is the possibility that one category that exists in train data, does not exist in validation data. This situation would cause our code to crash, since train and validation sets would consist of different features. 

In [19]:
all_data = pd.concat((train_2, validation_2), axis = 0)

In [20]:
train_2.shape, validation_2.shape, all_data.shape

((2430981, 25), (303925, 25), (2734906, 25))

In [21]:
% time all_data = FeatureEngineering(all_data)

CPU times: user 2min 34s, sys: 5.99 s, total: 2min 40s
Wall time: 2min 40s


In [22]:
all_data.shape

(2734906, 104)

In [23]:
# Drop useless features
all_data = DropColumns(all_data)

In [24]:
all_data.shape

(2734906, 85)

In [25]:
all_data.columns

Index(['click', 'slotvisibility', 'slotformat', 'creative', 'bidprice',
       'payprice', 'weekday_cat', 'hour_cat', 'region_cat', 'city_cat',
       'adexchange_cat', 'advertiser_cat', 'os', 'browser', 'slotarea',
       'part_of_the_day', 'slotprice_cat', 'usertag_10006', 'usertag_10024',
       'usertag_10031', 'usertag_10048', 'usertag_10052', 'usertag_10057',
       'usertag_10059', 'usertag_10063', 'usertag_10067', 'usertag_10074',
       'usertag_10075', 'usertag_10076', 'usertag_10077', 'usertag_10079',
       'usertag_10083', 'usertag_10093', 'usertag_10102', 'usertag_10110',
       'usertag_10111', 'usertag_10114', 'usertag_10115', 'usertag_10116',
       'usertag_10117', 'usertag_10118', 'usertag_10120', 'usertag_10123',
       'usertag_10125', 'usertag_10126', 'usertag_10127', 'usertag_10129',
       'usertag_10130', 'usertag_10131', 'usertag_10133', 'usertag_10138',
       'usertag_10140', 'usertag_10142', 'usertag_10145', 'usertag_10146',
       'usertag_10147', 'usertag

In [26]:
# Convert categorical features to numerical using dummy variables
all_data_dummy = GetDummies(all_data)

In [27]:
c = all_data_dummy.columns
c

Index(['click', 'bidprice', 'payprice', 'slotarea', 'slotprice_cat',
       'usertag_10006', 'usertag_10024', 'usertag_10031', 'usertag_10048',
       'usertag_10052',
       ...
       'part_of_the_day_Saturday_Night', 'part_of_the_day_Sunday_Evening',
       'part_of_the_day_Sunday_Morning', 'part_of_the_day_Sunday_Night',
       'part_of_the_day_Tuesday_Evening', 'part_of_the_day_Tuesday_Morning',
       'part_of_the_day_Tuesday_Night', 'part_of_the_day_Wednesday_Evening',
       'part_of_the_day_Wednesday_Morning', 'part_of_the_day_Wednesday_Night'],
      dtype='object', length=701)

In [28]:
all_data_dummy.head()

Unnamed: 0,click,bidprice,payprice,slotarea,slotprice_cat,usertag_10006,usertag_10024,usertag_10031,usertag_10048,usertag_10052,usertag_10057,usertag_10059,usertag_10063,usertag_10067,usertag_10074,usertag_10075,usertag_10076,usertag_10077,usertag_10079,usertag_10083,usertag_10093,usertag_10102,usertag_10110,usertag_10111,usertag_10114,usertag_10115,usertag_10116,usertag_10117,usertag_10118,usertag_10120,usertag_10123,usertag_10125,usertag_10126,usertag_10127,usertag_10129,usertag_10130,usertag_10131,usertag_10133,usertag_10138,usertag_10140,usertag_10142,usertag_10145,usertag_10146,usertag_10147,usertag_10148,usertag_10149,usertag_10684,usertag_11092,usertag_11278,usertag_11379,usertag_11423,usertag_11512,usertag_11576,usertag_11632,usertag_11680,usertag_11724,usertag_11944,usertag_13042,usertag_13403,usertag_13496,usertag_13678,usertag_13776,usertag_13800,usertag_13866,usertag_13874,usertag_14273,usertag_15398,usertag_16593,usertag_16617,usertag_16661,usertag_16706,usertag_16751,usertag_16753,slotvisibility_0,slotvisibility_1,slotvisibility_2,slotvisibility_255,slotvisibility_FifthView,slotvisibility_FirstView,slotvisibility_FourthView,slotvisibility_Na,slotvisibility_OtherView,slotvisibility_SecondView,slotvisibility_ThirdView,slotformat_0,slotformat_1,slotformat_5,slotformat_Na,creative_0055e8503dc053435b3599fe44af118b,creative_00fccc64a1ee2809348509b7ac2a97a5,creative_011c1a3d4d3f089a54f9b70a4c0a6eb3,creative_0cd33fcb336655841d3e1441b915748d,creative_10717,creative_10718,creative_10719,creative_10720,creative_10721,creative_10722,creative_10723,creative_10733,creative_10734,creative_11908,creative_12610,creative_12611,creative_12612,creative_12613,creative_12614,creative_12615,creative_12616,creative_12618,creative_12619,creative_12620,creative_12621,creative_12622,creative_12623,creative_12624,creative_12625,creative_12626,creative_12627,creative_12628,creative_12629,creative_12630,creative_12631,creative_12632,creative_12633,creative_12782,creative_12987,creative_13606a7c541dcd9ca1948875a760bb31,creative_16ff51916f0c796ec72eefc633c4a8ed,creative_1a43f1ff53f48573803d4a3c31ebc163,creative_23485fcd23122d755d38f8c89d46ca56,creative_23d6dade7ed21cea308205b37594003e,creative_266f17dcc3a2280172a48fec62be7d02,creative_2abc9eaf57d17a96195af3f63c45dc72,creative_2f88fc9cf0141b5bbaf251cab07f4ce7,creative_3b805a00d99d5ee2493c8fb0063e30e9,creative_3d8f1161832704a1a34e1ccdda11a81e,creative_4400bf8dea968a0068824792fd336c4c,creative_44966cc8da1ed40c95d59e863c8c75f0,creative_449a22cd91d9042eda3d3a1b89a22ea8,creative_47905feeb59223468fb898b3c9ac024d,creative_48c6405fa5bdecc850ea6ef5e6cf4da5,creative_48f2e9ba15708c0146bda5e1dd653caa,creative_4ad7e35171a3d8de73bb862791575f2e,creative_4b724cd63dfb905ebcd54e64572c646d,creative_54c6e2569e059cf336f7e7742b44c3ba,creative_59f065a795a663140e36eec106464524,creative_5c4e0bb0db45e2d1b3a14f817196ebd6,creative_612599432d200b093719dd1f372f7a30,creative_62f7f9a6dca2f80cc00f17dcda730bc1,creative_6455a6f45433aef94075f34dafd8d3bf,creative_672fd361820651d4d99b47d7bf48d715,creative_6b9331e0f0dbbfef42c308333681f0a3,creative_6cdf8fdd3e01122b09b5b411510a2385,creative_6fb071c11134e5078c1184ef1370a110,creative_7097e4210dea4d69f07f0f5e4343529c,creative_7184c9560e68e977187e67e45a4f3198,creative_7314,creative_7315,creative_7316,creative_7317,creative_7318,creative_7319,creative_7320,creative_7321,creative_7322,creative_7323,creative_7324,creative_7325,creative_7326,creative_7327,creative_7328,creative_7330,creative_7331,creative_7332,creative_7333,creative_7334,creative_7335,creative_7336,creative_77819d3e0b3467fe5c7b16d68ad923a1,creative_798b2d49952d77f1eace9f23c210d0b5,creative_7eb0065067225fa5f511f7ffb9895f24,creative_80446133fda50388678789e3ebd9fdb1,creative_80a776343079ed94d424f4607b35fd39,creative_82f125e356439d73902ae85e2be96777,creative_832b91d59d0cb5731431653204a76c0e,creative_86c2543527c86a893d4d4f68810a0416,creative_87945ed58e806dbdc291b3662f581354,creative_8dff45ed862a740986dbe688aafee7e5,creative_911b2d84826786018761e8c0b0a3a60c,creative_a10c31a8ff5f42930b4c34035e523886,creative_a499988a822facd86dd0e8e4ffef8532,creative_a4f763f78ef3eedfe614263b94a8924e,creative_b90c12ed2bd7950c6027bf9c6937c48a,creative_bac72fe483c04a0ea2490539b4d06e6a,creative_bc27493ad2351e2577bc8664172544f8,creative_c46090c887c257b61ab1fa11baee91d8,creative_c936045d792f6ea3aa22d86d93f5cf23,creative_c938195f9e404b4f38c7e71bf50263e5,creative_cb7c76e7784031272e37af8e7e9b062c,creative_cc9b344e950b4f8c2b96537174a343b7,creative_cd6f20e0b2a699c352f2e55a7a2091fd,creative_d01411218cc79bc49d2a4078c4093b76,creative_d5cecca9a6cbd7a0a48110f1306b26d1,creative_d881a6c788e76c2c27ed1ef04f119544,creative_dba9860463419dcd6c1b0152dfe7314c,creative_dc0998c10f8f0b623b5d949e8272e4c7,creative_e049ebe262e20bed5f9b975208db375b,creative_e1096a0a4f557814cb226fa5ba2339b2,creative_e1af08818a6cd6bbba118bb54a651961,creative_e1b0b6fb39abeb138c0b1e37c5f6d777,creative_e87d7633d474589c2e2e3ba4eda53f6c,creative_f1691b7571803d5a46adcb1f39f94d44,creative_f49541b4e0999d0c934ee3eea142a60e,creative_f65c8bdb41e9015970bac52baa813239,creative_fa8f0532dd5144b5fa748459e8d90b49,creative_fb5afa9dba1274beaf3dad86baf97e89,creative_fe222c13e927077ad3ea087a92c0935c,creative_ff5123fb9333ca095034c62fdaaf51aa,weekday_cat_0,weekday_cat_1,weekday_cat_2,weekday_cat_3,weekday_cat_4,weekday_cat_5,weekday_cat_6,hour_cat_0,hour_cat_1,hour_cat_10,hour_cat_11,hour_cat_12,hour_cat_13,hour_cat_14,hour_cat_15,hour_cat_16,hour_cat_17,hour_cat_18,hour_cat_19,hour_cat_2,hour_cat_20,hour_cat_21,hour_cat_22,hour_cat_23,hour_cat_3,hour_cat_4,hour_cat_5,hour_cat_6,hour_cat_7,hour_cat_8,hour_cat_9,region_cat_0,region_cat_1,region_cat_106,region_cat_124,region_cat_134,region_cat_146,region_cat_15,region_cat_164,region_cat_183,region_cat_2,region_cat_201,region_cat_216,region_cat_238,region_cat_253,region_cat_27,region_cat_275,region_cat_276,region_cat_298,region_cat_3,region_cat_308,region_cat_325,region_cat_333,region_cat_344,region_cat_359,region_cat_368,region_cat_374,region_cat_393,region_cat_394,region_cat_395,region_cat_40,region_cat_55,region_cat_65,region_cat_79,region_cat_80,region_cat_94,city_cat_0,city_cat_1,city_cat_10,city_cat_100,city_cat_101,city_cat_102,city_cat_103,city_cat_104,city_cat_105,city_cat_106,city_cat_107,city_cat_108,city_cat_109,city_cat_11,city_cat_110,city_cat_111,city_cat_112,city_cat_113,city_cat_114,city_cat_115,city_cat_116,city_cat_117,city_cat_118,city_cat_119,city_cat_12,city_cat_120,city_cat_121,city_cat_122,city_cat_123,city_cat_124,city_cat_125,city_cat_126,city_cat_127,city_cat_128,city_cat_129,city_cat_13,city_cat_130,city_cat_131,city_cat_132,city_cat_133,city_cat_134,city_cat_135,city_cat_136,city_cat_137,city_cat_138,city_cat_139,city_cat_14,city_cat_140,city_cat_141,city_cat_142,city_cat_143,city_cat_144,city_cat_145,city_cat_146,city_cat_147,city_cat_148,city_cat_149,city_cat_15,city_cat_150,city_cat_151,city_cat_152,city_cat_153,city_cat_154,city_cat_155,city_cat_156,city_cat_157,city_cat_158,city_cat_159,city_cat_16,city_cat_160,city_cat_161,city_cat_162,city_cat_163,city_cat_164,city_cat_165,city_cat_166,city_cat_167,city_cat_168,city_cat_169,city_cat_17,city_cat_170,city_cat_171,city_cat_172,city_cat_173,city_cat_174,city_cat_175,city_cat_176,city_cat_177,city_cat_178,city_cat_179,city_cat_18,city_cat_180,city_cat_181,city_cat_183,city_cat_184,city_cat_185,city_cat_186,city_cat_187,city_cat_188,city_cat_189,city_cat_19,city_cat_190,city_cat_191,city_cat_192,city_cat_193,city_cat_194,city_cat_195,city_cat_196,city_cat_2,city_cat_20,city_cat_201,city_cat_202,city_cat_203,city_cat_204,city_cat_205,city_cat_206,city_cat_207,city_cat_208,city_cat_209,city_cat_21,city_cat_210,city_cat_211,city_cat_212,city_cat_213,city_cat_214,city_cat_215,city_cat_216,city_cat_217,city_cat_218,city_cat_219,city_cat_22,city_cat_220,city_cat_221,city_cat_222,city_cat_223,city_cat_224,city_cat_225,city_cat_226,city_cat_227,city_cat_228,city_cat_229,city_cat_23,city_cat_230,city_cat_231,city_cat_232,city_cat_233,city_cat_234,city_cat_235,city_cat_236,city_cat_237,city_cat_238,city_cat_239,city_cat_24,city_cat_240,city_cat_241,city_cat_242,city_cat_243,city_cat_244,city_cat_245,city_cat_246,city_cat_247,city_cat_248,city_cat_249,city_cat_25,city_cat_250,city_cat_251,city_cat_252,city_cat_253,city_cat_254,city_cat_255,city_cat_26,city_cat_27,city_cat_275,city_cat_276,city_cat_277,city_cat_278,city_cat_279,city_cat_28,city_cat_280,city_cat_281,city_cat_282,city_cat_283,city_cat_284,city_cat_285,city_cat_286,city_cat_287,city_cat_288,city_cat_289,city_cat_29,city_cat_290,city_cat_291,city_cat_292,city_cat_293,city_cat_294,city_cat_295,city_cat_296,city_cat_297,city_cat_299,city_cat_3,city_cat_30,city_cat_300,city_cat_301,city_cat_302,city_cat_303,city_cat_304,city_cat_305,city_cat_306,city_cat_307,city_cat_308,city_cat_309,city_cat_31,city_cat_310,city_cat_311,city_cat_312,city_cat_313,city_cat_314,city_cat_315,city_cat_316,city_cat_317,city_cat_318,city_cat_319,city_cat_32,city_cat_320,city_cat_321,city_cat_322,city_cat_323,city_cat_324,city_cat_325,city_cat_326,city_cat_327,city_cat_328,city_cat_329,city_cat_33,city_cat_330,city_cat_331,city_cat_332,city_cat_333,city_cat_334,city_cat_335,city_cat_336,city_cat_337,city_cat_338,city_cat_339,city_cat_34,city_cat_340,city_cat_341,city_cat_342,city_cat_343,city_cat_344,city_cat_345,city_cat_346,city_cat_347,city_cat_348,city_cat_349,city_cat_35,city_cat_350,city_cat_351,city_cat_352,city_cat_353,city_cat_354,city_cat_355,city_cat_356,city_cat_357,city_cat_358,city_cat_359,city_cat_36,city_cat_360,city_cat_361,city_cat_362,city_cat_363,city_cat_364,city_cat_365,city_cat_366,city_cat_367,city_cat_368,city_cat_369,city_cat_37,city_cat_370,city_cat_371,city_cat_372,city_cat_373,city_cat_374,city_cat_375,city_cat_376,city_cat_377,city_cat_378,city_cat_379,city_cat_38,city_cat_380,city_cat_381,city_cat_382,city_cat_383,city_cat_384,city_cat_385,city_cat_386,city_cat_387,city_cat_388,city_cat_39,city_cat_393,city_cat_394,city_cat_395,city_cat_396,city_cat_397,city_cat_398,city_cat_399,city_cat_4,city_cat_40,city_cat_41,city_cat_42,city_cat_43,city_cat_44,city_cat_45,city_cat_46,city_cat_47,city_cat_48,city_cat_49,city_cat_5,city_cat_50,city_cat_51,city_cat_52,city_cat_53,city_cat_54,city_cat_56,city_cat_57,city_cat_58,city_cat_59,city_cat_6,city_cat_60,city_cat_61,city_cat_62,city_cat_63,city_cat_64,city_cat_65,city_cat_66,city_cat_67,city_cat_68,city_cat_69,city_cat_7,city_cat_70,city_cat_71,city_cat_72,city_cat_73,city_cat_74,city_cat_75,city_cat_76,city_cat_77,city_cat_78,city_cat_79,city_cat_8,city_cat_80,city_cat_81,city_cat_82,city_cat_83,city_cat_84,city_cat_85,city_cat_86,city_cat_87,city_cat_88,city_cat_89,city_cat_9,city_cat_90,city_cat_91,city_cat_92,city_cat_93,city_cat_94,city_cat_95,city_cat_96,city_cat_97,city_cat_98,city_cat_99,adexchange_cat_1.0,adexchange_cat_2.0,adexchange_cat_3.0,adexchange_cat_4.0,advertiser_cat_1458,advertiser_cat_2259,advertiser_cat_2261,advertiser_cat_2821,advertiser_cat_2997,advertiser_cat_3358,advertiser_cat_3386,advertiser_cat_3427,advertiser_cat_3476,os_android,os_ios,os_linux,os_mac,os_other,os_windows,browser_chrome,browser_firefox,browser_ie,browser_maxthon,browser_opera,browser_other,browser_safari,browser_sogou,browser_theworld,part_of_the_day_Friday_Evening,part_of_the_day_Friday_Morning,part_of_the_day_Friday_Night,part_of_the_day_Monday_Evening,part_of_the_day_Monday_Morning,part_of_the_day_Monday_Night,part_of_the_day_Saturday_Evening,part_of_the_day_Saturday_Morning,part_of_the_day_Saturday_Night,part_of_the_day_Sunday_Evening,part_of_the_day_Sunday_Morning,part_of_the_day_Sunday_Night,part_of_the_day_Tuesday_Evening,part_of_the_day_Tuesday_Morning,part_of_the_day_Tuesday_Night,part_of_the_day_Wednesday_Evening,part_of_the_day_Wednesday_Morning,part_of_the_day_Wednesday_Night
0,0,238,5,40000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,294,23,75000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,238,24,62500,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
3,0,300,25,96000,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
4,0,277,133,65520,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Split back to train and validation

In [29]:
train_dummy = all_data_dummy[:train_2.shape[0]]
validation_dummy = all_data_dummy[train_2.shape[0]:]

In [30]:
train_dummy.shape, validation_dummy.shape

((2430981, 701), (303925, 701))

In [31]:
train_dummy.head()

Unnamed: 0,click,bidprice,payprice,slotarea,slotprice_cat,usertag_10006,usertag_10024,usertag_10031,usertag_10048,usertag_10052,usertag_10057,usertag_10059,usertag_10063,usertag_10067,usertag_10074,usertag_10075,usertag_10076,usertag_10077,usertag_10079,usertag_10083,usertag_10093,usertag_10102,usertag_10110,usertag_10111,usertag_10114,usertag_10115,usertag_10116,usertag_10117,usertag_10118,usertag_10120,usertag_10123,usertag_10125,usertag_10126,usertag_10127,usertag_10129,usertag_10130,usertag_10131,usertag_10133,usertag_10138,usertag_10140,usertag_10142,usertag_10145,usertag_10146,usertag_10147,usertag_10148,usertag_10149,usertag_10684,usertag_11092,usertag_11278,usertag_11379,usertag_11423,usertag_11512,usertag_11576,usertag_11632,usertag_11680,usertag_11724,usertag_11944,usertag_13042,usertag_13403,usertag_13496,usertag_13678,usertag_13776,usertag_13800,usertag_13866,usertag_13874,usertag_14273,usertag_15398,usertag_16593,usertag_16617,usertag_16661,usertag_16706,usertag_16751,usertag_16753,slotvisibility_0,slotvisibility_1,slotvisibility_2,slotvisibility_255,slotvisibility_FifthView,slotvisibility_FirstView,slotvisibility_FourthView,slotvisibility_Na,slotvisibility_OtherView,slotvisibility_SecondView,slotvisibility_ThirdView,slotformat_0,slotformat_1,slotformat_5,slotformat_Na,creative_0055e8503dc053435b3599fe44af118b,creative_00fccc64a1ee2809348509b7ac2a97a5,creative_011c1a3d4d3f089a54f9b70a4c0a6eb3,creative_0cd33fcb336655841d3e1441b915748d,creative_10717,creative_10718,creative_10719,creative_10720,creative_10721,creative_10722,creative_10723,creative_10733,creative_10734,creative_11908,creative_12610,creative_12611,creative_12612,creative_12613,creative_12614,creative_12615,creative_12616,creative_12618,creative_12619,creative_12620,creative_12621,creative_12622,creative_12623,creative_12624,creative_12625,creative_12626,creative_12627,creative_12628,creative_12629,creative_12630,creative_12631,creative_12632,creative_12633,creative_12782,creative_12987,creative_13606a7c541dcd9ca1948875a760bb31,creative_16ff51916f0c796ec72eefc633c4a8ed,creative_1a43f1ff53f48573803d4a3c31ebc163,creative_23485fcd23122d755d38f8c89d46ca56,creative_23d6dade7ed21cea308205b37594003e,creative_266f17dcc3a2280172a48fec62be7d02,creative_2abc9eaf57d17a96195af3f63c45dc72,creative_2f88fc9cf0141b5bbaf251cab07f4ce7,creative_3b805a00d99d5ee2493c8fb0063e30e9,creative_3d8f1161832704a1a34e1ccdda11a81e,creative_4400bf8dea968a0068824792fd336c4c,creative_44966cc8da1ed40c95d59e863c8c75f0,creative_449a22cd91d9042eda3d3a1b89a22ea8,creative_47905feeb59223468fb898b3c9ac024d,creative_48c6405fa5bdecc850ea6ef5e6cf4da5,creative_48f2e9ba15708c0146bda5e1dd653caa,creative_4ad7e35171a3d8de73bb862791575f2e,creative_4b724cd63dfb905ebcd54e64572c646d,creative_54c6e2569e059cf336f7e7742b44c3ba,creative_59f065a795a663140e36eec106464524,creative_5c4e0bb0db45e2d1b3a14f817196ebd6,creative_612599432d200b093719dd1f372f7a30,creative_62f7f9a6dca2f80cc00f17dcda730bc1,creative_6455a6f45433aef94075f34dafd8d3bf,creative_672fd361820651d4d99b47d7bf48d715,creative_6b9331e0f0dbbfef42c308333681f0a3,creative_6cdf8fdd3e01122b09b5b411510a2385,creative_6fb071c11134e5078c1184ef1370a110,creative_7097e4210dea4d69f07f0f5e4343529c,creative_7184c9560e68e977187e67e45a4f3198,creative_7314,creative_7315,creative_7316,creative_7317,creative_7318,creative_7319,creative_7320,creative_7321,creative_7322,creative_7323,creative_7324,creative_7325,creative_7326,creative_7327,creative_7328,creative_7330,creative_7331,creative_7332,creative_7333,creative_7334,creative_7335,creative_7336,creative_77819d3e0b3467fe5c7b16d68ad923a1,creative_798b2d49952d77f1eace9f23c210d0b5,creative_7eb0065067225fa5f511f7ffb9895f24,creative_80446133fda50388678789e3ebd9fdb1,creative_80a776343079ed94d424f4607b35fd39,creative_82f125e356439d73902ae85e2be96777,creative_832b91d59d0cb5731431653204a76c0e,creative_86c2543527c86a893d4d4f68810a0416,creative_87945ed58e806dbdc291b3662f581354,creative_8dff45ed862a740986dbe688aafee7e5,creative_911b2d84826786018761e8c0b0a3a60c,creative_a10c31a8ff5f42930b4c34035e523886,creative_a499988a822facd86dd0e8e4ffef8532,creative_a4f763f78ef3eedfe614263b94a8924e,creative_b90c12ed2bd7950c6027bf9c6937c48a,creative_bac72fe483c04a0ea2490539b4d06e6a,creative_bc27493ad2351e2577bc8664172544f8,creative_c46090c887c257b61ab1fa11baee91d8,creative_c936045d792f6ea3aa22d86d93f5cf23,creative_c938195f9e404b4f38c7e71bf50263e5,creative_cb7c76e7784031272e37af8e7e9b062c,creative_cc9b344e950b4f8c2b96537174a343b7,creative_cd6f20e0b2a699c352f2e55a7a2091fd,creative_d01411218cc79bc49d2a4078c4093b76,creative_d5cecca9a6cbd7a0a48110f1306b26d1,creative_d881a6c788e76c2c27ed1ef04f119544,creative_dba9860463419dcd6c1b0152dfe7314c,creative_dc0998c10f8f0b623b5d949e8272e4c7,creative_e049ebe262e20bed5f9b975208db375b,creative_e1096a0a4f557814cb226fa5ba2339b2,creative_e1af08818a6cd6bbba118bb54a651961,creative_e1b0b6fb39abeb138c0b1e37c5f6d777,creative_e87d7633d474589c2e2e3ba4eda53f6c,creative_f1691b7571803d5a46adcb1f39f94d44,creative_f49541b4e0999d0c934ee3eea142a60e,creative_f65c8bdb41e9015970bac52baa813239,creative_fa8f0532dd5144b5fa748459e8d90b49,creative_fb5afa9dba1274beaf3dad86baf97e89,creative_fe222c13e927077ad3ea087a92c0935c,creative_ff5123fb9333ca095034c62fdaaf51aa,weekday_cat_0,weekday_cat_1,weekday_cat_2,weekday_cat_3,weekday_cat_4,weekday_cat_5,weekday_cat_6,hour_cat_0,hour_cat_1,hour_cat_10,hour_cat_11,hour_cat_12,hour_cat_13,hour_cat_14,hour_cat_15,hour_cat_16,hour_cat_17,hour_cat_18,hour_cat_19,hour_cat_2,hour_cat_20,hour_cat_21,hour_cat_22,hour_cat_23,hour_cat_3,hour_cat_4,hour_cat_5,hour_cat_6,hour_cat_7,hour_cat_8,hour_cat_9,region_cat_0,region_cat_1,region_cat_106,region_cat_124,region_cat_134,region_cat_146,region_cat_15,region_cat_164,region_cat_183,region_cat_2,region_cat_201,region_cat_216,region_cat_238,region_cat_253,region_cat_27,region_cat_275,region_cat_276,region_cat_298,region_cat_3,region_cat_308,region_cat_325,region_cat_333,region_cat_344,region_cat_359,region_cat_368,region_cat_374,region_cat_393,region_cat_394,region_cat_395,region_cat_40,region_cat_55,region_cat_65,region_cat_79,region_cat_80,region_cat_94,city_cat_0,city_cat_1,city_cat_10,city_cat_100,city_cat_101,city_cat_102,city_cat_103,city_cat_104,city_cat_105,city_cat_106,city_cat_107,city_cat_108,city_cat_109,city_cat_11,city_cat_110,city_cat_111,city_cat_112,city_cat_113,city_cat_114,city_cat_115,city_cat_116,city_cat_117,city_cat_118,city_cat_119,city_cat_12,city_cat_120,city_cat_121,city_cat_122,city_cat_123,city_cat_124,city_cat_125,city_cat_126,city_cat_127,city_cat_128,city_cat_129,city_cat_13,city_cat_130,city_cat_131,city_cat_132,city_cat_133,city_cat_134,city_cat_135,city_cat_136,city_cat_137,city_cat_138,city_cat_139,city_cat_14,city_cat_140,city_cat_141,city_cat_142,city_cat_143,city_cat_144,city_cat_145,city_cat_146,city_cat_147,city_cat_148,city_cat_149,city_cat_15,city_cat_150,city_cat_151,city_cat_152,city_cat_153,city_cat_154,city_cat_155,city_cat_156,city_cat_157,city_cat_158,city_cat_159,city_cat_16,city_cat_160,city_cat_161,city_cat_162,city_cat_163,city_cat_164,city_cat_165,city_cat_166,city_cat_167,city_cat_168,city_cat_169,city_cat_17,city_cat_170,city_cat_171,city_cat_172,city_cat_173,city_cat_174,city_cat_175,city_cat_176,city_cat_177,city_cat_178,city_cat_179,city_cat_18,city_cat_180,city_cat_181,city_cat_183,city_cat_184,city_cat_185,city_cat_186,city_cat_187,city_cat_188,city_cat_189,city_cat_19,city_cat_190,city_cat_191,city_cat_192,city_cat_193,city_cat_194,city_cat_195,city_cat_196,city_cat_2,city_cat_20,city_cat_201,city_cat_202,city_cat_203,city_cat_204,city_cat_205,city_cat_206,city_cat_207,city_cat_208,city_cat_209,city_cat_21,city_cat_210,city_cat_211,city_cat_212,city_cat_213,city_cat_214,city_cat_215,city_cat_216,city_cat_217,city_cat_218,city_cat_219,city_cat_22,city_cat_220,city_cat_221,city_cat_222,city_cat_223,city_cat_224,city_cat_225,city_cat_226,city_cat_227,city_cat_228,city_cat_229,city_cat_23,city_cat_230,city_cat_231,city_cat_232,city_cat_233,city_cat_234,city_cat_235,city_cat_236,city_cat_237,city_cat_238,city_cat_239,city_cat_24,city_cat_240,city_cat_241,city_cat_242,city_cat_243,city_cat_244,city_cat_245,city_cat_246,city_cat_247,city_cat_248,city_cat_249,city_cat_25,city_cat_250,city_cat_251,city_cat_252,city_cat_253,city_cat_254,city_cat_255,city_cat_26,city_cat_27,city_cat_275,city_cat_276,city_cat_277,city_cat_278,city_cat_279,city_cat_28,city_cat_280,city_cat_281,city_cat_282,city_cat_283,city_cat_284,city_cat_285,city_cat_286,city_cat_287,city_cat_288,city_cat_289,city_cat_29,city_cat_290,city_cat_291,city_cat_292,city_cat_293,city_cat_294,city_cat_295,city_cat_296,city_cat_297,city_cat_299,city_cat_3,city_cat_30,city_cat_300,city_cat_301,city_cat_302,city_cat_303,city_cat_304,city_cat_305,city_cat_306,city_cat_307,city_cat_308,city_cat_309,city_cat_31,city_cat_310,city_cat_311,city_cat_312,city_cat_313,city_cat_314,city_cat_315,city_cat_316,city_cat_317,city_cat_318,city_cat_319,city_cat_32,city_cat_320,city_cat_321,city_cat_322,city_cat_323,city_cat_324,city_cat_325,city_cat_326,city_cat_327,city_cat_328,city_cat_329,city_cat_33,city_cat_330,city_cat_331,city_cat_332,city_cat_333,city_cat_334,city_cat_335,city_cat_336,city_cat_337,city_cat_338,city_cat_339,city_cat_34,city_cat_340,city_cat_341,city_cat_342,city_cat_343,city_cat_344,city_cat_345,city_cat_346,city_cat_347,city_cat_348,city_cat_349,city_cat_35,city_cat_350,city_cat_351,city_cat_352,city_cat_353,city_cat_354,city_cat_355,city_cat_356,city_cat_357,city_cat_358,city_cat_359,city_cat_36,city_cat_360,city_cat_361,city_cat_362,city_cat_363,city_cat_364,city_cat_365,city_cat_366,city_cat_367,city_cat_368,city_cat_369,city_cat_37,city_cat_370,city_cat_371,city_cat_372,city_cat_373,city_cat_374,city_cat_375,city_cat_376,city_cat_377,city_cat_378,city_cat_379,city_cat_38,city_cat_380,city_cat_381,city_cat_382,city_cat_383,city_cat_384,city_cat_385,city_cat_386,city_cat_387,city_cat_388,city_cat_39,city_cat_393,city_cat_394,city_cat_395,city_cat_396,city_cat_397,city_cat_398,city_cat_399,city_cat_4,city_cat_40,city_cat_41,city_cat_42,city_cat_43,city_cat_44,city_cat_45,city_cat_46,city_cat_47,city_cat_48,city_cat_49,city_cat_5,city_cat_50,city_cat_51,city_cat_52,city_cat_53,city_cat_54,city_cat_56,city_cat_57,city_cat_58,city_cat_59,city_cat_6,city_cat_60,city_cat_61,city_cat_62,city_cat_63,city_cat_64,city_cat_65,city_cat_66,city_cat_67,city_cat_68,city_cat_69,city_cat_7,city_cat_70,city_cat_71,city_cat_72,city_cat_73,city_cat_74,city_cat_75,city_cat_76,city_cat_77,city_cat_78,city_cat_79,city_cat_8,city_cat_80,city_cat_81,city_cat_82,city_cat_83,city_cat_84,city_cat_85,city_cat_86,city_cat_87,city_cat_88,city_cat_89,city_cat_9,city_cat_90,city_cat_91,city_cat_92,city_cat_93,city_cat_94,city_cat_95,city_cat_96,city_cat_97,city_cat_98,city_cat_99,adexchange_cat_1.0,adexchange_cat_2.0,adexchange_cat_3.0,adexchange_cat_4.0,advertiser_cat_1458,advertiser_cat_2259,advertiser_cat_2261,advertiser_cat_2821,advertiser_cat_2997,advertiser_cat_3358,advertiser_cat_3386,advertiser_cat_3427,advertiser_cat_3476,os_android,os_ios,os_linux,os_mac,os_other,os_windows,browser_chrome,browser_firefox,browser_ie,browser_maxthon,browser_opera,browser_other,browser_safari,browser_sogou,browser_theworld,part_of_the_day_Friday_Evening,part_of_the_day_Friday_Morning,part_of_the_day_Friday_Night,part_of_the_day_Monday_Evening,part_of_the_day_Monday_Morning,part_of_the_day_Monday_Night,part_of_the_day_Saturday_Evening,part_of_the_day_Saturday_Morning,part_of_the_day_Saturday_Night,part_of_the_day_Sunday_Evening,part_of_the_day_Sunday_Morning,part_of_the_day_Sunday_Night,part_of_the_day_Tuesday_Evening,part_of_the_day_Tuesday_Morning,part_of_the_day_Tuesday_Night,part_of_the_day_Wednesday_Evening,part_of_the_day_Wednesday_Morning,part_of_the_day_Wednesday_Night
0,0,238,5,40000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,294,23,75000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,238,24,62500,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
3,0,300,25,96000,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
4,0,277,133,65520,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [32]:
train_dummy.columns

Index(['click', 'bidprice', 'payprice', 'slotarea', 'slotprice_cat',
       'usertag_10006', 'usertag_10024', 'usertag_10031', 'usertag_10048',
       'usertag_10052',
       ...
       'part_of_the_day_Saturday_Night', 'part_of_the_day_Sunday_Evening',
       'part_of_the_day_Sunday_Morning', 'part_of_the_day_Sunday_Night',
       'part_of_the_day_Tuesday_Evening', 'part_of_the_day_Tuesday_Morning',
       'part_of_the_day_Tuesday_Night', 'part_of_the_day_Wednesday_Evening',
       'part_of_the_day_Wednesday_Morning', 'part_of_the_day_Wednesday_Night'],
      dtype='object', length=701)

## Target variables

In [33]:
train["click"].value_counts()

0    2429188
1       1793
Name: click, dtype: int64

In [34]:
validation["click"].value_counts()

0    303723
1       202
Name: click, dtype: int64

## Downsample Training Set

It is obvious that we have to deal with extremely imbalanced datasets. In the training set, the "negative" class (e.g. impressions) corresponds to 99.92% of total data-points and "positive" class corresponds to 0.08%. In absolute values, we have 2429188 impressions and only 1793 clicks. That means that we have a class-imbalance ratio in the order of 1:1000. For more information on the class imbalance problem, check [here](http://www.ele.uri.edu/faculty/he/PDFfiles/ImbalancedLearning.pdf). 

In [35]:
from imblearn.under_sampling import TomekLinks, RandomUnderSampler, NearMiss
from imblearn.over_sampling import SMOTE
from imblearn.combine import SMOTETomek

In [36]:
# Random downsampling of the dataset
if name_helper == "pCTR":
    train_dummy_outliers = train_dummy.loc[train_dummy["click"] == 1]
    train_dummy_inliers = train_dummy.loc[train_dummy["click"] == 0].sample(n = 20000, random_state = rand_seed).reset_index(drop = True)
else:
    train_dummy_outliers = train_dummy.loc[train_dummy["click"] == 1]
    train_dummy_inliers = train_dummy.loc[train_dummy["click"] == 0].sample(n = 40000, random_state = rand_seed).reset_index(drop = True)

In [37]:
train_dummy_inliers.shape, train_dummy_outliers.shape

((40000, 701), (1793, 701))

In [38]:
train_dummy_2 = pd.concat([train_dummy_inliers, train_dummy_outliers], axis = 0)

In [39]:
train_dummy_2.shape

(41793, 701)

In [40]:
if name_helper == "pCTR":
    X = train_dummy_2.loc[:, ((train_dummy_2.columns != "click") & 
                          (train_dummy_2.columns != "payprice") &
                          (train_dummy_2.columns != "bidprice"))].values
    y = train_dummy_2["click"].values

    X_validation = validation_dummy.loc[:, ((validation_dummy.columns != "click") & 
                                            (validation_dummy.columns != "payprice") & 
                                            (validation_dummy.columns != "bidprice"))]
    y_validation = validation_dummy["click"]
    
else:
    X = train_dummy_2.loc[:, ((train_dummy_2.columns != "click") & 
                          (train_dummy_2.columns != "payprice") &
                          (train_dummy_2.columns != "bidprice"))].values
    y = train_dummy_2["payprice"].values

    X_validation = validation_dummy.loc[:, ((validation_dummy.columns != "click") & 
                                            (validation_dummy.columns != "payprice") & 
                                            (validation_dummy.columns != "bidprice"))]
    y_validation = validation_dummy["payprice"]

In [41]:
X.shape, y.shape, X_validation.shape, y_validation.shape

((41793, 698), (41793,), (303925, 698), (303925,))

In [42]:
clicks = train.loc[train["click"] == 1].shape[0]
impressions = train.loc[train["click"] == 0].shape[0]

In [43]:
# SMOTE oversampling
if name_helper == "pCTR":
    sm = SMOTE(ratio = dict({0: train_dummy_inliers.shape[0], 1: train_dummy_inliers.shape[0]}), random_state = rand_seed)
    % time X_resampled, y_resampled = sm.fit_sample(X, y)
else:
    X_resampled, y_resampled = X, y

In [44]:
X_resampled.shape, y_resampled.shape

((41793, 698), (41793,))

### Store results

In [45]:
from sklearn.externals import joblib

In [46]:
if name_helper == "pCTR":
    joblib.dump(X_resampled, "X_train_resampled_pCTR.pkl")
    joblib.dump(y_resampled, "y_train_resampled_pCTR.pkl")
    joblib.dump(X_validation, "X_validation_pCTR.pkl")
    joblib.dump(y_validation, "y_validation_pCTR.pkl")
    
else:
    joblib.dump(X_resampled, "X_train_resampled_basebid.pkl")
    joblib.dump(y_resampled, "y_train_resampled_basebid.pkl")
    joblib.dump(X_validation, "X_validation_basebid.pkl")
    joblib.dump(y_validation, "y_validation_basebid.pkl")