In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import warnings as wn
import sklearn.preprocessing as skpre
import category_encoders as ce

wn.simplefilter( "ignore" )

In [2]:
loc_ftr = r"D:\FacundoTorraca\Documents\TP2_ML\FeaturesLocal\FeaturesSC"

## <span style="color:yellow"> **Preparamos el set de entramiento para sacar features** </span> 

In [3]:
loc_ts = r"D:\FacundoTorraca\Documents\TP2_ML\Training Sets"

In [4]:
auc_trn = pd.read_csv( loc_ts + "\\auc_18_20.csv" ); 
auc_tst = pd.read_csv( loc_ts + "\\auc_21_23.csv" );  

## <span style="color:yellow"> **Preparamos el ref_hash de cada ventana** </span> 

In [5]:
loc_lb = r"D:\FacundoTorraca\Documents\TP2_ML\Labels"

In [6]:
lb_ins_trn = pd.read_csv( loc_lb + "\\label_ins_21_23.csv" );
lb_ins_tst = pd.read_csv( loc_lb + "\\label_ins_24_26.csv" ); 

In [7]:
rh_trn = lb_ins_trn[ ["ref_hash"] ]
rh_tst = lb_ins_tst[ ["ref_hash"] ]

tg_trn = lb_ins_trn[ ["21_23_sc"] ]

## <span style="color:yellow"> **=======================================================================================================** </span> 

## <span style="color:green"> **Hora mas popular del usuario en las subastas** </span> 

Le aplicamos la transformacion sin( (hora * pi)/24 ) para agregarle periodicidad. 

In [8]:
main_ahr_trn = rh_trn.copy()
main_ahr_tst = rh_tst.copy()

hour_mode_trn = auc_trn[ ["device_id"] ]; hour_mode_trn["hour"] = pd.to_datetime( auc_trn["date"] ).dt.hour
hour_mode_tst = auc_tst[ ["device_id"] ]; hour_mode_tst["hour"] = pd.to_datetime( auc_tst["date"] ).dt.hour

hour_mode_trn = hour_mode_trn.groupby( ["device_id", "hour"] ).agg( { "hour":"count" } ).rename( columns = {"hour":"count"} ).reset_index()
hour_mode_tst = hour_mode_tst.groupby( ["device_id", "hour"] ).agg( { "hour":"count" } ).rename( columns = {"hour":"count"} ).reset_index()

hour_mode_trn = hour_mode_trn.sort_values( ["device_id","count"], ascending = False ).drop_duplicates( subset = ["device_id"], keep = "first" ).rename( columns = {"device_id":"ref_hash"} ).drop("count",axis = 1)
hour_mode_tst = hour_mode_tst.sort_values( ["device_id","count"], ascending = False ).drop_duplicates( subset = ["device_id"], keep = "first" ).rename( columns = {"device_id":"ref_hash"} ).drop("count",axis = 1)

main_ahr_trn = main_ahr_trn.merge( hour_mode_trn, how = "left", on = "ref_hash" ).rename( columns = {"hour":"main_hour"} )
main_ahr_tst = main_ahr_tst.merge( hour_mode_tst, how = "left", on = "ref_hash" ).rename( columns = {"hour":"main_hour"} )

main_ahr_trn["main_hour"] =  main_ahr_trn["main_hour"].apply( lambda x: np.sin( (x*np.pi)/24 ) )
main_ahr_tst["main_hour"] =  main_ahr_tst["main_hour"].apply( lambda x: np.sin( (x*np.pi)/24 ) )

main_ahr_trn.to_csv( loc_ftr + "\\main_ahr_trn.csv", index = False )
main_ahr_tst.to_csv( loc_ftr + "\\main_ahr_tst.csv", index = False )

## <span style="color:green"> **Horario de la primer subasta en ese ventana** </span> 

In [9]:
hr_f_auc_trn = rh_trn.copy()
hr_f_auc_tst = rh_tst.copy()

first_auc_hour_trn = auc_trn[ ["device_id","date"] ].sort_values( "date" ).drop_duplicates( subset = "device_id", keep = "first" ).rename( columns = {"device_id":"ref_hash"} )
first_auc_hour_tst = auc_tst[ ["device_id","date"] ].sort_values( "date" ).drop_duplicates( subset = "device_id", keep = "first" ).rename( columns = {"device_id":"ref_hash"} )

first_auc_hour_trn["time_to_frt_auc"] = ( pd.to_datetime( first_auc_hour_trn["date"] ) -  dt.datetime( year = 2019, month = 4, day = 18 ) ).dt.total_seconds()
first_auc_hour_tst["time_to_frt_auc"] = ( pd.to_datetime( first_auc_hour_tst["date"] ) -  dt.datetime( year = 2019, month = 4, day = 21 ) ).dt.total_seconds()

first_auc_hour_trn["hour_frt_auc"] = pd.to_datetime( first_auc_hour_trn["date"] ).dt.hour
first_auc_hour_tst["hour_frt_auc"] = pd.to_datetime( first_auc_hour_tst["date"] ).dt.hour

first_auc_hour_trn.drop( ["date", "time_to_frt_auc"], axis = 1, inplace = True )
first_auc_hour_tst.drop( ["date", "time_to_frt_auc"], axis = 1, inplace = True )

hr_f_auc_trn = hr_f_auc_trn.merge( first_auc_hour_trn, how = "left", on = "ref_hash" )
hr_f_auc_tst = hr_f_auc_tst.merge( first_auc_hour_tst, how = "left", on = "ref_hash" )

hr_f_auc_trn.to_csv( loc_ftr + "\\hr_f_auc_trn.csv", index = False )
hr_f_auc_tst.to_csv( loc_ftr + "\\hr_f_auc_tst.csv", index = False )

## <span style="color:green"> **Cantidad de Subastas por dispositivo en la ventana previa a la conversion** </span>

In [10]:
cant_auc_trn = rh_trn.copy()
cant_auc_tst = rh_tst.copy()

cant_auc_trn = cant_auc_trn.merge( auc_trn["device_id"].value_counts().to_frame().reset_index().rename( columns = {"device_id": "cant_auc", "index":"ref_hash"} ), how = "left", on = "ref_hash" )
cant_auc_tst = cant_auc_tst.merge( auc_tst["device_id"].value_counts().to_frame().reset_index().rename( columns = {"device_id": "cant_auc", "index":"ref_hash"} ), how = "left", on = "ref_hash" )

cant_auc_trn.fillna( 0, inplace = True )
cant_auc_tst.fillna( 0, inplace = True )

cant_auc_trn.to_csv( loc_ftr + "\\cant_auc_trn.csv", index = False )
cant_auc_tst.to_csv( loc_ftr + "\\cant_auc_tst.csv", index = False )

## <span style="color:green"> **Tiempo hasta la primer auction en ese ventana** </span> 

Le asignamos cuanto tiempo, en la ventana del 18-20, tardo en realizar su primer auction

In [11]:
frst_auc_trn = rh_trn.copy()
frst_auc_tst = rh_tst.copy()

first_auctions_rh_ins_trn = auc_trn[ ["device_id","date"] ].sort_values( "date" ).drop_duplicates( subset = "device_id", keep = "first" ).rename( columns = {"device_id":"ref_hash"} )
first_auctions_rh_ins_tst = auc_tst[ ["device_id","date"] ].sort_values( "date" ).drop_duplicates( subset = "device_id", keep = "first" ).rename( columns = {"device_id":"ref_hash"} )

first_auctions_rh_ins_trn["time_to_frt_auc"] = ( pd.to_datetime( first_auctions_rh_ins_trn["date"] ) -  dt.datetime( year = 2019, month = 4, day = 18 ) ).dt.total_seconds()
first_auctions_rh_ins_tst["time_to_frt_auc"] = ( pd.to_datetime( first_auctions_rh_ins_tst["date"] ) -  dt.datetime( year = 2019, month = 4, day = 21 ) ).dt.total_seconds()

first_auctions_rh_ins_trn.drop( "date", axis = 1, inplace = True )
first_auctions_rh_ins_tst.drop( "date", axis = 1, inplace = True )

frst_auc_trn = frst_auc_trn.merge( first_auctions_rh_ins_trn, how = "left", on = "ref_hash" )
frst_auc_tst = frst_auc_tst.merge( first_auctions_rh_ins_tst, how = "left", on = "ref_hash" )

#Los que tienen NaN es que nunca convirtieron. Los marcamos con el tiempo maximo
frst_auc_trn.fillna( 3 * 24 * 3600, inplace = True )
frst_auc_tst.fillna( 3 * 24 * 3600, inplace = True )

frst_auc_trn.to_csv( loc_ftr + "\\frst_auc_trn.csv", index = False )
frst_auc_tst.to_csv( loc_ftr + "\\frst_auc_tst.csv", index = False )

## <span style="color:green"> **Source que recibio mas instalaciones por dispositivo** </span> 

#### <span style="color:orange"> **OneHotEncoding** </span> 

In [12]:
srce_auc_trn = rh_trn.copy()
srce_auc_tst = rh_tst.copy()

main_source_trn = auc_trn.groupby( ["device_id", "source_id"] ).agg( {"source_id":"count"} ).rename( columns = {"source_id":"cant_auc"} ).reset_index()
main_source_tst = auc_tst.groupby( ["device_id", "source_id"] ).agg( {"source_id":"count"} ).rename( columns = {"source_id":"cant_auc"} ).reset_index()

main_source_trn = main_source_trn.sort_values( by = ["device_id","cant_auc"], ascending = True ).drop_duplicates( subset = ["device_id"], keep = "last" ); del( main_source_trn["cant_auc"] )
main_source_tst = main_source_tst.sort_values( by = ["device_id","cant_auc"], ascending = True ).drop_duplicates( subset = ["device_id"], keep = "last" ); del( main_source_tst["cant_auc"] )

srce_auc_trn = srce_auc_trn.merge( main_source_trn.rename(columns = {"device_id":"ref_hash"}), how = "left", on = "ref_hash" ); srce_auc_trn.fillna( "no_font", inplace = True )
srce_auc_tst = srce_auc_tst.merge( main_source_tst.rename(columns = {"device_id":"ref_hash"}), how = "left", on = "ref_hash" ); srce_auc_tst.fillna( "no_font", inplace = True )

#Ya fue controlado que quedan las mismas fuentes para ambas ventanas, no quedan columnas distintas
srce_auc_ohe_trn = pd.get_dummies( srce_auc_trn[ ["source_id"] ].astype( "category" ) )
srce_auc_ohe_tst = pd.get_dummies( srce_auc_tst[ ["source_id"] ].astype( "category" ) )

srce_auc_trn = pd.concat( [srce_auc_trn, srce_auc_ohe_trn], axis = 1 ); del( srce_auc_trn["source_id"] )
srce_auc_tst = pd.concat( [srce_auc_tst, srce_auc_ohe_tst], axis = 1 ); del( srce_auc_tst["source_id"] )

srce_auc_trn.to_csv( loc_ftr + "\\srce_auc_trn.csv", index = False )
srce_auc_tst.to_csv( loc_ftr + "\\srce_auc_tst.csv", index = False )

## <span style="color:green"> **Tiempo hasta la ultima auction en esa ventana** </span> 

In [13]:
last_auc_trn = rh_trn.copy()
last_auc_tst = rh_tst.copy()

last_auctions_rh_ins_trn = auc_trn[ ["device_id","date"] ].sort_values( "date", ascending = False ).drop_duplicates( subset = "device_id", keep = "first" ).rename( columns = {"device_id":"ref_hash"} )
last_auctions_rh_ins_tst = auc_tst[ ["device_id","date"] ].sort_values( "date", ascending = False ).drop_duplicates( subset = "device_id", keep = "first" ).rename( columns = {"device_id":"ref_hash"} )

last_auctions_rh_ins_trn["time_to_lst_auc"] = ( pd.to_datetime( last_auctions_rh_ins_trn["date"] ) -  dt.datetime( year = 2019, month = 4, day = 18 ) ).dt.total_seconds()
last_auctions_rh_ins_tst["time_to_lst_auc"] = ( pd.to_datetime( last_auctions_rh_ins_tst["date"] ) -  dt.datetime( year = 2019, month = 4, day = 21 ) ).dt.total_seconds()

last_auctions_rh_ins_trn.drop( "date", axis = 1, inplace = True )
last_auctions_rh_ins_tst.drop( "date", axis = 1, inplace = True )

last_auc_trn = last_auc_trn.merge( last_auctions_rh_ins_trn, how = "left", on = "ref_hash" )
last_auc_tst = last_auc_tst.merge( last_auctions_rh_ins_tst, how = "left", on = "ref_hash" )

#Los que tienen NaN es que nunca convirtieron. Los marcamos con el tiempo maximo
last_auc_trn.fillna( 3 * 24 * 3600, inplace = True )
last_auc_tst.fillna( 3 * 24 * 3600, inplace = True )

last_auc_trn.to_csv( loc_ftr + "\\last_auc_trn.csv", index = False )
last_auc_tst.to_csv( loc_ftr + "\\last_auc_tst.csv", index = False )

## <span style="color:green"> **Participo en mas de una subasta** </span> 

#### <span style="color:Orange"> **One-hot Encoding** </span> 

In [14]:
cant_auc_trn = rh_trn.copy()
cant_auc_tst = rh_tst.copy()

cant_auc_trn = cant_auc_trn.merge( auc_trn["device_id"].value_counts().to_frame().reset_index().rename( columns = {"device_id": "mt_1_ins", "index":"ref_hash"} ), how = "left", on = "ref_hash" )
cant_auc_tst = cant_auc_tst.merge( auc_tst["device_id"].value_counts().to_frame().reset_index().rename( columns = {"device_id": "mt_1_ins", "index":"ref_hash"} ), how = "left", on = "ref_hash" )

cant_auc_trn.fillna( 0, inplace = True )
cant_auc_tst.fillna( 0, inplace = True )

cant_auc_trn['mt_1_ins'] = (cant_auc_trn['mt_1_ins'] > 1).astype('int8')
cant_auc_tst['mt_1_ins'] = (cant_auc_tst['mt_1_ins'] > 1).astype('int8')

cant_auc_trn.to_csv( loc_ftr + "\\mas1_auc_trn.csv", index = False )
cant_auc_tst.to_csv( loc_ftr + "\\mas1_auc_tst.csv", index = False )

## <span style="color:green"> **Recibio subasta entre 21 hs y 3 hs (Noche)** </span> 

#### <span style="color:Orange"> **One-hot Encoding** </span> 

In [15]:
auc_trn['auc_21_3'] = ( pd.to_datetime( auc_trn['date'] ).dt.hour < 4) | ( pd.to_datetime( auc_trn['date'] ).dt.hour > 20)
auc_tst['auc_21_3'] = ( pd.to_datetime( auc_tst['date'] ).dt.hour < 4) | ( pd.to_datetime( auc_tst['date'] ).dt.hour > 20)

auc_night_trn = rh_trn.copy()
auc_night_tst = rh_tst.copy()

hour_mode_trn = auc_trn.groupby('device_id').agg({'auc_21_3':'sum'}).reset_index().rename( columns = {"device_id":"ref_hash"} )
hour_mode_tst = auc_tst.groupby('device_id').agg({'auc_21_3':'sum'}).reset_index().rename( columns = {"device_id":"ref_hash"} )

auc_night_trn = auc_night_trn.merge( hour_mode_trn, how = "left", on = "ref_hash" )
auc_night_tst = auc_night_tst.merge( hour_mode_tst, how = "left", on = "ref_hash" )

auc_night_trn["auc_21_3"] =  (auc_night_trn["auc_21_3"] > 1).astype('int8')
auc_night_tst["auc_21_3"] =  (auc_night_tst["auc_21_3"] > 1).astype('int8')

auc_night_trn.to_csv( loc_ftr + "\\auc_nght_trn.csv", index = False )
auc_night_tst.to_csv( loc_ftr + "\\auc_nght_tst.csv", index = False )

del auc_trn['auc_21_3']
del auc_tst['auc_21_3']

## <span style="color:green"> **Recibio subasta entre 4 hs y 10 hs (Mañana)** </span> 

#### <span style="color:Orange"> **One-hot Encoding** </span> 

In [16]:
auc_trn['auc_4_10'] = ( pd.to_datetime( auc_trn['date'] ).dt.hour < 11) & ( pd.to_datetime( auc_trn['date'] ).dt.hour > 3)
auc_tst['auc_4_10'] = ( pd.to_datetime( auc_tst['date'] ).dt.hour < 11) & ( pd.to_datetime( auc_tst['date'] ).dt.hour > 3)

auc_morn_trn = rh_trn.copy()
auc_morn_tst = rh_tst.copy()

hour_mode_trn = auc_trn.groupby('device_id').agg({'auc_4_10':'sum'}).reset_index().rename( columns = {"device_id":"ref_hash"} )
hour_mode_tst = auc_tst.groupby('device_id').agg({'auc_4_10':'sum'}).reset_index().rename( columns = {"device_id":"ref_hash"} )

auc_morn_trn = auc_morn_trn.merge( hour_mode_trn, how = "left", on = "ref_hash" )
auc_morn_tst = auc_morn_tst.merge( hour_mode_tst, how = "left", on = "ref_hash" )

auc_morn_trn["auc_4_10"] =  (auc_morn_trn["auc_4_10"] > 1).astype('int8')
auc_morn_tst["auc_4_10"] =  (auc_morn_tst["auc_4_10"] > 1).astype('int8')

auc_morn_trn.to_csv( loc_ftr + "\\auc_morn_trn.csv", index = False )
auc_morn_tst.to_csv( loc_ftr + "\\auc_morn_tst.csv", index = False )

del auc_trn['auc_4_10']
del auc_tst['auc_4_10']

## <span style="color:green"> **Recibio subasta entre 11 hs y 15 hs (Mediodia)** </span> 

#### <span style="color:Orange"> **One-hot Encoding** </span> 

In [17]:
auc_trn['auc_11_15'] = ( pd.to_datetime( auc_trn['date'] ).dt.hour < 16) & ( pd.to_datetime( auc_trn['date'] ).dt.hour > 10)
auc_tst['auc_11_15'] = ( pd.to_datetime( auc_tst['date'] ).dt.hour < 16) & ( pd.to_datetime( auc_tst['date'] ).dt.hour > 10)

auc_midday_trn = rh_trn.copy()
auc_midday_tst = rh_tst.copy()

hour_mode_trn = auc_trn.groupby('device_id').agg({'auc_11_15':'sum'}).reset_index().rename( columns = {"device_id":"ref_hash"} )
hour_mode_tst = auc_tst.groupby('device_id').agg({'auc_11_15':'sum'}).reset_index().rename( columns = {"device_id":"ref_hash"} )

auc_midday_trn = auc_midday_trn.merge( hour_mode_trn, how = "left", on = "ref_hash" )
auc_midday_tst = auc_midday_tst.merge( hour_mode_tst, how = "left", on = "ref_hash" )

auc_midday_trn["auc_11_15"] =  (auc_midday_trn["auc_11_15"] > 1).astype('int8')
auc_midday_tst["auc_11_15"] =  (auc_midday_tst["auc_11_15"] > 1).astype('int8')

auc_midday_trn.to_csv( loc_ftr + "\\auc_mday_trn.csv", index = False )
auc_midday_tst.to_csv( loc_ftr + "\\auc_mday_tst.csv", index = False )

del auc_trn['auc_11_15']
del auc_tst['auc_11_15']

## <span style="color:green"> **Recibio subasta entre 16 hs y 20 hs (Tarde)** </span> 

#### <span style="color:Orange"> **One-hot Encoding** </span> 

In [20]:
auc_trn['auc_16_20'] = ( pd.to_datetime( auc_trn['date'] ).dt.hour < 21) & ( pd.to_datetime( auc_trn['date'] ).dt.hour > 15)
auc_tst['auc_16_20'] = ( pd.to_datetime( auc_tst['date'] ).dt.hour < 21) & ( pd.to_datetime( auc_tst['date'] ).dt.hour > 15)

auc_after_trn = rh_trn.copy()
auc_after_tst = rh_tst.copy()

hour_mode_trn = auc_trn.groupby('device_id').agg({'auc_16_20':'sum'}).reset_index().rename( columns = {"device_id":"ref_hash"} )
hour_mode_tst = auc_tst.groupby('device_id').agg({'auc_16_20':'sum'}).reset_index().rename( columns = {"device_id":"ref_hash"} )

auc_after_trn = auc_after_trn.merge( hour_mode_trn, how = "left", on = "ref_hash" )
auc_after_tst = auc_after_tst.merge( hour_mode_tst, how = "left", on = "ref_hash" )

auc_after_trn["auc_16_20"] =  (auc_after_trn["auc_16_20"] > 1).astype('int8')
auc_after_tst["auc_16_20"] =  (auc_after_tst["auc_16_20"] > 1).astype('int8')

auc_after_trn.to_csv( loc_ftr + "\\auc_aftr_trn.csv", index = False )
auc_after_tst.to_csv( loc_ftr + "\\auc_aftr_tst.csv", index = False )

del auc_trn['auc_16_20']
del auc_tst['auc_16_20']

## <span style="color:green"> **Dia de la semana realizacion de la primera subasta** </span> 

In [10]:
sdia_auc_trn = rh_trn.copy()
sdia_auc_tst = rh_tst.copy()

first_auc_wday_trn = auc_trn[ ["device_id","date"] ].sort_values( "date" ).drop_duplicates( subset = "device_id", keep = "first" ).rename( columns = {"device_id":"ref_hash"} )
first_auc_wday_tst = auc_tst[ ["device_id","date"] ].sort_values( "date" ).drop_duplicates( subset = "device_id", keep = "first" ).rename( columns = {"device_id":"ref_hash"} )

first_auc_wday_trn["time_to_frt_auc"] = ( pd.to_datetime( first_auc_wday_trn["date"] ) -  dt.datetime( year = 2019, month = 4, day = 18 ) ).dt.total_seconds()
first_auc_wday_tst["time_to_frt_auc"] = ( pd.to_datetime( first_auc_wday_tst["date"] ) -  dt.datetime( year = 2019, month = 4, day = 21 ) ).dt.total_seconds()

first_auc_wday_trn["wday_frt_auc"] = pd.to_datetime( first_auc_wday_trn["date"] ).dt.weekday
first_auc_wday_tst["wday_frt_auc"] = pd.to_datetime( first_auc_wday_tst["date"] ).dt.weekday

first_auc_wday_trn.drop( ["date", "time_to_frt_auc"], axis = 1, inplace = True )
first_auc_wday_tst.drop( ["date", "time_to_frt_auc"], axis = 1, inplace = True )

sdia_auc_trn = sdia_auc_trn.merge( first_auc_wday_trn, how = "left", on = "ref_hash" )
sdia_auc_tst = sdia_auc_tst.merge( first_auc_wday_tst, how = "left", on = "ref_hash" )

sdia_auc_trn["wday_frt_auc"] = sdia_auc_trn["wday_frt_auc"].apply( lambda x: np.sin( (x*np.pi)/6 ) )
sdia_auc_tst["wday_frt_auc"] = sdia_auc_tst["wday_frt_auc"].apply( lambda x: np.sin( (x*np.pi)/6 ) )

sdia_auc_trn.to_csv( loc_ftr + "\\sdia_auc_trn.csv", index = False )
sdia_auc_tst.to_csv( loc_ftr + "\\sdia_auc_tst.csv", index = False )

## <span style="color:green"> **Cantidad de auctions entre 3 hs y 21 hs (Noche)** </span>

In [8]:
auc_trn['date'] = pd.to_datetime(auc_trn['date'])
auc_tst['date'] = pd.to_datetime(auc_tst['date'])

auc_trn['auc_21_3'] = (auc_trn['date'].dt.hour < 4) | (auc_trn['date'].dt.hour > 20)
auc_tst['auc_21_3'] = (auc_tst['date'].dt.hour < 4) | (auc_tst['date'].dt.hour > 20)

auc_night_trn = rh_trn.copy()
auc_night_tst = rh_tst.copy()

hour_mode_trn = auc_trn.rename( columns = {"device_id":"ref_hash"} ).groupby('ref_hash').agg({'auc_21_3':'sum'}).reset_index()
hour_mode_tst = auc_tst.rename( columns = {"device_id":"ref_hash"} ).groupby('ref_hash').agg({'auc_21_3':'sum'}).reset_index()

auc_night_trn = auc_night_trn.merge( hour_mode_trn, how = "left", on = "ref_hash" )
auc_night_tst = auc_night_tst.merge( hour_mode_tst, how = "left", on = "ref_hash" )

auc_night_trn.fillna( 0.0 , inplace = True )
auc_night_tst.fillna( 0.0 , inplace = True )

auc_night_trn.to_csv( loc_ftr + "\\cauc_nig_trn.csv", index = False )
auc_night_tst.to_csv( loc_ftr + "\\cauc_nig_tst.csv", index = False )

del auc_trn['auc_21_3']
del auc_tst['auc_21_3']

## <span style="color:green"> **Cantidad de auctions entre 4 hs y 10 hs (Mañana)** </span>

In [9]:
auc_trn['date'] = pd.to_datetime(auc_trn['date'])
auc_tst['date'] = pd.to_datetime(auc_tst['date'])

auc_trn['auc_4_10'] = (auc_trn['date'].dt.hour < 11) & (auc_trn['date'].dt.hour > 3)
auc_tst['auc_4_10'] = (auc_tst['date'].dt.hour < 11) & (auc_tst['date'].dt.hour > 3)

auc_morn_trn = rh_trn.copy()
auc_morn_tst = rh_tst.copy()

hour_mode_trn = auc_trn.rename( columns = {"device_id":"ref_hash"} ).groupby('ref_hash').agg({'auc_4_10':'sum'}).reset_index()
hour_mode_tst = auc_tst.rename( columns = {"device_id":"ref_hash"} ).groupby('ref_hash').agg({'auc_4_10':'sum'}).reset_index()

auc_morn_trn = auc_morn_trn.merge( hour_mode_trn, how = "left", on = "ref_hash" )
auc_morn_tst = auc_morn_tst.merge( hour_mode_tst, how = "left", on = "ref_hash" )

auc_morn_trn.fillna( 0.0 , inplace = True )
auc_morn_tst.fillna( 0.0 , inplace = True )

auc_morn_trn.to_csv( loc_ftr + "\\cauc_mor_trn.csv", index = False )
auc_morn_tst.to_csv( loc_ftr + "\\cauc_mor_tst.csv", index = False )

del auc_trn['auc_4_10']
del auc_tst['auc_4_10']

## <span style="color:green"> **Cantidad de auctions entre 11 hs y 15 hs (Mediodia)** </span>

In [10]:
auc_trn['date'] = pd.to_datetime(auc_trn['date'])
auc_tst['date'] = pd.to_datetime(auc_tst['date'])

auc_trn['auc_11_15'] = (auc_trn['date'].dt.hour < 16) & (auc_trn['date'].dt.hour > 10)
auc_tst['auc_11_15'] = (auc_tst['date'].dt.hour < 16) & (auc_tst['date'].dt.hour > 10)

auc_midday_trn = rh_trn.copy()
auc_midday_tst = rh_tst.copy()

hour_mode_trn = auc_trn.rename( columns = {"device_id":"ref_hash"} ).groupby('ref_hash').agg({'auc_11_15':'sum'}).reset_index()
hour_mode_tst = auc_tst.rename( columns = {"device_id":"ref_hash"} ).groupby('ref_hash').agg({'auc_11_15':'sum'}).reset_index()

auc_midday_trn = auc_midday_trn.merge( hour_mode_trn, how = "left", on = "ref_hash" )
auc_midday_tst = auc_midday_tst.merge( hour_mode_tst, how = "left", on = "ref_hash" )

auc_midday_trn.fillna( 0.0 , inplace = True )
auc_midday_tst.fillna( 0.0 , inplace = True )

auc_midday_trn.to_csv( loc_ftr + "\\cauc_mid_trn.csv", index = False )
auc_midday_tst.to_csv( loc_ftr + "\\cauc_mid_tst.csv", index = False )

del auc_trn['auc_11_15']
del auc_tst['auc_11_15']

## <span style="color:green"> **Cantidad de auctions entre 16 hs y 20 hs (Tarde)** </span>

In [12]:
auc_trn['date'] = pd.to_datetime(auc_trn['date'])
auc_tst['date'] = pd.to_datetime(auc_tst['date'])

auc_trn['auc_16_20'] = (auc_trn['date'].dt.hour < 21) & (auc_trn['date'].dt.hour > 15)
auc_tst['auc_16_20'] = (auc_tst['date'].dt.hour < 21) & (auc_tst['date'].dt.hour > 15)

auc_after_trn = rh_trn.copy()
auc_after_tst = rh_tst.copy()

hour_mode_trn = auc_trn.rename( columns = {"device_id":"ref_hash"} ).groupby('ref_hash').agg({'auc_16_20':'sum'}).reset_index()
hour_mode_tst = auc_tst.rename( columns = {"device_id":"ref_hash"} ).groupby('ref_hash').agg({'auc_16_20':'sum'}).reset_index()

auc_after_trn = auc_after_trn.merge( hour_mode_trn, how = "left", on = "ref_hash" )
auc_after_tst = auc_after_tst.merge( hour_mode_tst, how = "left", on = "ref_hash" )

auc_after_trn.fillna( 0.0 , inplace = True )
auc_after_tst.fillna( 0.0 , inplace = True )

auc_after_trn.to_csv( loc_ftr + "\\cauc_aft_trn.csv", index = False )
auc_after_tst.to_csv( loc_ftr + "\\cauc_aft_tst.csv", index = False )

del auc_trn['auc_16_20']
del auc_tst['auc_16_20']