# Features generation

## Packages importing

In [1]:
import pandas as pd
import numpy as np
import time
import datetime
import calendar

## Original dataframes loading to test

In [23]:
df_pageviews = pd.read_csv('data/pageviews.csv', parse_dates=['FEC_EVENT'])
df_devicedata = pd.read_csv("./data/device_data.csv", parse_dates=["FEC_EVENT"])
year = 2018
mes_snapshot = 9

def universo_train(df):
    universo = pd.DataFrame()
    universo['user_id'] = df.USER_ID.unique()
    universo['snapshot_mes'] = mes_snapshot
    return universo

universo = universo_train(df_pageviews)

In [24]:
df_conversiones = pd.read_csv("./data/conversiones.csv").drop_duplicates().reset_index(drop=True)

## Features generation per se

Metodo que calcula la cantidad de adopciones que tuvo cada cliente por las siguientes ventanas temporales:
 - ultimo mes (qty_1)
 - ultimos tres meses (qty_3)
 - ultimos seis meses (qty_6)
 - ultimos nueve meses (qty_9)

In [4]:
def qty_adoptions(universo, df_conversiones):
    mes_snapshot = universo['snapshot_mes'][0]
    df_conversiones = df_conversiones[df_conversiones['mes'] <= mes_snapshot]
    df_conversiones = df_conversiones[df_conversiones['mes'] > mes_snapshot - 9]
    df_conversiones = df_conversiones.astype({'mes': 'int32', 'USER_ID': 'int64'})

    qty_1 = [mes_snapshot]
    qty_3 = [mes_snapshot, mes_snapshot-1, mes_snapshot-2]
    qty_6 = [mes_snapshot, mes_snapshot-1, mes_snapshot-2]
    qty_6 = [mes_snapshot, mes_snapshot-1, mes_snapshot-2, mes_snapshot-3, mes_snapshot-4, mes_snapshot-5]
    
    df_qty_1 = df_conversiones[df_conversiones['mes'].isin(qty_1)].groupby(['USER_ID']).count()
    df_qty_1.drop(['anio'], axis='columns', inplace=True)
    df_qty_3 = df_conversiones[df_conversiones['mes'].isin(qty_3)].groupby(['USER_ID']).count()
    df_qty_3.drop(['anio'], axis='columns', inplace=True)
    df_qty_6 = df_conversiones[df_conversiones['mes'].isin(qty_6)].groupby(['USER_ID']).count()
    df_qty_6.drop(['anio'], axis='columns', inplace=True)
    df_qty_9 = df_conversiones.groupby(['USER_ID']).count()
    df_qty_9.drop(['anio'], axis='columns', inplace=True)

    universo = pd.merge(universo, df_qty_1, how='left', left_on=['user_id'], right_on=['USER_ID'])
    universo.rename(columns={'mes': 'qty_1'}, inplace=True)
    
    universo = pd.merge(universo, df_qty_3, how='left', left_on=['user_id'], right_on=['USER_ID'])
    universo.rename(columns={'mes': 'qty_3'}, inplace=True)
    
    universo = pd.merge(universo, df_qty_6, how='left', left_on=['user_id'], right_on=['USER_ID'])
    universo.rename(columns={'mes': 'qty_6'}, inplace=True)
    
    universo = pd.merge(universo, df_qty_9, how='left', left_on=['user_id'], right_on=['USER_ID'])
    universo.rename(columns={'mes': 'qty_9'}, inplace=True)
    
    universo = universo.fillna(0)
    return universo

Metodo que calcula la cantidad de dias desde la ultima adopcion

Ojo! Las adopciones las tenemos a nivel mes, por lo que se tomo el dia de cierre de dicho mes

In [29]:
def qty_days_last_adoption(universo, df_conversiones):
    today = datetime.datetime.today() # should today be first day of incoming month?

    df_conversiones = df_conversiones[df_conversiones['mes'] <= mes_snapshot]
    df_conversiones = df_conversiones[df_conversiones['mes'] > mes_snapshot - 9]
    df_conversiones = df_conversiones.astype({'mes': 'int32', 'anio': 'int32', 'USER_ID': 'int64'})

    df_conversiones['date'] = df_conversiones.apply(lambda row: 
                                                    datetime.datetime(year=row['anio'], 
                                                                      month=row['mes'], 
                                                                      day=calendar.monthrange(row['anio'], row['mes'])[1]),
                                                   axis=1)
    df_conversiones = df_conversiones.groupby(['USER_ID']).max().reset_index()
    df_conversiones['qty_days_last_adoption'] = df_conversiones.apply(lambda row: ((today - row['date']).days), axis=1)
    df_conversiones.drop(['date'], axis='columns', inplace=True)
    
    universo = pd.merge(universo, df_conversiones, how='left', left_on=['user_id'], right_on=['USER_ID'])
    universo = universo.fillna(0)
    return universo

In [30]:
hola = qty_days_last_adoption(universo, df_conversiones)

In [33]:
hola[hola.qty_days_last_adoption>0]

Unnamed: 0,user_id,snapshot_mes,USER_ID,mes,anio,qty_days_last_adoption
13,13,9,13.0,8.0,2018.0,359.0
40,40,9,40.0,1.0,2018.0,571.0
57,57,9,57.0,6.0,2018.0,421.0
92,92,9,92.0,3.0,2018.0,512.0
100,100,9,100.0,6.0,2018.0,421.0
204,204,9,204.0,2.0,2018.0,543.0
265,265,9,265.0,6.0,2018.0,421.0
290,290,9,290.0,6.0,2018.0,421.0
300,300,9,300.0,4.0,2018.0,482.0
310,310,9,310.0,8.0,2018.0,359.0
