### =======================================================================
### IMPORTACIÓN DE DATOS.
### =======================================================================

In [1]:
## IMPORTACIÓN GENERAL DE LIBRERIAS Y VISUALIZACIÓN DE DATOS (matplotlib y seaborn)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as DT
import warnings
%matplotlib inline
warnings.filterwarnings('ignore')
plt.style.use('default') 
sns.set(style="whitegrid") 
plt.rcParams['figure.figsize'] = (15, 10)
# Load scikit's random forest classifier library
from sklearn.ensemble import RandomForestClassifier

In [2]:
training = ''
eventos = ''
dummies = ''
test = ''

In [3]:
## OBTENEMOS TODA LA INFORMACIÓN DEL SET DE ENTRENAMIENTO.
training = pd.read_csv('labels_training_set.csv', encoding = 'utf-8')
## OBTENEMOS TODA LA INFORMACIÓN DE LOS DIFERENTES EVENTOS.
eventos = pd.read_csv('events_up_to_01062018.csv', encoding = 'utf-8')
## OBTENEMOS TODA LA INFORMACIÓN A TESTEAR.
test = pd.read_csv('trocafone_kaggle_test.csv', encoding = 'utf-8')

In [4]:
## PASAMOS LAS COLUMNAS QUE TIENEN UNA CANTIDAD DE VALORES LIMITADA A UN TIPO CATEGORY
eventos['person'] = eventos['person'].astype('category')
eventos['event'] = eventos['event'].astype('category')
eventos['condition'] = eventos['condition'].astype('category')
eventos['storage'] = eventos['storage'].astype('category')
eventos['search_engine'] = eventos['search_engine'].astype('category')
eventos['channel'] = eventos['channel'].astype('category')
eventos['new_vs_returning'] = eventos['new_vs_returning'].astype('category')
eventos['device_type'] = eventos['device_type'].astype('category')
eventos['color'] = eventos['color'].astype('category')
eventos['region'] = eventos['region'].astype('category')
eventos['country'] = eventos['country'].astype('category')
eventos['operating_system_version'] = eventos['operating_system_version'].astype('category')
eventos['city'] = eventos['city'].astype('category')
eventos['browser_version'] = eventos['browser_version'].astype('category')
eventos['screen_resolution'] = eventos['screen_resolution'].astype('category')
eventos['model'] = eventos['model'].astype('category')
eventos['timestamp'] = pd.to_datetime(eventos['timestamp'], infer_datetime_format=True)

### =======================================================================
### ARMADO DE FEATURES.
### =======================================================================

In [5]:
## ORDENAMOS LOS DATOS ṔOR PERSONAS EN PRIMER LUGAR Y TIEMPO EN SEGUNDO.
eventos.sort_values(['person', 'timestamp'], ascending=[True, True], inplace=True)
## DIFERENCIAMOS EN TRES COLUMNAS DIFERENTES EL DIA, MES Y AÑO.
eventos['mes'] = eventos['timestamp'].dt.month
eventos['dia'] = eventos['timestamp'].dt.day
eventos['hora'] = eventos['timestamp'].dt.hour
## ARMAMOS UNA COLUMNA PARA EL DÍA DE LA SEMANA COMO NOMBRE.
eventos['diasemana'] = eventos['timestamp'].dt.weekday_name
## PONEMOS LOS NOMBRES DE MANERA MÁS PROLIJA PARA LOS GRÁFICOS.
eventos.loc[eventos.diasemana.str.contains('Monday', na=False), 'diasemana'] = 'lunes'
eventos.loc[eventos.diasemana.str.contains('Tuesday', na=False), 'diasemana'] = 'martes'
eventos.loc[eventos.diasemana.str.contains('Wednesday', na=False), 'diasemana'] = 'miercoles'
eventos.loc[eventos.diasemana.str.contains('Thursday', na=False), 'diasemana'] = 'jueves'
eventos.loc[eventos.diasemana.str.contains('Friday', na=False), 'diasemana'] = 'viernes'
eventos.loc[eventos.diasemana.str.contains('Saturday', na=False), 'diasemana'] = 'sabado'
eventos.loc[eventos.diasemana.str.contains('Sunday', na=False), 'diasemana'] = 'domingo'
# DEFINIMOS UNA LÓGICA PARA INDICAR SI EL DÍA EN QUE SE EJECUTA EL EVENTO ES FIN DE SEMANA.
eventos['finde'] = 0
eventos.loc[(eventos.diasemana.str.contains('DOM', na=False) | eventos.diasemana.str.contains('SAB', na=False)), 'finde'] = 1
# DEFINIMOS EL MES COMO NOMBRE PARA FACILITAR LAS COLUMNAS
eventos['mesMayus'] = ''
eventos.loc[eventos.mes == 1, 'mesMayus'] = 'enero'
eventos.loc[eventos.mes == 2, 'mesMayus'] = 'febrero'
eventos.loc[eventos.mes == 3, 'mesMayus'] = 'marzo'
eventos.loc[eventos.mes == 4, 'mesMayus'] = 'abril'
eventos.loc[eventos.mes == 5, 'mesMayus'] = 'mayo'
eventos.loc[eventos.mes == 6, 'mesMayus'] = 'junio'
eventos.loc[eventos.mes == 7, 'mesMayus'] = 'julio'
eventos.loc[eventos.mes == 8, 'mesMayus'] = 'agosto'
eventos.loc[eventos.mes == 9, 'mesMayus'] = 'septiembre'
eventos.loc[eventos.mes == 10, 'mesMayus'] = 'octubre'
eventos.loc[eventos.mes == 11, 'mesMayus'] = 'noviembre'
eventos.loc[eventos.mes == 12, 'mesMayus'] = 'diciembre'
# ARMAMOS UNA LÓGICA PARA SEGMENTAR LAS FRANJAS HORARIAS.
# MADRUGADA de 00 a 06
eventos['hora_madrugada'] = 0
eventos.loc[((eventos.hora > -1) & (eventos.hora < 7)), 'hora_madrugada'] = 1
# MAÑANA de 07 a 11
eventos['hora_mañana'] = 0
eventos.loc[((eventos.hora > 6) & (eventos.hora < 12)), 'hora_mañana'] = 1
# ALMUERZO de 12 a 13
eventos['hora_almuerzo'] = 0
eventos.loc[((eventos.hora > 11) & (eventos.hora < 14)), 'hora_almuerzo'] = 1
# TARDE de 14 a 18
eventos['hora_tarde'] = 0
eventos.loc[((eventos.hora > 13) & (eventos.hora < 19)), 'hora_tarde'] = 1
# NOCHE de 19 a 23
eventos['hora_noche'] = 0
eventos.loc[((eventos.hora > 18) & (eventos.hora < 24)), 'hora_noche'] = 1
# TRANSFORMAMOS EN CATEGÓRICAS EL DÍA DE LA SEMANA Y EL MES.
eventos['diasemana'] = eventos['diasemana'].astype('category')
eventos['mesMayus'] = eventos['mesMayus'].astype('category')

In [6]:
eventos.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2341681 entries, 1507286 to 1504503
Data columns (total 34 columns):
timestamp                   datetime64[ns]
event                       category
person                      category
url                         object
sku                         float64
model                       category
condition                   category
storage                     category
color                       category
skus                        object
search_term                 object
staticpage                  object
campaign_source             object
search_engine               category
channel                     category
new_vs_returning            category
city                        category
region                      category
country                     category
device_type                 category
screen_resolution           category
operating_system_version    category
browser_version             category
mes                         int64
d

In [7]:
eventos.rename(columns={'staticpage': 'Genstatpage'}, inplace=True)

In [8]:
# COLUMNAS DONDE POR CADA REGISTRO SABEMOS QUE TENEMOS UN VALOR (SIEMPRE PRESENTES)
dummies = pd.get_dummies(eventos['diasemana'], drop_first=False)
eventos = pd.concat([eventos, dummies], axis=1)
dummies = pd.get_dummies(eventos['mesMayus'], drop_first=False)
eventos = pd.concat([eventos, dummies], axis=1)
dummies = pd.get_dummies(eventos['event'], drop_first=False)
eventos = pd.concat([eventos, dummies], axis=1)

In [9]:
## ESTAS COLUMNAS NO APARECEN SIEMPRE, PUESTO QUE SOLO APARECEN SEGÚN EL TIPO DE EVENTO.
dummies = pd.get_dummies(eventos['color'], drop_first=False)
eventos = pd.concat([eventos, dummies], axis=1)
dummies = pd.get_dummies(eventos['model'], drop_first=False)
eventos = pd.concat([eventos, dummies], axis=1)
dummies = pd.get_dummies(eventos['condition'], drop_first=False)
eventos = pd.concat([eventos, dummies], axis=1)
dummies = pd.get_dummies(eventos['storage'], drop_first=False)
eventos = pd.concat([eventos, dummies], axis=1)

In [10]:
dummies = ''
eventos.rename(columns={'generic listing': 'geneList', 'staticpage': 'statpage', 'staticpage': 'SP'}, inplace=True)

In [23]:
eventos_filtrados = ''

# VOY A TRABAJAR CON UN CASO  TESTIGO.
pd.options.display.max_columns = 350

#eventos = eventos[(eventos.person == 'db2c4d27')]
eventos_filtrados = eventos.iloc[0:1000, 0:150] 
eventos_filtrados.head(3)

#eventos_filtrados.info()

Unnamed: 0,timestamp,event,person,url,sku,model,condition,storage,color,skus,search_term,Genstatpage,campaign_source,search_engine,channel,new_vs_returning,city,region,country,device_type,screen_resolution,operating_system_version,browser_version,mes,dia,hora,diasemana,finde,mesMayus,hora_madrugada,hora_mañana,hora_almuerzo,hora_tarde,hora_noche,domingo,jueves,lunes,martes,miercoles,sabado,viernes,abril,enero,febrero,marzo,mayo,ad campaign hit,brand listing,checkout,conversion,geneList,lead,search engine hit,searched products,SP,viewed product,visited site,Amarelo,Ametista,Azul,Azul Escuro,Azul Safira,Azul Topázio,Bambu,Black Piano,Branco,Branco Azul,Branco Azul Navy,Branco Bambu,Branco Cabernet,Branco Dourado,Branco Framboesa,Branco Pink,Branco Verde,Branco Vermelho,Cabernet,Cinza,Cinza espacial,Cobre,Coral,Couro Marrom,Couro Navy,Couro Vinho,Couro Vintage,Cromo,Dourado,Framboesa,Indigo,Iuna,Olympic Edition,Ouro,Ouro Rosa,Platinum,Prata,Prateado,Preto,Preto Asfalto,Preto Azul,Preto Azul Navy,Preto Bambu,Preto Branco,Preto Brilhante,Preto Cabernet,Preto Matte,Preto Pink,Preto Tabaco,Preto Verde,Preto Vermelho,Rosa,Rose,Rouge,Roxo,Silver,Titânio,Turquesa,Verde,Verde Petroleo,Verde Água,Vermelho,Ônix,Asus Live,Asus Zenfone 2,Asus Zenfone 2 Deluxe,Asus Zenfone 2 Laser,"Asus Zenfone 2 Laser 6"""
1507286,2018-05-17 12:27:47,checkout,0008ed71,,3372.0,Samsung Galaxy S6 Flat,Muito Bom,32GB,Dourado,,,,,,,,,,,,,,,5,17,12,jueves,0,mayo,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2336760,2018-05-17 13:44:59,visited site,0008ed71,,,,,,,,,,,,Referral,New,Unknown,Unknown,Brazil,Computer,1920x1080,Windows 10,Chrome 66.0,5,17,13,jueves,0,mayo,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1507716,2018-05-17 13:45:00,checkout,0008ed71,,8247.0,iPhone SE,Bom,64GB,Cinza espacial,,,,,,,,,,,,,,,5,17,13,jueves,0,mayo,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [12]:
#eventos_agrupados = eventos_agrupados[(eventos_agrupados.person == 'db2c4d27')]
#eventos_agrupados.head()

In [24]:
columnas_filtrar = list(eventos_filtrados.select_dtypes(include=['int','float64','uint8']).columns)

eventos_agrupados = ''
asd = ''

columnas_filtrar.remove('sku')
columnas_filtrar.append('person')
asd = eventos_filtrados.loc[:, eventos_filtrados.columns.isin(columnas_filtrar)]
columnas_filtrar.remove('person')

#asd.head(5)
#asd.info()

eventos_agrupados = asd.groupby('person')[columnas_filtrar].mean().reset_index()
#eventos_agrupados = eventos_agrupados[(eventos_agrupados.dia > 0)]
#eventos_agrupados.head(15)

#asd.head()
#grouped_df = eventos.groupby('person').mean().reset_index()
#entrenar = pd.](grouped_df)

### =======================================================================
### ENTRENAMIENTO Y PREDICCIÓN.
### =======================================================================

In [16]:
## OBTENEMOS TODA LA INFORMACIÓN DEL SET DE ENTRENAMIENTO.
training = ''
train_completo = ''
training = pd.read_csv('labels_training_set.csv', encoding = 'utf-8')

train_completo = pd.merge(training, eventos_agrupados, on='person', how='left')
train_completo.dropna(subset=['person']) 
train_completo.head(20)

Unnamed: 0,person,label,mes,dia,hora,finde,hora_madrugada,hora_mañana,hora_almuerzo,hora_tarde,hora_noche,domingo,jueves,lunes,martes,miercoles,sabado,viernes,abril,enero,febrero,marzo,mayo,ad campaign hit,brand listing,checkout,conversion,geneList,lead,search engine hit,searched products,SP,viewed product,visited site,Amarelo,Ametista,Azul,Azul Escuro,Azul Safira,Azul Topázio,Bambu,Black Piano,Branco,Branco Azul,Branco Azul Navy,Branco Bambu,Branco Cabernet,Branco Dourado,Branco Framboesa,Branco Pink,Branco Verde,Branco Vermelho,Cabernet,Cinza,Cinza espacial,Cobre,Coral,Couro Marrom,Couro Navy,Couro Vinho,Couro Vintage,Cromo,Dourado,Framboesa,Indigo,Iuna,Olympic Edition,Ouro,Ouro Rosa,Platinum,Prata,Prateado,Preto,Preto Asfalto,Preto Azul,Preto Azul Navy,Preto Bambu,Preto Branco,Preto Brilhante,Preto Cabernet,Preto Matte,Preto Pink,Preto Tabaco,Preto Verde,Preto Vermelho,Rosa,Rose,Rouge,Roxo,Silver,Titânio,Turquesa,Verde,Verde Petroleo,Verde Água,Vermelho,Ônix,Asus Live,Asus Zenfone 2,Asus Zenfone 2 Deluxe,Asus Zenfone 2 Laser,"Asus Zenfone 2 Laser 6""",Asus Zenfone 3 Max 32 GB,Asus Zenfone 3 Max 16 GB,Asus Zenfone 5,Asus Zenfone 6,Asus Zenfone Go,Asus Zenfone Selfie,LG X Screen,LG G2 Mini D618,LG G3 Beat D724,LG G3 D855,LG G3 Stylus D690,LG G4 Beat H736,LG G4 H815P,LG G4 H818P,LG G4 Stylus H630,LG G4 Stylus HDTV H540T,LG G5 SE,LG K10,LG K10 Novo,LG K10 TV,LG K4,LG K8,LG L Prime D337,LG L80 Dual,LG Nexus 4,LG Nexus 5 D821,LG Prime Plus H522,LG X Power,Lenovo Vibe A7010 Dual Chip,Lenovo Vibe K5,Motorola Moto E1,Motorola Moto E2 3G Dual,Motorola Moto E2 4G Dual,Motorola Moto E4 Plus,Motorola Moto G1 3G,Motorola Moto G1 4G,Motorola Moto G2 3G Dual,Motorola Moto G2 4G Dual,Motorola Moto G3 4G,Motorola Moto G3 HDTV,Motorola Moto G4 DTV,Motorola Moto G4 Play,Motorola Moto G4 Play DTV,Motorola Moto G4 Plus,Motorola Moto G5,Motorola Moto G5 Plus,Motorola Moto G5S,Motorola Moto G5S Plus,Motorola Moto MAXX,Motorola Moto X Force,Motorola Moto X Play 4G Dual,Motorola Moto X Style,Motorola Moto X2,Motorola Moto Z,Motorola Moto Z Play,Motorola Moto Z Power Edition,Motorola Moto Z2 Force,Motorola Moto Z2 Play,Outros TV LED 15,Quantum GO 3G,Quantum GO 4G,Quantum Muv,Quantum Muv Pro,Quantum Muv Up,Quantum YOU,Samsung Galaxy A3 2016,Samsung Galaxy A3 Duos,Samsung Galaxy A5,Samsung Galaxy A5 2016,Samsung Galaxy A5 2017,Samsung Galaxy A7,Samsung Galaxy A7 2016,Samsung Galaxy A7 2017,Samsung Galaxy A9 Pro 2016,Samsung Galaxy Core 2 Duos,Samsung Galaxy Core Plus Duos TV,Samsung Galaxy E5 4G Duos,Samsung Galaxy E7,Samsung Galaxy Gran 2 Duos TV,Samsung Galaxy Gran Neo Duos,Samsung Galaxy Gran Neo Plus Duos,Samsung Galaxy Gran Prime 3G Duos,Samsung Galaxy Gran Prime Duos,Samsung Galaxy Gran Prime Duos TV,Samsung Galaxy Grand Duos i9082,Samsung Galaxy J1 2016,Samsung Galaxy J1 Mini,Samsung Galaxy J2 4G Duos,Samsung Galaxy J2 4G Duos TV,Samsung Galaxy J2 Prime TV,Samsung Galaxy J3,Samsung Galaxy J5,Samsung Galaxy J5 2016 Metal,Samsung Galaxy J5 PRO,Samsung Galaxy J5 Prime,Samsung Galaxy J7,Samsung Galaxy J7 2016 Metal,Samsung Galaxy J7 Neo,Samsung Galaxy J7 PRO,Samsung Galaxy J7 Prime,Samsung Galaxy Mega Duos,Samsung Galaxy Note 2 N7100,Samsung Galaxy Note 3,Samsung Galaxy Note 3 Neo Duos,Samsung Galaxy Note 4,Samsung Galaxy Note 5,Samsung Galaxy Note 8,Samsung Galaxy Note Edge,Samsung Galaxy On 7,Samsung Galaxy Pocket 2 Duos,Samsung Galaxy S Duos 2,Samsung Galaxy S3 Duos,Samsung Galaxy S3 Mini,Samsung Galaxy S3 Neo Duos i9300i,Samsung Galaxy S3 Slim Duos,Samsung Galaxy S3 i9300,Samsung Galaxy S4 Mini,Samsung Galaxy S4 Mini Duos,Samsung Galaxy S4 i9500,Samsung Galaxy S4 i9505,Samsung Galaxy S4 i9515,Samsung Galaxy S5,Samsung Galaxy S5 Duos,Samsung Galaxy S5 Mini,Samsung Galaxy S5 Mini Duos,Samsung Galaxy S5 New Edition,Samsung Galaxy S5 New Edition Duos,Samsung Galaxy S6 Edge,Samsung Galaxy S6 Edge Plus,Samsung Galaxy S6 Flat,Samsung Galaxy S7,Samsung Galaxy S7 Edge,Samsung Galaxy S8,Samsung Galaxy S8 Plus,Samsung Galaxy Tab 3 10.1 Wi-Fi + 3G,Samsung Galaxy Tab 4 10.1 Wi-Fi + 3G,Samsung Galaxy Tab 4 10.1 Wi-Fi,Samsung Galaxy Tab A 2016 10.1 W-Fi + 4G,Samsung Galaxy Tab A com S Pen 8 Wi-Fi + 4G,Samsung Galaxy Tab E 7 Wi-Fi,Samsung Galaxy Tab E 7 Wi-Fi + 3G,Samsung Galaxy Tab E 9.6 Wi-Fi,Samsung Galaxy Tab E 9.6 Wi-Fi + 3G,Samsung Galaxy Tab Pro 10.1 Wi-Fi,Samsung Galaxy Tab S 10.5 Wi-Fi,Samsung Galaxy Tab S 10.5 Wi-Fi + 4G,Samsung Galaxy Tab S 8.4 Wi-Fi + 4G,Samsung Galaxy Tab S2 8 Wi-Fi + 4G,Samsung Galaxy Tab S2 9.7 Wi-Fi + 4G,Samsung Galaxy Win 2 Duos TV,Samsung Galaxy Win Duos,Samsung Galaxy Y Duos,Samsung Galaxy Young 2 Duos TV,Samsung Gear Fit 2 Grande,Samsung Gear Fit 2 Pequeno,Samsung Gear S2,Samsung Gear S3 Classic,Samsung Gear S3 Frontier,Sony Xperia M4 Aqua,Sony Xperia M4 Aqua Dual,Sony Xperia Z ULTRA,Sony Xperia Z2,Sony Xperia Z3,Sony Xperia Z3 Compact,Sony Xperia Z3 Dual,Sony Xperia Z3 Plus,Sony Xperia Z3 TV,Sony Xperia Z5,Sony Xperia Z5 Premium,Xiaomi Redmi 2,iPad 2 Wi-Fi,iPad 2 Wi-Fi + 3G,iPad 3 Wi-Fi,iPad 3 Wi-Fi + 4G,iPad 4 Wi-Fi
0,0566e9c1,0,5.0,23.75,15.147059,0.0,0.088235,0.117647,0.014706,0.705882,0.073529,0.044118,0.235294,0.014706,0.397059,0.25,0.029412,0.029412,0.0,0.0,0.0,0.0,1.0,0.088235,0.044118,0.014706,0.014706,0.220588,0.0,0.014706,0.0,0.014706,0.338235,0.25,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.147059,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044118,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044118,0.102941,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.029412,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,6ec7ee77,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,abe7a2fb,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,34728364,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,87ed62de,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,db2c4d27,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,cde431db,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,be65035b,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,a4178891,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,d066f64c,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [14]:
training = ''

In [15]:
y = pd.factorize(train_completo['label'])[0]
train_completo = train_completo[columnas_filtrar]
# VER COMO OBTENER LAS COLUMNAS ESPECÍFICAS QUE QUEREMOS TRABAJAR.
features = train_completo.columns

UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 7: ordinal not in range(128)

In [15]:
y

array([0, 0, 0, ..., 0, 0, 0])

In [16]:
# Creamos un clasificador con Random Forest..
clf = RandomForestClassifier(n_jobs=2, random_state=0)
# Entrenamos.
clf.fit(train_completo[features], y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=2,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [17]:
# Predecimos.
eventos_agrupados['label'] = 0
clf.predict(eventos_agrupados[features])

array([0, 0, 0, ..., 0, 0, 0])

In [18]:
# REVISAR. todos dan iguales.
prueba = clf.predict_proba(eventos_agrupados[features])[0:10000]

In [19]:
y = np.unique(prueba)

In [20]:
y

array([0.        , 0.00909091, 0.01111111, 0.01666667, 0.02159091,
       0.025     , 0.02857143, 0.03      , 0.03333333, 0.04      ,
       0.04242424, 0.05      , 0.05975758, 0.06      , 0.06042769,
       0.06333333, 0.06611111, 0.06666667, 0.075     , 0.08      ,
       0.08147547, 0.08333333, 0.09166667, 0.09666667, 0.1       ,
       0.10833333, 0.11      , 0.11666667, 0.11666667, 0.125     ,
       0.12666667, 0.13      , 0.13333333, 0.135     , 0.14      ,
       0.15      , 0.15714286, 0.16      , 0.16666667, 0.175     ,
       0.1752381 , 0.18      , 0.18333333, 0.18333333, 0.18557692,
       0.18666667, 0.18897547, 0.19333333, 0.195     , 0.2       ,
       0.2       , 0.20833333, 0.21      , 0.22111111, 0.2275    ,
       0.22857143, 0.23047619, 0.23214286, 0.25      , 0.25166667,
       0.275     , 0.28833333, 0.29666667, 0.3       , 0.32      ,
       0.33333333, 0.335     , 0.34      , 0.35      , 0.35666667,
       0.3572619 , 0.35833333, 0.36666667, 0.36857143, 0.4    

In [21]:
prueba

array([[1. , 0. ],
       [0.9, 0.1],
       [0.8, 0.2],
       ...,
       [0.9, 0.1],
       [1. , 0. ],
       [1. , 0. ]])