### INDICE
* [**1) Notebook**](#section_notebook)
* [**2) Import data**](#section_import)
* [**3) Validation**](#section_val)

# 0) Notebook <a id='section_notebook'></a>

In [4]:
#### Librerías y modulos ####
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn
from pandas.io import gbq
from datetime import datetime
import warnings
import missingno as msno

In [5]:
#### Configuraciones de la notebook ####
#pd.set_option('display.max_columns', 500)
#pd.set_option('display.width', 1000)
pd.set_option('display.float_format', lambda x: '%.2f' % x)
warnings.filterwarnings("ignore")

In [6]:
#### Parametros de la notebook ####
path_save = '../datos/'
filter_date = str("2022-04-01")

# 1) Import data <a id='section_import'></a>

In [7]:
# Importar tabla de Big Query
query = "SELECT * FROM `data-team-k.pricing_data.match_meli_ok` WHERE date > '{}' ".format(filter_date)
df_meli = gbq.read_gbq(query, project_id="data-team-k")

In [8]:
# Descargarlo como pickle
fecha = datetime.today().strftime('%Y-%m-%d')
df_meli.to_pickle('{}df_meli_{}.pkl'.format(path_save,fecha))

In [9]:
# Upload del pickle
#df_meli = pd.read_pickle('{}df_meli_{}.pkl'.format(path_save,fecha))

In [10]:
df_meli.date.max()

Timestamp('2022-04-18 00:00:00')

# 2) Validation <a id='section_val'></a>

In [11]:
df_meli.shape

(822639, 42)

In [12]:
df_meli.isnull().sum()

runtime                  0
date                     0
timestamp                0
date_hour           822639
price_symbol             0
price_amount_txt         0
price_amount             0
car_year               475
car_kms_txt              0
car_kms                475
km                       0
car_title                0
car_location             0
car_location_0           0
car_location_1         831
car_link                 0
car_id                   0
year                     0
brand                    0
model                    0
version                  0
dealer                   0
match_marca_a         1011
score_marca_a         1011
match_modelo_a        1011
score_modelo_a        1011
match_v1_a            1011
score_v1_a            1011
total_score_a         1011
Seg_a                 1011
Subseg_a              1011
gama_a                1011
match_marca_c         1011
score_marca_c         1011
match_modelo_c        1011
score_modelo_c        1011
match_v1_c            1011
s

In [13]:
######### benchmark ########## (no correr este chunk)
df_meli.describe(percentiles=[0.1,0.25,0.5,0.75,0.90,0.99])

Unnamed: 0,price_amount,car_year,car_kms,year,score_marca_a,score_modelo_a,score_v1_a,total_score_a,score_marca_c,score_modelo_c,score_v1_c,total_score_c
count,822639.0,822164.0,822164.0,822639.0,821628.0,821628.0,821628.0,821628.0,821628.0,821628.0,821628.0,821628.0
mean,2736724.3,2015.27,87288.13,2015.27,99.6,97.98,85.88,84.07,99.6,96.6,88.85,85.73
std,2767996.07,4.26,124844.88,4.26,2.12,9.12,16.71,18.48,1.93,11.31,17.98,20.3
min,1850.0,2006.0,0.0,2006.0,33.0,0.0,0.0,0.0,50.0,0.0,0.0,0.0
10%,59800.0,2009.0,0.0,2009.0,100.0,95.0,61.0,60.0,100.0,90.0,67.0,55.8
25%,1350000.0,2012.0,27600.0,2012.0,100.0,100.0,77.0,75.0,100.0,100.0,82.0,80.0
50%,2230000.0,2016.0,80000.0,2016.0,100.0,100.0,90.0,89.24,100.0,100.0,100.0,92.0
75%,3550000.0,2019.0,126000.0,2019.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
90%,5489900.0,2021.0,172000.0,2021.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
99%,10500000.0,2021.0,297000.0,2021.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0


In [14]:
########## corrida actual #########
df_meli.describe(percentiles=[0.1,0.25,0.5,0.75,0.90,0.99])

Unnamed: 0,price_amount,car_year,car_kms,year,score_marca_a,score_modelo_a,score_v1_a,total_score_a,score_marca_c,score_modelo_c,score_v1_c,total_score_c
count,822639.0,822164.0,822164.0,822639.0,821628.0,821628.0,821628.0,821628.0,821628.0,821628.0,821628.0,821628.0
mean,2736724.3,2015.27,87288.13,2015.27,99.6,97.98,85.88,84.07,99.6,96.6,88.85,85.73
std,2767996.07,4.26,124844.88,4.26,2.12,9.12,16.71,18.48,1.93,11.31,17.98,20.3
min,1850.0,2006.0,0.0,2006.0,33.0,0.0,0.0,0.0,50.0,0.0,0.0,0.0
10%,59800.0,2009.0,0.0,2009.0,100.0,95.0,61.0,60.0,100.0,90.0,67.0,55.8
25%,1350000.0,2012.0,27600.0,2012.0,100.0,100.0,77.0,75.0,100.0,100.0,82.0,80.0
50%,2230000.0,2016.0,80000.0,2016.0,100.0,100.0,90.0,89.24,100.0,100.0,100.0,92.0
75%,3550000.0,2019.0,126000.0,2019.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
90%,5489900.0,2021.0,172000.0,2021.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
99%,10500000.0,2021.0,297000.0,2021.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
