In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from experiments.extract import extract
from experiments.experiment import Experiment

import pandas as pd
import altair as alt
from lightgbm import LGBMRegressor

from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder
from category_encoders import TargetEncoder, QuantileEncoder

pd.options.display.float_format = '{:,.2f}'.format

# Data Sample

In [3]:
X = extract('RENTAL')
X['month'] = pd.to_datetime(X.created_date.dt.strftime('%Y-%m-01'))
print(X.shape)
X.sample(5).T

(32846, 14)


title,Cobertura Duplex na beira da Lagoa da Tijuca e vista para a Pedra da Gávea,Rio de Janeiro - Apartamento Padrão - Barra da Tijuca,Recreio dos Bandeirantes | Casa 4 quartos,Apartamento - Rua Clóvis Beviláqua - Aluguel - Tijuca,RIO DE JANEIRO - Apartamento Padrão - RECREIO DOS BANDEIRANTES
url,/imovel/venda-cobertura-2-quartos-com-terraco-barra-da-tijuca-zona-oeste-rio-de-janeiro-rj-90m2-id-2601106922/,/imovel/aluguel-apartamento-3-quartos-barra-da-tijuca-zona-oeste-rio-de-janeiro-rj-300m2-id-2593785179/,/imovel/casa-4-quartos-recreio-dos-bandeirantes-zona-oeste-rio-de-janeiro-com-garagem-350m2-aluguel-RS24000-id-2620224730/,/imovel/apartamento-2-quartos-tijuca-zona-norte-rio-de-janeiro-com-garagem-107m2-aluguel-RS2400-id-2631308487/,/imovel/apartamento-3-quartos-recreio-dos-bandeirantes-zona-oeste-rio-de-janeiro-com-garagem-112m2-aluguel-RS3200-id-2621434561/
origin,zapimoveis,zapimoveis,vivareal,vivareal,vivareal
neighborhood,Barra da Tijuca,Barra da Tijuca,Recreio Dos Bandeirantes,Tijuca,Recreio Dos Bandeirantes
usable_area,90,300,350,107,112
unit_types,APARTMENT,APARTMENT,HOME,APARTMENT,APARTMENT
floors,3,0,1,0,0
bedrooms,2,3,4,2,3
bathrooms,2,3,3,2,3
suites,2,3,0,1,1
parking_spaces,0,2,1,1,2
amenities,DECK|SECURITY_CAMERA|BICYCLES_PLACE|ELECTRONIC...,,BALCONY|CLOSET|GARAGE|HOME_OFFICE|PETS_ALLOWED,KITCHEN|SERVICE_AREA,ELEVATOR|BALCONY
lat,0.00,0.00,-23.02,0.00,0.00


In [4]:
X.groupby('neighborhood').total.describe(percentiles=[.05, .10, .25, .5, .75, .9, .95]).sort_values('count', ascending=False).head(10)

Unnamed: 0_level_0,count,mean,std,min,5%,10%,25%,50%,75%,90%,95%,max
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Tijuca,7426.0,3568.35,8176.26,300.0,1700.0,1963.0,2430.0,3050.5,3987.0,5250.0,6304.0,684880.0
Barra da Tijuca,6905.0,19145.27,48331.31,150.0,3573.0,4322.4,6473.0,12886.0,25000.0,42000.0,52500.0,2660000.0
Recreio Dos Bandeirantes,4753.0,8425.81,7250.34,199.0,2300.0,3250.0,4000.0,5823.0,10800.0,17000.0,22000.0,137320.0
Centro,2471.0,2220.06,2105.61,100.0,970.0,1060.0,1330.0,1770.0,2380.0,3350.0,4181.0,28000.0
Copacabana,2198.0,6980.32,6797.77,130.0,1744.25,2148.8,3300.0,5283.5,8375.0,13900.0,18000.0,95000.0
Taquara,2080.0,2193.41,2055.99,450.0,700.0,970.0,1300.0,1650.0,2150.75,3600.0,5500.0,25000.0
Praça Seca,1172.0,1634.5,737.37,500.0,900.0,990.0,1200.0,1516.0,1841.0,2154.0,2983.0,10000.0
Icaraí,1154.0,4613.91,3486.07,1445.0,1945.0,2238.0,2950.0,3783.5,4812.25,7500.0,10340.0,36000.0
Flamengo,1085.0,11122.65,85755.66,500.0,1657.0,1945.2,2862.0,4727.0,10740.0,17772.0,21213.6,2000000.0
Freguesia- Jacarepaguá,820.0,3595.32,7178.23,709.0,1125.65,1300.0,1710.0,2701.0,3890.0,5650.0,7297.15,140350.0


In [5]:
aux = X.query('lat != 0').groupby('neighborhood')[['lat', 'lon', 'total']].median().reset_index()

alt.Chart(aux).mark_point().encode(
    x=alt.X('lon', scale=alt.Scale(zero=False),),
    y=alt.Y('lat', scale=alt.Scale(zero=False)),
    size='total', tooltip=['neighborhood', 'total']
).properties(width=900, height=600).interactive()

In [6]:
aux = X.groupby(['month', 'neighborhood']).total.agg(['count', 'median']).reset_index()
(
    alt.Chart(aux.query('count > 10')).mark_point().encode(x='month', y='median', color='neighborhood', tooltip=['month', 'neighborhood', 'count', 'median']) +
    alt.Chart(aux.query('count > 10')).mark_line().encode(x='month', y='median', color='neighborhood')
).properties(width=900, height=600).interactive()

In [7]:
aux = X.groupby(['month', 'neighborhood']).size().reset_index(name='qtd')
alt.Chart(aux).mark_line().encode(x='month', y='qtd', color='neighborhood', tooltip=['month', 'neighborhood']).properties(width=900, height=600).interactive()

lat   -22.90
lon   -43.35
dtype: float64

In [26]:
aux = X.query('neighborhood == "Praça Seca" and lat != 0').reset_index()

aux['lat'] = aux['lat'] - aux['lat'].median()
aux['lon'] = aux['lon'] - aux['lon'].median()

alt.Chart(aux).mark_point().encode(
    x=alt.X('lon', scale=alt.Scale(zero=True),),
    y=alt.Y('lat', scale=alt.Scale(zero=True))
).properties(width=900, height=600).interactive()

In [32]:
import numpy as np

def cart_to_pol(x, y, x_c = 0, y_c = 0, deg = True):
    complex_format = x - x_c + 1j * (y - y_c)
    return np.abs(complex_format), np.angle(complex_format, deg = deg)

print(cart_to_pol(0.14, 0.10))

(0.17204650534085256, 35.53767779197438)
