In [59]:
import plotly.express as px
import pandas as pd
import numpy as np

In [60]:
df_train = pd.read_csv('./data/data_app.csv', index_col=[0]).dropna()


In [61]:
df_train['house_category'].unique()

array(['Apartamentos', 'Casas'], dtype=object)

In [62]:
# Default show_data=all
show_data = 'all'

if show_data == 'all':
    df = df_train
elif show_data == 'Apartments':
    df = df_train[df_train['house_category'] == 'Apartamentos']
else:
    df = df_train[df_train['house_category'] == 'Casas']

## GRÁFICOS GERAIS

### Mapa

In [63]:
# Remover endereços com locais nulos no sample
df_sample = df[~df['latitude'].isna() & ~df['longitude'].isna()]

# Escolher colunas que queremos ver
cols = ['house_category', 'latitude', 'longitude', 'house_price']

#pegar local, lat e lon e calcular média dos preço, salvos em house_price
df_search = df_sample[cols].groupby(['house_category', 'latitude', 'longitude']).mean().reset_index()

# # take the number of houses by location and catogory
df_search[0] = df_sample[cols].groupby(['house_category', 'latitude', 'longitude']).count().reset_index()['house_price']

fig = px.scatter_mapbox(df_search,
                       lon = df_search['longitude'],
                       lat = df_search['latitude'],
                       center={"lat": -15.878191211624543, "lon": -48.10520207922798},
                        zoom=12.5,
                       color = df_search['house_price'],
#                        size = df_search[0],
                       color_continuous_scale=px.colors.diverging.Portland,
                       labels={'house_price': 'Preço médio<br>dos imóveis'},
                       )

fig.update_layout(mapbox_style="carto-positron", plot_bgcolor = '#f8f8f8', paper_bgcolor = '#f8f8f8')
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_traces(marker={'size':13})

fig.show()

In [68]:
value = df['house_price'].mean()
value = f'{value:,.0f}'
value = value.replace(',','.')
print(f'{value}')

229.012


In [51]:
max_value = df['house_price'].max()
print(f'R$ {max_value:,.0f}')

R$ 1,199,999


In [52]:
min_value = df['house_price'].min()
print(f'R$ {min_value:,.0f}')

R$ 29,000


In [71]:
Q3 = np.quantile(df['house_price'], 0.75)
Q1 = np.quantile(df['house_price'], 0.25)

Q1 = f'{Q1:,.0f}'.replace(',','.')
Q3 = f'{Q3:,.0f}'.replace(',','.')

range_values = f'de {Q1} à {Q3} reais'

165.000


In [54]:
print(f'Q1={Q1} Q3={Q3} IRQ={IQR}')

Q1=165000.0 Q3=279900.0 IRQ=114900.0


In [55]:
title = '<b>Distribuição de preços</b>'

fig = px.histogram(data_frame=df, x='house_price', color='house_category', marginal='box', 
                   labels = {'house_category': ""})

fig.update_layout(
    title = title,
    titlefont = {'size': 18},
    template = 'simple_white',
    paper_bgcolor = '#f8f8f8',
    plot_bgcolor = '#f8f8f8',
    legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1)
)

fig.update_yaxes(
    showgrid = True,
#     gridcolor = '#f9f9f9',
    gridwidth = .3
)

fig['layout']['xaxis']['title'] = 'Número de banheiros'
fig['layout']['yaxis']['title'] = 'Preço (R$)'

fig.show()

In [56]:
title = '<b>Preço vs Tamanho</b>'

fig = px.scatter(df, x = 'house_size', y = 'house_price', color = 'house_category', trendline = 'ols',
                labels = {'house_category': ""}, color_discrete_sequence=['#548aaa', '#E6A65C'])

fig['layout']['xaxis']['title'] = 'Property size (m²)'
fig['layout']['yaxis']['title'] = 'Price (R$)'

fig.update_layout(
    title = title,
    titlefont = {'size': 18},
    template = 'simple_white',
    paper_bgcolor = '#f8f8f8',
    plot_bgcolor = '#f8f8f8',
    legend=dict(
            orientation="h",
            yanchor="bottom",
            y=0.97,
            xanchor="right",
            x=1.044)
)

fig.update_traces(marker_line_width=.3, marker_size = 9, marker_line_color = '#282828')

fig.update_xaxes(
    showgrid = True,
    gridcolor = '#f0f0f0',
    gridwidth = .4
)

fig.update_yaxes(
    showgrid = True,
    gridcolor = '#e9e9e9',
    gridwidth = .4
)


fig['layout']['xaxis']['title'] = 'Tamanho do imóvel (m²)'
fig['layout']['yaxis']['title'] = 'Preço (R$)'

fig.show()

In [57]:
title = '<b>Preço vs N° de banheiros</b>'

list_order = [0, 1, 2, 3, 4, '5 ou mais']

fig = px.box(df, x = 'n_bathrooms', y = 'house_price', title = title, color = 'house_category', 
             category_orders={'n_bathrooms': list_order}, labels = {'house_category': ""}, 
             color_discrete_sequence=['#5380aa', '#E6A65C'])


fig.update_layout(
    title = title,
    titlefont = {'size': 18},
    template = 'simple_white',
    paper_bgcolor = '#f8f8f8',
    plot_bgcolor = '#f8f8f8',
    legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1)
)

fig.update_yaxes(
    showgrid = True,
    gridcolor = '#c1c1c1',
    gridwidth = .4
)

fig['layout']['xaxis']['title'] = 'Número de banheiros'
fig['layout']['yaxis']['title'] = 'Preço (R$)'

fig.show()

In [58]:
# df = df_train.copy()
# df[HOUSE_CATEGORY] = df[HOUSE_CATEGORY]\
#                         .apply(lambda x: 'Apartments' if x == 'Apartamentos' else 'Houses')

title = '<b>Preço vs N° de vagas</b>'

fig = px.box(df, x = 'n_garage', y = 'house_price', title = title, color = 'house_category', 
             labels = {'house_category': ""}, color_discrete_sequence=['#5380aa', '#E6A65C'])

fig.update_layout(
    title = title,
    titlefont = {'size': 18},
    template = 'simple_white',
    paper_bgcolor = '#f8f8f8',
    plot_bgcolor = '#f8f8f8',
    legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1)
)

fig.update_yaxes(
    showgrid = True,
    gridcolor = '#c1c1c1',
    gridwidth = .4
)

fig['layout']['xaxis']['title'] = 'Vagas de garagem'
fig['layout']['yaxis']['title'] = 'Preço (R$)'

fig.show()