In [88]:
import pandas               as pd
import numpy                as np
import plotly.express       as px
import plotly.graph_objects as go
import ipywidgets           as widgets
from ipywidgets      import fixed, interact, interact_manual
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

from geopy.geocoders import Nominatim

# set float type display format
pd.set_option('display.float_format', lambda x: '%.2f' % x)
# set plots size
plt.rcParams['figure.figsize'] = [ 20, 10 ]

## 0. Carga

In [99]:
df_raw = pd.read_csv('../data/processed/kc_house_processed.csv',parse_dates=['date'])
print(df_raw.info())
df_raw.head()

OSError: [Errno 22] Invalid argument: '*/kc_house_processed.csv'

## 1. Engenharia de atributos

#### 1.1. Criando variáveis

In [90]:
#new attributes
data = df_raw.copy()
data['month'] = data['date'].dt.month
data['month_name'] = data['date'].dt.month_name()
data['season'] = data['month'].apply(lambda x: 'winter' if x in(12,1,2) else
                                               'spring' if x in(3,4,5) else
                                               'summer' if x in(6,7,8) else 'fall' )
data['condition_type'] = data['condition'].apply(lambda x: 'good' if x==5 else 'regular' if x in(3,4) else 'bad')
# definindo mediana do custo por região
regional_median = data[['zipcode','price']].groupby('zipcode').median().reset_index().copy()
regional_median.columns = ['zipcode','regional_median']
# definindo mediana do custo por região/estação
season_region_median = data[['price','zipcode','season']].groupby(['zipcode','season']).median('price')
season_region_median = season_region_median.rename(columns={'price': 'season_median'}).reset_index(drop=False)

#### 1.1. Definição dos imóveis para compra

In [None]:
data = pd.merge(data,regional_median,how='left',on='zipcode').copy()
data['buy'] = data.apply(lambda x: 'Yes' if (x['price'] < x['regional_median']) & (x['condition_type']=='good') else 'No',axis=1 )

#### 1.3. Dados de geolocalização dos imóveis elegíveis

In [None]:
# dados de geolocalização
# geolocation = data.loc[data['buy'] == 'Yes'][['id', 'lat', 'long']].copy().reset_index( drop=True )
# geolocator = Nominatim( user_agent='geoapiExercises' )
# geolocation['street'] = 'NA'
# geolocation['house_num'] = 'NA'
# geolocation['city'] = 'NA'
# geolocation['neighbourhood'] = 'NA'
# geolocation['county'] = 'NA'
# geolocation['state'] = 'NA'
# for i in range( len( geolocation ) ):
#     query = str( geolocation.loc[i, 'lat'] ) + ',' + str( geolocation.loc[i, 'long'] )
#     response = Nominatim( user_agent=f'geoapiExercises{i}' ).reverse( query )
#     response = pd.json_normalize( response.raw['address'] )
#     geolocation.iloc[i, 3] = response.apply( lambda x: x['road']          if 'road'          in response.columns else 'NA', axis = 1 )    
#     geolocation.iloc[i, 4] = response.apply( lambda x: x['house_number']  if 'house_number'  in response.columns else 'NA', axis = 1 )
#     geolocation.iloc[i, 5] = response.apply( lambda x: x['city']          if 'city'          in response.columns else 'NA', axis = 1 )
#     geolocation.iloc[i, 6] = response.apply( lambda x: x['neighbourhood'] if 'neighbourhood' in response.columns else 'NA', axis = 1 )
#     geolocation.iloc[i, 7] = response.apply( lambda x: x['county']        if 'county'        in response.columns else 'NA', axis = 1 )
#     geolocation.iloc[i, 8] = response.apply( lambda x: x['state']         if 'state'         in response.columns else 'NA', axis = 1 )
# geolocation['address'] = geolocation['street'] + ', ' + geolocation['house_num']
# geolocation.to_csv('../data/processed/geoloc.csv', index=False)

#### 1.4. Definição do preço de revenda

In [None]:
data = pd.merge(data,season_region_median,how='left',on=['zipcode','season'])
data['sell_price'] = data.apply(lambda x: x['price'] * 1.3 if x['price'] <  x['season_median'] and x['buy']=='Yes' else
                                                    x['price'] * 1.1 if x['price'] >= x['season_median'] and x['buy']=='Yes'
                                                    else 0, axis=1)
data['diff_price'] = data.apply(lambda x: np.sqrt((x['sell_price']-x['season_median'])**2) if x['sell_price']!=0 else 0,axis=1)
data['profit'] = data.apply(lambda x: x['sell_price'] - x['price'] if x['buy'] == "Yes" else 0, axis=1)
data = data.drop(['year', 'month', 'month_name'],axis = 1)

## 2. Métricas

In [None]:
# tamanho da base
data.shape[0]

In [None]:
# custo total da base
data['price'].sum()*1e-9

In [None]:
# total imóveis sugeridos
data.loc[data['buy']=="Yes",'id'].count()

In [None]:
# investimento total previsto
data.loc[data['buy']=="Yes",'price'].sum()*1e-3

In [None]:
# faturamento total previsto
data.loc[data['buy']=="Yes",'sell_price'].sum()

In [None]:
# lucro total previsto
data.loc[data['buy']=="Yes",'profit'].sum()

In [None]:
# lucro total previsto %
aux = data.loc[data['buy']=="Yes",['price','profit']].sum().reset_index()
val = (aux[0].pct_change()+1).dropna().values[0]
val*100

In [None]:
# imóveis por estado de conservação
data[['condition_type','id']].groupby('condition_type').count().reset_index()

In [None]:
# rentabilidade por estação do ano
data[['season','profit']].groupby('season').sum().reset_index()

In [None]:
# preco do m2 por regiao
aux = data[['zipcode','sqft_lot','price']].copy()
aux['price_m2'] = aux['price']/aux['sqft_lot']
aux2 = aux[['zipcode','price_m2']].groupby('zipcode').mean().reset_index()
aux2

In [None]:
# pareto lucro por regiao
pareto = data[['zipcode','profit']].groupby('zipcode').sum().reset_index().sort_values('profit',ascending=False).reset_index(drop=True)
pareto['acumulado'] = pareto['profit'].cumsum()
pareto['total'] = data['profit'].sum()
pareto['perc_acumulado'] = pareto['acumulado']/pareto['total']
pareto['zipcode'] = pareto['zipcode'].astype(str)

## 3. Visualizações

#### 3.1. Pareto

In [None]:
def millions(x, pos):
    'The two args are the value and tick position'
    return '$ %1.1fM' % (x * 1e-6)

def addlabels(x,y):
    for i in range(len(x)):
        plt.text(i, y[i]/2, y[i], ha = 'center')

# estilo bmh
plt.style.use("bmh")
fig, ax = plt.subplots()
# cria um segundo grafico que compartilha o eixo X mas tem o eixo Y independente
ax2 = ax.twinx()
# estrutura dos gráficos
ax.bar(pareto['zipcode'],pareto['profit'],color="C0")
ax2.plot(pareto['zipcode'],pareto['perc_acumulado'],color='C4',marker="o")
# arrumando eixo y coluna
ax.set_ylabel("Lucro por região", color="C0")
ax.tick_params(axis="y", labelcolor="C0")
# arrumando eixo y linha
ax2.set_ylabel("Percentual", color="C4")
ax2.tick_params(axis="y", labelcolor="C4")
# formatar percentual
ax.yaxis.set_major_formatter(mtick.FuncFormatter(millions))
ax2.yaxis.set_major_formatter(mtick.PercentFormatter(1))
# rotacao eixo x
ax.tick_params(axis="x", rotation=90)

for i, percentual in enumerate(pareto['perc_acumulado']):
    if i < 25:
        ax2.annotate(f"{percentual:.0%}", (i, percentual), textcoords="offset points", xytext=(0, 20), ha="center", color="C4", fontweight="bold")
    else:
        if i%2 == 1:
            ax2.annotate(f"{percentual:.0%}", (i, percentual), textcoords="offset points", xytext=(10, -20), ha="center", color="C4", fontweight="bold")
        else:
            ax2.annotate(f"{percentual:.0%}", (i, percentual), textcoords="offset points", xytext=(0, 10), ha="center", color="C4", fontweight="bold")

for i, profit in enumerate(pareto['profit']):
    if i < 38:
        ax.annotate(f"${profit*1e-6:1.1f}M", (i, profit), textcoords="offset points", xytext=(0, -40), ha="center", rotation=90, color="white", fontweight="bold")
    else:
        ax.annotate(f"${profit*1e-6:1.1f}M", (i, profit), textcoords="offset points", xytext=(0, 10), ha="center", rotation=90, color="C0", fontweight="bold")        

ax.grid(False)
ax2.grid(True)

plt.show()

#### 3.2. Filtros

In [None]:
f_filters = widgets.Checkbox(
    value=True,
    description='Select all',
    disabled=False,
    indent=True
)

f_zipcode = widgets.Dropdown(
    options = data['zipcode'].sort_values().unique().tolist(),
    description = 'Region',
    disable = False
)

#### 3.3. Imóveis para compra/revenda

In [None]:
mapdata = data[['id','lat','long','zipcode','buy','season','price','sell_price','profit']].copy()
mapdata.columns=[i.capitalize().replace('_',' ') for i in mapdata.columns]
mapdata['Season'] = mapdata['Season'].apply(lambda x: x.capitalize())
mapdata['Price'] = mapdata['Price'].apply(lambda x: f"${x*1e-3:1.1f}K")
mapdata['Sell price'] = mapdata['Sell price'].apply(lambda x: f"${x*1e-3:1.1f}K" if x != 0 else f"${x}")
mapdata['size'] = mapdata['Profit'].apply(lambda x: int(data.loc[data['profit']!=0,'profit'].min()*.7) if x == 0 else int(x))
mapdata['Profit'] = mapdata['Profit'].apply(lambda x: f"${x*1e-3:1.1f}K" if x != 0 else f"${x}")

In [None]:
def make_map( mapdata = pd.DataFrame, filter_region = list, filter_clear = bool ) -> None: 
    """
    """
    
    #if mapdata == None:


    if filter_clear:
        mapdata
    else:
        mapdata = mapdata.loc[mapdata['Zipcode']==filter_region]
    
    _map = px.scatter_mapbox( mapdata,
                              lat='Lat',
                              lon='Long',                            
                              color='Buy',
                              color_discrete_sequence=['#999999','#0F3D6E'],
                              zoom=10,
                              size='size',
                              hover_data={'Id': True,
                                          'Lat': False,
                                          'Long': False,
                                          'Zipcode': True,
                                          'Buy': False,
                                          'Season': True,
                                          'Price': True,
                                          'Sell price': True,
                                          'Profit': True}
                            )
    _map.update_layout(mapbox_style='carto-positron')
    _map.update_layout(height=600, margin = {'r':0,'t':0,'l':0,'b':0})
    _map.show()
    
    return None

In [None]:
interact( make_map, mapdata=fixed(mapdata), filter_region=f_zipcode, filter_clear=f_filters );