In [2]:
import pandas               as pd
import seaborn              as sns
import plotly.express       as px
import plotly.graph_objects as go
import ipywidgets           as widgets
from ipywidgets      import fixed, interact, interact_manual
from matplotlib      import pyplot as plt

# set float type display format
pd.set_option('display.float_format', lambda x: '%.2f' % x)
# set plots size
plt.rcParams['figure.figsize'] = [ 20, 10 ]

## 0. Carga

In [8]:
data = pd.read_csv('../data/processed/kc_data.csv')
print(data.info())
data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21436 entries, 0 to 21435
Data columns (total 24 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              21436 non-null  int64  
 1   date            21436 non-null  object 
 2   price           21436 non-null  float64
 3   bedrooms        21436 non-null  int64  
 4   bathrooms       21436 non-null  int64  
 5   sqft_living     21436 non-null  int64  
 6   sqft_lot        21436 non-null  int64  
 7   floors          21436 non-null  int64  
 8   waterfront      21436 non-null  int64  
 9   view            21436 non-null  int64  
 10  condition       21436 non-null  int64  
 11  grade           21436 non-null  int64  
 12  sqft_above      21436 non-null  int64  
 13  sqft_basement   21436 non-null  int64  
 14  yr_built        21436 non-null  int64  
 15  yr_renovated    21436 non-null  int64  
 16  zipcode         21436 non-null  int64  
 17  lat             21436 non-null 

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,yr_built,yr_renovated,zipcode,lat,long,year,month,month_name,season,condition_type
0,7129300520,2014-10-13,221900.0,3,1,1180,5650,1,0,0,...,1955,0,98178,47.51,-122.26,2014,10,October,fall,regular
1,6414100192,2014-12-09,538000.0,3,2,2570,7242,2,0,0,...,1951,1991,98125,47.72,-122.32,2014,12,December,winter,regular
2,5631500400,2015-02-25,180000.0,2,1,770,10000,1,0,0,...,1933,0,98028,47.74,-122.23,2015,2,February,winter,regular
3,2487200875,2014-12-09,604000.0,4,3,1960,5000,1,0,0,...,1965,0,98136,47.52,-122.39,2014,12,December,winter,good
4,1954400510,2015-02-18,510000.0,3,2,1680,8080,1,0,0,...,1987,0,98074,47.62,-122.05,2015,2,February,winter,regular


In [5]:
sell_list = pd.read_csv('../data/processed/sell_list.csv')
print(sell_list.info())
sell_list.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 698 entries, 0 to 697
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             698 non-null    int64  
 1   price          698 non-null    float64
 2   zipcode        698 non-null    int64  
 3   lat            698 non-null    float64
 4   long           698 non-null    float64
 5   year           698 non-null    int64  
 6   month_name     698 non-null    object 
 7   season         698 non-null    object 
 8   sell_price     698 non-null    float64
 9   profit         698 non-null    float64
 10  address        698 non-null    object 
 11  neighbourhood  698 non-null    object 
 12  city           269 non-null    object 
dtypes: float64(5), int64(3), object(5)
memory usage: 71.0+ KB
None


Unnamed: 0,id,price,zipcode,lat,long,year,month_name,season,sell_price,profit,address,neighbourhood,city
0,1202000200,233000.0,98002,47.3,-122.22,2014,November,summer,256300.0,23300.0,"I Street Southeast, 316",King County,
1,3303700376,667000.0,98112,47.62,-122.31,2014,December,fall,867100.0,200100.0,"East Harrison Street, 1414",King County,Seattle
2,3253500160,317625.0,98144,47.57,-122.3,2014,November,spring,412912.5,95287.5,"22nd Avenue South, 3214",King County,Seattle
3,2599001200,305000.0,98092,47.29,-122.19,2014,November,summer,335500.0,30500.0,"Fir Street Southeast, 2020",King County,
4,722079104,314000.0,98038,47.41,-121.96,2014,July,spring,408200.0,94200.0,"290th Avenue Southeast, 21315",King County,


In [7]:
purchase_list = pd.read_csv('../data/processed/purchase_list.csv',parse_dates=['date'])
print(purchase_list.info())
purchase_list.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21436 entries, 0 to 21435
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   id               21436 non-null  int64         
 1   date             21436 non-null  datetime64[ns]
 2   price            21436 non-null  float64       
 3   zipcode          21436 non-null  int64         
 4   lat              21436 non-null  float64       
 5   long             21436 non-null  float64       
 6   year             21436 non-null  int64         
 7   month_name       21436 non-null  object        
 8   season           21436 non-null  object        
 9   condition_type   21436 non-null  object        
 10  regional_median  21436 non-null  float64       
 11  buy              21436 non-null  object        
dtypes: datetime64[ns](1), float64(4), int64(3), object(4)
memory usage: 2.0+ MB
None


Unnamed: 0,id,date,price,zipcode,lat,long,year,month_name,season,condition_type,regional_median,buy
0,7129300520,2014-10-13,221900.0,98178,47.51,-122.26,2014,October,fall,regular,279500.0,n
1,6414100192,2014-12-09,538000.0,98125,47.72,-122.32,2014,December,winter,regular,425000.0,n
2,5631500400,2015-02-25,180000.0,98028,47.74,-122.23,2015,February,winter,regular,442500.0,n
3,2487200875,2014-12-09,604000.0,98136,47.52,-122.39,2014,December,winter,good,489950.0,n
4,1954400510,2015-02-18,510000.0,98074,47.62,-122.05,2015,February,winter,regular,645000.0,n


## 1. Dash

In [31]:
# lucro total
sell_list['profit'].sum()

71884784.6

In [32]:
# top3 regions with more available houses to sell
sell_list[['id','zipcode']].groupby('zipcode').count().sort_values('id',ascending=False).head(3).reset_index()

Unnamed: 0,zipcode,id
0,98042,49
1,98056,48
2,98006,41


In [33]:
# top3 most valuable regions
sell_list[['profit','zipcode']].groupby('zipcode').sum().sort_values('profit',ascending=False).head(3).reset_index()

Unnamed: 0,zipcode,profit
0,98006,6945070.0
1,98056,3364235.0
2,98042,3133960.0


In [34]:
# most valuable regions per season
a = sell_list[['profit','zipcode','season']].groupby(['season','zipcode']).sum().sort_values(['season','profit'],ascending=False).reset_index()
b = sell_list[['profit','zipcode','season']].groupby(['season','zipcode']).sum().groupby('season').max().reset_index()
pd.merge(a,b,how='right', on=['season','profit'])

Unnamed: 0,season,zipcode,profit
0,fall,98006,3153030.0
1,spring,98004,2408160.0
2,summer,98059,1614750.0
3,winter,98006,3002190.0


#### 3.1. Filtros

In [35]:
f_zipcode = widgets.Dropdown(
    options = data['zipcode'].sort_values().unique().tolist(),
    description = 'Zipcode',
    disable = False
)

f_filters = widgets.Checkbox(
    value=False,
    description='Disable filter',
    disabled=False,
    indent=False
)

#### 3.2. Preço do imóveis

In [36]:
def data_viz( df, region ):
    plot = df[df['zipcode']==region][['id','price','zipcode','regional_median','condition_type','buy']]
    plot['color'] = plot['buy'].apply(lambda x: 'lightgray' if x == 'n' else 'blue')
     
    fig = go.Figure()
    
    # Add traces
    fig.add_trace(go.Scatter(y=plot['price'], 
                        marker_color=plot['color'],
                        mode='markers',
                        text='ID: '+plot['id'].astype('string')+' | Region: '+plot['zipcode'].astype('string')+' | Condition: ' + plot['condition_type'] + ' | Price: US$' + plot['price'].astype('string'),
                        name='Imovel'))

    fig.add_trace(go.Scatter(y=plot['regional_median'],
                        mode='lines',
                        text='Region: '+plot['zipcode'].astype('string')+' | Median: US$' + plot['regional_median'].astype('string'),
                        name='Mediana Regional'))
    
    fig.update_layout(
        xaxis_title="Houses",
        yaxis_title="Aquisition cost",
        font=dict( color="#000000" )
    )

    fig.show()
    
    return None

In [37]:
interact(data_viz, df=fixed(purchase_list), region = f_zipcode);

interactive(children=(Dropdown(description='Zipcode', options=(98001, 98002, 98003, 98004, 98005, 98006, 98007…

#### 3.3. Imóveis para compra/revenda

In [38]:
def make_map( house_list, region, filters ): 
    
    map_data = house_list

    if filters:
        map_data
    else:
        map_data = map_data.loc[map_data['zipcode']==region]
    
    _map = px.scatter_mapbox( map_data,
                              lat='lat',
                              lon='long',
                              hover_name='address',                              
                              color='season',
                              color_discrete_sequence=px.colors.qualitative.Dark2,
                              zoom=10,
                              text='ID: '+map_data['id'].astype('string'),
                              hover_data=dict( lat=False, long=False, season=False)
                            )

    _map.update_layout(mapbox_style='open-street-map')
    _map.update_layout(height=600, margin = {'r':0,'t':0,'l':0,'b':0})
    _map.show()
    
    return None

In [39]:
interact( make_map, house_list=fixed( sell_list ), region=f_zipcode, filters=f_filters );

interactive(children=(Dropdown(description='Zipcode', options=(98001, 98002, 98003, 98004, 98005, 98006, 98007…