In [1]:
import pandas as pd

df = pd.read_csv('../datasets/kc_house_data.csv')

df.columns

Index(['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
       'lat', 'long', 'sqft_living15', 'sqft_lot15'],
      dtype='object')

In [2]:
# 1. Qual a quantidade de imóveis por nível?
# 	- Nivel 0: Preço entre R$ 0.00 e R$ 321.950
# 	- Nivel 1: Preço entre R$ 321.950 e R$ 450.000
# 	- Nivel 2: Preço entre R$ 450.000 e R$ 645.000
# 	- Nivel 3: Preço entre R$ Acima de R$ 645.000

df['level'] = df['price'].apply(lambda x: 0 if x <= 321950 
                                     else 1 if x <= 450000
                                     else 2 if x <= 645000
                                     else 3)

df['level'].value_counts().sort_values(ascending=False)

1    5460
0    5404
2    5376
3    5373
Name: level, dtype: int64

In [3]:
df[['id', 'price', 'level']].sample(5)

Unnamed: 0,id,price,level
2827,7805450750,864000.0,3
11303,6127010890,663000.0,3
15833,7199330130,474000.0,2
954,8691300860,851000.0,3
12732,3241600150,287000.0,0


In [4]:
# 2. Qual a média do tamanho da sala de estar dos imíveis por "size"?
#  - Size 0: Tamanho entre 0 e 1427 sqft
#  - Size 1: Tamanho entre 1427 e 1910 sqft
#  - Size 2: Tamanho entre 1910 e 2550 sqft
#  - Size 3: Tamanho acima de 2550 sqtf

df['size_sqft_living'] = df['sqft_living'].apply(lambda x: 0 if x < 1427
                                                      else 1 if x < 1910
                                                      else 2 if x < 2550
                                                      else 3)


print('Sala de estar:')
# verificando a média do tamanho da sala de estar de acordo com o size
for i in range(0,4):
    print('\tSize {}: {:.2f}'.format(i, df['sqft_living'][df['size_sqft_living'] == i].mean()))
    
# ou 
# df[['sqft_living', 'size_sqft_living']].groupby('size_sqft_living').mean().reset_index()

Sala de estar:
	Size 0: 1123.78
	Size 1: 1661.20
	Size 2: 2202.70
	Size 3: 3318.99


In [5]:
# 3. Adicione as seguintes informações ao conjunto de dados original?
#  - Place ID: Identificação da localização
#  - OSM type: Open Street Map Type
#  - Country: Nome do País
#  - Country CODE: Código do País

#! pip install geopy

import pandas as pd
from geopy.geocoders import Nominatim

df = pd.read_csv('../datasets/kc_house_data.csv')
df10 = df[:10].copy()

# Create empty rows 
#response.raw
df['place_id'] = 'undefined' # response.raw['place_id']
df['osm_type'] = 'undefined' # response.raw['osm_type']
df['country'] = 'undefined' # response.raw['address']['country']
df['country_code'] = 'undefined' # response.raw['address']['country_code']
geolocator = Nominatim(user_agent='geoapiExercises')



for i in range( len( df[:10]) ):
    #print('Loop: {} / {}'.format( i, len( df[:10]) ))
    
    # Make query
    query = str(df.loc[i, 'lat']) + ',' + str(df.loc[i,'long'])
    
    # Populate query
    response = geolocator.reverse(query)
    
    # Populate data
    if 'place_id' in response.raw:
        df.loc[i, 'place_id'] = response.raw['place_id']
        #print( df.loc[i, 'place_id'] )
    
    if 'osm_type' in response.raw:
        df.loc[i, 'osm_type'] = response.raw['osm_type']
        #print( df.loc[i, 'osm_type'] )
        
    if 'country' in response.raw['address']:
        df.loc[i, 'country'] = response.raw['address']['country']
        #print( df.loc[i, 'country'] )
        
    if 'country_code' in response.raw['address']:
        df.loc[i, 'country_code'] = response.raw['address']['country_code']
        #print( df.loc[i, 'country_code'] )

        

# displaying the result
df[['id', 'lat', 'long', 'place_id', 'osm_type', 'country', 'country_code']].head()


Unnamed: 0,id,lat,long,place_id,osm_type,country,country_code
0,7129300520,47.5112,-122.257,148421265,way,United States,us
1,6414100192,47.721,-122.319,148033904,way,United States,us
2,5631500400,47.7379,-122.233,76736995,node,United States,us
3,2487200875,47.5208,-122.393,145042219,way,United States,us
4,1954400510,47.6168,-122.045,292831047,way,United States,us


In [6]:
#df['bathrooms'].sort_values().unique().tolist()

In [7]:
# 4. Adicione os seguintes filtros no mapa:
#  a Tamanho mínimo da área da sala de estar
#  b Número mínino de banheiros
#  c Valor máximo do preço
#  d Tamanho máximo da área do porão
#  e Filtro das condições do imóvel
#  f Filtro por ano de construção


# https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20List.html
import ipywidgets as widgets
from ipywidgets import fixed # informa que os dados ñ podem variar, mas o filtro
import plotly.express as px


houses = df[['id', 'lat', 'long', 
             'sqft_living', 'bathrooms', 'price', 
             'sqft_basement', 'condition', 'yr_built']].copy()

houses['condition'] = houses['condition'].astype( int )
houses['yr_built'] = houses['yr_built'].astype( int )
houses['sqft_living'] = houses['sqft_living'].astype( int )

style = {'description_width': 'initial'}

# a. Minimum size of living room area
sqft_living_limit = widgets.IntSlider(
    value=290,
    min=290,
    max=13540,
    description='Living Room',
    disabled=False,
    orientation='horizontal',
    readout=True,
    style=style
)

# b. minimum number of bathrooms
bathrooms_min = widgets.Dropdown(
    options=df['bathrooms'].sort_values().unique().tolist(),
    description='Bathrooms',
    disabled=False,
    style=style
)

# c. maximum price
price_max = widgets.FloatSlider(
    value=7700000.0,
    min=75000.0,
    max=7700000.0,
    description='Price Max',
    disabled=False,
    orientation='horizontal',
    readout=True,
    style=style)

# d. Maximum size of the basement area
basement_max = widgets.IntSlider(
    value=1651359,
    min=520,
    max=1651359,
    description='Basement',
    disabled=False,
    orientation='horizontal',
    redout=True,
    style=style)


# e. Property Conditions Iterative Button 
property_conditions_limit = widgets.RadioButtons(
    options=df['condition'].sort_values().unique().tolist(),
    value=3,
    description='Property Condition',
    #orientation='horizontal',
    disabled=False,
    style=style,
)

# f. Year Built Iterative Button 
year_built_limit = widgets.IntSlider(
    value=1970,
    min = 1900, # df['condition'].min()
    max = 2015, # df['condition'].max()
    step = 1,
    description = 'Property Year Built',
    disable=False,
    style = style
)


# Graph Function
def update_map(df, sqft_living, bathrooms, price, basement, property_conditions, year_built):
    houses = df[ (df['sqft_living'] <= sqft_living) & 
                 (df['bathrooms'] <= bathrooms) & 
                 (df['condition'] <= property_conditions) & 
                 (df['yr_built'] <= year_built)
               ][ ['id', 'lat', 'long','sqft_living', 'bathrooms', 
                   'price','sqft_basement', 'condition', 'yr_built'
                  ] 
                ]
    
    fig = px.scatter_mapbox( houses,
                       lat = 'lat',
                       lon = 'long',
                       color = 'condition',
                       size = 'price',
                       color_continuous_scale=px.colors.sequential.Viridis_r,
                       size_max=15,
                       zoom=10)

    fig.update_layout( mapbox_style = 'open-street-map' )
    fig.update_layout( height=600, margin={'r':0, 't':0, 'l':0, 'b':0} )
    fig.show()
    
widgets.interactive( update_map, df=fixed(df),  sqft_living=sqft_living_limit,
                                                price=price_max,
                                                basement=basement_max,
                                                bathrooms=bathrooms_min,
                                                property_conditions=property_conditions_limit,
                                                year_built=year_built_limit,
                                               )

interactive(children=(IntSlider(value=290, description='Living Room', max=13540, min=290, style=SliderStyle(de…

In [8]:
# # 5. Adicione os seguintes filtros no Dashboard:
#  - Filtro por data disponível para compra
#  - Filtro por ano de renovação
#  - Filtro se possui vista para água ou não.  



#libs
import ipywidgets as widgets
from matplotlib import gridspec
from matplotlib import pyplot as plt
import datetime as dt
import pandas as pd
from ipywidgets import fixed # informa que os dados ñ podem variar, mas o filtro
import numpy as np


#prepare dataset
data = pd.read_csv('../datasets/kc_house_data.csv')
# change data format
data['year'] = pd.to_datetime(data['date']).dt.strftime('%Y')
data['date'] = pd.to_datetime(data['date']).dt.strftime('%Y-%m-%d')
data['year_week'] = pd.to_datetime(data['date']).dt.strftime('%Y-%m-%U')

data[['id', 'yr_renovated']].sort_values('yr_renovated', ascending=True)


# Widgets to control data

# to control date
date_limit = widgets.SelectionSlider(
            options = data['date'].sort_values().unique().tolist(),
            value = '2014-12-01',
            description = 'Disponível',
            disabled=False,
            continuous_update=False,
            orientation='horizontal', 
            readout=True)

# to control waterfront
data['is_waterfront'] = data['waterfront'].apply(lambda x:'yes' if x == 1 else 'no')

waterfront_bar = widgets.Dropdown(options= data['is_waterfront'].unique().tolist(), value='no', 
                                  description='Water View', disable=False)

# to control year renovated
renovated_yr = widgets.BoundedIntText(
    value=2010,
    min=1930,
    max=2015,
    step=1,
    description='Ano de Renovação',
    disabled=False)


def update_map(data, limit, option, year):
    #Filter data
    df = data[(data['date'] >= limit) & (data['is_waterfront'] == option) & 
              (data['yr_renovated'] >= year)]
    
    fig = plt.figure(figsize=(21,12))
              
    specs = gridspec.GridSpec(ncols=2, nrows=2, figure=fig)
    
    ax1 = fig.add_subplot(specs[0, :]) # First rows
    ax2 = fig.add_subplot(specs[1, 0]) # Second Row First Column
    ax3 = fig.add_subplot(specs[1, 1]) # Second Row Second Column rows
    
    by_year = df[['id', 'year',]].groupby('year').sum().reset_index()
    ax1.bar(by_year['year'], by_year['id'])
    
    by_day = df[['id', 'date']].groupby('date').mean().reset_index()
    ax2.plot(by_day['date'], by_day['id'])
    ax2.set_title('title: Avg Price by Day')

    by_week_of_year = df[['id', 'year_week']].groupby('year_week').mean().reset_index()
    ax3.bar(by_week_of_year['year_week'], by_week_of_year['id'])
    ax3.set_title('title: Avg Price by Week Of Year')
    plt.xticks(rotation=60);    
    
widgets.interactive(update_map, data=fixed(data), limit=date_limit, option=waterfront_bar, year=renovated_yr)

interactive(children=(SelectionSlider(continuous_update=False, description='Disponível', index=212, options=('…