# Contexto
*Empresa*: House Rocket
*Problema de negocio*: Comprar casas com preco baixo, e revender a preco mais alto possivel
*Desafio* Encontrar bons negocios dentro do portolio disponivel, isto 'e, encontrar casas com precos mais baixos, em optima localizacao e com optimo potencial de revenda com preco mais alto;

## Novas perguuntas de negocios

* Visualizar no mapa todas as casas
* Acessibilidade, de qualquer lugar;


## Planeamento da solucao
* porduto a ser entregue: Link online com acesso ao dashboard
* podera cessar o mapa e o dashboard;

## Ferramentas por utilizar
* Python, Jupyter Notebook, VSCode;

In [38]:
import pandas as pd
import numpy as np
pd.set_option('display.float_format', lambda x: '%.3f' % x)



In [39]:
# load datasets
df = pd.read_csv("kc_house_data.csv")

In [40]:
#print dataframe dimmensions
print("number of rows {}".format(df.shape[0]))
print("number of cols {}".format(df.shape[1]))

number of rows 21613
number of cols 21


In [41]:
# print types
df.dtypes

id                 int64
date              object
price            float64
bedrooms           int64
bathrooms        float64
sqft_living        int64
sqft_lot           int64
floors           float64
waterfront         int64
view               int64
condition          int64
grade              int64
sqft_above         int64
sqft_basement      int64
yr_built           int64
yr_renovated       int64
zipcode            int64
lat              float64
long             float64
sqft_living15      int64
sqft_lot15         int64
dtype: object

In [42]:
# convert objects to date
df["date"] = pd.to_datetime(df['date'])

# Descirptive Statistic



In [43]:
num_attributes  = df.select_dtypes( include= ['int64', 'float64'])

In [44]:
# Central tendency - Mean, Median
mean = pd.DataFrame(num_attributes.apply( np.mean, axis=0))
median = num_attributes.apply( np.median, axis=0)

# Dispersion data  - min, max, std
std = pd.DataFrame(num_attributes.apply( np.std, axis=0))
min_ = pd.DataFrame(num_attributes.apply(np.min, axis = 0))
max_ = pd.DataFrame(num_attributes.apply(np.max, axis = 0))

df1 = pd.concat([max_, min_, mean, median, std], axis  = 1 ).reset_index()
df1.columns = ['attributes', 'max_', 'min_', 'mean', 'median', 'std']
# df1.columns= ['max', 'min','mean','median','std']

In [45]:
#print dataframe dimmensions
print("number of rows {}".format(df1.shape[0]))
print("number of cols {}".format(df1.shape[1]))

number of rows 20
number of cols 6


### New columns


*Dortmitory type*

In [46]:
# Create new column calls "dormitory_type"
# if bedroom = 1, 'studio',
# if bedroom = 2 'apartment'
# if bedroom > 2 'house'

df['dormitory_type'] = ""

for i in range( len(df)):
    if df.loc[i, 'bedrooms'] == 1:
        df.loc[i, 'dormitory_type'] = 'studio'
    elif df.loc[i, 'bedrooms'] == 2:
        df.loc[i, 'dormitory_type'] = 'apartment'
    else:
         df.loc[i, 'dormitory_type'] = 'house'

*Levels*

In [49]:
# Define levels of prices
# 0 to 321.950                level 0
# 0 to 321.950 to 450.000     level 1
# 0 to 450.000 to 649.000     level 2
# above  649.000              level 3
df['level'] = ""

for i in range(len(df)):
    if df.loc[i, 'price'] <= 321950:
        df.loc[i, 'level'] = 0
    elif  321950 > df.loc[i, 'price'] <= 450000: 
        df.loc[i, 'level'] = 1  
    elif  45000 > df.loc[i, 'price'] < 649000: 
        df.loc[i, 'level'] = 2
    else: 
        df.loc[i, 'level'] = 3     

In [50]:
display(df)

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,dormitory_type,level
0,7129300520,2014-10-13,221900.000,3,1.000,1180,5650,1.000,0,0,...,0,1955,0,98178,47.511,-122.257,1340,5650,house,0
1,6414100192,2014-12-09,538000.000,3,2.250,2570,7242,2.000,0,0,...,400,1951,1991,98125,47.721,-122.319,1690,7639,house,3
2,5631500400,2015-02-25,180000.000,2,1.000,770,10000,1.000,0,0,...,0,1933,0,98028,47.738,-122.233,2720,8062,apartment,0
3,2487200875,2014-12-09,604000.000,4,3.000,1960,5000,1.000,0,0,...,910,1965,0,98136,47.521,-122.393,1360,5000,house,3
4,1954400510,2015-02-18,510000.000,3,2.000,1680,8080,1.000,0,0,...,0,1987,0,98074,47.617,-122.045,1800,7503,house,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21608,263000018,2014-05-21,360000.000,3,2.500,1530,1131,3.000,0,0,...,0,2009,0,98103,47.699,-122.346,1530,1509,house,3
21609,6600060120,2015-02-23,400000.000,4,2.500,2310,5813,2.000,0,0,...,0,2014,0,98146,47.511,-122.362,1830,7200,house,3
21610,1523300141,2014-06-23,402101.000,2,0.750,1020,1350,2.000,0,0,...,0,2009,0,98144,47.594,-122.299,1020,2007,apartment,3
21611,291310100,2015-01-16,400000.000,3,2.500,1600,2388,2.000,0,0,...,0,2004,0,98027,47.535,-122.069,1410,1287,house,3
