# Aluguel de casas Brasil

In [444]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

Nesta análise estaremos trabalhando com um dataframe que contem informações sobre imóveis para serem alugados. As colunas nos fornecem as seguintes informações:
 - city: A coluna city contem a cidade onde esta localizado o imóvel.
 - area: Corresponde a área do imóvel.
 - rooms: Corresponde a quantidade de quartos.
 - bathroom: Corresponde a quantidade de banheiros.
 - parking spaces: corresponde a vagas de garagem.
 - floor: Corresponde ao andar do imóvel.
 - furniture: Indica se o imóvel está mobiliado ou não.
 - hoa: Correspode ao valor do condomínio.
 - rent amount: Corresponde ao valor do aluguel.
 - property tax: Corresponde ao valor do IPTU.
 - fire insurance: Corresponde ao valor do segure incêndio.
 - total: Corresponde ao valor total das despesas mensais do imóvel.

In [445]:
data = pd.read_csv("../src/houses_to_rent.csv")
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10692 entries, 0 to 10691
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   city                 10692 non-null  object
 1   area                 10692 non-null  int64 
 2   rooms                10692 non-null  int64 
 3   bathroom             10692 non-null  int64 
 4   parking spaces       10692 non-null  int64 
 5   floor                10692 non-null  object
 6   animal               10692 non-null  object
 7   furniture            10692 non-null  object
 8   hoa (R$)             10692 non-null  int64 
 9   rent amount (R$)     10692 non-null  int64 
 10  property tax (R$)    10692 non-null  int64 
 11  fire insurance (R$)  10692 non-null  int64 
 12  total (R$)           10692 non-null  int64 
dtypes: int64(9), object(4)
memory usage: 1.1+ MB


In [446]:
data.head()

Unnamed: 0,city,area,rooms,bathroom,parking spaces,floor,animal,furniture,hoa (R$),rent amount (R$),property tax (R$),fire insurance (R$),total (R$)
0,São Paulo,70,2,1,1,7,acept,furnished,2065,3300,211,42,5618
1,São Paulo,320,4,4,0,20,acept,not furnished,1200,4960,1750,63,7973
2,Porto Alegre,80,1,1,1,6,acept,not furnished,1000,2800,0,41,3841
3,Porto Alegre,51,2,1,0,2,acept,not furnished,270,1112,22,17,1421
4,São Paulo,25,1,1,0,1,not acept,not furnished,0,800,25,11,836


## Tradução de colunas e limpeza de dados

In [447]:
data = data.rename(columns={
    "city": "cidade",
    "rooms": "quartos",
    "bathroom": "banheiro",
    "parking spaces": "garagens",
    "floor": "andar",
    "furniture": "mobilia",
    "hoa (R$)": "condominio_valor",
    "rent amount (R$)": "aluguel",
    "property tax (R$)": "iptu",
    "fire insurance (R$)": "seguro_incendio",
    "total (R$)": "total_reais"
})

data.replace({
    "acept": True,
    "not acept": False,
    "furnished": True,
    "not furnished": False,
    "-": np.NAN
}, inplace=True)

data.drop_duplicates(inplace=True)

data["preco_por_m2"] = data["total_reais"]/data["area"]
data["andar"] = data["andar"].astype(float)
data = data[data['andar'] < 40]

# definindo os quantis para remoção de outliers
low, high = data["preco_por_m2"].quantile([0.001, 0.999])
data = data[data["preco_por_m2"].between(low, high)].sort_values(by="preco_por_m2", ascending=True)
data.reset_index(inplace=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7944 entries, 0 to 7943
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   index             7944 non-null   int64  
 1   cidade            7944 non-null   object 
 2   area              7944 non-null   int64  
 3   quartos           7944 non-null   int64  
 4   banheiro          7944 non-null   int64  
 5   garagens          7944 non-null   int64  
 6   andar             7944 non-null   float64
 7   animal            7944 non-null   bool   
 8   mobilia           7944 non-null   bool   
 9   condominio_valor  7944 non-null   int64  
 10  aluguel           7944 non-null   int64  
 11  iptu              7944 non-null   int64  
 12  seguro_incendio   7944 non-null   int64  
 13  total_reais       7944 non-null   int64  
 14  preco_por_m2      7944 non-null   float64
dtypes: bool(2), float64(2), int64(10), object(1)
memory usage: 822.5+ KB


In [448]:
data[["area", "quartos", "banheiro", "garagens", "andar", "animal", "mobilia", "condominio_valor", "aluguel", "iptu", "seguro_incendio", "total_reais", "preco_por_m2"]].corr(method="kendall")

Unnamed: 0,area,quartos,banheiro,garagens,andar,animal,mobilia,condominio_valor,aluguel,iptu,seguro_incendio,total_reais,preco_por_m2
area,1.0,0.711304,0.685971,0.534352,0.089911,0.105385,-0.017864,0.553038,0.484853,0.518827,0.488665,0.546182,-0.116819
quartos,0.711304,1.0,0.669942,0.53992,0.053386,0.120674,-0.083328,0.455294,0.392959,0.455036,0.397545,0.442577,-0.160434
banheiro,0.685971,0.669942,1.0,0.61835,0.142527,0.081915,0.012769,0.557305,0.530838,0.541067,0.53293,0.583063,0.016002
garagens,0.534352,0.53992,0.61835,1.0,0.152642,0.070568,0.00872,0.481745,0.439626,0.46686,0.437298,0.49127,0.04386
andar,0.089911,0.053386,0.142527,0.152642,1.0,-0.007995,0.076823,0.209984,0.203863,0.133547,0.195772,0.217356,0.186227
animal,0.105385,0.120674,0.081915,0.070568,-0.007995,1.0,-0.104183,0.038408,0.005735,0.050238,0.012288,0.016324,-0.101883
mobilia,-0.017864,-0.083328,0.012769,0.00872,0.076823,-0.104183,1.0,0.107779,0.216196,0.069841,0.213283,0.194389,0.285781
condominio_valor,0.553038,0.455294,0.557305,0.481745,0.209984,0.038408,0.107779,1.0,0.543954,0.557896,0.530809,0.676757,0.208049
aluguel,0.484853,0.392959,0.530838,0.439626,0.203863,0.005735,0.216196,0.543954,1.0,0.470197,0.95722,0.863977,0.382316
iptu,0.518827,0.455036,0.541067,0.46686,0.133547,0.050238,0.069841,0.557896,0.470197,1.0,0.471118,0.55779,0.119616
