---
# Análise Exploratória dos preços de aluguéis e Vendas de Apartamentos na Cidade de São Paulo
---

In [1]:
# Manipulação de Dados
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go


warnings.filterwarnings('ignore')
%matplotlib inline
plt.rcParams["figure.figsize"] = 20, 10
plt.style.use('fivethirtyeight')
pd.options.plotting.backend = "plotly"

In [4]:
# Carrega o dataset com preços de alugueis da cidade de São Paulo
df = pd.read_csv(
    '../data/raw/sao-paulo-properties.csv'
)

# Vendo as 5 primeiras linhas
df.head()

Unnamed: 0,Price,Condo,Size,Rooms,Toilets,Suites,Parking,Elevator,Furnished,Swimming Pool,New,District,Negotiation Type,Property Type,Latitude,Longitude
0,930,220,47,2,2,1,1,0,0,0,0,Artur Alvim/São Paulo,rent,apartment,-23.543138,-46.479486
1,1000,148,45,2,2,1,1,0,0,0,0,Artur Alvim/São Paulo,rent,apartment,-23.550239,-46.480718
2,1000,100,48,2,2,1,1,0,0,0,0,Artur Alvim/São Paulo,rent,apartment,-23.542818,-46.485665
3,1000,200,48,2,2,1,1,0,0,0,0,Artur Alvim/São Paulo,rent,apartment,-23.547171,-46.483014
4,1300,410,55,2,2,1,1,1,0,0,0,Artur Alvim/São Paulo,rent,apartment,-23.525025,-46.482436


In [5]:
# Vendo informações sobre tipos de variáveis
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13640 entries, 0 to 13639
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Price             13640 non-null  int64  
 1   Condo             13640 non-null  int64  
 2   Size              13640 non-null  int64  
 3   Rooms             13640 non-null  int64  
 4   Toilets           13640 non-null  int64  
 5   Suites            13640 non-null  int64  
 6   Parking           13640 non-null  int64  
 7   Elevator          13640 non-null  int64  
 8   Furnished         13640 non-null  int64  
 9   Swimming Pool     13640 non-null  int64  
 10  New               13640 non-null  int64  
 11  District          13640 non-null  object 
 12  Negotiation Type  13640 non-null  object 
 13  Property Type     13640 non-null  object 
 14  Latitude          13640 non-null  float64
 15  Longitude         13640 non-null  float64
dtypes: float64(2), int64(11), object(3)
memo

In [6]:
# Substituindo " " por "_" nos nomes das variáveis
df.columns = df.columns.str.replace(' ', '_').str.lower()

In [7]:
# Transformando atributos categóricos para o tipo 'category'
cat_var = [
    'elevator', 
    'furnished', 
    'swimming_pool', 
    'new', 
    'district', 
    'negotiation_type',
    'property_type'
]

# Muda o tipo das variáveis
for i in df:
    df[cat_var] = df[cat_var].astype('category')

In [8]:
# Checando valores nulos
df.isnull().sum()

price               0
condo               0
size                0
rooms               0
toilets             0
suites              0
parking             0
elevator            0
furnished           0
swimming_pool       0
new                 0
district            0
negotiation_type    0
property_type       0
latitude            0
longitude           0
dtype: int64

In [11]:
df['negotiation_type'].value_counts()

negotiation_type
rent    7228
sale    6412
Name: count, dtype: int64

In [10]:
# Checa os valores de "Property_Type"
df['property_type'].value_counts()

property_type
apartment    13640
Name: count, dtype: int64