<a href="https://colab.research.google.com/github/guilherme-2003/Programming/blob/main/analise_modelos_imobiliarios.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
pip install scikit-fuzzy



Import de bibliotecas necessárias

In [23]:
import pandas as pd # Manipulação dos dados
import numpy as np # Matemática aplicada a dados
from sklearn.datasets import fetch_openml # Import da base de dados
from sklearn.model_selection import train_test_split # Dividir os dados de treino e de teste
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score # Métricas medidas
from sklearn.preprocessing import StandardScaler # Normalização dos dados
from sklearn.neural_network import MLPRegressor # Rede neural
from sklearn.tree import DecisionTreeRegressor # Árvore de decisão
import skfuzzy as fuzz # Algoritmo Fuzzy

Leitura da base

In [24]:
data = fetch_openml(data_id=42165, as_frame=True)  # Exporta a base de casas nos EUA (Ames Housing Dataset)
df = data.frame # Dataframe

Prévia dos dados

In [25]:
display(df)

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,RL,62.0,7917,Pave,,Reg,Lvl,AllPub,...,0,,,,0,8,2007,WD,Normal,175000
1456,1457,20,RL,85.0,13175,Pave,,Reg,Lvl,AllPub,...,0,,MnPrv,,0,2,2010,WD,Normal,210000
1457,1458,70,RL,66.0,9042,Pave,,Reg,Lvl,AllPub,...,0,,GdPrv,Shed,2500,5,2010,WD,Normal,266500
1458,1459,20,RL,68.0,9717,Pave,,Reg,Lvl,AllPub,...,0,,,,0,4,2010,WD,Normal,142125


In [26]:
df = df.select_dtypes(include=[np.number]) # Seleciona apenas dados númericos para análise dos modelos
df = df.dropna() # Trata valores null

Prévia dos dados tratados

In [27]:
display(df)

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,...,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice
0,1,60,65.0,8450,7,5,2003,2003,196.0,706,...,0,61,0,0,0,0,0,2,2008,208500
1,2,20,80.0,9600,6,8,1976,1976,0.0,978,...,298,0,0,0,0,0,0,5,2007,181500
2,3,60,68.0,11250,7,5,2001,2002,162.0,486,...,0,42,0,0,0,0,0,9,2008,223500
3,4,70,60.0,9550,7,5,1915,1970,0.0,216,...,0,35,272,0,0,0,0,2,2006,140000
4,5,60,84.0,14260,8,5,2000,2000,350.0,655,...,192,84,0,0,0,0,0,12,2008,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,62.0,7917,6,5,1999,2000,0.0,0,...,0,40,0,0,0,0,0,8,2007,175000
1456,1457,20,85.0,13175,6,6,1978,1988,119.0,790,...,349,0,0,0,0,0,0,2,2010,210000
1457,1458,70,66.0,9042,7,9,1941,2006,0.0,275,...,0,60,0,0,0,0,2500,5,2010,266500
1458,1459,20,68.0,9717,5,6,1950,1996,0.0,49,...,366,0,112,0,0,0,0,4,2010,142125


Definindo os testes e objetivos

In [28]:
x = df.drop("SalePrice", axis=1) # Dados de entrada, temos todas as características da casa (Exceto o valor).
y = df["SalePrice"] # Dados de saída, nosso alvo, modelo deve prever o valor da casa o mais próximo possível do real.

Normalização dos dados

In [29]:
scaler = StandardScaler() # Biblioteca de transformação da escala dos dados
x = scaler.fit_transform(x) # Transforma os dados de entrada

In [30]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42) # Dividimos os dados em 70% para treino e 30% para teste

Rede Neural Artificial

In [31]:
mrn = MLPRegressor(hidden_layer_sizes=(100, 100), max_iter=1000, random_state=42) # Define a rede neural com duas camadas de 100 neurônios
mrn.fit(x_train, y_train) # Treina a rede neural
y_pred_mrn = mrn.predict(x_test) # Preve um valor no conjunto de teste

df_mrn = pd.DataFrame({'Real': y_test.values, 'Previsão': np.round(y_pred_mrn, 2)}) # Dataframe de comparação

# Avaliação
print("Rede Neural Artificial")
print("R2 Score:", r2_score(y_test, y_pred_mrn))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_mrn)))
print("MAE:", mean_absolute_error(y_test, y_pred_mrn))
display(df_mrn.head())  # Mostrar a comparação

Rede Neural Artificial
R2 Score: 0.7604155955817615
RMSE: 43756.49535385754
MAE: 29752.523051800952




Unnamed: 0,Real,Previsão
0,140000,153087.35
1,150750,114411.09
2,157000,121881.83
3,138000,117905.84
4,144000,148630.47


Árvore de decisão

In [33]:
mad = DecisionTreeRegressor(max_depth=10, random_state=42) # Máximo 5 de profundidade
mad.fit(x_train, y_train) # Treina a árvore de decisão
y_pred_mad = mad.predict(x_test) # Preve o preço no conjunto de teste

df_mad = pd.DataFrame({'Real': y_test.values, 'Previsão': np.round(y_pred_mad, 2)})

# Avaliação
print("\nÁrvore de Decisão")
print("R2 Score:", r2_score(y_test, y_pred_mad))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_mad)))
print("MAE:", mean_absolute_error(y_test, y_pred_mad))
display(df_mad.head()) # Mostrar a comparação


Árvore de Decisão
R2 Score: 0.7696466258893065
RMSE: 42905.26087378544
MAE: 28702.800593255568


Unnamed: 0,Real,Previsão
0,140000,143020.83
1,150750,140740.22
2,157000,140740.22
3,138000,143020.83
4,144000,126283.93


Fuzzy

In [35]:
cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(x_train.T, 6, 3, error=0.002, maxiter=2000, init=None) # Agrupa os dados em 3 clusters

labels = np.argmax(u, axis=0)
cluster_means = [y_train[labels == i].mean() for i in range(6)] # Cada casa associada a um cluster
y_pred_fuzzy = [cluster_means[np.argmax(u[:, i])] for i in range(len(y_test))] # A predição é uma média dos preços da casa do mesmo cluster

df_fuzzy = pd.DataFrame({'Real': y_test.values, 'Previsão': np.round(y_pred_fuzzy, 2)})

# Avaliação
print("\nFuzzy")
print("R2 Score:", r2_score(y_test, y_pred_fuzzy))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_fuzzy)))
print("MAE:", mean_absolute_error(y_test, y_pred_fuzzy))
display(df_fuzzy.head()) # Mostrar a comparação


Fuzzy
R2 Score: -0.47159488787515436
RMSE: 108444.46565600664
MAE: 79541.44782451102


Unnamed: 0,Real,Previsão
0,140000,237024.41
1,150750,237024.41
2,157000,131011.62
3,138000,131011.62
4,144000,131011.62
