# Modelo Preditivo

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
df = pd.read_csv('data/USA_Housing.csv', sep=',')
df.head()

Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price,Address
0,79545.458574,5.682861,7.009188,4.09,23086.800503,1059034.0,"208 Michael Ferry Apt. 674\nLaurabury, NE 3701..."
1,79248.642455,6.0029,6.730821,3.09,40173.072174,1505891.0,"188 Johnson Views Suite 079\nLake Kathleen, CA..."
2,61287.067179,5.86589,8.512727,5.13,36882.1594,1058988.0,"9127 Elizabeth Stravenue\nDanieltown, WI 06482..."
3,63345.240046,7.188236,5.586729,3.26,34310.242831,1260617.0,USS Barnett\nFPO AP 44820
4,59982.197226,5.040555,7.839388,4.23,26354.109472,630943.5,USNS Raymond\nFPO AE 09386


In [3]:
df.describe()

Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,68583.108984,5.977222,6.987792,3.98133,36163.516039,1232073.0
std,10657.991214,0.991456,1.005833,1.234137,9925.650114,353117.6
min,17796.63119,2.644304,3.236194,2.0,172.610686,15938.66
25%,61480.562388,5.322283,6.29925,3.14,29403.928702,997577.1
50%,68804.286404,5.970429,7.002902,4.05,36199.406689,1232669.0
75%,75783.338666,6.650808,7.665871,4.49,42861.290769,1471210.0
max,107701.748378,9.519088,10.759588,6.5,69621.713378,2469066.0


In [12]:
X = df[['Avg. Area Income', 'Avg. Area House Age', 'Area Population', 'Avg. Area Number of Rooms']]
y = df['Price']

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
modelo = LinearRegression()
modelo.fit(X_train, y_train)

In [15]:
y_pred = modelo.predict(X_test)

In [16]:
print(f'MAE: {mean_absolute_error(y_test, y_pred):.2f}')
print(f'RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.2f}')
print(f'R²: {r2_score(y_test, y_pred):.2f}')

MAE: 80857.79
RMSE: 100367.93
R²: 0.92


In [17]:
# Coeficientes (pesos das variáveis)
coeficientes = modelo.coef_

# Intercepto (termo independente)
intercepto = modelo.intercept_

# Criar a equação do modelo
equacao = f'Preço = {intercepto:.2f}'
for nome, coef in zip(X.columns, coeficientes):
    equacao += f' + ({coef:.2f} * {nome})'

print("Equação do modelo:")
print(equacao)

Equação do modelo:
Preço = -2635543.37 + (21.66 * Avg. Area Income) + (164702.18 * Avg. Area House Age) + (15.27 * Area Population) + (121003.91 * Avg. Area Number of Rooms)
