# Regressão Linear 
***

## Importanto Bibliotecas

In [None]:
pip install plotly

In [1]:
# Bibliotecas de manipualção e visualização de dados
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

# Classes dos modelo
from sklearn.linear_model import LinearRegression

#Funções de avaliação dos modelos
from sklearn.metrics import  mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

## Análise do dataset

**House**: A tarefa é prever o preço médio da casa na região com base na composição demográfica e no estado do mercado imobiliário na região. Descrição [link](https://sci2s.ugr.es/keel/dataset.php?cod=95#sub1).

In [3]:
dataset = pd.read_csv("https://raw.githubusercontent.com/cynthiamaia/Monitoria-DeepLearning-CIN-AI/main/Datasets/house.csv")

In [8]:
dataset.columns

Index(['Price', 'P1', ' P5p1', ' P6p2', ' P11p4', ' P14p9', ' P15p1', ' P15p3',
       ' P16p2', ' P18p2', ' P27p4', ' H2p2', ' H8p2', ' H10p1', ' H13p1',
       ' H18pA', ' H40p4'],
      dtype='object')

In [9]:
# vamos olhar para uma pequena parte do dataset
dataset.head()

Unnamed: 0,Price,P1,P5p1,P6p2,P11p4,P14p9,P15p1,P15p3,P16p2,P18p2,P27p4,H2p2,H8p2,H10p1,H13p1,H18pA,H40p4
0,14999,219,0.506849,0.031963,0.146119,0.101852,0.876712,0.0,0.746988,0.012048,0.060241,0.231482,0.024096,0.987952,0.351852,0.0,0.8
1,106200,2273,0.495381,0.018918,0.067312,0.045336,0.794545,0.016718,0.641053,0.002105,0.076842,0.064961,0.017895,0.997895,0.170276,0.054217,0.258064
2,14999,564,0.457447,0.058511,0.299645,0.238562,0.764184,0.010638,0.567273,0.003636,0.014545,0.140625,0.054545,0.996364,0.38125,0.05618,1.0
3,29900,620,0.495161,0.003226,0.104839,0.086262,0.909677,0.0,0.792793,0.009009,0.027027,0.051282,0.004504,0.995495,0.183761,0.162791,0.0
4,85900,3809,0.49173,0.205303,0.107115,0.085744,0.899449,0.0,0.766566,0.008283,0.03991,0.017024,0.200301,0.985693,0.198372,0.106557,0.666667


### Separando o conjunto de dados 

In [10]:
X = dataset.drop(["Price"], axis = 1)
y = dataset["Price"]

X_train, X_test, y_train, y_test = train_test_split(X,y)

## Treinamento do modelo

A regressão linear é um modelo de regressão. Lembrando da fómula da equação linear: $y = a \times x  + b$

In [15]:
regression_model = LinearRegression()
regression_model.fit(X_train, y_train)

y_pred = regression_model.predict(X_test)

print("MSE:", mean_squared_error(y_test, y_pred))
print("R2_score:", r2_score(y_test, y_pred))

MSE: 2116584664.558962
R2_score: 0.2686138878775711


MSE = $\frac{1}{n} \sum\limits_{i = 1}^{n}(y_i-\hat{y_i})^2$

In [21]:
!pip install --upgrade nbformat


Collecting nbformat
  Downloading nbformat-5.10.4-py3-none-any.whl.metadata (3.6 kB)
Collecting fastjsonschema>=2.15 (from nbformat)
  Downloading fastjsonschema-2.21.1-py3-none-any.whl.metadata (2.2 kB)
Collecting jsonschema>=2.6 (from nbformat)
  Downloading jsonschema-4.24.0-py3-none-any.whl.metadata (7.8 kB)
Collecting jupyter-core!=5.0.*,>=4.12 (from nbformat)
  Downloading jupyter_core-5.8.1-py3-none-any.whl.metadata (1.6 kB)
Collecting traitlets>=5.1 (from nbformat)
  Using cached traitlets-5.14.3-py3-none-any.whl.metadata (10 kB)
Collecting attrs>=22.2.0 (from jsonschema>=2.6->nbformat)
  Using cached attrs-25.3.0-py3-none-any.whl.metadata (10 kB)
Collecting jsonschema-specifications>=2023.03.6 (from jsonschema>=2.6->nbformat)
  Using cached jsonschema_specifications-2025.4.1-py3-none-any.whl.metadata (2.9 kB)
Collecting referencing>=0.28.4 (from jsonschema>=2.6->nbformat)
  Using cached referencing-0.36.2-py3-none-any.whl.metadata (2.8 kB)
Collecting rpds-py>=0.7.1 (from jsons

In [None]:
fig = px.scatter(x=y_test, y=y_pred, 
                 title="Real x Previsto", 
                 trendline="ols", 
                 labels={"x": "Real", "y": "Previsto"})
fig.show()


In [14]:
pred_y_df = pd.DataFrame({"True Value": y_test, "Predicted value": y_pred, "Difference": y_test-y_pred}).round(2)
pred_y_df

Unnamed: 0,True Value,Predicted value,Difference
15501,24000,45932.72,-21932.72
12765,15800,30399.05,-14599.05
18417,44900,77985.28,-33085.28
14805,101800,51878.24,49921.76
20573,37500,35320.73,2179.27
...,...,...,...
3260,196400,45055.18,151344.82
22126,62300,21871.39,40428.61
22180,14999,17350.63,-2351.63
9300,34900,50535.49,-15635.49
