## Classe de Regressão Linear

In [2]:
import numpy as np
import pandas as pd

In [12]:
3*4

12

In [13]:
np.dot(3, 4)

12

In [1]:
class RegressaoLinear:
    def __init__(self):
        self.coeficientes = None
    
    def treinar(self, X, y):
        # Adiciona uma coluna de 1s para o termo de viés (bias)
        X = np.insert(X, 0, 1, axis=1)
        
        # Calcula a matriz X^T * X
        XT_X = np.dot(X.T, X)
        
        # Calcula a matriz inversa de XT_X
        XT_X_inv = np.linalg.inv(XT_X)
        
        # Calcula a matriz X^T * y
        XT_y = np.dot(X.T, y)
        
        # Calcula os coeficientes usando a fórmula da regressão linear
        self.coeficientes = np.dot(XT_X_inv, XT_y)
        
        #self.coeficientes = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)

    
    def prever(self, X):
        # Adiciona uma coluna de 1s para o termo de viés (bias)
        X = np.insert(X, 0, 1, axis=1)
        
        # Realiza a previsão multiplicando os coeficientes pelos atributos
        return X.dot(self.coeficientes)
    
    def avaliar(self, X, y):
        # Realiza a previsão
        y_pred = self.prever(X)
        
        # Calcula o erro quadrático médio (MSE)
        mse = np.mean((y - y_pred) ** 2)
        
        # Calcula o coeficiente de determinação (R²)
        ss_total = np.sum((y - np.mean(y)) ** 2)
        ss_residual = np.sum((y - y_pred) ** 2)
        r2 = 1 - (ss_residual / ss_total)
        
        return mse, r2

## Utilização

In [7]:
# Carregar o dataset
data = pd.read_csv('house_prices_train.csv')
data.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [8]:
data = data[['TotalBsmtSF','GrLivArea','SalePrice']]

In [9]:
# Separar os atributos (X) e o alvo (y)
X = data.drop('SalePrice', axis=1).values
y = data['SalePrice'].values

In [6]:
# Instanciar a classe de regressão linear
regressao = RegressaoLinear()

In [7]:
# Treinar o modelo
regressao.treinar(X, y)

In [8]:
# Prever os valores
y_pred = regressao.prever(X)

In [9]:
# Avaliar o modelo
mse, r2 = regressao.avaliar(X, y)

In [10]:
# Exibir as métricas
print('Erro quadrático médio (MSE):', mse)
print('Coeficiente de determinação (R²):', r2)

Erro quadrático médio (MSE): 3139843209.666527
Coeficiente de determinação (R²): 0.5021486502718042


In [None]:
np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)

In [12]:
X

array([[   1,  856, 1710],
       [   1, 1262, 1262],
       [   1,  920, 1786],
       ...,
       [   1, 1152, 2340],
       [   1, 1078, 1078],
       [   1, 1256, 1256]])

In [11]:
# Adiciona uma coluna de 1s para o termo de viés (bias)
X = np.insert(X, 0, 1, axis=1)

In [13]:
# Calcula a matriz X^T * X
XT_X = np.dot(X.T, X)

In [14]:
XT_X

array([[      1460,    1543847,    2212577],
       [   1543847, 1913311873, 2492636635],
       [   2212577, 2492636635, 3755953259]])

In [15]:
# Calcula a matriz inversa de XT_X
XT_X_inv = np.linalg.inv(XT_X)

In [16]:
XT_X_inv

array([[ 7.42844303e-03, -2.16398670e-06, -2.93985811e-06],
       [-2.16398670e-06,  4.49028318e-09, -1.70520150e-09],
       [-2.93985811e-06, -1.70520150e-09,  3.12972749e-09]])

In [17]:
# Calcula a matriz X^T * y
XT_y = np.dot(X.T, y)

In [18]:
XT_y

array([   264144946, 310514509527, 443462020418])

In [19]:
np.dot(XT_X_inv, XT_y)

array([-13479.00523843,     66.49982391,     81.876807  ])

In [None]:
# Calcula os coeficientes usando a fórmula da regressão linear
self.coeficientes = np.dot(XT_X_inv, XT_y)