## Importação das bibliotecas

In [16]:
import functools
from math import sqrt
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
import scipy.stats as sct
import seaborn as sns
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.decomposition import PCA
from sklearn.metrics import  mean_squared_error, median_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [4]:
# Algumas configurações para o matplotlib.
%matplotlib inline

from IPython.core.pylabtools import figsize


figsize(12, 12)

sns.set()

In [5]:
np.random.seed(1000)

In [8]:
boston_dataset = load_boston()

In [11]:
boston_features = pd.DataFrame(boston_dataset.data, columns=boston_dataset.feature_names)
boston_target = pd.DataFrame(boston_dataset.target, columns=["Price"])

boston = pd.concat([boston_features, boston_target], axis=1)

boston.head(5)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


Vamos começar separando o nosso conjunto de dados em dois grupos: treinamento (80%) e teste (20%).

In [12]:
boston_features_train, boston_features_test, boston_target_train, boston_target_test = train_test_split(boston_features,
                                                                                                        boston_target,
                                                                                                        test_size=0.2,
                                                                                                        random_state=42,)

## Regressão Linear

In [13]:
linear_regression = LinearRegression()

linear_regression.fit(boston_features_train, boston_target_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

## Testando

In [14]:
predicted = linear_regression.predict(boston_features_test)
predicted[:3]

array([[28.99672362],
       [36.02556534],
       [14.81694405]])

## Comparando com valores corretos

In [15]:
boston_target_test.values[:3]

array([[23.6],
       [32.4],
       [13.6]])

In [23]:
linear_regression.coef_.round(2)

array([[-1.10e-01,  3.00e-02,  4.00e-02,  2.78e+00, -1.72e+01,  4.44e+00,
        -1.00e-02, -1.45e+00,  2.60e-01, -1.00e-02, -9.20e-01,  1.00e-02,
        -5.10e-01]])

## Ridge

In [25]:
scaler = StandardScaler()

boston_features_train_scaled = scaler.fit_transform(boston_features_train)

boston_features_train_scaled[:3]

array([[ 1.28770177, -0.50032012,  1.03323679, -0.27808871,  0.48925206,
        -1.42806858,  1.02801516, -0.80217296,  1.70689143,  1.57843444,
         0.84534281, -0.07433689,  1.75350503],
       [-0.33638447, -0.50032012, -0.41315956, -0.27808871, -0.15723342,
        -0.68008655, -0.43119908,  0.32434893, -0.62435988, -0.58464788,
         1.20474139,  0.4301838 , -0.5614742 ],
       [-0.40325332,  1.01327135, -0.71521823, -0.27808871, -1.00872286,
        -0.40206304, -1.6185989 ,  1.3306972 , -0.97404758, -0.60272378,
        -0.63717631,  0.06529747, -0.65159505]])

In [27]:
ridge_regression = Ridge(alpha=1,solver="cholesky")

In [28]:
ridge_regression.fit(boston_features_train_scaled, boston_target_train)

ridge_regression.intercept_, ridge_regression.coef_

(array([22.79653465]),
 array([[-0.99218679,  0.6777488 ,  0.2522143 ,  0.72248078, -1.99083465,
          3.15157218, -0.17726162, -3.04502895,  2.17324941, -1.69555879,
         -2.02783351,  1.127197  , -3.59897667]]))

## R2

In [29]:
r2_score(boston_target_test, predicted)

0.6687594935356307

In [30]:
pred2 = ridge_regression.predict(boston_features_test)

In [31]:
r2_score(boston_target_test, pred2)

-3761.5433705953624

### Mean Squared Error

In [32]:
mean_squared_error(boston_target_test, predicted)

24.291119474973616

### Median Absolute Error

In [33]:
median_absolute_error(boston_target_test, predicted)

2.3243319064124535