In [44]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import r2_score, mean_squared_error
import pandas as pd
import numpy as np

In [15]:
DATA_URL = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(DATA_URL, sep="\s+", skiprows=22, header=None)
X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
y = raw_df.values[1::2, 2]

In [16]:
X_tn, X_te, y_tn, y_te = train_test_split(X, y, random_state=0)

In [17]:
std_scale = StandardScaler()
std_scale.fit(X_tn)
X_tn_std = std_scale.transform(X_tn)
X_te_std = std_scale.transform(X_te)

In [21]:
clf_lr = LinearRegression()
clf_lr.fit(X_tn_std, y_tn)

In [23]:
clf_lr.coef_ # 회귀 계수

array([-0.97100092,  1.04667838, -0.04044753,  0.59408776, -1.80876877,
        2.60991991, -0.19823317, -3.00216551,  2.08021582, -1.93289037,
       -2.15743759,  0.75199122, -3.59027047])

In [24]:
clf_lr.intercept_ # 상수항

np.float64(22.6087071240106)

In [26]:
clf_ridge = Ridge(alpha=1)
clf_ridge.fit(X_tn_std, y_tn)

In [27]:
clf_ridge.coef_

array([-0.96187481,  1.02775462, -0.06861144,  0.59814087, -1.77318401,
        2.6205672 , -0.20466821, -2.96504904,  2.00091047, -1.85840697,
       -2.14955893,  0.75175979, -3.57350065])

In [28]:
clf_ridge.intercept_

np.float64(22.6087071240106)

In [30]:
clf_lasso = Lasso(alpha=0.01)
clf_lasso.fit(X_tn_std, y_tn)

In [31]:
clf_lasso.coef_

array([-0.93949205,  1.01037722, -0.05747479,  0.59232437, -1.76160385,
        2.62290366, -0.17911018, -2.92328686,  1.93398258, -1.81118512,
       -2.14705184,  0.73964238, -3.59732302])

In [32]:
clf_lasso.intercept_

np.float64(22.6087071240106)

In [35]:
clf_elastic = ElasticNet(alpha=0.01, l1_ratio=0.01)
clf_elastic.fit(X_tn_std, y_tn)

In [36]:
clf_elastic.coef_

array([-0.93905353,  0.98057259, -0.13487248,  0.60767135, -1.68277217,
        2.64642801, -0.21978011, -2.86727682,  1.80930385, -1.68204979,
       -2.12935659,  0.75122826, -3.52939748])

In [37]:
clf_elastic.intercept_

np.float64(22.6087071240106)

In [38]:
pred_lr = clf_lr.predict(X_te_std)
pred_ridge = clf_ridge.predict(X_te_std)
pred_lasso = clf_lasso.predict(X_te_std)
pred_elastic = clf_elastic.predict(X_te_std)

In [43]:
print(r2_score(y_te, pred_lr))
print(r2_score(y_te, pred_ridge))
print(r2_score(y_te, pred_lasso))
print(r2_score(y_te, pred_elastic))

0.6354638433202132
0.6345884564889053
0.6343061000666704
0.6322273400977834


In [45]:
print(mean_squared_error(y_te, pred_lr))
print(mean_squared_error(y_te, pred_ridge))
print(mean_squared_error(y_te, pred_lasso))
print(mean_squared_error(y_te, pred_elastic))

29.78224509230234
29.853763334547615
29.876831576246808
30.046664219036877
