In [0]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [0]:
df=pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data', header=None, sep='\s+')

df.columns=['CRIM','ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
print('df shape', df.shape)

In [0]:
X=df.iloc[:, 0:13].values
# 正解に住宅価格(MDEV)
y = df['MEDV'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
print('X_train shape：',X_train.shape,' y_train shape：',y_train.shape,' X_test shape：',X_test.shape,' y_test shape：',y_test.shape)

In [0]:
# 特徴量を2次多項式に変換
POLY = PolynomialFeatures(degree=2, include_bias = False)

X_train_pol = POLY.fit_transform(X_train)
X_test_pol = POLY.transform(X_test)
X_train_pol.shape, X_test_pol.shape

In [0]:
# 標準化
sc = StandardScaler()
X_train_std = sc.fit_transform(X_train_pol)
X_test_std = sc.transform(X_test_pol)

In [0]:
# 正則化無しとL1正則化のモデルを作成
model = LinearRegression()
model2 = Lasso(alpha=0.1)

model.fit(X_train_std, y_train)
model2.fit(X_train_std, y_train)

In [0]:
# 正則化無しの傾きと切片
print(model.intercept_) 
print(model.coef_.shape)
print(model.coef_)

In [0]:
# L1正則化の傾きと切片
print(model2.intercept_) 
print(model2.coef_.shape)
print(model2.coef_)

In [0]:
# 正則化無しのMSE
y_train_pred = model.predict(X_train_std)
y_test_pred = model.predict(X_test_std)

print('MSE train: %.2f, test: %.2f' % (
        mean_squared_error(y_train, y_train_pred),
        mean_squared_error(y_test, y_test_pred)))

In [0]:
# L1正則化有りのMSE
y_train_pred = model2.predict(X_train_std)
y_test_pred = model2.predict(X_test_std)

print('MSE train: %.2f, test: %.2f' % (
        mean_squared_error(y_train, y_train_pred),
        mean_squared_error(y_test, y_test_pred)))

In [0]:
# L1+L2正則化のモデルを作成
model3 = ElasticNet(alpha=0.1, l1_ratio=0.6)

# モデルの訓練
model3.fit(X_train_std, y_train)

In [0]:
# L1+L2正則化の傾きと切片
print(model3.intercept_) 
print(model3.coef_.shape)
print(model3.coef_)

In [0]:
# L1+L2正則化有りのMSE
y_train_pred = model3.predict(X_train_std)
y_test_pred = model3.predict(X_test_std)

print('MSE train: %.2f, test: %.2f' % (
        mean_squared_error(y_train, y_train_pred),
        mean_squared_error(y_test, y_test_pred)))