# Linear Regression

In [28]:
from sklearn.datasets import load_wine
import pandas as pd
from sklearn.preprocessing import minmax_scale
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split

wine = load_wine()
wine_df = pd.DataFrame(data=wine.data, columns=wine.feature_names)
target_df = pd.DataFrame(data=wine.target, columns=['Y'])
print(len(wine_df.columns))

X_train, X_test, y_train, y_test = train_test_split(wine_df, target_df, test_size=0.3, random_state=101)

model = LinearRegression()
model.fit(X_train, y_train)

pred = model.predict(X_test)

print("result of linear regression")
print('Mean Absolute Error:', mean_absolute_error(y_test, pred))
print('Mean Squared Error:', mean_squared_error(y_test, pred))
print('Mean Root Squared Error:', np.sqrt(mean_squared_error(y_test, pred)))

print("\n\n coefficient linear regression")
print(model.coef_)

13
result of linear regression
Mean Absolute Error: 0.2512897393972267
Mean Squared Error: 0.10624587409525586
Mean Root Squared Error: 0.3259537913497186


 coefficient linear regression
[[-8.09017190e-02  4.34817880e-02 -1.18857931e-01  3.65705449e-02
  -4.68014203e-04  1.41423581e-01 -4.54107854e-01 -5.13172664e-01
   9.69318443e-02  5.34311136e-02 -1.27626604e-01 -2.91381844e-01
  -5.72238959e-04]]


# L1 regression

In [22]:
from sklearn.linear_model import Lasso

L1 = Lasso(alpha=0.05)
L1.fit(X_train, y_train)
w, b = L1.coef_, L1.intercept_
print(w, b)

pred = model.predict(X_test)
print("result of linear regression")
print('Mean Absolute Error:', mean_absolute_error(y_test, pred))
print('Mean Squared Error:', mean_squared_error(y_test, pred))
print('Mean Root Squared Error:', np.sqrt(mean_squared_error(y_test, pred)))

[-0.          0.01373795 -0.          0.03065716  0.00154719 -0.
 -0.34143614 -0.          0.          0.06755943 -0.         -0.14558153
 -0.00089635] [1.56790619]
result of linear regression
Mean Absolute Error: 0.2512897393972267
Mean Squared Error: 0.10624587409525586
Mean Root Squared Error: 0.3259537913497186


# L2


In [29]:
from sklearn.linear_model import Ridge

L2 = Ridge(alpha=0.05)
L2.fit(X_train, y_train)
w, b = L2.coef_, L2.intercept_
print(w, b)

pred = model.predict(X_test)
print("result of linear regression")
print('Mean Absolute Error:', mean_absolute_error(y_test, pred))
print('Mean Squared Error:', mean_squared_error(y_test, pred))
print('Mean Root Squared Error:', np.sqrt(mean_squared_error(y_test, pred)))

[[-8.12456257e-02  4.35541496e-02 -1.21661565e-01  3.65979773e-02
  -3.94014013e-04  1.39168707e-01 -4.50691113e-01 -4.87216747e-01
   9.54111059e-02  5.37077039e-02 -1.28602933e-01 -2.89832790e-01
  -5.73136185e-04]] [3.17415274]
result of linear regression
Mean Absolute Error: 0.2512897393972267
Mean Squared Error: 0.10624587409525586
Mean Root Squared Error: 0.3259537913497186


# 결론
Linear Regression, Lasso Regression, Ridge Regression은 모두 에러값이 비슷하다. 그런데 L1 같은 경우 13개의 feature중 6개가 0값에 해당한다.
니는 다른 7개의 feature만 있어도 모델이 예측하는데 문제가 없음을 의미한다.