## 다중회귀 - 당뇨병 사례

In [1]:
import pandas as pd
from sklearn.datasets import load_diabetes
diab = load_diabetes()

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor
from sklearn.linear_model import Lasso, Ridge

- dataset 분리

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    diab.data, diab.target, test_size=0.2, random_state=2023
)

- Linear Regression

In [4]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_hat_lr = lr.predict(X_test)
r2_lr = r2_score(y_test, y_hat_lr)
mse_lr = mean_squared_error(y_test, y_hat_lr)

- Support Vector Regressor

In [5]:
svr = SVR()
svr.fit(X_train, y_train)
y_hat_svr = svr.predict(X_test)
r2_svr = r2_score(y_test, y_hat_svr)
mse_svr = mean_squared_error(y_test, y_hat_svr)

- Random Forest Regressor

In [8]:
rfr = RandomForestRegressor(random_state=2023)
rfr.fit(X_train, y_train)
y_hat_rfr = rfr.predict(X_test)
r2_rfr = r2_score(y_test, y_hat_rfr)
mse_rfr = mean_squared_error(y_test, y_hat_rfr)

- LightGBM Regressor

In [9]:
lgbr = LGBMRegressor(verbose=-1)
lgbr.fit(X_train, y_train)
y_hat_lgbr = lgbr.predict(X_test)
r2_lgbr = r2_score(y_test, y_hat_lgbr)
mse_lgbr = mean_squared_error(y_test, y_hat_lgbr)

- 모델 비교

In [10]:
pf = pd.DataFrame({
    'LR':[r2_lr,mse_lr], 'SVR':[r2_svr,mse_svr], 'RFR':[r2_rfr,mse_rfr], 'LGBR':[r2_lgbr,mse_lgbr]
})
pf.index = ['R2 score', 'MSE']
pf

Unnamed: 0,LR,SVR,RFR,LGBR
R2 score,0.501135,0.170325,0.405444,0.346808
MSE,3490.250913,5804.724664,4159.742055,4569.985042


In [11]:
rf = pd.DataFrame({
    'y':y_test, 'LR':y_hat_lr, 'SVR':y_hat_svr, 'RFR':y_hat_rfr, 'LGBR':y_hat_lgbr
})
rf.head(10)

Unnamed: 0,y,LR,SVR,RFR,LGBR
0,265.0,164.553734,143.763042,193.64,205.767454
1,261.0,230.858516,155.672919,263.64,273.751148
2,160.0,113.389079,127.947072,81.45,46.181044
3,249.0,204.741168,154.991682,151.02,177.173406
4,102.0,105.520336,130.356541,97.71,58.710724
5,200.0,181.943438,144.978071,186.34,145.990366
6,42.0,82.00807,122.08572,91.85,121.436647
7,55.0,80.442006,132.165408,127.36,110.201718
8,178.0,131.113347,132.977523,161.08,149.377221
9,317.0,229.231799,150.77368,193.56,177.968251


#### 규제 선형회귀
- Lasso: L1 규제

In [12]:
lasso = Lasso(random_state=2023)
lasso.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': 1000,
 'positive': False,
 'precompute': False,
 'random_state': 2023,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [13]:
lasso.fit(X_train, y_train)
y_hat_lasso = lasso.predict(X_test)
r2_lasso = r2_score(y_test, y_hat_lasso)
mse_lasso = mean_squared_error(y_test, y_hat_lasso)

- Ridge: L2 규제

In [14]:
ridge = Ridge(random_state=2023)
ridge.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': None,
 'positive': False,
 'random_state': 2023,
 'solver': 'auto',
 'tol': 0.0001}

In [15]:
ridge.fit(X_train, y_train)
y_hat_ridge = ridge.predict(X_test)
r2_ridge = r2_score(y_test, y_hat_ridge)
mse_ridge = mean_squared_error(y_test, y_hat_ridge)

- 종합 비교

In [16]:
pf['Lasso'] = [r2_lasso, mse_lasso]
pf['Ridge'] = [r2_ridge, mse_ridge]
pf

Unnamed: 0,LR,SVR,RFR,LGBR,Lasso,Ridge
R2 score,0.501135,0.170325,0.405444,0.346808,0.3132,0.428734
MSE,3490.250913,5804.724664,4159.742055,4569.985042,4805.117215,3996.798175


In [17]:
rf['Lasso'] = y_hat_lasso
rf['Ridge'] = y_hat_ridge
rf.head(10)

Unnamed: 0,y,LR,SVR,RFR,LGBR,Lasso,Ridge
0,265.0,164.553734,143.763042,193.64,205.767454,163.80803,160.002767
1,261.0,230.858516,155.672919,263.64,273.751148,185.863613,199.355943
2,160.0,113.389079,127.947072,81.45,46.181044,137.521623,133.198975
3,249.0,204.741168,154.991682,151.02,177.173406,172.979468,192.171298
4,102.0,105.520336,130.356541,97.71,58.710724,126.882774,130.853284
5,200.0,181.943438,144.978071,186.34,145.990366,171.59005,168.120517
6,42.0,82.00807,122.08572,91.85,121.436647,111.078479,107.399013
7,55.0,80.442006,132.165408,127.36,110.201718,113.071589,98.703859
8,178.0,131.113347,132.977523,161.08,149.377221,139.611205,133.656656
9,317.0,229.231799,150.77368,193.56,177.968251,182.738338,208.104324
