In [112]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [113]:
from sklearn.svm import SVR
from sklearn.metrics import r2_score,mean_squared_error

In [114]:
from sklearn.datasets import load_diabetes
diabetes =load_diabetes()
df= pd.DataFrame(diabetes.data,columns=diabetes.feature_names)
df['target']=diabetes.target
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


#### 다중 선형회귀

In [115]:
X = df.iloc[:, :-1].values
y = df.target.values

In [116]:
from sklearn.model_selection  import train_test_split
from sklearn.linear_model import LinearRegression
X_train,X_test,y_train,y_test =train_test_split(
   X,y,test_size=0.2,random_state=2023
)

In [117]:
lr =LinearRegression()
lr.fit(X_train,y_train)
lr.score(X_test,y_test)
pred_lr =lr.predict(X_test)
r2_lr =r2_score(y_test,pred_lr)
mse_lr =mean_squared_error(y_test,pred_lr)

In [118]:
svr =SVR()
svr.fit(X_train,y_train)
pred_sv =svr.predict(X_test)
r2_sv = r2_score(y_test,pred_sv)
mse_sv =mean_squared_error(y_test,pred_sv)

In [119]:
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor(random_state=2023)
rfr.fit(X_train, y_train)
pred_rf = rfr.predict(X_test)
r2_rf = r2_score(y_test, pred_rf)
mse_rf = mean_squared_error(y_test, pred_rf)

In [120]:
from xgboost import XGBRegressor
xgr =XGBRegressor()
xgr.fit(X_train,y_train)
pred_xg =xgr.predict(X_test)
r2_xg = r2_score(y_test,pred_xg)
mse_xg =mean_squared_error(y_test,pred_xg)

In [121]:
print('\t\tLR\tSV\tRF\tXG')
print(f'R squared\t{r2_lr:.4f}\t{r2_sv:.4f}\t{r2_rf:.4f}\t{r2_xg:.4f}')
print(f'Mean squared E\t{mse_lr:.4f}\t{mse_sv:.4f}\t{mse_rf:.4f}\t{mse_xg:.4f}')

		LR	SV	RF	XG
R squared	0.5011	0.1703	0.4054	0.3062
Mean squared E	3490.2509	5804.7247	4159.7421	4853.7496


In [122]:
df = pd.DataFrame({
    'y':y_test,'LR':pred_lr,'SV':pred_sv,'RF':pred_rf,'XG':pred_xg
})
df.head()

Unnamed: 0,y,LR,SV,RF,XG
0,265.0,164.553734,143.763042,193.64,189.963928
1,261.0,230.858516,155.672919,263.64,291.990021
2,160.0,113.389079,127.947072,81.45,58.978519
3,249.0,204.741168,154.991682,151.02,131.112106
4,102.0,105.520336,130.356541,97.71,74.594643


- 규제 선형회귀
- Ridge - l2 규제

In [123]:
from sklearn.linear_model import Ridge
ridge =Ridge()
ridge.fit(X_train,y_train)
pred_rg = ridge.predict(X_test)
r2_rg =r2_score(y_test,pred_rg)
mse_rg =mean_squared_error(y_test,pred_rg)
r2_rg,mse_rg 
# r2_rg결정 계수
# mse_rg 평균 제곱 오차

(0.4287339832377136, 3996.7981745424554)

In [124]:
from sklearn.linear_model import Lasso
lasso =Lasso(random_state=2023)
lasso.fit(X_train,y_train)
pred_ls =lasso.predict(X_test)
r2_ls =r2_score(y_test,pred_ls)
mse_ls = mean_squared_error(y_test,pred_ls)
r2_ls,mse_ls

(0.3132002038489573, 4805.117215076456)

In [125]:
print('\t\tLR\tSV\tRF\tXG\tRidge\tLasso')
print(f'R squared\t{r2_lr:.4f}\t{r2_sv:.4f}\t{r2_rf:.4f}\t{r2_xg:.4f}\t{r2_rg:.4f}\t{r2_ls:.4f}')
print(f'Mean squared E\t{mse_lr:.4f}\t{mse_sv:.4f}\t{mse_rf:.4f}\t{mse_xg:.4f}\t{mse_rg:.4f}\t{mse_ls:.4f}')

		LR	SV	RF	XG	Ridge	Lasso
R squared	0.5011	0.1703	0.4054	0.3062	0.4287	0.3132
Mean squared E	3490.2509	5804.7247	4159.7421	4853.7496	3996.7982	4805.1172


In [126]:
df['Ridge'] = pred_rg
df['Lasso'] = pred_ls
df

Unnamed: 0,y,LR,SV,RF,XG,Ridge,Lasso
0,265.0,164.553734,143.763042,193.64,189.963928,160.002767,163.808030
1,261.0,230.858516,155.672919,263.64,291.990021,199.355943,185.863613
2,160.0,113.389079,127.947072,81.45,58.978519,133.198975,137.521623
3,249.0,204.741168,154.991682,151.02,131.112106,192.171298,172.979468
4,102.0,105.520336,130.356541,97.71,74.594643,130.853284,126.882774
...,...,...,...,...,...,...,...
84,143.0,171.604988,145.073347,179.10,165.103058,163.756299,163.954723
85,258.0,173.617811,137.874301,170.47,195.641418,150.313060,161.744051
86,199.0,107.913507,132.585325,107.25,98.236061,132.358882,131.177829
87,182.0,118.190436,128.137722,88.28,68.233879,129.944404,129.297907
