In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
df['target'] = diabetes.target
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    df.iloc[:,:-1].values, df.target.values, test_size=0.1, random_state=2023
)

- 다중 선형회귀

In [4]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)

In [5]:
# 결정 계수, 가중치, 편향
print(f'결정계수 : {lr.score(X_test, y_test)}\n가중치: {lr.coef_}\n편향: {lr.intercept_}')

결정계수 : 0.5473984855686955
가중치: [ -33.11628307 -237.6032915   530.85406309  314.76599611 -576.10499117
  292.39957612   11.04058171  126.857235    689.82482851   66.14794604]
편향: 151.74476373360375


In [6]:
# weight와 bias를 이용해 직접 계산
np.dot(lr.coef_, X_test[0]) + lr.intercept_

164.4388651998387

In [7]:
# predict 메소드 사용
lr.predict(X_test[0].reshape(1,-1))[0]

164.4388651998387

In [8]:
# 10개 테스트 데이터에 적용
pred = lr.predict(X_test)
for i in range(10):
    print(f'실제값: {y_test[i]},\t 예측값: {pred[i]:.4f}')

실제값: 265.0,	 예측값: 164.4389
실제값: 261.0,	 예측값: 230.1419
실제값: 160.0,	 예측값: 112.8615
실제값: 249.0,	 예측값: 205.7712
실제값: 102.0,	 예측값: 105.6943
실제값: 200.0,	 예측값: 181.0511
실제값: 42.0,	 예측값: 82.0679
실제값: 55.0,	 예측값: 77.2532
실제값: 178.0,	 예측값: 128.8918
실제값: 317.0,	 예측값: 226.2350


- 서포트 벡터 머신

In [9]:
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_squared_error
svr = SVR()
svr.fit(X_train, y_train)
pred_sv = svr.predict(X_test)
r2_sv = r2_score(y_test, pred_sv)
mse_sv = mean_squared_error(y_test, pred_sv)

- Random Forest Regressor

In [10]:
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor(random_state=2023)
rfr.fit(X_train, y_train)
pred_rf = rfr.predict(X_test)
r2_rf = r2_score(y_test, pred_rf)
mse_rf = mean_squared_error(y_test, pred_rf)

- XGBoost

In [11]:
from xgboost import XGBRegressor
xgr = XGBRegressor()
xgr.fit(X_train, y_train)
pred_xg = xgr.predict(X_test)
r2_xg = r2_score(y_test, pred_xg)
mse_xg = mean_squared_error(y_test, pred_xg)

- Linear Regression

In [12]:
lr.fit(X_train, y_train)
pred_lr = lr.predict(X_test)
r2_lr = r2_score(y_test, pred_lr)
mse_lr = mean_squared_error(y_test, pred_lr)

- Ridge : L2 규제

In [13]:
from sklearn.linear_model import Ridge
ridge = Ridge(random_state=2023)
ridge.fit(X_train, y_train)
pred_rg = ridge.predict(X_test)
r2_rg = r2_score(y_test, pred_rg)
mse_rg = mean_squared_error(y_test, pred_rg)

- Lasso : L1 규제

In [14]:
from sklearn.linear_model import Lasso
lasso = Lasso(random_state=2023)
lasso.fit(X_train, y_train)
pred_ls = lasso.predict(X_test)
r2_ls = r2_score(y_test, pred_ls)
mse_ls = mean_squared_error(y_test, pred_ls)

- 종합 비교

In [15]:
print('\t\tLR\tSV\tRF\tXG\tRidge\tLasso')
print(f'R squared\t{r2_lr:.4f}\t{r2_sv:.4f}\t{r2_rf:.4f}\t{r2_xg:.4f}\t{r2_rg:.4f}\t{r2_ls:.4f}')
print(f'Mean squared E\t{mse_lr:.4f}\t{mse_sv:.4f}\t{mse_rf:.4f}\t{mse_xg:.4f}\t{mse_rg:.4f}\t{mse_ls:.4f}')

		LR	SV	RF	XG	Ridge	Lasso
R squared	0.5474	0.1736	0.4374	0.3298	0.4885	0.3414
Mean squared E	2934.2784	5357.8379	3647.4062	4345.2940	3316.0657	4269.5669


In [16]:
df = pd.DataFrame({
    'y':y_test ,'LR':pred_lr, 'SV': pred_sv, 'RF': pred_rf, 'XG': pred_xg, 'Ridge': pred_rg, 'Lasso': pred_ls
})
df.head(10)

Unnamed: 0,y,LR,SV,RF,XG,Ridge,Lasso
0,265.0,164.438865,144.190589,196.66,240.086166,160.151435,163.228322
1,261.0,230.141881,157.247371,257.69,294.357269,202.152287,185.709959
2,160.0,112.861523,126.542081,82.91,69.451904,130.893089,136.437283
3,249.0,205.771219,155.329271,162.19,135.287415,193.404176,172.572808
4,102.0,105.694296,129.916467,89.69,63.781631,130.014978,125.594654
5,200.0,181.051091,144.85424,164.64,148.613083,168.462653,171.165806
6,42.0,82.067894,119.731117,96.46,97.405067,104.536203,109.488925
7,55.0,77.253222,129.977129,141.39,97.169762,94.52976,111.518618
8,178.0,128.891822,131.056091,145.14,157.598083,131.573181,138.565391
9,317.0,226.235038,150.626285,193.59,207.042252,208.643264,182.521455
