# 다양한 회귀 모델 - 보스톤 주택 가격
- 다중 선형회귀
- 규제 선형회귀
- 선형회귀 이외의 회귀

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
from sklearn.datasets import load_boston
boston = load_boston()

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    boston.data, boston.target, test_size=0.2, random_state=2022
)

### 1. 다중 선형회귀

In [6]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)

LinearRegression()

In [7]:
from sklearn.metrics import r2_score
pred_lr = lr.predict(X_test)
r2_score(y_test, pred_lr)

0.6225687597000796

### 2. 규제 선형회귀

- Ridge

In [8]:
from sklearn.linear_model import Ridge
ridge = Ridge()
ridge.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': None,
 'normalize': 'deprecated',
 'positive': False,
 'random_state': None,
 'solver': 'auto',
 'tol': 0.001}

In [9]:
ridge.fit(X_train, y_train)
ridge.score(X_test, y_test)

0.6120292876368371

- Lasso

In [10]:
from sklearn.linear_model import Lasso
lasso = Lasso()
lasso.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': 1000,
 'normalize': 'deprecated',
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [11]:
lasso.fit(X_train, y_train)
lasso.score(X_test, y_test)

0.6471885919727083

- 데이터를 스케일링을 한 후에 계산을 해 주어야 함

In [12]:
from sklearn.preprocessing import MinMaxScaler
boston_scaled = MinMaxScaler().fit_transform(boston.data)

In [13]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(
    boston_scaled, boston.target, test_size=0.2, random_state=2022
)

In [14]:
# Ridge (L2 규제)
for alpha in [0.1, 1, 10, 100]:
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train2, y_train2)
    score = ridge.score(X_test2, y_test2)
    print(f'{alpha}: {score:.4f}')

0.1: 0.6258
1: 0.6480
10: 0.6626
100: 0.3889


In [15]:
for alpha in [5, 10, 20, 50]:
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train2, y_train2)
    score = ridge.score(X_test2, y_test2)
    print(f'{alpha}: {score:.4f}')

5: 0.6764
10: 0.6626
20: 0.6124
50: 0.4973


In [16]:
ridge = Ridge(alpha=5)
ridge.fit(X_train2, y_train2)
pred_ridge = ridge.predict(X_test2)

In [17]:
# Lasso (L1 규제)
for alpha in [0.1, 1, 10, 100]:
    lasso = Lasso(alpha=alpha)
    lasso.fit(X_train2, y_train2)
    score = lasso.score(X_test2, y_test2)
    print(f'{alpha}: {score:.4f}')

0.1: 0.6124
1: 0.2158
10: -0.0937
100: -0.0937


In [18]:
for alpha in [0.01, 0.05, 0.1, 0.3]:
    lasso = Lasso(alpha=alpha)
    lasso.fit(X_train2, y_train2)
    score = lasso.score(X_test2, y_test2)
    print(f'{alpha}: {score:.4f}')

0.01: 0.6272
0.05: 0.6193
0.1: 0.6124
0.3: 0.6250


In [19]:
lasso = Lasso(alpha=0.01)
lasso.fit(X_train2, y_train2)
pred_lasso = lasso.predict(X_test2)

In [20]:
import pandas as pd

df = pd.DataFrame({
    'y_test': y_test, 'LR': pred_lr, 
    'y_test2': y_test2, 'Ridge': pred_ridge, 'Lasso': pred_lasso
})
df.head(10)

Unnamed: 0,y_test,LR,y_test2,Ridge,Lasso
0,20.1,21.052395,20.1,20.997831,21.258673
1,11.9,6.309071,11.9,9.55801,6.328281
2,20.6,21.526247,20.6,20.315095,21.110774
3,33.2,36.158114,33.2,34.056721,35.792194
4,20.8,18.031103,20.8,17.579987,17.837889
5,13.5,13.179806,13.5,15.213495,13.565227
6,24.7,24.951738,24.7,25.467278,24.859535
7,13.9,12.625334,13.9,15.47389,13.052384
8,10.4,6.181062,10.4,8.887716,6.440885
9,19.7,13.386348,19.7,15.806759,13.59417


### 3. 회귀는 선형회귀만 있나?

In [21]:
# Decision Tree Regressor
from sklearn.tree import DecisionTreeRegressor
dtr = DecisionTreeRegressor(random_state=2022)
dtr.fit(X_train, y_train)
pred_dt = dtr.predict(X_test)
r2_score(y_test, pred_dt)

0.7353132347724971

In [22]:
# Random Forest
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor(random_state=2022)
rfr.fit(X_train, y_train)
pred_rf = rfr.predict(X_test)
r2_score(y_test, pred_rf)

0.8017061249172668

In [23]:
# SVM
from sklearn.svm import SVR
svr = SVR()
svr.fit(X_train2, y_train2)
pred_sv = svr.predict(X_test2)
r2_score(y_test2, pred_sv)

0.655552865955936

### 4. 예측값 비교

In [24]:
df['DT'] = pred_dt.round(4)
df['RF'] = pred_rf.round(4)
df['SV'] = pred_sv.round(4)

In [25]:
df.head(10)

Unnamed: 0,y_test,LR,y_test2,Ridge,Lasso,DT,RF,SV
0,20.1,21.052395,20.1,20.997831,21.258673,20.4,20.128,19.2864
1,11.9,6.309071,11.9,9.55801,6.328281,13.8,13.19,12.8522
2,20.6,21.526247,20.6,20.315095,21.110774,23.0,21.152,20.6842
3,33.2,36.158114,33.2,34.056721,35.792194,34.6,34.464,28.8451
4,20.8,18.031103,20.8,17.579987,17.837889,23.1,19.411,16.0579
5,13.5,13.179806,13.5,15.213495,13.565227,15.2,15.187,15.5334
6,24.7,24.951738,24.7,25.467278,24.859535,25.3,24.576,24.3536
7,13.9,12.625334,13.9,15.47389,13.052384,17.5,15.887,15.5062
8,10.4,6.181062,10.4,8.887716,6.440885,5.0,8.429,10.8791
9,19.7,13.386348,19.7,15.806759,13.59417,18.5,17.792,15.5834
