# 선형 회귀식의 계수를 찾는 법 - OLS VS. SGD
- 보스턴 집값 데이터 활용(RM VS Price)

### 필요한 모듈 import

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 필요한 라이브러리 import 
from sklearn.datasets import load_boston
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split


# 1. LinearRegression 모델을 사용한 경우

In [2]:
from sklearn.datasets import load_boston

# 데이터 수집
boston = load_boston()

import pandas as pd

df =pd.DataFrame(boston.data, columns=boston.feature_names)

X = pd.DataFrame(df['RM'])
y = boston.target

# 전체 데이터 중 80%는 학습용, 20%는 검증용으로 분리
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,\
                                                   random_state=1)
import numpy as np
from sklearn.linear_model import LinearRegression

#모델 객체 생성
reg = LinearRegression()

# 모델 학습
reg.fit(X_train, y_train)

# 계수 및 절편 확인: _속성은 학습을 통해 결정되는 속성
print(reg.coef_, reg.intercept_)

#회귀식
print("y = {:2f}X + {:.3f}".format(reg.coef_[0], reg.intercept_))

from sklearn.metrics import mean_squared_error, r2_score

# 예측 수행
y_pred = reg.predict(X_test)

# MSE, RMSE, r2_score
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("MSE:", np.round(mse, 3))
print("RMSE: ", np.round(rmse, 3))
print("R2: ", np.round(r2, 3))

[8.46109164] -30.571032410898315
y = 8.461092X + -30.571
MSE: 36.517
RMSE:  6.043
R2:  0.602


# 2. SGDRegressor with hyperparameter

In [6]:
from sklearn.linear_model import SGDRegressor

reg= SGDRegressor()

# 모델 학습
reg.fit(X_train, y_train)

# 계수 및 절편 확인: _속성은 학습을 통해 결정되는 속성
print(reg.coef_, reg.intercept_)

#회귀식
print("y = {:2f}X + {:.3f}".format(reg.coef_[0], reg.intercept_[0]))

from sklearn.metrics import mean_squared_error, r2_score

# 예측 수행
y_pred = reg.predict(X_test)

# MSE, RMSE, r2_score
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("MSE:", np.round(mse, 3))
print("RMSE: ", np.round(rmse, 3))
print("R2: ", np.round(r2, 3))

[4.17510729] [-3.27733761]
y = 4.175107X + -3.277
MSE: 55.223
RMSE:  7.431
R2:  0.397


In [9]:
from sklearn.linear_model import SGDRegressor

reg= SGDRegressor(max_iter=1000,eta0=0.001,learning_rate='constant')

# 모델 학습
reg.fit(X_train, y_train)

# 계수 및 절편 확인: _속성은 학습을 통해 결정되는 속성
print(reg.coef_, reg.intercept_)

#회귀식
print("y = {:2f}X + {:.3f}".format(reg.coef_[0], reg.intercept_[0]))

from sklearn.metrics import mean_squared_error, r2_score

# 예측 수행
y_pred = reg.predict(X_test)

# MSE, RMSE, r2_score
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("MSE:", np.round(mse, 3))
print("RMSE: ", np.round(rmse, 3))
print("R2: ", np.round(r2, 3))

[3.92970862] [-1.45832261]
y = 3.929709X + -1.458
MSE: 57.083
RMSE:  7.555
R2:  0.377


In [None]:
#학습률, 에폭
#help(SGDRegressor)


In [10]:
from sklearn.linear_model import SGDRegressor

reg= SGDRegressor(max_iter=1000,eta0=0.01,learning_rate='constant')

# 모델 학습
reg.fit(X_train, y_train)

# 계수 및 절편 확인: _속성은 학습을 통해 결정되는 속성
print(reg.coef_, reg.intercept_)

#회귀식
print("y = {:2f}X + {:.3f}".format(reg.coef_[0], reg.intercept_[0]))

from sklearn.metrics import mean_squared_error, r2_score

# 예측 수행
y_pred = reg.predict(X_test)

# MSE, RMSE, r2_score
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("MSE:", np.round(mse, 3))
print("RMSE: ", np.round(rmse, 3))
print("R2: ", np.round(r2, 3))




[6.48581788] [-19.27301732]
y = 6.485818X + -19.273
MSE: 43.752
RMSE:  6.615
R2:  0.523


# 3. SGDRegressor with scaling

In [13]:
from sklearn.linear_model import SGDRegressor

#표준화 scaling
train_mean=np.mean(X_train,axis=0)
train_std= np.std(X_train,axis=0)
X_train=(X_train-train_mean)/train_std
X_test = (X_test-train_mean)/train_std

reg= SGDRegressor()

# 모델 학습
reg.fit(X_train, y_train)

# 계수 및 절편 확인: _속성은 학습을 통해 결정되는 속성
print(reg.coef_, reg.intercept_)

#회귀식
print("y = {:2f}X + {:.3f}".format(reg.coef_[0], reg.intercept_[0]))

from sklearn.metrics import mean_squared_error, r2_score

# 예측 수행
y_pred = reg.predict(X_test)

# MSE, RMSE, r2_score
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("MSE:", np.round(mse, 3))
print("RMSE: ", np.round(rmse, 3))
print("R2: ", np.round(r2, 3))


[5.83014279] [22.3749631]
y = 5.830143X + 22.375
MSE: 36.603
RMSE:  6.05
R2:  0.601


# 4. SGDRegressor with standardscaler()

In [14]:
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
#표준화 scaling
scaler = StandardScaler()
X_train=scaler.fit_transform(X_train)  #기준 만들고 transform 도
X_test=scaler.transform(X_test) # 만들어진 기준에 transform 만
#y는 변환x

reg= SGDRegressor()

# 모델 학습
reg.fit(X_train, y_train)

# 계수 및 절편 확인: _속성은 학습을 통해 결정되는 속성
print(reg.coef_, reg.intercept_)

#회귀식
print("y = {:2f}X + {:.3f}".format(reg.coef_[0], reg.intercept_[0]))

from sklearn.metrics import mean_squared_error, r2_score

# 예측 수행
y_pred = reg.predict(X_test)

# MSE, RMSE, r2_score
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("MSE:", np.round(mse, 3))
print("RMSE: ", np.round(rmse, 3))
print("R2: ", np.round(r2, 3))



[5.88396585] [22.33251378]
y = 5.883966X + 22.333
MSE: 36.42
RMSE:  6.035
R2:  0.603
