In [13]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

%matplotlib inline

In [11]:
boston = load_boston()

In [12]:
df = pd.DataFrame(boston.data,columns=boston.feature_names)

df['PRICE'] = boston.target
print(df.shape)

(506, 14)


In [16]:
y_target = df['PRICE']
x_data = df.drop(['PRICE'],axis=1,inplace=False)

X_train,X_test,y_train,y_test = train_test_split(x_data,y_target,test_size=0.3,random_state=156)

In [18]:
lr = LinearRegression()
lr.fit(X_train,y_train)
 
y_preds = lr.predict(X_test)
mse = mean_squared_error(y_test,y_preds)
rmse = np.sqrt(mse)

print("MSE :",mse)
print("RMSE : ",rmse)
print("R2 : ",r2_score(y_test,y_preds))

MSE : 17.29691590790215
RMSE :  4.158956107955715
R2 :  0.7572263323138921


In [23]:
print("절편 값 : ",lr.intercept_)
print("회귀계수 : ",np.round(lr.coef_,1))

절편 값 :  40.995595172164336
회귀계수 :  [ -0.1   0.1   0.    3.  -19.8   3.4   0.   -1.7   0.4  -0.   -0.9   0.
  -0.6]


In [25]:
lr.coef_

array([-1.12979614e-01,  6.55124002e-02,  3.44366694e-02,  3.04589777e+00,
       -1.97958320e+01,  3.35496880e+00,  5.93713290e-03, -1.74185354e+00,
        3.55884364e-01, -1.42954516e-02, -9.20180066e-01,  1.03966156e-02,
       -5.66182106e-01])

In [27]:
# 회귀계수 큰 값 순으로 정렬
coeff = pd.Series(data=np.round(lr.coef_,1),index=x_data.columns)
coeff.sort_values(ascending=False)

RM          3.4
CHAS        3.0
RAD         0.4
ZN          0.1
B           0.0
TAX        -0.0
AGE         0.0
INDUS       0.0
CRIM       -0.1
LSTAT      -0.6
PTRATIO    -0.9
DIS        -1.7
NOX       -19.8
dtype: float64

In [30]:
# cross_val_score

from sklearn.model_selection import cross_val_score

neg_mse_scores = cross_val_score(lr,x_data,y_target,scoring="neg_mean_squared_error",cv=5)
rmse_scores = np.sqrt(-1*neg_mse_scores)
avg_rmse = np.mean(rmse_scores)

# coross_val_score(scoring="neg_mean_squared_error")로 반환된 값은 모두 음수
print("5 folds 의 개별 Negative MSE scores :",np.round(neg_mse_scores,2))
print("5 folds 의 개별 RMSE scores :",np.round(rmse_scores,2))
print("5 folds 의 평균 RMSE :",avg_rmse)

5 folds 의 개별 Negative MSE scores : [-12.46 -26.05 -33.07 -80.76 -33.31]
5 folds 의 개별 RMSE scores : [3.53 5.1  5.75 8.99 5.77]
5 folds 의 평균 RMSE : 5.828658946215808
