In [18]:
from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge, RidgeCV
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error  # 均方误差

In [19]:
boston_data = load_boston()
print("特征数量为:(样本数,特征数)", boston_data.data.shape)
x_train, x_test, y_train, y_test = train_test_split(boston_data.data,
                                                    boston_data.target, random_state=22)

特征数量为:(样本数,特征数) (506, 13)


In [20]:
# 标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)

estimator = LinearRegression()
estimator.fit(x_train, y_train)

print("正规方程_权重系数为: ", estimator.coef_)
print("正规方程_偏置为:", estimator.intercept_)

y_predict = estimator.predict(x_test)
error = mean_squared_error(y_test, y_predict)
print("正规方程_房价预测:", y_predict)
print("正规方程_均分误差:", error)

正规方程_权重系数为:  [-0.64817766  1.14673408 -0.05949444  0.74216553 -1.95515269  2.70902585
 -0.07737374 -3.29889391  2.50267196 -1.85679269 -1.75044624  0.87341624
 -3.91336869]
正规方程_偏置为: 22.62137203166228
正规方程_房价预测: [28.22944896 31.5122308  21.11612841 32.6663189  20.0023467  19.07315705
 21.09772798 19.61400153 19.61907059 32.87611987 20.97911561 27.52898011
 15.54701758 19.78630176 36.88641203 18.81202132  9.35912225 18.49452615
 30.66499315 24.30184448 19.08220837 34.11391208 29.81386585 17.51775647
 34.91026707 26.54967053 34.71035391 27.4268996  19.09095832 14.92742976
 30.86877936 15.88271775 37.17548808  7.72101675 16.24074861 17.19211608
  7.42140081 20.0098852  40.58481466 28.93190595 25.25404307 17.74970308
 38.76446932  6.87996052 21.80450956 25.29110265 20.427491   20.4698034
 17.25330064 26.12442519  8.48268143 27.50871869 30.58284841 16.56039764
  9.38919181 35.54434377 32.29801978 21.81298945 17.60263689 22.0804256
 23.49262401 24.10617033 20.1346492  38.5268066  24.58319594

In [21]:
estimator = SGDRegressor(learning_rate="constant", eta0=0.01, max_iter=10000)
# estimator = SGDRegressor(penalty='l2', loss="squared_loss")  # 这样设置就相当于岭回归, 但是建议用Ridge方法
estimator.fit(x_train, y_train)

print("梯度下降_权重系数为: ", estimator.coef_)
print("梯度下降_偏置为:", estimator.intercept_)

y_predict = estimator.predict(x_test)
error = mean_squared_error(y_test, y_predict)
print("梯度下降_房价预测:", y_predict)
print("梯度下降_均分误差:", error)

梯度下降_权重系数为:  [-0.90659046  1.37943928 -0.08934696  1.06801961 -2.11049693  2.927141
 -0.12625752 -3.24481511  2.41326401 -1.73151084 -1.55599116  1.00598295
 -4.29476823]
梯度下降_偏置为: [22.74845844]
梯度下降_房价预测: [29.02630522 31.94372411 21.12172372 34.36512724 20.34494967 18.67397087
 21.1020583  19.91206936 19.88942235 33.02561675 20.84420671 28.00444779
 15.30877039 19.91707597 37.37476585 18.36883893  7.3220989  18.63966301
 31.56501496 25.01014484 18.63922008 34.05239308 31.15405461 16.73647422
 35.6626479  26.53484851 36.60244272 28.38359715 18.17431108 15.23452087
 30.53161265 14.92665548 39.0656448   4.01792559 16.15312256 16.56250072
  4.82552491 19.83938281 43.01107315 29.97812081 25.88913321 17.13018496
 41.12456966  4.93133278 21.81449747 25.32920854 20.61309537 20.71350519
 17.9108394  25.70782248  7.83335955 28.29492087 32.24991043 15.17698977
  7.46218131 35.61335997 34.03426175 22.2304244  17.2532475  23.03992984
 23.91917196 24.36185869 20.62572603 39.55162693 25.16899518 19.

In [22]:
estimator = Ridge(max_iter=10000, alpha=0.5)  # 岭回归
#estimator = RidgeCV(alphas=[0.1, 0.2, 0.3, 0.5])  # 加了交叉验证的岭回归
estimator.fit(x_train, y_train)

print("岭回归_权重系数为: ", estimator.coef_)
print("岭回归_偏置为:", estimator.intercept_)

y_predict = estimator.predict(x_test)
error = mean_squared_error(y_test, y_predict)
print("岭回归_房价预测:", y_predict)
print("岭回归_均分误差:", error)

岭回归_权重系数为:  [-0.64193209  1.13369189 -0.07675643  0.74427624 -1.93681163  2.71424838
 -0.08171268 -3.27871121  2.45697934 -1.81200596 -1.74659067  0.87272606
 -3.90544403]
岭回归_偏置为: 22.62137203166228
岭回归_房价预测: [28.22536271 31.50554479 21.13191715 32.65799504 20.02127243 19.07245621
 21.10832868 19.61646071 19.63294981 32.85629282 20.99521805 27.5039205
 15.55295503 19.79534148 36.87534254 18.80312973  9.39151837 18.50769876
 30.66823994 24.3042416  19.08011554 34.10075629 29.79356171 17.51074566
 34.89376386 26.53739131 34.68266415 27.42811508 19.08866098 14.98888119
 30.85920064 15.82430706 37.18223651  7.77072879 16.25978968 17.17327251
  7.44393003 19.99708381 40.57013125 28.94670553 25.25487557 17.75476957
 38.77349313  6.87948646 21.78603146 25.27475292 20.4507104  20.47911411
 17.25121804 26.12109499  8.54773286 27.48936704 30.58050833 16.56570322
  9.40627771 35.52573005 32.2505845  21.8734037  17.61137983 22.08222631
 23.49713296 24.09419259 20.15174912 38.49803353 24.63926151 1