#平方误差损失函数最小，通过普通最小二乘法求解系数

In [2]:
#sklearn包中线性回归模型，通过最小二乘法求解回归系数
#损失函数为误差平方和
#下面例子用来拟合y=x+1方程
from sklearn import linear_model
lm1 = linear_model.LinearRegression()
#用训练集训练模型参数
lm1.fit([[0],[1],[2],[3],[4]],[1,2,3,4,5])
print(lm1.coef_)
print(lm1.intercept_)
print(lm1.predict([[6],[7],[8],[9]]))

[1.]
0.9999999999999996
[ 7.  8.  9. 10.]


In [3]:
# sklearn中岭回归，由于最小二乘线性回归中的损失函数只有误差平方和项，即只有经验风险最小化项
#没有结构风险最小化项，可能导致模型泛化性很差，岭回归误差函数中引入正则化项


In [4]:
#sklearn包中岭回归模型，
#下面例子用来拟合y=x+1方程，岭回归中惩罚参数越大模型中的回归系数越小，下面分别以
#0.1,1,100,1000来说明

from sklearn import linear_model
Rm1 = linear_model.Ridge(alpha=0.1)
Rm2 = linear_model.Ridge(alpha=1)
Rm3 = linear_model.Ridge(alpha=100)
Rm4 = linear_model.Ridge(alpha=1000)

Rm1.fit([[0],[1],[2],[3],[4]],[1,2,3,4,5])
Rm2.fit([[0],[1],[2],[3],[4]],[1,2,3,4,5])
Rm3.fit([[0],[1],[2],[3],[4]],[1,2,3,4,5])
Rm4.fit([[0],[1],[2],[3],[4]],[1,2,3,4,5])

print(Rm1.coef_)
print(Rm1.intercept_)

print(Rm2.coef_)
print(Rm2.intercept_)

print(Rm3.coef_)
print(Rm3.intercept_)

print(Rm4.coef_)
print(Rm4.intercept_)

print(Rm1.predict([[6],[7],[8],[9]]))
print(Rm2.predict([[6],[7],[8],[9]]))
print(Rm3.predict([[6],[7],[8],[9]]))
print(Rm4.predict([[6],[7],[8],[9]]))


[0.99009901]
1.0198019801980198
[0.90909091]
1.1818181818181819
[0.09090909]
2.8181818181818183
[0.00990099]
2.98019801980198
[6.96039604 7.95049505 8.94059406 9.93069307]
[6.63636364 7.54545455 8.45454545 9.36363636]
[3.36363636 3.45454545 3.54545455 3.63636364]
[3.03960396 3.04950495 3.05940594 3.06930693]


从上可见随着alpha增大,coef_的值越来越小，下面以数据有噪声为例，比较岭回归和最小二乘线性回归的优缺点，待拟合的方程仍然为y=x+1,训练数据加入随机噪声


In [30]:
from sklearn import linear_model
import numpy as np
#生产训练集合测试集
noise = np.random.rand(4)
X_train=[[1],[2],[3],[4]]
Y_train=[2,3,4,5]+noise
X_test=[[5],[10],[20],[30]]
Y_test=[5,11,21,31]
#训练模型
lm1 = linear_model.LinearRegression()
lm1.fit(X_train,Y_train)
#预测，并计算预测值和测试值之间的误差平方和
lm1_predict=lm1.predict(X_test)
error = np.sum(np.square(lm1_predict-Y_test))
#输出结果
print(lm1.coef_)
print(lm1.intercept_)
print(lm1_predict)
print(error)
#训练岭回归模型，alpha=0.1
rm1 = linear_model.Ridge(alpha=0.1)
rm1.fit(X_train,Y_train)

#预测，并计算预测值和测试值之间的误差平方和
rm1_predict=rm1.predict(X_test)
error = np.sum(np.square(rm1_predict-Y_test))

#输出岭回归预测结果和误差
print(rm1.coef_)
print(rm1.intercept_)
print(rm1_predict)
print(error)

#训练岭回归模型，alpha=0.5
rm2 = linear_model.Ridge(alpha=0.5)
rm2.fit(X_train,Y_train)

#预测，并计算预测值和测试值之间的误差平方和
rm2_predict=rm2.predict(X_test)
error = np.sum(np.square(rm1_predict-Y_test))


#输出岭回归模型预测结果和误差
print(rm2.coef_)
print(rm2.intercept_)
print(rm2_predict)
print(error)

#通过交叉验证选择alpha的值
alpha = linear_model.RidgeCV(alphas=[0.1,0.3,0.5,0.8,1.0])
alpha.fit(X_train,Y_train)
print(alpha.alpha_)




[1.16702484]
0.8700535294882079
[ 6.70517771 12.54030189 24.21055024 35.8807986 ]
39.40998876678873
[1.144142]
0.9272606292779426
[ 6.64797061 12.36868059 23.81010055 35.2515205 ]
30.56118533791461
[1.06093167]
1.1352864466951575
[ 6.43994479 11.74460313 22.35391982 32.96323651]
30.56118533791461
0.1


从上可以看出,噪声较小时，线性回归效果最好，加大数据噪声，noise = np.random.rand(4)，再次进行预测，发现预测结果如下：

[1.16702484]
0.8700535294882079
[ 6.70517771 12.54030189 24.21055024 35.8807986 ]
39.40998876678873
[1.144142]
0.9272606292779426
[ 6.64797061 12.36868059 23.81010055 35.2515205 ]
30.56118533791461
[1.06093167]
1.1352864466951575
[ 6.43994479 11.74460313 22.35391982 32.96323651]
30.56118533791461
0.1
alpha等于0.1时，岭回归预测效果最好
