# 多项式回归与正则化

产生测试数据


In [None]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

train_size = 90
test_size = 30
train_X = np.random.uniform(low=0, high=1.2, size=train_size)
test_X = np.random.uniform(low=0.1, high=1.3, size=test_size)
train_y = np.sin(train_X * 2 * np.pi) + np.random.normal(0, 0.2, train_size)
test_y = np.sin(test_X * 2 * np.pi) + np.random.normal(0, 0.2, test_size)
# y=sin(2Pi x)+高斯分布随机数

plt.scatter(train_X,train_y,label="train")
plt.scatter(test_X,test_y,label="test")
plt.legend()



## 线性回归


In [None]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression() 

train_X_2D=np.array(train_X).reshape(-1, 1)
lr.fit(train_X_2D, train_y)
plt.plot(train_X, lr.predict(train_X_2D), color = 'blue') 

plt.scatter(train_X,train_y,label="train")
plt.scatter(test_X,test_y,label="test")
plt.legend()
plt.show()

## 多项式回归

In [None]:

polynomialList=[2,3,4,5,6,7,8]
for degree in polynomialList:

    poly = PolynomialFeatures(degree) # 次数为6
    train_poly_X = poly.fit_transform(train_X.reshape(train_size, 1))
    test_poly_X = poly.fit_transform(test_X.reshape(test_size, 1))
    # fit即先对数据进行拟合，求得训练数据的固有属性，例如方差均值最值等属性，transform对数据进行归一化标准化等操作，将数据缩放映射至某个固定区间

    poly.fit(train_poly_X, train_y) 
    lrPoly = LinearRegression() 
    lrPoly.fit(train_poly_X, train_y)

    train_pred_y=lrPoly.predict(train_poly_X)
    test_pred_y=lrPoly.predict(test_poly_X)
    
    training_mse = mean_squared_error(train_pred_y, train_y)
    test_mse = mean_squared_error(test_pred_y, test_y)
    print("%d阶多项式回归"%degree,"训练mse： %.3f"%training_mse,"测试mse： %.3f"%test_mse )

    
    plt.scatter(train_X,train_y,label="train")  
    plt.plot(np.sort(train_X), train_pred_y[np.argsort(train_X)], label=f"train {degree}") 

    plt.scatter(test_X,test_y,label="test")
    plt.plot(np.sort(test_X), test_pred_y[np.argsort(test_X)], label=f"test {degree} ") 

    plt.title('Polynomial Regression') 
    plt.legend()
    plt.show()






## 数据维度


In [None]:
print(train_X.shape)
print(test_X.shape)
print(test_poly_X.shape)
print(test_X)
print(test_poly_X[0])
print(test_poly_X[1])

## ridge回归 - 正则化

In [None]:
degree=6
poly = PolynomialFeatures(degree) 
train_poly_X = poly.fit_transform(train_X.reshape(train_size, 1))
test_poly_X = poly.fit_transform(test_X.reshape(test_size, 1))
    
print(np.transpose(poly.powers_) )


plt.scatter(train_X, train_y, label="data")

alphaList=[1,3,5,9,15,50,100]
for alpha in alphaList:
    model = Ridge(alpha)
    model.fit(train_poly_X, train_y)
    train_pred_y = model.predict(train_poly_X)
    test_pred_y = model.predict(test_poly_X)
    training_mse = mean_squared_error(train_pred_y, train_y)
    test_mse = mean_squared_error(test_pred_y, test_y)
    print("alpha%d"%alpha,"训练mse： %.3f"%training_mse,"测试mse： %.3f"%test_mse )
    plt.plot(np.sort(train_X), train_pred_y[np.argsort(train_X)], label=f"alpha{alpha}")


#plt.scatter(train_X, train_y)

plt.legend()
plt.show()

