In [141]:
from sklearn.svm import LinearSVR # 线性支持向量机回归
from sklearn.svm import SVR # Epsilon-Support Vector Regression(可以使用核函数)
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge # 岭回归
from sklearn.preprocessing import PolynomialFeatures

In [142]:
boston = datasets.load_boston() # 波士顿房价数据集
X = boston.data
y = boston.target
X = X[y < 50.0]
y = y[y < 50.0] # 剔除异常值(上限点)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)

In [143]:
def StandardLinearSVR(epsilon=0.1, C=1.0, degree=4):
    """带有多项式特征的LinearSVR"""
    return Pipeline([
        ("poly", PolynomialFeatures(degree=degree)),
        ('std_scaler', StandardScaler()),
        ('linearSVR', LinearSVR(epsilon=epsilon, # 支持向量回归原始问题公式中的不敏感函数中的\epsilon
                                C=C)) # 支持向量回归原始问题公式中的C
    ])

In [144]:
'''

C : float, optional (default=1.0)
    Penalty parameter C of the error term.

epsilon : float, optional (default=0.1)
     Epsilon in the epsilon-SVR model. It specifies the epsilon-tube
     within which no penalty is associated in the training loss function
     with points predicted within a distance epsilon from the actual
     value.
'''
linesvr = StandardLinearSVR(C=1, 
                        epsilon=0.01) # 易知,epsilon越大,间隔带越宽
linesvr.fit(X_train, y_train)



Pipeline(memory=None,
         steps=[('poly',
                 PolynomialFeatures(degree=4, include_bias=True,
                                    interaction_only=False, order='C')),
                ('std_scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('linearSVR',
                 LinearSVR(C=1, dual=True, epsilon=0.01, fit_intercept=True,
                           intercept_scaling=1.0, loss='epsilon_insensitive',
                           max_iter=1000, random_state=None, tol=0.0001,
                           verbose=0))],
         verbose=False)

In [145]:
linesvr.score(X_test, y_test)

0.8729853510425635

In [146]:
linesvr[2].coef_ # 解释变量系数(LinearSVR)

array([ 0.        , -0.07575425,  0.03204083, ...,  0.40969914,
        0.26020901,  0.73326864])

In [147]:
linesvr[2].intercept_ # 截距项(LinearSVR)

array([21.27934443])

In [148]:
def StandardSVR(epsilon=0.1, C=1.0, kernel='linear'): # 使用核函数
    return Pipeline([
        ('std_scaler', StandardScaler()),
        ('linearSVR', SVR(epsilon=epsilon, # 支持向量回归原始问题公式中的不敏感函数中的\epsilon
                                C=C, kernel=kernel)) # 支持向量回归原始问题公式中的C
    ])


svr = StandardSVR(C=10,  epsilon=2) # 易知,epsilon越大,间隔带越宽
svr.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('std_scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('linearSVR',
                 SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=2,
                     gamma='auto_deprecated', kernel='linear', max_iter=-1,
                     shrinking=True, tol=0.001, verbose=False))],
         verbose=False)

In [149]:
svr.score(X_test, y_test)

0.7972726685252344

In [150]:
svr[1].coef_ # 解释变量系数(SVR,仅在kenel='linear'时可用)

array([[-1.02437958,  0.5874283 , -0.32280905,  0.07288721, -0.98528787,
         2.75412821, -0.73408035, -1.99085341,  1.3507517 , -1.38006587,
        -1.60248949,  1.18159994, -2.30680415]])

In [151]:
svr[1].intercept_ # 截距项(SVR,仅在kenel='linear'时可用)

array([21.3028462])

In [152]:
def RidgeRegression(degree, alpha):
    """岭回归"""
    return Pipeline([
        ("poly", PolynomialFeatures(degree=degree)),
        ("std_scaler", StandardScaler()),
        ("ridge_reg", Ridge(alpha=alpha))
    ])


In [153]:
ridge_reg = RidgeRegression(4, 10) # 设置alpha=0.0001
ridge_reg.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('poly',
                 PolynomialFeatures(degree=4, include_bias=True,
                                    interaction_only=False, order='C')),
                ('std_scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('ridge_reg',
                 Ridge(alpha=10, copy_X=True, fit_intercept=True, max_iter=None,
                       normalize=False, random_state=None, solver='auto',
                       tol=0.001))],
         verbose=False)

In [154]:
ridge_reg.score(X_test, y_test)

0.8901188813636589