# 使用SVM 解决回归问题

### 在线性回归中，目标是，使得点到预测直线的MSE最小。

### 借鉴这个思想，SVM的解决回归问题的思路是，在决策界面附近（两个支撑向量和margin值），包含的点越多，越好。

### 这与SVM解决分类问题的思路，恰好相反。在解决分类问题的时候，希望在决策界面margin区间内，包含的点越少，越好。（从一个点也不包含的hard margin SVM，到可以容忍一些其中有一些点的soft margin SVM）。

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from sklearn.datasets import load_boston

boston=load_boston()

X=boston.data
y=boston.target

In [3]:
X.shape

(506, 13)

In [4]:
X[:2]

array([[  6.32000000e-03,   1.80000000e+01,   2.31000000e+00,
          0.00000000e+00,   5.38000000e-01,   6.57500000e+00,
          6.52000000e+01,   4.09000000e+00,   1.00000000e+00,
          2.96000000e+02,   1.53000000e+01,   3.96900000e+02,
          4.98000000e+00],
       [  2.73100000e-02,   0.00000000e+00,   7.07000000e+00,
          0.00000000e+00,   4.69000000e-01,   6.42100000e+00,
          7.89000000e+01,   4.96710000e+00,   2.00000000e+00,
          2.42000000e+02,   1.78000000e+01,   3.96900000e+02,
          9.14000000e+00]])

In [5]:
y[:2]

array([ 24. ,  21.6])

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=666)

In [8]:
X_train.shape

(379, 13)

In [9]:
X_test.shape

(127, 13)

## 使用sklearn中的SVR--support vector regression

In [10]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVR, SVR
from sklearn.pipeline import Pipeline

In [11]:
def standardLinearSVR(epsilon=0.1):
    return Pipeline([
        ('std', StandardScaler()),
        ('svr', LinearSVR(epsilon=epsilon))
    ])

In [12]:
svr=standardLinearSVR()
svr.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('std', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svr', LinearSVR(C=1.0, dual=True, epsilon=0.1, fit_intercept=True,
     intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
     random_state=None, tol=0.0001, verbose=0))])

In [13]:
svr.score(X_test, y_test)

0.63650692862802183