# 支持向量机
+ 可以做线性或者非线性的分类、回归、甚至异常值检测
+ 适合应用于复杂但中小规模数据集的分类
+ SVM对特征缩放非常敏感

# 线性支持向量机

In [5]:
##SVM模型过拟合 可以尝试减小超参数c去调整
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

iris = datasets.load_iris()
x = iris["data"][: ,(2,3)]
## 转换y的数据类型
y = (iris["target"] == 2).astype(np.float64)

In [6]:
## LinearSVC类 超参数c hinge损失函数
svm_clf = Pipeline((
    ("scaler",StandardScaler()),
    ("linear_svc",LinearSVC(C=1,loss="hinge"))
))

svm_clf.fit(x,y)
svm_clf.predict([[5.5,1.7]])

array([1.])

# 非线性支持向量机

In [10]:
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

X, y = make_moons(n_samples=200, noise=0.15, random_state=0)
Polynomial_clf = Pipeline((
    ('poly_features',PolynomialFeatures(degree=3)),
    ('scaler',StandardScaler()),
    ('svm_clf',LinearSVC(C=10,loss='hinge'))
))
Polynomial_clf.fit(X,y)



Pipeline(steps=[('poly_features', PolynomialFeatures(degree=3)),
                ('scaler', StandardScaler()),
                ('svm_clf', LinearSVC(C=10, loss='hinge'))])

## 多项式核

In [12]:
from sklearn.svm import SVC

##三阶的多项式核训练SVM训练器 coef0控制高阶多项式与低阶多项式对模型的影响
## 网格搜索寻找超参数
poly_feature_clf = Pipeline((
    ('scaler',StandardScaler()),
    ('svm_clf',SVC(kernel='poly',degree=3,coef0=1,C=5))
))

poly_feature_clf.fit(X,y)

Pipeline(steps=[('scaler', StandardScaler()),
                ('svm_clf', SVC(C=5, coef0=1, kernel='poly'))])

## 高斯RBF核 

In [13]:
## 在多特征中获得同样好的结果
rbf_kernel_svm_clf = Pipeline((
    ("scaler",StandardScaler()),
    ("svm_clf",SVC(kernel='rbf',gamma=5,C=0.001))
))

rbf_kernel_svm_clf.fit(X,y)

Pipeline(steps=[('scaler', StandardScaler()),
                ('svm_clf', SVC(C=0.001, gamma=5))])

**核函数选择顺序：线性核函数(LinerSVC快于SVC(kerner="svc"))->RBF->交叉验证+网格搜索**