# [线性SVM分类](http://coldjune.com/2018/05/22/%E6%94%AF%E6%8C%81%E5%90%91%E9%87%8F%E6%9C%BA-SVM/)

## 软间隔分类

In [1]:
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

iris = datasets.load_iris()
X = iris["data"][:, (2, 3)]
y = (iris["target"] == 2).astype(np.float64)

svm_clf = Pipeline((
        ("scaler", StandardScaler()),
        ("linear_svc", LinearSVC(C=1, loss="hinge"))
))

svm_clf.fit(X, y)

Pipeline(memory=None,
     steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('linear_svc', LinearSVC(C=1, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='hinge', max_iter=1000, multi_class='ovr',
     penalty='l2', random_state=None, tol=0.0001, verbose=0))])

In [2]:
svm_clf.predict([[5.5, 1.7]])

array([1.])

# 非线性SVM分类

In [5]:
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

polynomial_svm_clf = Pipeline((
            ("poly_featrure", PolynomialFeatures(degree=3)),
            ("scaler", StandardScaler()),
            ("svm_clf", LinearSVC(C=10, loss="hinge"))
))

polynomial_svm_clf.fit(X, y)

Pipeline(memory=None,
     steps=[('poly_featrure', PolynomialFeatures(degree=3, include_bias=True, interaction_only=False)), ('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svm_clf', LinearSVC(C=10, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='hinge', max_iter=1000, multi_class='ovr',
     penalty='l2', random_state=None, tol=0.0001, verbose=0))])

## 多项式核

In [7]:
from sklearn.svm import SVC
polu_kernel_svm_clf = Pipeline((
        ("scaler", StandardScaler()),
        ("svm_clf", SVC(kernel="poly", degree=3, coef0=1, C=5))
))

polu_kernel_svm_clf.fit(X, y)

Pipeline(memory=None,
     steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svm_clf', SVC(C=5, cache_size=200, class_weight=None, coef0=1,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='poly', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))])

## 高斯径向基函数核

In [8]:
rbf_kernel_svm_clf = Pipeline((
        ("scaler", StandardScaler()),
        ("svm_clf", SVC(kernel="rbf", gamma=5, C=0.001))
))

rbf_kernel_svm_clf.fit(X, y)

Pipeline(memory=None,
     steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svm_clf', SVC(C=0.001, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=5, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])

# SVM回归

In [9]:
from sklearn.svm import LinearSVR
svm_reg = LinearSVR(epsilon=1.5)
svm_reg.fit(X, y)

LinearSVR(C=1.0, dual=True, epsilon=1.5, fit_intercept=True,
     intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
     random_state=None, tol=0.0001, verbose=0)

In [11]:
from sklearn.svm import SVR

svm_poly_reg = SVR(kernel="poly", degree=2, C=100, epsilon=0.1, gamma='auto')
svm_poly_reg.fit(X, y)

SVR(C=100, cache_size=200, coef0=0.0, degree=2, epsilon=0.1, gamma='auto',
  kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

# 练习

8

In [16]:
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler

lin_svc = LinearSVC(loss='hinge', C=5)
svc = SVC(kernel='linear', gamma=5, C=5)
sgd = SGDClassifier(loss='hinge', learning_rate='constant', eta0=0.01, max_iter=10000, tol=1e-3)

scaler = StandardScaler()


In [19]:
from sklearn import datasets

iris = datasets.load_iris()
X = iris['data'][:, (2, 3)]
y = iris['target']
index = (y==0)|(y==1)
X = X[index]
y = y[index]
X = scaler.fit_transform(X)
lin_svc.fit(X, y)
svc.fit(X, y)
sgd.fit(X, y)

SGDClassifier(alpha=0.0001, average=False, class_weight=None,
       early_stopping=False, epsilon=0.1, eta0=0.01, fit_intercept=True,
       l1_ratio=0.15, learning_rate='constant', loss='hinge',
       max_iter=10000, n_iter=None, n_iter_no_change=5, n_jobs=None,
       penalty='l2', power_t=0.5, random_state=None, shuffle=True,
       tol=0.001, validation_fraction=0.1, verbose=0, warm_start=False)

In [20]:
print('lin_svc:',lin_svc.intercept_, lin_svc.coef_)
print('svc:', svc.intercept_, svc.coef_)
print('sgd:', sgd.intercept_, sgd.coef_)

lin_svc: [0.28475357] [[1.05364315 1.09903432]]
svc: [0.31896852] [[1.1203284  1.02625193]]
sgd: [0.32] [[1.06407189 0.99023675]]


9

In [21]:
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.datasets import fetch_mldata
import numpy as np
mnist = fetch_mldata("MNIST original", data_home='./datasets/')

X = mnist['data']
y = mnist['target']

X_train = X[:60000]
y_train = y[:60000]

X_test = X[60000:]
y_test = y[60000:]

index = np.random.permutation(60000)
X_train = X_train[index]
y_train = y_train[index]

In [22]:
lin_svc = LinearSVC()
lin_svc.fit(X_train, y_train)



LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [23]:
from sklearn.metrics import accuracy_score

y_pred = lin_svc.predict(X_test)
accuracy_score(y_test, y_pred)

0.8395

In [25]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float32))
X_test_scaled = scaler.transform(X_test.astype(np.float32))

In [26]:
lin_svc.fit(X_train_scaled, y_train)
y_pred = lin_svc.predict(X_test_scaled)
accuracy_score(y_test, y_pred)



0.9143

In [28]:
svm_clf = SVC(gamma='auto')
svm_clf.fit(X_train_scaled, y_train)
y_pred = svm_clf.predict(X_test_scaled)
accuracy_score(y_test, y_pred)

0.9664

In [29]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform
param_grid = {"gamma": reciprocal(0.001, 0.1), 'C': uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(svm_clf, param_grid, n_iter=10, cv=3, n_jobs=-1)
rnd_search_cv.fit(X_train_scaled[:10000], y_train[:10000])

RandomizedSearchCV(cv=3, error_score='raise-deprecating',
          estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
          fit_params=None, iid='warn', n_iter=10, n_jobs=-1,
          param_distributions={'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x12299f978>, 'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x12299ae48>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score='warn', scoring=None, verbose=0)

In [30]:
rnd_search_cv.best_estimator_

SVC(C=3.989711022016336, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0015797502977847602,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [31]:
rnd_search_cv.best_score_

0.9413

In [32]:
rnd_search_cv.best_estimator_.fit(X_train_scaled, y_train)

SVC(C=3.989711022016336, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0015797502977847602,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [34]:
y_pred = rnd_search_cv.best_estimator_.predict(X_test_scaled)
accuracy_score(y_test, y_pred)

0.9721

10

In [68]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()

Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /Users/dengxiaojun/scikit_learn_data


In [70]:
X = housing['data']
y = housing['target']

In [75]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [76]:
from sklearn.svm import LinearSVR
from sklearn.metrics import mean_squared_error
lin_svr = LinearSVR()
lin_svr.fit(X_train_scaled, y_train)
y_pred = lin_svr.predict(X_test_scaled)
mean_squared_error(y_test, y_pred)



7.707205711841997

In [78]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVR
from scipy.stats import reciprocal, uniform
param = {'gamma':reciprocal(0.001, 0.1), 'C': uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(SVR(), n_iter=10, param_distributions=param, n_jobs=-1, verbose=True, cv=3)
rnd_search_cv.fit(X_train_scaled, y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:  1.2min finished


RandomizedSearchCV(cv=3, error_score='raise-deprecating',
          estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False),
          fit_params=None, iid='warn', n_iter=10, n_jobs=-1,
          param_distributions={'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x11e364ba8>, 'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x11f2d3898>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score='warn', scoring=None, verbose=True)

In [79]:
rnd_search_cv.best_estimator_

SVR(C=5.589545100235242, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma=0.0843364120036159, kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [80]:
rnd_search_cv.best_score_

0.7445064798868384

In [81]:
rnd_search_cv.best_estimator_.fit(X_train_scaled, y_train)

SVR(C=5.589545100235242, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma=0.0843364120036159, kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [83]:
pred = rnd_search_cv.best_estimator_.predict(X_test_scaled)
mse = mean_squared_error(y_test, pred)

In [84]:
np.sqrt(mse)

0.5543707334462389