# 比较LinearSVC SVC SDGC

In [18]:
from sklearn import datasets

iris = datasets.load_iris()
X = iris["data"][:, (2, 3)]
y = iris["target"]

setosa_or_versicolor = (y == 0) | (y == 1)
X = X[setosa_or_versicolor]
y = y[setosa_or_versicolor]

## LinearSVC

In [20]:
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler

lin_clf = LinearSVC(loss='hinge', C=5, random_state=42)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

lin_clf.fit(X_scaled, y)
print(lin_clf.intercept_, lin_clf.coef_)

[0.28475098] [[1.05364854 1.09903804]]


## SVC

In [24]:
from sklearn.svm import SVC
svm_clf = SVC(kernel="linear", C=5)
svm_clf.fit(X_scaled, y)
print(svm_clf.intercept_, svm_clf.coef_)

[0.31896852] [[1.1203284  1.02625193]]


## SGDClassifier

In [40]:
from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier(learning_rate="constant", eta0= 0.1, random_state=42)
sgd_clf.fit(X_scaled, y)
print(sgd_clf.intercept_, sgd_clf.coef_)

[0.3] [[1.13059932 1.19084486]]


---

In [2]:
import numpy as np
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True, as_frame=False)
mnist.target = mnist.target.astype(np.int8)

In [3]:
x, y = mnist['data'], mnist['target']
x_train, x_test = x[:60000], x[60000:]
y_train, y_test = y[:60000], y[60000:]

In [4]:
np.random.seed(42)
shuffle_index = np.random.permutation(60000)
x_train, y_train = x_train[shuffle_index], y_train[shuffle_index]

In [6]:
from sklearn.svm import LinearSVC
lin_clf = LinearSVC(random_state=42)
lin_clf.fit(x_train, y_train)



LinearSVC(random_state=42)

In [7]:
from sklearn.metrics import accuracy_score

y_pred_lin = lin_clf.predict(x_train)
accuracy_score(y_train, y_pred_lin)

0.8656166666666667

In [9]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [11]:
lin_clf.fit(x_train_scaled, y_train)
y_pred_lin_scaled = lin_clf.predict(x_train_scaled)
accuracy_score(y_train, y_pred_lin_scaled)



0.92025

In [12]:
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

para_dic={
    'gamma':reciprocal(0.001,0.1),'C': uniform(5,9)   
}
search_cv = RandomizedSearchCV(SVR(), para_dic, n_iter=20, n_jobs=1, verbose=2,cv=3, random_state=42)
search_cv.fit(x_train_scaled[:2000], y_train[:2000])


Fitting 3 folds for each of 20 candidates, totalling 60 fits
[CV] END .....C=8.370861069626262, gamma=0.07969454818643928; total time=   1.3s
[CV] END .....C=8.370861069626262, gamma=0.07969454818643928; total time=   1.1s
[CV] END .....C=8.370861069626262, gamma=0.07969454818643928; total time=   1.1s
[CV] END ...C=11.587945476302647, gamma=0.015751320499779724; total time=   1.0s
[CV] END ...C=11.587945476302647, gamma=0.015751320499779724; total time=   1.0s
[CV] END ...C=11.587945476302647, gamma=0.015751320499779724; total time=   1.0s
[CV] END ...C=6.4041677639819286, gamma=0.002051110418843397; total time=   0.9s
[CV] END ...C=6.4041677639819286, gamma=0.002051110418843397; total time=   0.9s
[CV] END ...C=6.4041677639819286, gamma=0.002051110418843397; total time=   1.1s
[CV] END .....C=5.522752509513795, gamma=0.05399484409787431; total time=   1.1s
[CV] END .....C=5.522752509513795, gamma=0.05399484409787431; total time=   1.1s
[CV] END .....C=5.522752509513795, gamma=0.05399

RandomizedSearchCV(cv=3, estimator=SVR(), n_iter=20, n_jobs=1,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7ff2ed4376a0>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7ff2ed4322e0>},
                   random_state=42, verbose=2)

In [13]:
search_cv.best_estimator_

SVR(C=10.506676052501415, gamma=0.0019010245319870352)

In [None]:
search_cv.best_estimator_.fit(x_train_scaled, y_train)

In [None]:
y_pred_search_cv = search_cv.best_estimator_.predict(x_train_scaled)
accuracy_score(y_train, y_pred_search_cv)