In [67]:
import numpy as np

from sklearn.datasets import load_iris
from sklearn.pipeline import make_pipeline
from sklearn.discriminant_analysis import StandardScaler
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import SGDClassifier


# Using SVMs on a linearly separable dataset

In [49]:
iris = load_iris(as_frame=True)
X = iris.data[["sepal length (cm)", "sepal width (cm)"]]
y = iris.target
X.head()

Unnamed: 0,sepal length (cm),sepal width (cm)
0,5.1,3.5
1,4.9,3.0
2,4.7,3.2
3,4.6,3.1
4,5.0,3.6


In [50]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [51]:
def get_accuracy(clf, X, y_true):
  y_pred = clf.predict(X)
  return accuracy_score(y_true, y_pred)

### Linear SVC

In [52]:
lin_svc = make_pipeline(
  StandardScaler(),
  LinearSVC(C=10, random_state=42)
)

lin_svc.fit(X_train, y_train)
print ("Train accuracy: ", get_accuracy(lin_svc, X_train, y_train))
print ("Test accuracy: ", get_accuracy(lin_svc, X_test, y_test))


Train accuracy:  0.7666666666666667
Test accuracy:  0.9333333333333333




### SVC

In [66]:
svc = make_pipeline(
  StandardScaler(),
  SVC(C=10, kernel="linear", random_state=42)
)

svc.fit(X_train, y_train)
print ("Train accuracy: ", get_accuracy(svc, X_train, y_train))
print ("Test accuracy: ", get_accuracy(svc, X_test, y_test))

Train accuracy:  0.7833333333333333
Test accuracy:  0.9


### SGD Classifier

In [72]:
sgd = make_pipeline(
  StandardScaler(),
  SGDClassifier(loss="log_loss")
)

sgd.fit(X_train, y_train)
print ("Train accuracy: ", get_accuracy(sgd, X_train, y_train))
print ("Test accuracy: ", get_accuracy(sgd, X_test, y_test))

Train accuracy:  0.7833333333333333
Test accuracy:  0.9333333333333333
