In [1]:
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

In [2]:
from sklearn.model_selection import train_test_split

In [22]:
from sklearn.metrics import accuracy_score

In [33]:
import pickle

# ***Cancer dataset***

_Data preparation_

In [3]:
data_breast_cancer = datasets.load_breast_cancer(as_frame=True)

In [5]:
X_cancer = data_breast_cancer.data
y_cancer = data_breast_cancer.target

In [7]:
print(X_cancer.size, y_cancer.size)

17070 569


In [8]:
X_cancer_train, X_cancer_test, y_cancer_train, y_cancer_test = train_test_split(X_cancer, y_cancer, test_size=0.2, random_state=42)

In [10]:
print(X_cancer_train.size,X_cancer_test.size )

13650 3420


_SVM Classification_

In [12]:
X_cancer_train_area_smooth = X_cancer_train[["mean area", "mean smoothness"]]
X_cancer_test_area_smooth = X_cancer_test[["mean area", "mean smoothness"]]

In [13]:
svm_clf = Pipeline([("linear_svc", LinearSVC(C=1,  loss="hinge", random_state=42)),])

In [17]:
svm_clf_scaler = Pipeline([("scaler", StandardScaler()),
                            ("linear_svc", LinearSVC(C=1, loss="hinge", random_state=42))
                            ])

In [24]:
svm_clf.fit(X_cancer_train_area_smooth, y_cancer_train)
svm_clf_scaler.fit(X_cancer_train_area_smooth, y_cancer_train)



Pipeline(steps=[('scaler', StandardScaler()),
                ('linear_svc', LinearSVC(C=1, loss='hinge', random_state=42))])

In [25]:
y_cancer_train_svm_pred = svm_clf.predict(X_cancer_train_area_smooth)
y_cancer_train_svm_scal_pred = svm_clf_scaler.predict(X_cancer_train_area_smooth)

In [26]:
y_cancer_test_svm_pred = svm_clf.predict(X_cancer_test_area_smooth)
y_cancer_test_svm_scal_pred = svm_clf_scaler.predict(X_cancer_test_area_smooth)

In [29]:
acc_svm_clf_train = accuracy_score(y_cancer_train, y_cancer_train_svm_pred)
acc_svm_clf_scaler_train = accuracy_score(y_cancer_train, y_cancer_train_svm_scal_pred)

In [30]:
acc_svm_clf_test = accuracy_score(y_cancer_test, y_cancer_test_svm_pred)
acc_svm_clf_scaler_test = accuracy_score(y_cancer_test, y_cancer_test_svm_scal_pred)

In [31]:
bc_acc = [acc_svm_clf_train, acc_svm_clf_test, acc_svm_clf_scaler_train, acc_svm_clf_scaler_test]

In [32]:
print(bc_acc)

[0.6285714285714286, 0.6228070175438597, 0.8923076923076924, 0.9298245614035088]


In [34]:
with open('bc_acc.pkl', 'wb') as f:
    pickle.dump(bc_acc, f)

# ***Iris dataset***

_Data preparation_

In [35]:
data_iris = datasets.load_iris(as_frame=True)

In [36]:
X_iris = data_iris.data
y_iris = data_iris.target

In [37]:
X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(X_iris, y_iris, test_size=0.2, random_state=42)

In [38]:
print(X_iris_train.size, X_iris_test.size)

480 120


_SVM Classification_

In [41]:
print(data_iris['DESCR'])

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [42]:
X_iris_train_length_width = X_iris_train[["petal length (cm)", "petal width (cm)"]]
X_iris_test_length_width = X_iris_test[["petal length (cm)", "petal width (cm)"]]

In [43]:
svm_clf_iris = Pipeline([("linear_svc", LinearSVC(C=1,  loss="hinge", random_state=42)),])

In [44]:
svm_clf_scaler_iris = Pipeline([
        ("scaler", StandardScaler()),
        ("linear_svc", LinearSVC(C=1,
                                 loss="hinge",
                                 random_state=42)),])

In [46]:
svm_clf_iris.fit(X_iris_train_length_width, y_iris_train)
svm_clf_scaler_iris.fit(X_iris_train_length_width, y_iris_train)



Pipeline(steps=[('scaler', StandardScaler()),
                ('linear_svc', LinearSVC(C=1, loss='hinge', random_state=42))])

In [48]:
y_iris_train_svm_pred = svm_clf_iris.predict(X_iris_train_length_width)
y_iris_train_svm_scal_pred = svm_clf_scaler_iris.predict(X_iris_train_length_width)

In [50]:
y_iris_test_svm_pred = svm_clf_iris.predict(X_iris_test_length_width)
y_iris_test_svm_scal_pred = svm_clf_scaler_iris.predict(X_iris_test_length_width)

In [52]:
acc_svm_clf_iris_train = accuracy_score(y_iris_train, y_iris_train_svm_pred)
acc_svm_clf_scaler_iris_train = accuracy_score(y_iris_train, y_iris_train_svm_scal_pred)

In [53]:
acc_svm_clf_iris_test = accuracy_score(y_iris_test, y_iris_test_svm_pred)
acc_svm_clf_scaler_iris_test = accuracy_score(y_iris_test, y_iris_test_svm_scal_pred)

In [54]:
iris_acc = [acc_svm_clf_iris_train, acc_svm_clf_iris_test, acc_svm_clf_scaler_iris_train, acc_svm_clf_scaler_iris_test]

In [55]:
print(iris_acc)

[0.7583333333333333, 0.8333333333333334, 0.8583333333333333, 0.8666666666666667]


In [57]:
with open('iris_acc.pkl', 'wb') as f:
    pickle.dump(iris_acc, f)