In [10]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

In [2]:
data = datasets.load_breast_cancer()
print(data['DESCR'])

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

In [24]:
X, y = data.data[:, 3:5], data.target

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [38]:
svm_clf_noscaling = Pipeline([("linear_svc", LinearSVC(C=1, loss="hinge", random_state=42, max_iter=1000000))])
svm_clf_scaling = Pipeline([("scaler", StandardScaler()), ("linear_svc", LinearSVC(C=0.1, loss="hinge", random_state=42))])
svm_clf_noscaling.fit(X_train, y_train)
svm_clf_scaling.fit(X_train, y_train)

svm_clf_noscaling_test = svm_clf_noscaling.predict(X_test)
svm_clf_noscaling_train = svm_clf_noscaling.predict(X_train)
svm_clf_scaling_test = svm_clf_scaling.predict(X_test)
svm_clf_scaling_train = svm_clf_scaling.predict(X_train)



In [42]:
train_noscaling = test_noscaling = train_scaling = test_scaling = 0

for i in range(len(y_train)):
    if y_train[i] == svm_clf_noscaling_train[i]:
        train_noscaling += 1
    if y_train[i] == svm_clf_scaling_train[i]:
        train_scaling += 1

for i in range(len(y_test)):
    if y_test[i] == svm_clf_noscaling_test[i]:
        test_noscaling += 1
    if y_test[i] == svm_clf_scaling_test[i]:
        test_scaling += 1

In [44]:
bc_acc_train_noscaling = train_noscaling/len(svm_clf_train_noscaling)
bc_acc_test_noscaling = test_noscaling/len(svm_clf_test_noscaling)
bc_acc_train_scaling = train_scaling/len(svm_clf_train_scaling)
bc_acc_test_scaling = test_scaling/len(svm_clf_test_scaling)
print(bc_acc_train_noscaling)
print(bc_acc_test_noscaling)
print(bc_acc_train_scaling)
print(bc_acc_test_scaling)

NameError: name 'svm_clf_train_noscaling' is not defined