In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import normalize as skl_normalize

In [2]:
X, y = load_breast_cancer(return_X_y=True)
print("data shapes:", X.shape, y.shape, np.unique(y))

data shapes: (569, 30) (569,) [0 1]


In [3]:
# select train/test indices
np.random.seed(42)
order = np.random.permutation(len(y))
tr = np.sort(order[:250])
tst = np.sort(order[250:])

In [4]:
def center(X): 
    return X - np.mean(X, axis=0, keepdims=0)

In [5]:
def standardize(X):
    return center(X)/np.std(X, axis=0)

In [6]:
def unit_range(X):
    return (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))

In [7]:
def normalized(X):
    Xl2 = np.linalg.norm(X,axis=0)
    return X/Xl2[np.newaxis,:]

In [8]:
X_centered = center(X)
assert(X.shape == X_centered.shape)

In [9]:
X_standardized = standardize(X)
sdscaler = StandardScaler()
assert(X.shape == X_standardized.shape)
np.testing.assert_array_equal(X_standardized, sdscaler.fit_transform(X))

In [10]:
X_ur = unit_range(X)
assert(X.shape == X_ur.shape)

In [11]:
X_normalized = normalized(X)
assert(X.shape == X_normalized.shape)
np.testing.assert_array_equal(X_normalized, skl_normalize(X, axis=0))

In [12]:
svm = LinearSVC(fit_intercept=False, random_state=2)

In [13]:
def fit_svm(data, condition):
    svm.fit(data[tr, :], y[tr])
    preds = svm.predict(data[tst, :])
    print("SVM accuracy on " + condition + ":", np.round(100*accuracy_score(y[tst], preds), 1), "%")

In [14]:
fit_svm(X, "raw")

SVM accuracy on raw: 89.7 %




In [15]:
fit_svm(X_centered, "centered")

SVM accuracy on centered: 90.9 %




In [16]:
fit_svm(X_standardized, "standardized")

SVM accuracy on standardized: 94.7 %


In [17]:
fit_svm(X_ur, "unit-ranged")

SVM accuracy on unit-ranged: 93.7 %


In [18]:
fit_svm(X_normalized, "normalized")

SVM accuracy on normalized: 89.3 %
