In [32]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import scale, StandardScaler
data = load_breast_cancer()
X, y = data.data, data.target

In [34]:
X_trainval, X_test, y_trainval, y_test = train_test_split(X, y)
X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval)
knn = KNeighborsClassifier(n_neighbors=5).fit(X_train, y_train)
print("Train: {:.3f}".format(knn.score(X_train, y_train)))
print("Validation: {:.3f}".format(knn.score(X_val, y_val)))
print("Test: {:.3f}".format(knn.score(X_test, y_test)))

Train: 0.953
Validation: 0.897
Test: 0.944


In [35]:
X = scale(X)
X_trainval, X_test, y_trainval, y_test = train_test_split(X, y)
X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval)
knn = KNeighborsClassifier(n_neighbors=5).fit(X_train, y_train)
print("Train: {:.3f}".format(knn.score(X_train, y_train)))
print("Validation: {:.3f}".format(knn.score(X_val, y_val)))
print("Test: {:.3f}".format(knn.score(X_test, y_test)))

Train: 0.972
Validation: 0.972
Test: 0.951


In [19]:
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline

In [29]:

knn = KNeighborsClassifier(n_neighbors=5)
steps = [('scaler', StandardScaler()), ('knn', knn)]
pipe = Pipeline(steps)
pipe.fit(X_train,y_train)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('knn',
                 KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                      metric='minkowski', metric_params=None,
                                      n_jobs=None, n_neighbors=5, p=2,
                                      weights='uniform'))],
         verbose=False)

In [27]:
pipe.score(X_val, y_val)

0.9719626168224299

In [30]:
pipe

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('knn',
                 KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                      metric='minkowski', metric_params=None,
                                      n_jobs=None, n_neighbors=5, p=2,
                                      weights='uniform'))],
         verbose=False)

In [38]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [41]:
steps = [('scaler', StandardScaler()), ('SVM', SVC())]
pipeline = Pipeline(steps)
parameters = {'SVM__C':[0.001,0.1,10,100,10e5], 'SVM__gamma':[0.1,0.01]}

grid = GridSearchCV(pipeline, param_grid=parameters, cv=5)

grid.fit(X_train, y_train)

print("score = %3.2f" %(grid.score(X_test,y_test)))
print("best params: ", grid.best_params_)

score = 0.99
best params:  {'SVM__C': 10, 'SVM__gamma': 0.01}
