In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

In [6]:
# load the data and split it into training and test sets
X_digits, y_digits = load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X_digits, y_digits)

In [7]:
# create model, train, predict, and evaluate
model = LogisticRegression()
model.fit(X_train, y_train)
model.score(X_test, y_test)



0.9555555555555556

In [25]:
# Although it is tempting to define the number of clusters to 10,
# since there are 10 different digits, it is unlikely to perform well,
# because there are several different ways to write each digit.

pipeline = Pipeline([
    ("kmeans",KMeans(n_clusters=50)),
    ("log_reg",LogisticRegression() )
])

pipeline.fit(X_train, y_train)



Pipeline(memory=None,
         steps=[('kmeans',
                 KMeans(algorithm='auto', copy_x=True, init='k-means++',
                        max_iter=300, n_clusters=50, n_init=10, n_jobs=None,
                        precompute_distances='auto', random_state=None,
                        tol=0.0001, verbose=0)),
                ('log_reg',
                 LogisticRegression(C=1.0, class_weight=None, dual=False,
                                    fit_intercept=True, intercept_scaling=1,
                                    l1_ratio=None, max_iter=100,
                                    multi_class='warn', n_jobs=None,
                                    penalty='l2', random_state=None,
                                    solver='warn', tol=0.0001, verbose=0,
                                    warm_start=False))],
         verbose=False)

In [26]:
pipeline.score(X_test, y_test)

0.98

In [27]:
# we have a better accuracy score when we used an arbitary value for k. 
# We can still improve on the score by finding the optimum value for k using GridSearchCV

In [32]:
param_grid = dict(kmeans__n_clusters=range(2,100))
grid_clf = GridSearchCV(pipeline, param_grid, cv=3, verbose=3)
grid_clf.fit(X_train, y_train)

Fitting 3 folds for each of 98 candidates, totalling 294 fits
[CV] kmeans__n_clusters=2 ............................................
[CV] ................ kmeans__n_clusters=2, score=0.413, total=   0.1s
[CV] kmeans__n_clusters=2 ............................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


[CV] ................ kmeans__n_clusters=2, score=0.396, total=   0.1s
[CV] kmeans__n_clusters=2 ............................................
[CV] ................ kmeans__n_clusters=2, score=0.290, total=   0.1s
[CV] kmeans__n_clusters=3 ............................................




[CV] ................ kmeans__n_clusters=3, score=0.528, total=   0.2s
[CV] kmeans__n_clusters=3 ............................................




[CV] ................ kmeans__n_clusters=3, score=0.581, total=   0.3s
[CV] kmeans__n_clusters=3 ............................................
[CV] ................ kmeans__n_clusters=3, score=0.544, total=   0.2s
[CV] kmeans__n_clusters=4 ............................................




[CV] ................ kmeans__n_clusters=4, score=0.660, total=   0.2s
[CV] kmeans__n_clusters=4 ............................................
[CV] ................ kmeans__n_clusters=4, score=0.690, total=   0.2s




[CV] kmeans__n_clusters=4 ............................................
[CV] ................ kmeans__n_clusters=4, score=0.672, total=   0.2s
[CV] kmeans__n_clusters=5 ............................................




[CV] ................ kmeans__n_clusters=5, score=0.720, total=   0.2s
[CV] kmeans__n_clusters=5 ............................................
[CV] ................ kmeans__n_clusters=5, score=0.753, total=   0.2s
[CV] kmeans__n_clusters=5 ............................................




[CV] ................ kmeans__n_clusters=5, score=0.742, total=   0.1s
[CV] kmeans__n_clusters=6 ............................................
[CV] ................ kmeans__n_clusters=6, score=0.804, total=   0.2s




[CV] kmeans__n_clusters=6 ............................................
[CV] ................ kmeans__n_clusters=6, score=0.811, total=   0.2s
[CV] kmeans__n_clusters=6 ............................................




[CV] ................ kmeans__n_clusters=6, score=0.796, total=   0.2s
[CV] kmeans__n_clusters=7 ............................................




[CV] ................ kmeans__n_clusters=7, score=0.815, total=   0.2s
[CV] kmeans__n_clusters=7 ............................................
[CV] ................ kmeans__n_clusters=7, score=0.864, total=   0.2s
[CV] kmeans__n_clusters=7 ............................................




[CV] ................ kmeans__n_clusters=7, score=0.831, total=   0.2s
[CV] kmeans__n_clusters=8 ............................................
[CV] ................ kmeans__n_clusters=8, score=0.879, total=   0.2s
[CV] kmeans__n_clusters=8 ............................................




[CV] ................ kmeans__n_clusters=8, score=0.884, total=   0.2s
[CV] kmeans__n_clusters=8 ............................................
[CV] ................ kmeans__n_clusters=8, score=0.879, total=   0.2s
[CV] kmeans__n_clusters=9 ............................................




[CV] ................ kmeans__n_clusters=9, score=0.898, total=   0.2s
[CV] kmeans__n_clusters=9 ............................................




[CV] ................ kmeans__n_clusters=9, score=0.898, total=   0.2s
[CV] kmeans__n_clusters=9 ............................................




[CV] ................ kmeans__n_clusters=9, score=0.872, total=   0.3s
[CV] kmeans__n_clusters=10 ...........................................




[CV] ............... kmeans__n_clusters=10, score=0.914, total=   0.3s
[CV] kmeans__n_clusters=10 ...........................................




[CV] ............... kmeans__n_clusters=10, score=0.922, total=   0.2s
[CV] kmeans__n_clusters=10 ...........................................




[CV] ............... kmeans__n_clusters=10, score=0.894, total=   0.2s
[CV] kmeans__n_clusters=11 ...........................................




[CV] ............... kmeans__n_clusters=11, score=0.925, total=   0.2s
[CV] kmeans__n_clusters=11 ...........................................




[CV] ............... kmeans__n_clusters=11, score=0.929, total=   0.2s
[CV] kmeans__n_clusters=11 ...........................................




[CV] ............... kmeans__n_clusters=11, score=0.921, total=   0.2s
[CV] kmeans__n_clusters=12 ...........................................




[CV] ............... kmeans__n_clusters=12, score=0.940, total=   0.3s
[CV] kmeans__n_clusters=12 ...........................................




[CV] ............... kmeans__n_clusters=12, score=0.933, total=   0.3s
[CV] kmeans__n_clusters=12 ...........................................




[CV] ............... kmeans__n_clusters=12, score=0.921, total=   0.3s
[CV] kmeans__n_clusters=13 ...........................................




[CV] ............... kmeans__n_clusters=13, score=0.943, total=   0.2s
[CV] kmeans__n_clusters=13 ...........................................




[CV] ............... kmeans__n_clusters=13, score=0.944, total=   0.3s
[CV] kmeans__n_clusters=13 ...........................................




[CV] ............... kmeans__n_clusters=13, score=0.926, total=   0.3s
[CV] kmeans__n_clusters=14 ...........................................




[CV] ............... kmeans__n_clusters=14, score=0.951, total=   0.3s
[CV] kmeans__n_clusters=14 ...........................................




[CV] ............... kmeans__n_clusters=14, score=0.953, total=   0.3s
[CV] kmeans__n_clusters=14 ...........................................




[CV] ............... kmeans__n_clusters=14, score=0.942, total=   0.3s
[CV] kmeans__n_clusters=15 ...........................................




[CV] ............... kmeans__n_clusters=15, score=0.951, total=   0.3s
[CV] kmeans__n_clusters=15 ...........................................




[CV] ............... kmeans__n_clusters=15, score=0.938, total=   0.3s
[CV] kmeans__n_clusters=15 ...........................................




[CV] ............... kmeans__n_clusters=15, score=0.933, total=   0.3s
[CV] kmeans__n_clusters=16 ...........................................




[CV] ............... kmeans__n_clusters=16, score=0.958, total=   0.3s
[CV] kmeans__n_clusters=16 ...........................................




[CV] ............... kmeans__n_clusters=16, score=0.958, total=   0.3s
[CV] kmeans__n_clusters=16 ...........................................




[CV] ............... kmeans__n_clusters=16, score=0.942, total=   0.3s
[CV] kmeans__n_clusters=17 ...........................................




[CV] ............... kmeans__n_clusters=17, score=0.954, total=   0.3s
[CV] kmeans__n_clusters=17 ...........................................




[CV] ............... kmeans__n_clusters=17, score=0.947, total=   0.3s
[CV] kmeans__n_clusters=17 ...........................................




[CV] ............... kmeans__n_clusters=17, score=0.953, total=   0.4s
[CV] kmeans__n_clusters=18 ...........................................




[CV] ............... kmeans__n_clusters=18, score=0.958, total=   0.4s
[CV] kmeans__n_clusters=18 ...........................................




[CV] ............... kmeans__n_clusters=18, score=0.938, total=   0.3s
[CV] kmeans__n_clusters=18 ...........................................




[CV] ............... kmeans__n_clusters=18, score=0.957, total=   0.4s
[CV] kmeans__n_clusters=19 ...........................................




[CV] ............... kmeans__n_clusters=19, score=0.967, total=   0.4s
[CV] kmeans__n_clusters=19 ...........................................




[CV] ............... kmeans__n_clusters=19, score=0.947, total=   0.4s
[CV] kmeans__n_clusters=19 ...........................................




[CV] ............... kmeans__n_clusters=19, score=0.944, total=   0.4s
[CV] kmeans__n_clusters=20 ...........................................




[CV] ............... kmeans__n_clusters=20, score=0.969, total=   0.3s
[CV] kmeans__n_clusters=20 ...........................................




[CV] ............... kmeans__n_clusters=20, score=0.940, total=   0.3s
[CV] kmeans__n_clusters=20 ...........................................




[CV] ............... kmeans__n_clusters=20, score=0.953, total=   0.4s
[CV] kmeans__n_clusters=21 ...........................................




[CV] ............... kmeans__n_clusters=21, score=0.967, total=   0.4s
[CV] kmeans__n_clusters=21 ...........................................




[CV] ............... kmeans__n_clusters=21, score=0.953, total=   0.4s
[CV] kmeans__n_clusters=21 ...........................................




[CV] ............... kmeans__n_clusters=21, score=0.953, total=   0.4s
[CV] kmeans__n_clusters=22 ...........................................




[CV] ............... kmeans__n_clusters=22, score=0.960, total=   0.4s
[CV] kmeans__n_clusters=22 ...........................................




[CV] ............... kmeans__n_clusters=22, score=0.953, total=   0.4s
[CV] kmeans__n_clusters=22 ...........................................




[CV] ............... kmeans__n_clusters=22, score=0.953, total=   0.4s
[CV] kmeans__n_clusters=23 ...........................................




[CV] ............... kmeans__n_clusters=23, score=0.967, total=   0.4s
[CV] kmeans__n_clusters=23 ...........................................




[CV] ............... kmeans__n_clusters=23, score=0.949, total=   0.4s
[CV] kmeans__n_clusters=23 ...........................................




[CV] ............... kmeans__n_clusters=23, score=0.964, total=   0.4s
[CV] kmeans__n_clusters=24 ...........................................




[CV] ............... kmeans__n_clusters=24, score=0.969, total=   0.4s
[CV] kmeans__n_clusters=24 ...........................................




[CV] ............... kmeans__n_clusters=24, score=0.944, total=   0.4s
[CV] kmeans__n_clusters=24 ...........................................




[CV] ............... kmeans__n_clusters=24, score=0.960, total=   0.4s
[CV] kmeans__n_clusters=25 ...........................................




[CV] ............... kmeans__n_clusters=25, score=0.967, total=   0.5s
[CV] kmeans__n_clusters=25 ...........................................




[CV] ............... kmeans__n_clusters=25, score=0.949, total=   0.4s
[CV] kmeans__n_clusters=25 ...........................................




[CV] ............... kmeans__n_clusters=25, score=0.953, total=   0.4s
[CV] kmeans__n_clusters=26 ...........................................




[CV] ............... kmeans__n_clusters=26, score=0.971, total=   0.4s
[CV] kmeans__n_clusters=26 ...........................................




[CV] ............... kmeans__n_clusters=26, score=0.958, total=   0.4s
[CV] kmeans__n_clusters=26 ...........................................




[CV] ............... kmeans__n_clusters=26, score=0.962, total=   0.5s
[CV] kmeans__n_clusters=27 ...........................................




[CV] ............... kmeans__n_clusters=27, score=0.974, total=   0.4s
[CV] kmeans__n_clusters=27 ...........................................




[CV] ............... kmeans__n_clusters=27, score=0.951, total=   0.4s
[CV] kmeans__n_clusters=27 ...........................................




[CV] ............... kmeans__n_clusters=27, score=0.964, total=   0.5s
[CV] kmeans__n_clusters=28 ...........................................




[CV] ............... kmeans__n_clusters=28, score=0.969, total=   0.5s
[CV] kmeans__n_clusters=28 ...........................................




[CV] ............... kmeans__n_clusters=28, score=0.951, total=   0.5s
[CV] kmeans__n_clusters=28 ...........................................




[CV] ............... kmeans__n_clusters=28, score=0.964, total=   0.5s
[CV] kmeans__n_clusters=29 ...........................................




[CV] ............... kmeans__n_clusters=29, score=0.976, total=   0.5s
[CV] kmeans__n_clusters=29 ...........................................




[CV] ............... kmeans__n_clusters=29, score=0.964, total=   0.5s
[CV] kmeans__n_clusters=29 ...........................................




[CV] ............... kmeans__n_clusters=29, score=0.969, total=   0.5s
[CV] kmeans__n_clusters=30 ...........................................




[CV] ............... kmeans__n_clusters=30, score=0.978, total=   0.5s
[CV] kmeans__n_clusters=30 ...........................................




[CV] ............... kmeans__n_clusters=30, score=0.960, total=   0.5s
[CV] kmeans__n_clusters=30 ...........................................




[CV] ............... kmeans__n_clusters=30, score=0.969, total=   0.5s
[CV] kmeans__n_clusters=31 ...........................................




[CV] ............... kmeans__n_clusters=31, score=0.976, total=   0.5s
[CV] kmeans__n_clusters=31 ...........................................




[CV] ............... kmeans__n_clusters=31, score=0.964, total=   0.6s
[CV] kmeans__n_clusters=31 ...........................................




[CV] ............... kmeans__n_clusters=31, score=0.964, total=   0.6s
[CV] kmeans__n_clusters=32 ...........................................




[CV] ............... kmeans__n_clusters=32, score=0.974, total=   0.5s
[CV] kmeans__n_clusters=32 ...........................................




[CV] ............... kmeans__n_clusters=32, score=0.949, total=   0.6s
[CV] kmeans__n_clusters=32 ...........................................




[CV] ............... kmeans__n_clusters=32, score=0.964, total=   0.6s
[CV] kmeans__n_clusters=33 ...........................................




[CV] ............... kmeans__n_clusters=33, score=0.978, total=   0.5s
[CV] kmeans__n_clusters=33 ...........................................




[CV] ............... kmeans__n_clusters=33, score=0.958, total=   0.5s
[CV] kmeans__n_clusters=33 ...........................................




[CV] ............... kmeans__n_clusters=33, score=0.964, total=   0.6s
[CV] kmeans__n_clusters=34 ...........................................




[CV] ............... kmeans__n_clusters=34, score=0.978, total=   0.6s
[CV] kmeans__n_clusters=34 ...........................................




[CV] ............... kmeans__n_clusters=34, score=0.962, total=   0.6s
[CV] kmeans__n_clusters=34 ...........................................




[CV] ............... kmeans__n_clusters=34, score=0.962, total=   0.6s
[CV] kmeans__n_clusters=35 ...........................................




[CV] ............... kmeans__n_clusters=35, score=0.974, total=   0.7s
[CV] kmeans__n_clusters=35 ...........................................




[CV] ............... kmeans__n_clusters=35, score=0.967, total=   0.6s
[CV] kmeans__n_clusters=35 ...........................................




[CV] ............... kmeans__n_clusters=35, score=0.966, total=   0.7s
[CV] kmeans__n_clusters=36 ...........................................




[CV] ............... kmeans__n_clusters=36, score=0.978, total=   0.6s
[CV] kmeans__n_clusters=36 ...........................................




[CV] ............... kmeans__n_clusters=36, score=0.958, total=   0.6s
[CV] kmeans__n_clusters=36 ...........................................




[CV] ............... kmeans__n_clusters=36, score=0.966, total=   0.6s
[CV] kmeans__n_clusters=37 ...........................................




[CV] ............... kmeans__n_clusters=37, score=0.978, total=   0.6s
[CV] kmeans__n_clusters=37 ...........................................




[CV] ............... kmeans__n_clusters=37, score=0.962, total=   0.6s
[CV] kmeans__n_clusters=37 ...........................................




[CV] ............... kmeans__n_clusters=37, score=0.971, total=   0.7s
[CV] kmeans__n_clusters=38 ...........................................




[CV] ............... kmeans__n_clusters=38, score=0.976, total=   0.6s
[CV] kmeans__n_clusters=38 ...........................................




[CV] ............... kmeans__n_clusters=38, score=0.962, total=   0.6s
[CV] kmeans__n_clusters=38 ...........................................




[CV] ............... kmeans__n_clusters=38, score=0.962, total=   0.6s
[CV] kmeans__n_clusters=39 ...........................................




[CV] ............... kmeans__n_clusters=39, score=0.978, total=   0.6s
[CV] kmeans__n_clusters=39 ...........................................




[CV] ............... kmeans__n_clusters=39, score=0.955, total=   0.7s
[CV] kmeans__n_clusters=39 ...........................................




[CV] ............... kmeans__n_clusters=39, score=0.969, total=   0.6s
[CV] kmeans__n_clusters=40 ...........................................




[CV] ............... kmeans__n_clusters=40, score=0.976, total=   0.7s
[CV] kmeans__n_clusters=40 ...........................................




[CV] ............... kmeans__n_clusters=40, score=0.960, total=   0.6s
[CV] kmeans__n_clusters=40 ...........................................




[CV] ............... kmeans__n_clusters=40, score=0.969, total=   0.6s
[CV] kmeans__n_clusters=41 ...........................................




[CV] ............... kmeans__n_clusters=41, score=0.978, total=   0.6s
[CV] kmeans__n_clusters=41 ...........................................




[CV] ............... kmeans__n_clusters=41, score=0.960, total=   0.7s
[CV] kmeans__n_clusters=41 ...........................................




[CV] ............... kmeans__n_clusters=41, score=0.973, total=   0.7s
[CV] kmeans__n_clusters=42 ...........................................




[CV] ............... kmeans__n_clusters=42, score=0.976, total=   0.7s
[CV] kmeans__n_clusters=42 ...........................................




[CV] ............... kmeans__n_clusters=42, score=0.964, total=   0.6s
[CV] kmeans__n_clusters=42 ...........................................




[CV] ............... kmeans__n_clusters=42, score=0.966, total=   0.7s
[CV] kmeans__n_clusters=43 ...........................................




[CV] ............... kmeans__n_clusters=43, score=0.976, total=   0.6s
[CV] kmeans__n_clusters=43 ...........................................




[CV] ............... kmeans__n_clusters=43, score=0.962, total=   0.6s
[CV] kmeans__n_clusters=43 ...........................................




[CV] ............... kmeans__n_clusters=43, score=0.962, total=   0.6s
[CV] kmeans__n_clusters=44 ...........................................




[CV] ............... kmeans__n_clusters=44, score=0.976, total=   0.7s
[CV] kmeans__n_clusters=44 ...........................................




[CV] ............... kmeans__n_clusters=44, score=0.964, total=   0.7s
[CV] kmeans__n_clusters=44 ...........................................




[CV] ............... kmeans__n_clusters=44, score=0.969, total=   0.8s
[CV] kmeans__n_clusters=45 ...........................................




[CV] ............... kmeans__n_clusters=45, score=0.980, total=   0.7s
[CV] kmeans__n_clusters=45 ...........................................




[CV] ............... kmeans__n_clusters=45, score=0.955, total=   0.6s
[CV] kmeans__n_clusters=45 ...........................................




[CV] ............... kmeans__n_clusters=45, score=0.969, total=   0.7s
[CV] kmeans__n_clusters=46 ...........................................




[CV] ............... kmeans__n_clusters=46, score=0.971, total=   0.7s
[CV] kmeans__n_clusters=46 ...........................................




[CV] ............... kmeans__n_clusters=46, score=0.958, total=   0.7s
[CV] kmeans__n_clusters=46 ...........................................




[CV] ............... kmeans__n_clusters=46, score=0.962, total=   0.7s
[CV] kmeans__n_clusters=47 ...........................................




[CV] ............... kmeans__n_clusters=47, score=0.978, total=   0.7s
[CV] kmeans__n_clusters=47 ...........................................




[CV] ............... kmeans__n_clusters=47, score=0.964, total=   0.7s
[CV] kmeans__n_clusters=47 ...........................................




[CV] ............... kmeans__n_clusters=47, score=0.969, total=   0.7s
[CV] kmeans__n_clusters=48 ...........................................




[CV] ............... kmeans__n_clusters=48, score=0.978, total=   0.7s
[CV] kmeans__n_clusters=48 ...........................................




[CV] ............... kmeans__n_clusters=48, score=0.964, total=   0.7s
[CV] kmeans__n_clusters=48 ...........................................




[CV] ............... kmeans__n_clusters=48, score=0.971, total=   0.8s
[CV] kmeans__n_clusters=49 ...........................................




[CV] ............... kmeans__n_clusters=49, score=0.980, total=   0.7s
[CV] kmeans__n_clusters=49 ...........................................




[CV] ............... kmeans__n_clusters=49, score=0.958, total=   0.7s
[CV] kmeans__n_clusters=49 ...........................................




[CV] ............... kmeans__n_clusters=49, score=0.962, total=   0.8s
[CV] kmeans__n_clusters=50 ...........................................




[CV] ............... kmeans__n_clusters=50, score=0.976, total=   0.7s
[CV] kmeans__n_clusters=50 ...........................................




[CV] ............... kmeans__n_clusters=50, score=0.960, total=   0.7s
[CV] kmeans__n_clusters=50 ...........................................




[CV] ............... kmeans__n_clusters=50, score=0.966, total=   0.9s
[CV] kmeans__n_clusters=51 ...........................................




[CV] ............... kmeans__n_clusters=51, score=0.978, total=   1.1s
[CV] kmeans__n_clusters=51 ...........................................




[CV] ............... kmeans__n_clusters=51, score=0.958, total=   0.8s
[CV] kmeans__n_clusters=51 ...........................................




[CV] ............... kmeans__n_clusters=51, score=0.971, total=   0.8s
[CV] kmeans__n_clusters=52 ...........................................




[CV] ............... kmeans__n_clusters=52, score=0.982, total=   0.8s
[CV] kmeans__n_clusters=52 ...........................................




[CV] ............... kmeans__n_clusters=52, score=0.962, total=   0.8s
[CV] kmeans__n_clusters=52 ...........................................




[CV] ............... kmeans__n_clusters=52, score=0.969, total=   0.7s
[CV] kmeans__n_clusters=53 ...........................................




[CV] ............... kmeans__n_clusters=53, score=0.978, total=   0.9s
[CV] kmeans__n_clusters=53 ...........................................




[CV] ............... kmeans__n_clusters=53, score=0.958, total=   0.7s
[CV] kmeans__n_clusters=53 ...........................................




[CV] ............... kmeans__n_clusters=53, score=0.973, total=   0.8s
[CV] kmeans__n_clusters=54 ...........................................




[CV] ............... kmeans__n_clusters=54, score=0.978, total=   0.8s
[CV] kmeans__n_clusters=54 ...........................................




[CV] ............... kmeans__n_clusters=54, score=0.958, total=   0.7s
[CV] kmeans__n_clusters=54 ...........................................




[CV] ............... kmeans__n_clusters=54, score=0.975, total=   0.7s
[CV] kmeans__n_clusters=55 ...........................................




[CV] ............... kmeans__n_clusters=55, score=0.980, total=   0.8s
[CV] kmeans__n_clusters=55 ...........................................




[CV] ............... kmeans__n_clusters=55, score=0.962, total=   0.7s
[CV] kmeans__n_clusters=55 ...........................................




[CV] ............... kmeans__n_clusters=55, score=0.971, total=   0.8s
[CV] kmeans__n_clusters=56 ...........................................




[CV] ............... kmeans__n_clusters=56, score=0.980, total=   0.8s
[CV] kmeans__n_clusters=56 ...........................................




[CV] ............... kmeans__n_clusters=56, score=0.958, total=   0.8s
[CV] kmeans__n_clusters=56 ...........................................




[CV] ............... kmeans__n_clusters=56, score=0.969, total=   0.8s
[CV] kmeans__n_clusters=57 ...........................................




[CV] ............... kmeans__n_clusters=57, score=0.982, total=   0.8s
[CV] kmeans__n_clusters=57 ...........................................




[CV] ............... kmeans__n_clusters=57, score=0.962, total=   0.8s
[CV] kmeans__n_clusters=57 ...........................................




[CV] ............... kmeans__n_clusters=57, score=0.975, total=   0.8s
[CV] kmeans__n_clusters=58 ...........................................




[CV] ............... kmeans__n_clusters=58, score=0.982, total=   0.8s
[CV] kmeans__n_clusters=58 ...........................................




[CV] ............... kmeans__n_clusters=58, score=0.960, total=   0.8s
[CV] kmeans__n_clusters=58 ...........................................




[CV] ............... kmeans__n_clusters=58, score=0.971, total=   0.8s
[CV] kmeans__n_clusters=59 ...........................................




[CV] ............... kmeans__n_clusters=59, score=0.974, total=   0.8s
[CV] kmeans__n_clusters=59 ...........................................




[CV] ............... kmeans__n_clusters=59, score=0.958, total=   0.8s
[CV] kmeans__n_clusters=59 ...........................................




[CV] ............... kmeans__n_clusters=59, score=0.978, total=   0.8s
[CV] kmeans__n_clusters=60 ...........................................




[CV] ............... kmeans__n_clusters=60, score=0.982, total=   0.8s
[CV] kmeans__n_clusters=60 ...........................................




[CV] ............... kmeans__n_clusters=60, score=0.962, total=   0.8s
[CV] kmeans__n_clusters=60 ...........................................




[CV] ............... kmeans__n_clusters=60, score=0.971, total=   0.8s
[CV] kmeans__n_clusters=61 ...........................................




[CV] ............... kmeans__n_clusters=61, score=0.976, total=   0.9s
[CV] kmeans__n_clusters=61 ...........................................




[CV] ............... kmeans__n_clusters=61, score=0.962, total=   0.8s
[CV] kmeans__n_clusters=61 ...........................................




[CV] ............... kmeans__n_clusters=61, score=0.969, total=   0.9s
[CV] kmeans__n_clusters=62 ...........................................




[CV] ............... kmeans__n_clusters=62, score=0.978, total=   0.8s
[CV] kmeans__n_clusters=62 ...........................................




[CV] ............... kmeans__n_clusters=62, score=0.962, total=   1.0s
[CV] kmeans__n_clusters=62 ...........................................




[CV] ............... kmeans__n_clusters=62, score=0.975, total=   1.1s
[CV] kmeans__n_clusters=63 ...........................................




[CV] ............... kmeans__n_clusters=63, score=0.982, total=   0.9s
[CV] kmeans__n_clusters=63 ...........................................




[CV] ............... kmeans__n_clusters=63, score=0.964, total=   0.9s
[CV] kmeans__n_clusters=63 ...........................................




[CV] ............... kmeans__n_clusters=63, score=0.964, total=   0.9s
[CV] kmeans__n_clusters=64 ...........................................




[CV] ............... kmeans__n_clusters=64, score=0.978, total=   0.8s
[CV] kmeans__n_clusters=64 ...........................................




[CV] ............... kmeans__n_clusters=64, score=0.962, total=   0.9s
[CV] kmeans__n_clusters=64 ...........................................




[CV] ............... kmeans__n_clusters=64, score=0.971, total=   0.9s
[CV] kmeans__n_clusters=65 ...........................................




[CV] ............... kmeans__n_clusters=65, score=0.982, total=   0.9s
[CV] kmeans__n_clusters=65 ...........................................




[CV] ............... kmeans__n_clusters=65, score=0.967, total=   0.9s
[CV] kmeans__n_clusters=65 ...........................................




[CV] ............... kmeans__n_clusters=65, score=0.980, total=   0.9s
[CV] kmeans__n_clusters=66 ...........................................




[CV] ............... kmeans__n_clusters=66, score=0.978, total=   0.8s
[CV] kmeans__n_clusters=66 ...........................................




[CV] ............... kmeans__n_clusters=66, score=0.962, total=   0.9s
[CV] kmeans__n_clusters=66 ...........................................




[CV] ............... kmeans__n_clusters=66, score=0.973, total=   1.0s
[CV] kmeans__n_clusters=67 ...........................................




[CV] ............... kmeans__n_clusters=67, score=0.978, total=   0.9s
[CV] kmeans__n_clusters=67 ...........................................




[CV] ............... kmeans__n_clusters=67, score=0.964, total=   0.9s
[CV] kmeans__n_clusters=67 ...........................................




[CV] ............... kmeans__n_clusters=67, score=0.966, total=   1.0s
[CV] kmeans__n_clusters=68 ...........................................




[CV] ............... kmeans__n_clusters=68, score=0.978, total=   0.9s
[CV] kmeans__n_clusters=68 ...........................................




[CV] ............... kmeans__n_clusters=68, score=0.962, total=   0.9s
[CV] kmeans__n_clusters=68 ...........................................




[CV] ............... kmeans__n_clusters=68, score=0.969, total=   1.0s
[CV] kmeans__n_clusters=69 ...........................................




[CV] ............... kmeans__n_clusters=69, score=0.980, total=   0.9s
[CV] kmeans__n_clusters=69 ...........................................




[CV] ............... kmeans__n_clusters=69, score=0.964, total=   0.9s
[CV] kmeans__n_clusters=69 ...........................................




[CV] ............... kmeans__n_clusters=69, score=0.971, total=   1.2s
[CV] kmeans__n_clusters=70 ...........................................




[CV] ............... kmeans__n_clusters=70, score=0.982, total=   1.0s
[CV] kmeans__n_clusters=70 ...........................................




[CV] ............... kmeans__n_clusters=70, score=0.960, total=   1.0s
[CV] kmeans__n_clusters=70 ...........................................




[CV] ............... kmeans__n_clusters=70, score=0.964, total=   1.0s
[CV] kmeans__n_clusters=71 ...........................................




[CV] ............... kmeans__n_clusters=71, score=0.976, total=   1.0s
[CV] kmeans__n_clusters=71 ...........................................




[CV] ............... kmeans__n_clusters=71, score=0.964, total=   1.4s
[CV] kmeans__n_clusters=71 ...........................................




[CV] ............... kmeans__n_clusters=71, score=0.969, total=   1.4s
[CV] kmeans__n_clusters=72 ...........................................




[CV] ............... kmeans__n_clusters=72, score=0.978, total=   1.4s
[CV] kmeans__n_clusters=72 ...........................................




[CV] ............... kmeans__n_clusters=72, score=0.964, total=   1.3s
[CV] kmeans__n_clusters=72 ...........................................




[CV] ............... kmeans__n_clusters=72, score=0.973, total=   1.3s
[CV] kmeans__n_clusters=73 ...........................................




[CV] ............... kmeans__n_clusters=73, score=0.980, total=   1.2s
[CV] kmeans__n_clusters=73 ...........................................




[CV] ............... kmeans__n_clusters=73, score=0.962, total=   1.3s
[CV] kmeans__n_clusters=73 ...........................................




[CV] ............... kmeans__n_clusters=73, score=0.978, total=   1.4s
[CV] kmeans__n_clusters=74 ...........................................




[CV] ............... kmeans__n_clusters=74, score=0.978, total=   0.9s
[CV] kmeans__n_clusters=74 ...........................................




[CV] ............... kmeans__n_clusters=74, score=0.964, total=   0.9s
[CV] kmeans__n_clusters=74 ...........................................




[CV] ............... kmeans__n_clusters=74, score=0.975, total=   1.0s
[CV] kmeans__n_clusters=75 ...........................................




[CV] ............... kmeans__n_clusters=75, score=0.980, total=   0.9s
[CV] kmeans__n_clusters=75 ...........................................




[CV] ............... kmeans__n_clusters=75, score=0.962, total=   0.9s
[CV] kmeans__n_clusters=75 ...........................................




[CV] ............... kmeans__n_clusters=75, score=0.973, total=   1.0s
[CV] kmeans__n_clusters=76 ...........................................




[CV] ............... kmeans__n_clusters=76, score=0.978, total=   1.0s
[CV] kmeans__n_clusters=76 ...........................................




[CV] ............... kmeans__n_clusters=76, score=0.960, total=   1.0s
[CV] kmeans__n_clusters=76 ...........................................




[CV] ............... kmeans__n_clusters=76, score=0.971, total=   1.1s
[CV] kmeans__n_clusters=77 ...........................................




[CV] ............... kmeans__n_clusters=77, score=0.980, total=   1.0s
[CV] kmeans__n_clusters=77 ...........................................




[CV] ............... kmeans__n_clusters=77, score=0.964, total=   0.9s
[CV] kmeans__n_clusters=77 ...........................................




[CV] ............... kmeans__n_clusters=77, score=0.971, total=   1.0s
[CV] kmeans__n_clusters=78 ...........................................




[CV] ............... kmeans__n_clusters=78, score=0.976, total=   1.0s
[CV] kmeans__n_clusters=78 ...........................................




[CV] ............... kmeans__n_clusters=78, score=0.964, total=   1.0s
[CV] kmeans__n_clusters=78 ...........................................




[CV] ............... kmeans__n_clusters=78, score=0.973, total=   1.1s
[CV] kmeans__n_clusters=79 ...........................................




[CV] ............... kmeans__n_clusters=79, score=0.976, total=   1.0s
[CV] kmeans__n_clusters=79 ...........................................




[CV] ............... kmeans__n_clusters=79, score=0.962, total=   1.2s
[CV] kmeans__n_clusters=79 ...........................................




[CV] ............... kmeans__n_clusters=79, score=0.973, total=   1.1s
[CV] kmeans__n_clusters=80 ...........................................




[CV] ............... kmeans__n_clusters=80, score=0.978, total=   1.2s
[CV] kmeans__n_clusters=80 ...........................................




[CV] ............... kmeans__n_clusters=80, score=0.960, total=   1.1s
[CV] kmeans__n_clusters=80 ...........................................




[CV] ............... kmeans__n_clusters=80, score=0.969, total=   1.0s
[CV] kmeans__n_clusters=81 ...........................................




[CV] ............... kmeans__n_clusters=81, score=0.980, total=   1.0s
[CV] kmeans__n_clusters=81 ...........................................




[CV] ............... kmeans__n_clusters=81, score=0.958, total=   1.0s
[CV] kmeans__n_clusters=81 ...........................................




[CV] ............... kmeans__n_clusters=81, score=0.971, total=   2.0s
[CV] kmeans__n_clusters=82 ...........................................




[CV] ............... kmeans__n_clusters=82, score=0.978, total=   1.1s
[CV] kmeans__n_clusters=82 ...........................................




[CV] ............... kmeans__n_clusters=82, score=0.960, total=   0.9s
[CV] kmeans__n_clusters=82 ...........................................




[CV] ............... kmeans__n_clusters=82, score=0.969, total=   1.0s
[CV] kmeans__n_clusters=83 ...........................................




[CV] ............... kmeans__n_clusters=83, score=0.976, total=   1.0s
[CV] kmeans__n_clusters=83 ...........................................




[CV] ............... kmeans__n_clusters=83, score=0.960, total=   1.0s
[CV] kmeans__n_clusters=83 ...........................................




[CV] ............... kmeans__n_clusters=83, score=0.973, total=   1.1s
[CV] kmeans__n_clusters=84 ...........................................




[CV] ............... kmeans__n_clusters=84, score=0.980, total=   1.0s
[CV] kmeans__n_clusters=84 ...........................................




[CV] ............... kmeans__n_clusters=84, score=0.962, total=   1.0s
[CV] kmeans__n_clusters=84 ...........................................




[CV] ............... kmeans__n_clusters=84, score=0.975, total=   1.0s
[CV] kmeans__n_clusters=85 ...........................................




[CV] ............... kmeans__n_clusters=85, score=0.980, total=   1.0s
[CV] kmeans__n_clusters=85 ...........................................




[CV] ............... kmeans__n_clusters=85, score=0.964, total=   1.0s
[CV] kmeans__n_clusters=85 ...........................................




[CV] ............... kmeans__n_clusters=85, score=0.978, total=   1.0s
[CV] kmeans__n_clusters=86 ...........................................




[CV] ............... kmeans__n_clusters=86, score=0.980, total=   0.9s
[CV] kmeans__n_clusters=86 ...........................................




[CV] ............... kmeans__n_clusters=86, score=0.960, total=   1.0s
[CV] kmeans__n_clusters=86 ...........................................




[CV] ............... kmeans__n_clusters=86, score=0.969, total=   1.0s
[CV] kmeans__n_clusters=87 ...........................................




[CV] ............... kmeans__n_clusters=87, score=0.976, total=   1.1s
[CV] kmeans__n_clusters=87 ...........................................




[CV] ............... kmeans__n_clusters=87, score=0.960, total=   1.0s
[CV] kmeans__n_clusters=87 ...........................................




[CV] ............... kmeans__n_clusters=87, score=0.962, total=   1.0s
[CV] kmeans__n_clusters=88 ...........................................




[CV] ............... kmeans__n_clusters=88, score=0.982, total=   1.0s
[CV] kmeans__n_clusters=88 ...........................................




[CV] ............... kmeans__n_clusters=88, score=0.962, total=   1.0s
[CV] kmeans__n_clusters=88 ...........................................




[CV] ............... kmeans__n_clusters=88, score=0.971, total=   1.0s
[CV] kmeans__n_clusters=89 ...........................................




[CV] ............... kmeans__n_clusters=89, score=0.974, total=   1.0s
[CV] kmeans__n_clusters=89 ...........................................




[CV] ............... kmeans__n_clusters=89, score=0.964, total=   1.0s
[CV] kmeans__n_clusters=89 ...........................................




[CV] ............... kmeans__n_clusters=89, score=0.973, total=   1.1s
[CV] kmeans__n_clusters=90 ...........................................




[CV] ............... kmeans__n_clusters=90, score=0.982, total=   1.0s
[CV] kmeans__n_clusters=90 ...........................................




[CV] ............... kmeans__n_clusters=90, score=0.960, total=   1.0s
[CV] kmeans__n_clusters=90 ...........................................




[CV] ............... kmeans__n_clusters=90, score=0.971, total=   1.1s
[CV] kmeans__n_clusters=91 ...........................................




[CV] ............... kmeans__n_clusters=91, score=0.976, total=   1.1s
[CV] kmeans__n_clusters=91 ...........................................




[CV] ............... kmeans__n_clusters=91, score=0.962, total=   1.0s
[CV] kmeans__n_clusters=91 ...........................................




[CV] ............... kmeans__n_clusters=91, score=0.962, total=   1.5s
[CV] kmeans__n_clusters=92 ...........................................




[CV] ............... kmeans__n_clusters=92, score=0.976, total=   1.1s
[CV] kmeans__n_clusters=92 ...........................................




[CV] ............... kmeans__n_clusters=92, score=0.960, total=   1.0s
[CV] kmeans__n_clusters=92 ...........................................




[CV] ............... kmeans__n_clusters=92, score=0.980, total=   1.1s
[CV] kmeans__n_clusters=93 ...........................................




[CV] ............... kmeans__n_clusters=93, score=0.978, total=   1.0s
[CV] kmeans__n_clusters=93 ...........................................




[CV] ............... kmeans__n_clusters=93, score=0.962, total=   1.0s
[CV] kmeans__n_clusters=93 ...........................................




[CV] ............... kmeans__n_clusters=93, score=0.969, total=   1.1s
[CV] kmeans__n_clusters=94 ...........................................




[CV] ............... kmeans__n_clusters=94, score=0.982, total=   1.1s
[CV] kmeans__n_clusters=94 ...........................................




[CV] ............... kmeans__n_clusters=94, score=0.962, total=   1.1s
[CV] kmeans__n_clusters=94 ...........................................




[CV] ............... kmeans__n_clusters=94, score=0.980, total=   1.2s
[CV] kmeans__n_clusters=95 ...........................................




[CV] ............... kmeans__n_clusters=95, score=0.982, total=   1.1s
[CV] kmeans__n_clusters=95 ...........................................




[CV] ............... kmeans__n_clusters=95, score=0.960, total=   1.1s
[CV] kmeans__n_clusters=95 ...........................................




[CV] ............... kmeans__n_clusters=95, score=0.980, total=   1.1s
[CV] kmeans__n_clusters=96 ...........................................




[CV] ............... kmeans__n_clusters=96, score=0.980, total=   1.1s
[CV] kmeans__n_clusters=96 ...........................................




[CV] ............... kmeans__n_clusters=96, score=0.964, total=   1.1s
[CV] kmeans__n_clusters=96 ...........................................




[CV] ............... kmeans__n_clusters=96, score=0.978, total=   1.1s
[CV] kmeans__n_clusters=97 ...........................................




[CV] ............... kmeans__n_clusters=97, score=0.976, total=   1.1s
[CV] kmeans__n_clusters=97 ...........................................




[CV] ............... kmeans__n_clusters=97, score=0.964, total=   1.1s
[CV] kmeans__n_clusters=97 ...........................................




[CV] ............... kmeans__n_clusters=97, score=0.975, total=   1.2s
[CV] kmeans__n_clusters=98 ...........................................




[CV] ............... kmeans__n_clusters=98, score=0.976, total=   1.1s
[CV] kmeans__n_clusters=98 ...........................................




[CV] ............... kmeans__n_clusters=98, score=0.962, total=   1.0s
[CV] kmeans__n_clusters=98 ...........................................




[CV] ............... kmeans__n_clusters=98, score=0.975, total=   1.1s
[CV] kmeans__n_clusters=99 ...........................................




[CV] ............... kmeans__n_clusters=99, score=0.982, total=   1.1s
[CV] kmeans__n_clusters=99 ...........................................




[CV] ............... kmeans__n_clusters=99, score=0.962, total=   1.1s
[CV] kmeans__n_clusters=99 ...........................................




[CV] ............... kmeans__n_clusters=99, score=0.975, total=   1.1s


[Parallel(n_jobs=1)]: Done 294 out of 294 | elapsed:  3.5min finished


GridSearchCV(cv=3, error_score='raise-deprecating',
             estimator=Pipeline(memory=None,
                                steps=[('kmeans',
                                        KMeans(algorithm='auto', copy_x=True,
                                               init='k-means++', max_iter=300,
                                               n_clusters=50, n_init=10,
                                               n_jobs=None,
                                               precompute_distances='auto',
                                               random_state=None, tol=0.0001,
                                               verbose=0)),
                                       ('log_reg',
                                        LogisticRegression(C=1.0,
                                                           class_weight=None,
                                                           dual=False,
                                                           fit_intercept=True,
    

In [33]:
grid_clf.best_params_

{'kmeans__n_clusters': 65}

In [34]:
grid_clf.score(X_test, y_test)

0.9866666666666667

In [None]:
# The accuracy has bumped up to 98.6%