## Implementing KNN on the Breast Cancer Dataset and Implementing Grid Search with Cross-Validation

#### Calling all the required libraries and loading the data set

In [2]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [3]:
data = load_breast_cancer()

#### Defining the model

We will split the data into two parts: Test and Train.

In [4]:
X, y = data.data, data.target

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y)
cross_val_scores = []
neighbors = np.arange(1, 15, 2)

#### Performing Cross Validation to Find the Best Value for n

In [14]:
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier

for i in neighbors:
    knn = KNeighborsClassifier(n_neighbors=i)
    scores = cross_val_score(knn, X_train, y_train, cv=10)  # running cross validation 10 times to get a list with scores for all 10 iterations of cross validation
    cross_val_scores.append(np.mean(scores))  # selecting the average of scores for i and adding that value to cross_val_scores
    
print("Best Cross-Validation Score: {:.3f}".format(np.max(cross_val_scores)))
print("Best n-neighbors Value: {}".format(neighbors[np.argmax(cross_val_scores)]))

Best Cross-Validation Score: 0.930
Best n-neighbors Value: 9


#### Building the Model

In [16]:
best_n = neighbors[np.argmax(cross_val_scores)]
knn = KNeighborsClassifier(n_neighbors=best_n)  # building knn with the best value returned after the cross validation
knn.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=9, p=2,
           weights='uniform')

#### Evaluating the Model on the Test Set

In [17]:
print('Test-Set Score: {:.3f}'.format(knn.score(X_test, y_test)))

Test-Set Score: 0.937
