In [40]:
import matplotlib.pyplot as plt

from sklearn import metrics
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import scale

from sklearn.neighbors import KNeighborsClassifier as kNN

In [41]:
data = load_breast_cancer()
shuffle()
X = data.data
X = scale(X)
y = data.target
X, y = shuffle(X, y, random_state=512)

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=512)

In [43]:
m_train = X_train.shape[0]
print('m_train', m_train)

m_train 426


In [44]:
model = kNN(n_neighbors=10)
model.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=10, p=2,
           weights='uniform')

In [45]:
y_pred = model.predict(X_test)
print('Accuracy:', metrics.accuracy_score(y_test, y_pred))
print('Precision:', metrics.precision_score(y_test, y_pred))
print('Recall:', metrics.recall_score(y_test, y_pred))
print('F1-score:', metrics.f1_score(y_test, y_pred))

Accuracy: 0.958041958041958
Precision: 0.9560439560439561
Recall: 0.9775280898876404
F1-score: 0.9666666666666667


In [46]:
def experiment(k_max, p, weight):
    acc = []
    for nb in range(k_max):
        model = kNN(n_neighbors=nb+1, weights=weight, p=p)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        acc.append(metrics.accuracy_score(y_test, y_pred))
    plt.plot(acc)
    print('best k:', acc.index(max(acc))+1)
    print('best acc:', max(acc))

In [None]:
weights = ['uniform', 'distance']
ps = [i/10 for i in range(10, 31, 3)]
for weight in weights:
    for p in ps:
        print('weights:', weight)
        print('p value:', p)
        experiment(m_train//10, p, weight)
        print()

weights: uniform
p value: 1.0
best k: 3
best acc: 0.965034965034965

weights: uniform
p value: 1.3


In [None]:
GridSearchCV()