In [69]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.utils.random import sample_without_replacement

In [8]:
train_data = np.load("./train_base.npz")

In [9]:
X_train = train_data["data"]
y_train = train_data["targets"]

In [15]:
#Cross Validation
KNN = KNeighborsClassifier(n_neighbors=10)
cross_val_score(KNN, X_train, y_train, cv=10)

array([0.96716667, 0.9705    , 0.97266667, 0.9675    , 0.966     ,
       0.97283333, 0.96083333, 0.9695    , 0.9655    , 0.9655    ])

In [51]:
#Grid Search
parameters = {"n_neighbors":[1,3,5,7,10], "weights":("uniform","distance")}
clf = GridSearchCV( KNeighborsClassifier(), parameters, cv=3 )

In [52]:
clf.fit(X_train, y_train)

In [53]:
cv_results = pd.DataFrame(clf.cv_results_)
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_neighbors,param_weights,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.186366,0.150014,4.557515,0.115721,1,uniform,"{'n_neighbors': 1, 'weights': 'uniform'}",0.96995,0.9685,0.96615,0.9682,0.001566,4
1,0.167656,0.007845,4.646178,0.051587,1,distance,"{'n_neighbors': 1, 'weights': 'distance'}",0.96995,0.9685,0.96615,0.9682,0.001566,4
2,0.082381,0.018652,4.872846,0.408615,3,uniform,"{'n_neighbors': 3, 'weights': 'uniform'}",0.971,0.9686,0.9673,0.968967,0.001533,3
3,0.155495,0.046031,4.627106,0.086338,3,distance,"{'n_neighbors': 3, 'weights': 'distance'}",0.9722,0.9701,0.9679,0.970067,0.001756,1
4,0.075937,0.036884,4.59059,0.092269,5,uniform,"{'n_neighbors': 5, 'weights': 'uniform'}",0.9696,0.9677,0.96705,0.968117,0.001082,6
5,0.151202,0.061104,5.071986,0.368383,5,distance,"{'n_neighbors': 5, 'weights': 'distance'}",0.9712,0.96935,0.9683,0.969617,0.001199,2
6,0.257581,0.062755,5.517101,0.118866,7,uniform,"{'n_neighbors': 7, 'weights': 'uniform'}",0.9676,0.96655,0.9646,0.96625,0.001243,9
7,0.180514,0.039423,4.907107,0.190722,7,distance,"{'n_neighbors': 7, 'weights': 'distance'}",0.96885,0.9677,0.9661,0.96755,0.001128,7
8,0.069957,0.021892,4.659718,0.101651,10,uniform,"{'n_neighbors': 10, 'weights': 'uniform'}",0.96505,0.9645,0.9638,0.96445,0.000512,10
9,0.04937,0.001583,4.61098,0.034292,10,distance,"{'n_neighbors': 10, 'weights': 'distance'}",0.968,0.967,0.96575,0.966917,0.00092,8


In [42]:
clf.best_estimator_

In [57]:
cv_test_score = max(cv_results["mean_test_score"])
cv_test_score*100

97.00666666666665

# KNN on augmented data

In [91]:
train_data = np.load("./train_aug.npz")
X_train = train_data["data"]
y_train = train_data["targets"]

In [92]:
idxs = sample_without_replacement( X_train.shape[0], 60000)
X_train = X_train[idxs]
y_train = y_train[idxs]

In [95]:
#Cross Validation
KNN = KNeighborsClassifier(n_neighbors=10)
cross_val_score(KNN, X_train, y_train, cv=5)

array([0.96925   , 0.96425   , 0.96833333, 0.9645    , 0.96308333])

In [96]:
#Grid Search
parameters = {"n_neighbors":[1,3,5,7,10], "weights":("uniform","distance")}
clf = GridSearchCV( KNeighborsClassifier(), parameters, cv=3 )

In [97]:
clf.fit(X_train, y_train)

In [98]:
cv_results = pd.DataFrame(clf.cv_results_)
cv_test_score = max(cv_results["mean_test_score"])
cv_test_score*100

97.18

In [99]:
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_neighbors,param_weights,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.244054,0.122765,10.221014,2.57037,1,uniform,"{'n_neighbors': 1, 'weights': 'uniform'}",0.97195,0.9733,0.97015,0.9718,0.00129,1
1,0.424208,0.159432,13.321423,0.760568,1,distance,"{'n_neighbors': 1, 'weights': 'distance'}",0.97195,0.9733,0.97015,0.9718,0.00129,1
2,0.227543,0.110522,11.026658,0.30368,3,uniform,"{'n_neighbors': 3, 'weights': 'uniform'}",0.96945,0.96995,0.9689,0.969433,0.000429,4
3,0.169726,0.036248,10.411677,0.157113,3,distance,"{'n_neighbors': 3, 'weights': 'distance'}",0.97085,0.9716,0.97095,0.971133,0.000332,3
4,0.216224,0.105784,10.789688,0.132063,5,uniform,"{'n_neighbors': 5, 'weights': 'uniform'}",0.96775,0.96915,0.96585,0.967583,0.001352,7
5,0.264488,0.119505,10.774188,0.571404,5,distance,"{'n_neighbors': 5, 'weights': 'distance'}",0.9697,0.97075,0.96785,0.969433,0.001199,4
6,0.144962,0.020863,10.886639,0.212549,7,uniform,"{'n_neighbors': 7, 'weights': 'uniform'}",0.9662,0.96755,0.9643,0.966017,0.001333,8
7,0.386164,0.060057,12.375182,0.841636,7,distance,"{'n_neighbors': 7, 'weights': 'distance'}",0.9677,0.96975,0.9658,0.96775,0.001613,6
8,0.296119,0.128426,10.977389,0.49836,10,uniform,"{'n_neighbors': 10, 'weights': 'uniform'}",0.96365,0.96395,0.9608,0.9628,0.00142,10
9,0.365533,0.134849,25.909632,22.302752,10,distance,"{'n_neighbors': 10, 'weights': 'distance'}",0.9667,0.967,0.9634,0.9657,0.001631,9
