In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.metrics import classification_report, zero_one_loss, confusion_matrix, accuracy_score

In [2]:
from sklearn.datasets import fetch_openml

X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
X = X / 255.
X.shape

(70000, 784)

In [3]:
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

In [4]:
# Using 3 nearest neighbors with default parameters of KNeighborsClassifier
K31 = KNeighborsClassifier(n_neighbors=3, weights='uniform', algorithm='auto', 
                          leaf_size=30, metric='minkowski')

K31.fit(X_train,y_train)

y_pred= K31.predict(X_test)

r = metrics.accuracy_score(y_test,y_pred)
r

0.9705

In [6]:
# Using 3 nearest neighbors, distance weights, kd_tree algorithm ,manhattan distance and setting the number of leaf nodes equal to 10
K32 = KNeighborsClassifier(n_neighbors=3, weights='distance', algorithm='kd_tree', 
                                             leaf_size=10, metric='manhattan')

K32.fit(X_train,y_train)

y_pred32 = K32.predict(X_test)

print(classification_report(y_test, y_pred32))
print(accuracy_score(y_test,y_pred32))
error32 = zero_one_loss(y_test, y_pred32)
print(error32)

              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.94      1.00      0.97      1135
           2       0.98      0.95      0.97      1032
           3       0.96      0.96      0.96      1010
           4       0.97      0.95      0.96       982
           5       0.96      0.96      0.96       892
           6       0.98      0.98      0.98       958
           7       0.95      0.96      0.95      1028
           8       0.99      0.92      0.95       974
           9       0.95      0.95      0.95      1009

    accuracy                           0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000

0.964
0.03600000000000003


In [8]:
# Using 3 nearest neighbors, distance weights, ball_tree algorithm ,euclidean distance and setting the number of leaf nodes equal to 10
K33 = KNeighborsClassifier(n_neighbors=3, weights='distance', algorithm='ball_tree', 
                                             leaf_size=10, metric='euclidean')

K33.fit(X_train,y_train)

y_pred33 = K33.predict(X_test)

print(classification_report(y_test, y_pred33))
print(accuracy_score(y_test,y_pred33))
error33 = zero_one_loss(y_test, y_pred33)
print(error33)

              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.97      1.00      0.98      1135
           2       0.98      0.97      0.98      1032
           3       0.97      0.97      0.97      1010
           4       0.98      0.97      0.97       982
           5       0.96      0.96      0.96       892
           6       0.98      0.99      0.98       958
           7       0.96      0.97      0.96      1028
           8       0.99      0.95      0.97       974
           9       0.96      0.96      0.96      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000

0.9717
0.028299999999999992


In [4]:
# Using 5 nearest neighbors, uniform weights,algorithm is set to auto ,minkowski distance and setting the number of leaf nodes equal to 30
K51 = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', 
                          leaf_size=30, metric='minkowski')

K51.fit(X_train,y_train)

y_pred51 = K51.predict(X_test)

print(classification_report(y_test, y_pred51))
print(accuracy_score(y_test,y_pred51))
error51 = zero_one_loss(y_test, y_pred51)
print(error51)

              precision    recall  f1-score   support

           0       0.96      0.99      0.98       980
           1       0.95      1.00      0.98      1135
           2       0.98      0.96      0.97      1032
           3       0.96      0.97      0.97      1010
           4       0.98      0.96      0.97       982
           5       0.97      0.97      0.97       892
           6       0.98      0.99      0.98       958
           7       0.96      0.96      0.96      1028
           8       0.99      0.94      0.96       974
           9       0.96      0.95      0.95      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000

0.9688
0.031200000000000006


In [12]:
# Using 5 nearest neighbors, distance weights,kd tree algorithm, manhattan distance and setting the number of leaf nodes equal to 10
K52 = KNeighborsClassifier(n_neighbors=5, weights='distance', algorithm='kd_tree', 
                                             leaf_size=10, metric='manhattan')
K52.fit(X_train,y_train)

y_pred52 = K52.predict(X_test)

print(classification_report(y_test, y_pred52))
print(accuracy_score(y_test,y_pred52))
error52 = zero_one_loss(y_test, y_pred52)
print(error52)

              precision    recall  f1-score   support

           0       0.96      0.99      0.98       980
           1       0.94      1.00      0.97      1135
           2       0.98      0.94      0.96      1032
           3       0.97      0.96      0.96      1010
           4       0.97      0.95      0.96       982
           5       0.96      0.97      0.96       892
           6       0.98      0.98      0.98       958
           7       0.95      0.96      0.95      1028
           8       0.99      0.92      0.96       974
           9       0.95      0.95      0.95      1009

    accuracy                           0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000

0.9629
0.03710000000000002


In [13]:
# Using 5 nearest neighbors, distance weights,ball tree algorithm, euclidean distance and setting the number of leaf nodes equal to 10
K53 = KNeighborsClassifier(n_neighbors=5, weights='distance', algorithm='ball_tree', 
                                             leaf_size=10, metric='euclidean')

K53.fit(X_train,y_train)

y_pred53 = K53.predict(X_test)

print(classification_report(y_test, y_pred53))
print(accuracy_score(y_test,y_pred53))
error53 = zero_one_loss(y_test, y_pred53)
print(error53)

              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.96      1.00      0.98      1135
           2       0.98      0.96      0.97      1032
           3       0.97      0.96      0.97      1010
           4       0.98      0.96      0.97       982
           5       0.97      0.97      0.97       892
           6       0.98      0.99      0.98       958
           7       0.96      0.96      0.96      1028
           8       0.98      0.94      0.96       974
           9       0.95      0.96      0.95      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000

0.9691
0.03090000000000004


In [14]:
# Using 7 nearest neighbors, uniform weights,algorithm is set to auto,distance is manhattan and setting the number of leaf nodes equal to 30
K71 = KNeighborsClassifier(n_neighbors=7, weights='uniform', algorithm='auto', 
                          leaf_size=30, metric='manhattan')


K71.fit(X_train,y_train)

y_pred71 = K71.predict(X_test)

print(classification_report(y_test, y_pred71))
print(accuracy_score(y_test,y_pred71))
error71 = zero_one_loss(y_test, y_pred71)
print(error71)

              precision    recall  f1-score   support

           0       0.96      0.99      0.98       980
           1       0.93      1.00      0.96      1135
           2       0.98      0.94      0.96      1032
           3       0.96      0.96      0.96      1010
           4       0.97      0.95      0.96       982
           5       0.95      0.97      0.96       892
           6       0.98      0.98      0.98       958
           7       0.95      0.96      0.95      1028
           8       0.99      0.91      0.95       974
           9       0.95      0.95      0.95      1009

    accuracy                           0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000

0.9615
0.03849999999999998


In [16]:
# Using 7 nearest neighbors, distance weights,kd_tree algorithm,minkowski distance and setting the number of leaf nodes equal to 10
K72 = KNeighborsClassifier(n_neighbors=7, weights='distance', algorithm='kd_tree', 
                                             leaf_size=10, metric='minkowski')

K72.fit(X_train,y_train)

y_pred72 = K72.predict(X_test)

print(classification_report(y_test, y_pred72))
print(accuracy_score(y_test,y_pred72))
error72 = zero_one_loss(y_test, y_pred72)
print(error72)

              precision    recall  f1-score   support

           0       0.96      0.99      0.98       980
           1       0.93      1.00      0.96      1135
           2       0.99      0.94      0.96      1032
           3       0.97      0.96      0.97      1010
           4       0.98      0.95      0.96       982
           5       0.96      0.96      0.96       892
           6       0.97      0.98      0.98       958
           7       0.94      0.96      0.95      1028
           8       0.99      0.92      0.95       974
           9       0.95      0.95      0.95      1009

    accuracy                           0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000

0.9623
0.037699999999999956


In [17]:
# Using 7 nearest neighbors, distance weights,ball_tree algorithm, euclidean distance and setting the number of leaf nodes equal to 10
K73 = KNeighborsClassifier(n_neighbors=7, weights='distance', algorithm='ball_tree', 
                                             leaf_size=10, metric='euclidean')
K73.fit(X_train,y_train)

y_pred73 = K73.predict(X_test)

print(classification_report(y_test, y_pred73))
print(accuracy_score(y_test,y_pred73))
error73 = zero_one_loss(y_test, y_pred73)
print(error73)

              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.96      1.00      0.98      1135
           2       0.99      0.96      0.97      1032
           3       0.97      0.96      0.97      1010
           4       0.98      0.96      0.97       982
           5       0.97      0.97      0.97       892
           6       0.98      0.99      0.98       958
           7       0.96      0.96      0.96      1028
           8       0.99      0.95      0.96       974
           9       0.95      0.96      0.96      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000

0.97
0.030000000000000027
