In [1]:
# Dataset source: https://archive.ics.uci.edu/ml/datasets/arrhythmia

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_auc_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt

In [3]:
dataset = pd.read_csv('arrhythmia.data', header=None)
print(dataset.shape)

(452, 280)


In [4]:
dataset = dataset.replace(to_replace='?', value= np.nan)
imp = SimpleImputer(missing_values=np.nan, strategy='median')
imp.fit(dataset)
new1 = imp.transform(dataset)

In [5]:
X = new1[:,:-1]
Y = new1[:,-1]
x_train, x_test, y_train, y_test = train_test_split(X,Y)

In [6]:
k_values = [1,7,16,30]
metrics = ['euclidean', 'manhattan', 'cosine']

for k in k_values:
    for mtr in metrics:
        neigh = KNeighborsClassifier(n_neighbors=k, metric=mtr)
        neigh.fit(x_train, y_train)
        y_pred = neigh.predict(x_train)
#         roc = roc_auc_score(y_train, y_pred, average=None)
        conf_mtx = confusion_matrix(y_train, y_pred)
        f1 = f1_score(y_train, y_pred, average=None)
        print("K value: " + str(k) + "\nMetric: " + mtr + "\nConfusion Matrix: \n" + str(conf_mtx) + "\nF1-Score: " + str(f1) +"\n")

K value: 1
Metric: euclidean
Confusion Matrix: 
[[181   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0  31   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0  12   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0  11   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0  12   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0  19   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   3   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   1   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   8   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0  39   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   3   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   4   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0  15]]
F1-Score: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]

K value: 1
Metric: manhattan
Confusion Matrix: 
[[181   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0  31   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0  12   0   0   0 

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


K value: 16
Metric: euclidean
Confusion Matrix: 
[[180   1   0   0   0   0   0   0   0   0   0   0   0]
 [ 28   3   0   0   0   0   0   0   0   0   0   0   0]
 [  9   0   3   0   0   0   0   0   0   0   0   0   0]
 [ 11   0   0   0   0   0   0   0   0   0   0   0   0]
 [ 11   0   0   0   0   0   0   0   0   1   0   0   0]
 [ 19   0   0   0   0   0   0   0   0   0   0   0   0]
 [  3   0   0   0   0   0   0   0   0   0   0   0   0]
 [  1   0   0   0   0   0   0   0   0   0   0   0   0]
 [  6   1   0   0   0   0   0   0   1   0   0   0   0]
 [ 37   0   0   0   0   0   0   0   0   2   0   0   0]
 [  3   0   0   0   0   0   0   0   0   0   0   0   0]
 [  4   0   0   0   0   0   0   0   0   0   0   0   0]
 [ 15   0   0   0   0   0   0   0   0   0   0   0   0]]
F1-Score: [0.70866142 0.16666667 0.4        0.         0.         0.
 0.         0.         0.22222222 0.0952381  0.         0.
 0.        ]

K value: 16
Metric: manhattan
Confusion Matrix: 
[[181   0   0   0   0   0   0   0   0   0   

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


K value: 30
Metric: manhattan
Confusion Matrix: 
[[181   0   0   0   0   0   0   0   0   0   0   0   0]
 [ 31   0   0   0   0   0   0   0   0   0   0   0   0]
 [ 12   0   0   0   0   0   0   0   0   0   0   0   0]
 [ 11   0   0   0   0   0   0   0   0   0   0   0   0]
 [ 12   0   0   0   0   0   0   0   0   0   0   0   0]
 [ 19   0   0   0   0   0   0   0   0   0   0   0   0]
 [  3   0   0   0   0   0   0   0   0   0   0   0   0]
 [  1   0   0   0   0   0   0   0   0   0   0   0   0]
 [  8   0   0   0   0   0   0   0   0   0   0   0   0]
 [ 38   0   0   0   0   0   0   0   0   1   0   0   0]
 [  3   0   0   0   0   0   0   0   0   0   0   0   0]
 [  4   0   0   0   0   0   0   0   0   0   0   0   0]
 [ 15   0   0   0   0   0   0   0   0   0   0   0   0]]
F1-Score: [0.69749518 0.         0.         0.         0.         0.
 0.         0.         0.         0.05       0.         0.
 0.        ]

K value: 30
Metric: cosine
Confusion Matrix: 
[[181   0   0   0   0   0   0   0   0   0   0  

  'precision', 'predicted', average, warn_for)
