# Step I: Importing required libraries

In [53]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Step II: Importing training and testing dataset for each heartbeat

In [54]:
train_heartbeat = pd.read_csv('ECG200_TRAIN.txt', delimiter='\t', header=None, names=['electrical_activity'])
train_heartbeat = train_heartbeat['electrical_activity'].str.split(expand=True)
train_heartbeat.rename(columns={0:'Heart_condition'}, inplace=True)
for i in train_heartbeat.columns:
    train_heartbeat[i] = pd.to_numeric(train_heartbeat[i])
train_heartbeat.head()

Unnamed: 0,Heart_condition,1,2,3,4,5,6,7,8,9,...,87,88,89,90,91,92,93,94,95,96
0,-1.0,0.502055,0.542163,0.722383,1.428885,2.136516,2.281149,1.936274,1.46889,1.008845,...,0.931043,0.610298,0.638894,0.684679,0.583238,0.640522,0.708585,0.705011,0.713815,0.433765
1,1.0,0.147647,0.804668,0.367771,0.243894,0.026614,-0.274402,0.096731,-0.747731,-1.609777,...,-0.533503,-0.400228,0.176084,1.111768,2.438428,2.734889,1.736054,0.036857,-1.265074,-0.208024
2,-1.0,0.316646,0.243199,0.370471,1.063738,1.678187,1.759558,1.697717,1.612159,1.168188,...,0.764229,0.610621,0.5529,0.566786,0.604002,0.777068,0.812345,0.748848,0.818042,0.539347
3,-1.0,1.168874,2.075901,1.76014,1.606446,1.949046,1.302842,0.459332,0.516412,0.85218,...,0.419006,0.723888,1.323947,2.136488,1.746597,1.47022,1.893512,1.256949,0.800407,0.73154
4,1.0,0.648658,0.752026,2.636231,3.455716,2.118157,0.52062,-0.188627,0.780818,0.933775,...,-0.097869,-0.136787,-0.340237,-0.089441,-0.080297,-0.192584,-0.304704,-0.454556,0.31459,0.58219


In [55]:
test_heartbeat = pd.read_csv('ECG200_TEST.txt', delimiter='\t', header=None, names=['electrical_activity'])
test_heartbeat = test_heartbeat['electrical_activity'].str.split(expand=True)
test_heartbeat.rename(columns={0:'Heart_condition'}, inplace=True)
for i in train_heartbeat.columns:
    test_heartbeat[i] = pd.to_numeric(test_heartbeat[i])
test_heartbeat.head()

Unnamed: 0,Heart_condition,1,2,3,4,5,6,7,8,9,...,87,88,89,90,91,92,93,94,95,96
0,1.0,0.425189,1.418599,2.668791,3.298629,2.264554,0.165179,-0.959727,0.282668,0.84285,...,-0.018196,-0.220493,-0.152557,-0.117011,-0.14516,-0.116668,-0.199262,-0.109837,-0.142488,0.098758
1,1.0,0.653929,2.17729,3.644783,2.279203,0.978367,-0.388296,-0.911911,-0.14833,0.305439,...,0.099883,0.141395,-0.239092,0.247424,-0.08515,0.0078,0.407868,-0.194915,0.171107,0.197027
2,1.0,0.404953,0.553996,0.724097,1.449039,2.012616,1.624477,1.204726,1.117511,0.562099,...,1.297938,1.243332,1.463616,1.694916,1.456115,1.570372,1.934584,1.497556,0.81257,0.390639
3,1.0,1.088088,2.011949,2.301733,1.630199,0.636768,-0.140502,0.547954,1.075071,0.896861,...,0.125239,0.071349,0.138197,0.116399,0.131198,0.119412,0.082968,0.151348,0.273915,0.349405
4,-1.0,0.443621,0.947285,1.924084,2.15979,1.499447,0.964508,0.223256,-0.424946,0.121279,...,0.900511,1.278688,2.317878,2.916254,2.211947,2.034753,2.071302,1.254178,1.037879,1.009615


# Step III: EDA

In [56]:
train_heartbeat['Heart_condition'].value_counts()

 1.0    69
-1.0    31
Name: Heart_condition, dtype: int64

***There is no class imbalance in the dataset, therefore we can move forward for applying the model for classifying into normal heatbeat and myocrdial infraction heartbeat.***

# Step IV: Building algorithm of KNN Model

In [57]:
def knn_model(p_minkowski, k_value):
    
    #Initializing counter variables
    count_normal = 0
    count_abnormal = 0
    predicted_heart_condition = 0
    count_true = 0
    count_false = 0
    count_TP = 0 
    count_FN = 0
    count_FP = 0
    count_TN = 0
    
    # To calculate minowski distance
    def minkowski_dist(train_heart, test_heart, p_minkowski):
        basic_term = sum(pow(abs(m-n), p_minkowski) for m, n in zip(train_heart, test_heart))
        if p_minkowski < 1:
            return basic_term
        else:
            return pow(basic_term, (1/p_minkowski))
    
    # To calculate number of right and wrong prediction from KNN model
    for i in range(0, len(test_heartbeat)):
        test_temp = test_heartbeat[test_heartbeat.drop('Heart_condition', axis=1).columns].values.tolist()[i]
        # To store the combination of heart_condition of training set and its distance
        validation_temp = pd.DataFrame(columns=[''])
        # To predict target class for each entry in testing set
        for j in range(0, len(train_heartbeat)):
            train_temp = train_heartbeat[train_heartbeat.drop('Heart_condition', axis=1).columns].values.tolist()[j]
            validation_temp = validation_temp.append(pd.Series([train_heartbeat['Heart_condition'][j], 
                                    minkowski_dist(train_temp, test_temp, p_minkowski)]),                                     
                                    ignore_index=True)
        
        # To sort the minkowski distance
        validation_temp.sort_values(by=[1], inplace=True)
        
        # To fetch minimum distance based on k-value
        for m in validation_temp[0][:k_value]:
            if m == 1:
                count_normal = count_normal + 1
            else:
                count_abnormal = count_abnormal + 1
         
        # To predict heart condition of each entry in testing set
        if count_normal>count_abnormal:
            predicted_heart_condition = 1
        else:
            predicted_heart_condition = -1                                            
        
        count_normal = 0
        count_abnormal = 0
            
        #Checking heart condition for accuracy and confusion matrix
        if (predicted_heart_condition == 1) and (predicted_heart_condition == test_heartbeat['Heart_condition'].loc[i]):
            count_TP = count_TP + 1
        elif(predicted_heart_condition == -1) and (predicted_heart_condition == test_heartbeat['Heart_condition'].loc[i]):
            count_TN = count_TN + 1
        elif(predicted_heart_condition == 1) and (predicted_heart_condition != test_heartbeat['Heart_condition'].loc[i]):
            count_FP = count_FP + 1
        else:
            count_FN = count_FN + 1
        
        # Dropping exisinng values from table to run for next entry of testing set
        validation_temp.drop([0,1], axis=1)
        
    accuracy = ((count_TP + count_TN)/(count_TP + count_TN + count_FP + count_FN))*100
    print('Accuracy of model with p as', p_minkowski, 'and k as ', k_value, ' is: ', accuracy)
    print('Confusion Matrix: ')
    confusion_mat = [[]]
    confusion_mat = [[count_TP, count_FN], [count_FP, count_TN]]
    print(confusion_mat)

# Step V: Evaluating algorithm of KNN Model

In [49]:
P = [0.5,1,2,4]
K = [3,5,11]
for p in P:
    for k in K:
        knn_model(p,k)

Accuracy of model with p as 0.5 and k as  3  is:  94.0
Confusion Matrix: 
[[63, 1], [5, 31]]
Accuracy of model with p as 0.5 and k as  5  is:  91.0
Confusion Matrix: 
[[62, 2], [7, 29]]
Accuracy of model with p as 0.5 and k as  11  is:  82.0
Confusion Matrix: 
[[57, 7], [11, 25]]
Accuracy of model with p as 1 and k as  3  is:  91.0
Confusion Matrix: 
[[62, 2], [7, 29]]
Accuracy of model with p as 1 and k as  5  is:  90.0
Confusion Matrix: 
[[62, 2], [8, 28]]
Accuracy of model with p as 1 and k as  11  is:  84.0
Confusion Matrix: 
[[57, 7], [9, 27]]
Accuracy of model with p as 2 and k as  3  is:  90.0
Confusion Matrix: 
[[61, 3], [7, 29]]
Accuracy of model with p as 2 and k as  5  is:  90.0
Confusion Matrix: 
[[61, 3], [7, 29]]
Accuracy of model with p as 2 and k as  11  is:  88.0
Confusion Matrix: 
[[61, 3], [9, 27]]
Accuracy of model with p as 4 and k as  3  is:  93.0
Confusion Matrix: 
[[62, 2], [5, 31]]
Accuracy of model with p as 4 and k as  5  is:  86.0
Confusion Matrix: 
[[59, 5]

# Step VI: Applying KNN model using library of python for k=3 and p=1

In [52]:
#Importing required libraries
knn= KNeighborsClassifier(n_neighbors=3, p=1)
knn.fit(train_heartbeat.drop('Heart_condition', axis=1),train_heartbeat['Heart_condition'])
predict_knn = knn.predict(test_heartbeat.drop('Heart_condition', axis=1))
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(predict_knn, test_heartbeat['Heart_condition']))
print(confusion_matrix(predict_knn, test_heartbeat['Heart_condition']))

              precision    recall  f1-score   support

        -1.0       0.81      0.94      0.87        31
         1.0       0.97      0.90      0.93        69

    accuracy                           0.91       100
   macro avg       0.89      0.92      0.90       100
weighted avg       0.92      0.91      0.91       100

[[29  2]
 [ 7 62]]


***CONCLUSION: Accuracy of KNN model is same by building the whole algorithm and by using library of python for k=3 and p=1. Hence, it can be concluded that the algorithm has been correctly build to classify the heartbeat condition of noraml or abnormal.***