In [24]:
import statistics
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

  # Building the KNN classifier

In [25]:
class KNN_classifier():

    def __init__(self,distance_metric):

    
        self.get_distance_metric = distance_metric


    def get_distance_metric(self,training_point,testing_point):  # getting the distance metric

        if (self.get_distance_metric == "euclidean"):

            dist = 0

            for i in range(len(training_point[i] -1)): # as we have to avoid the target class

                dist = dist + (training_point[i] - testing_point[i])**2

            euclidean_distance = np.sqrt(dist)

            return euclidean_distance
        
        elif (self.get_distance_metric == "manhattan"):

            dist = 0

            for i in range(len(training_point) -1):

                dist = dist + abs(training_point[i] - testing_point[i])

                manhattan_distance = dist

                return manhattan_distance
# getting the nearest neighbours

    def nearest_neighbour(self ,X_train,test_data , k):
        
        distance_list = []

        for training_data in X_train:

            distance = self.get_distance_metric(training_data,test_data)
            distance_list.append(training_data,distance)  # adding the individual points along with the distance with the required point

        distance_list.sort(key=lambda x:x[1])   # sorting the list based on the distance ascending order

        neighbours_list = []

        for j in range(k):
            neighbours_list.append(distance_list[j][0])    #fetching only the data not the distance so we can find the neighbouring points

        return neighbours_list 

    def predict(self,X_train ,test_data , k):
        neighbours = self.nearest_neighbour(X_train,test_data,k)

        for dt in neighbours:
            label = []
            label.append(dt[-1])

        predicted_class = statistics.mode(label)   # so as we know the majority wins , so which class has highest numbers in the K neighbours it prevails

        return predicted_class

In [26]:
classifier1 = KNN_classifier(distance_metric="euclidean")

In [27]:
classifier2 = KNN_classifier(distance_metric="manhattan")

In [28]:
# using the model on the diabetes csv
df = pd.read_csv("heart.csv")

In [29]:
df.head(5)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [30]:
X = df.drop(columns="target",axis= 1)
Y = df['target']

In [31]:
X.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2


In [32]:
Y.head()

0    1
1    1
2    1
3    1
4    1
Name: target, dtype: int64

In [33]:
# as it is a series datatype we convert it into numpy array datatype for better trversal

X = X.to_numpy()
Y = Y.to_numpy()

In [34]:
print(X)
print(Y)

[[63.  1.  3. ...  0.  0.  1.]
 [37.  1.  2. ...  0.  0.  2.]
 [41.  0.  1. ...  2.  0.  2.]
 ...
 [68.  1.  0. ...  1.  2.  3.]
 [57.  1.  0. ...  1.  1.  3.]
 [57.  0.  1. ...  1.  1.  2.]]
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0]


In [35]:
# train test split  # stratfuy is used to make sure that the y outcome is evenly spread on test and train data
X_train , X_test ,Y_train , Y_test = train_test_split(X,Y,test_size=0.3,stratify=Y, random_state=2)

In [36]:
print(X.shape,X_train.shape,Y.shape,Y_train.shape)

(303, 13) (212, 13) (303,) (212,)


In [37]:
# here in the predict function we can see as there is no fitting procedure so the X_train must contain the target varibls also
# The test_data will only contain the features

In [38]:
X_train = np.insert(X_train,13,Y_train,axis=1)

In [39]:
print(X_train)

[[41.  0.  1. ...  1.  2.  1.]
 [47.  1.  0. ...  0.  2.  1.]
 [46.  1.  0. ...  0.  3.  0.]
 ...
 [68.  1.  0. ...  2.  3.  0.]
 [60.  1.  2. ...  0.  2.  0.]
 [48.  0.  2. ...  0.  2.  1.]]


In [40]:
X_test.shape

(91, 13)

In [42]:
# X_train = training data with features and target
#X_test = test data with out the taret

print(X_train[:,13])


[1. 1. 0. 1. 1. 0. 1. 1. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 1. 1. 0. 1. 1.
 1. 0. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0. 1. 0. 0.
 1. 0. 1. 0. 0. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 1. 1. 0.
 0. 1. 1. 1. 1. 1. 0. 1. 1. 0. 0. 0. 0. 1. 1. 0. 0. 1. 1. 0. 0. 1. 1. 0.
 1. 1. 1. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 0. 0.
 0. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0.
 0. 1. 0. 1. 1. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0.
 0. 1. 0. 1. 0. 1. 0. 1. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 0. 1. 0. 1. 0. 0.
 1. 1. 1. 0. 0. 0. 1. 0. 1. 1. 0. 0. 0. 1. 1. 1. 1. 0. 0. 1.]
