In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
heart_disease = fetch_ucirepo(id=45) 
  
# data (as pandas dataframes) 
X = heart_disease.data.features 
X = X.values

y = heart_disease.data.targets 
y = y.iloc[:, 0].values



### Define KNN Class

In [2]:
def calculate_distance(test, train, method="euclidean"):
    if method == "manhattan":
        return np.sum(np.abs(test-train))
    return np.sqrt(np.sum((test - train)**2)) 

def accuracy(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)


In [3]:
class KNN:
    def __init__(self, k=3, metric="euclidean"):
        self.k = k
        self.metric = metric
        
    def fit(self, X, y, preprocess_=False):
        if preprocess_:
            X, y = pd.DataFrame(X), pd.DataFrame(y)
            df = pd.concat([X,y], axis=1).dropna()
            X = preprocessing.normalize(df.iloc[:, :-1].values)
            y = df.iloc[:, -1].values
            self.X_train = X
            self.y_train = y
        self.X_train = X
        self.y_train = y
        
    def predict(self, X):
        predictions = [self.make_predictions(x) for x in X]
        return np.array(predictions)
    
    def make_predictions(self, x):
        distances = [calculate_distance(x, x_train, self.metric) for x_train in self.X_train]
        indices = np.argsort(distances)[:self.k]
        nearest_labels = [self.y_train[i] for i in indices]
        pred = np.bincount(nearest_labels).argmax()
        return pred

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
knn = KNN(k=2, metric="manhattan")

In [6]:
knn.fit(X_train, y_train, preprocess_=True)

In [7]:
y_pred = knn.predict(X_test)

In [9]:
accuracy_score = accuracy(y_test, y_pred)