In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier

# Load dataset
raw_df = pd.read_csv("processed_cleveland.csv")
raw_df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0


In [6]:
df = raw_df.replace('?', np.nan).dropna().astype(float)
df['target'] = (df['num'] > 0).astype(int)

X = df.drop(columns=['num', 'target'])
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

y_train.value_counts(normalize=True)

target
0    0.540084
1    0.459916
Name: proportion, dtype: float64

In [7]:
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))


def knn_predict(X_train, y_train, X_query, k = 5):
    predictions = []
    for x in X_query:
        distances = [euclidean_distance(x, x_train) for x_train in X_train]
        nearest_indices = np.argsort(distances)[:k]
        nearest_labels = y_train.iloc[nearest_indices]
        predictions.append(int(nearest_labels.mean() >= 0.5))
    return np.array(predictions)


simple_knn_preds = knn_predict(X_train_scaled, y_train.reset_index(drop=True), X_test_scaled, k=5)

print("Simple KNN Accuracy:", accuracy_score(y_test, simple_knn_preds))
print(classification_report(y_test, simple_knn_preds))

Simple KNN Accuracy: 0.8833333333333333
              precision    recall  f1-score   support

           0       0.86      0.94      0.90        32
           1       0.92      0.82      0.87        28

    accuracy                           0.88        60
   macro avg       0.89      0.88      0.88        60
weighted avg       0.89      0.88      0.88        60



In [8]:
sklearn_knn = KNeighborsClassifier(n_neighbors=5)
sklearn_knn.fit(X_train_scaled, y_train)
sklearn_preds = sklearn_knn.predict(X_test_scaled)

print("scikit-learn KNN Accuracy:", accuracy_score(y_test, sklearn_preds))
print(classification_report(y_test, sklearn_preds))

scikit-learn KNN Accuracy: 0.8833333333333333
              precision    recall  f1-score   support

           0       0.86      0.94      0.90        32
           1       0.92      0.82      0.87        28

    accuracy                           0.88        60
   macro avg       0.89      0.88      0.88        60
weighted avg       0.89      0.88      0.88        60

