# Imports

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
import math

# Creating DataFrame

In [3]:
heart_data= {
    "Age": [29, 54, 37, 45, 63, 50, 48, 40, 58, 42, 55, 47, 35, 62, 53],
    "Cholesterol": [200, 230, 180, 195, 240, 210, 190, 220, 210, 205, 250, 215, 185, 245, 225],
    "Max_Heart_Rate": [150, 120, 165, 140, 110, 155, 145, 135, 125, 145, 115, 130, 160, 118, 135],
    "Exercise_Induced_Angina": [1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0],
    "Heart_Disease": [1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1],  # Balanced target variable
}

In [4]:
df=pd.DataFrame(heart_data)

# Handling Missing Values

In [5]:
df['Age']=df['Age'].fillna(df['Age'].mode()[0])
df['Cholesterol']=df['Cholesterol'].fillna(math.floor(df['Cholesterol'].mean()))
df['Max_Heart_Rate']=df['Max_Heart_Rate'].fillna(df['Max_Heart_Rate'].median())
df['Exercise_Induced_Angina']=df['Exercise_Induced_Angina'].ffill()

# Encoding

In [6]:
object_cols=df.select_dtypes(include=object).columns

In [7]:
df[object_cols]=df[object_cols].astype(int)

In [8]:
df

Unnamed: 0,Age,Cholesterol,Max_Heart_Rate,Exercise_Induced_Angina,Heart_Disease
0,29,200,150,1,1
1,54,230,120,0,1
2,37,180,165,0,0
3,45,195,140,1,0
4,63,240,110,1,1
5,50,210,155,0,0
6,48,190,145,1,1
7,40,220,135,0,0
8,58,210,125,1,1
9,42,205,145,0,0


# Splitting

In [9]:
y=df['Heart_Disease']
x=df.drop(columns=['Heart_Disease'])

In [10]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

# Normalisation

In [11]:
normalizer=MinMaxScaler()

In [12]:
x_train=pd.DataFrame(normalizer.fit_transform(x_train),columns=x_train.columns)
x_test=pd.DataFrame(normalizer.transform(x_test),columns=x_test.columns)

# Training

In [13]:
model=KNeighborsClassifier(n_neighbors=3)

In [14]:
model.fit(x_train,y_train)

# Testing

In [15]:
y_pred=model.predict(x_test)

In [16]:
accuracy=accuracy_score(y_test,y_pred)

In [17]:
accuracy

0.3333333333333333

In [19]:
cm=confusion_matrix(y_test,y_pred)
cm

array([[1, 1],
       [1, 0]])