### Importing libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

### Load the dataset

In [7]:
df = pd.read_csv("hf.csv.csv", header=None)

Column = ['age','anaemia','creatinine_phosphokinase','diabetes','ejection_fraction',
          'high_blood_pressure','platelets','serum_creatinine','serum_sodium',
          'sex','smoking','time','DEATH_EVENT']

df.columns = Column

df.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


### Define features (X) and target variable (y)

In [9]:
x_values = df[['age','anaemia','creatinine_phosphokinase','diabetes','ejection_fraction',
               'high_blood_pressure','platelets','serum_creatinine','serum_sodium',
               'sex','smoking','time']]

y_values = df[['DEATH_EVENT']]

# Standardize the features
standardise = StandardScaler()
x_values = standardise.fit_transform(x_values)

# Convert back to DataFrame for clarity
x_values_df = pd.DataFrame(x_values, columns=[
    'age', 'anaemia', 'creatinine_phosphokinase', 'diabetes', 'ejection_fraction',
    'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium',
    'sex', 'smoking', 'time'
])

x_values_df.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time
0,1.192945,-0.871105,0.000166,-0.847579,-1.53056,1.359272,0.01681648,0.490057,-1.504036,0.735688,-0.687682,-1.629502
1,-0.491279,-0.871105,7.51464,-0.847579,-0.007077,-0.735688,7.53566e-09,-0.284552,-0.141976,0.735688,-0.687682,-1.603691
2,0.350833,-0.871105,-0.449939,-0.847579,-1.53056,-0.735688,-1.038073,-0.0909,-1.731046,0.735688,1.454161,-1.590785
3,-0.912335,1.147968,-0.486071,-0.847579,-1.53056,-0.735688,-0.5464741,0.490057,0.085034,0.735688,-0.687682,-1.590785
4,0.350833,1.147968,-0.435486,1.17983,-1.53056,-0.735688,0.6517986,1.264666,-4.682176,-1.359272,-0.687682,-1.577879


### Train-test split

In [10]:
x_train, x_test, y_train, y_test = train_test_split(x_values, y_values, test_size=0.25, random_state=10)

# Further standardizing
x_train = standardise.fit_transform(x_train)
x_test = standardise.transform(x_test)

### Train KNN model

In [11]:
KNN = KNeighborsClassifier(n_neighbors=6)
KNN.fit(x_train, y_train.values.ravel())  # Fix incorrect use of fit()

# Evaluate accuracy
accuracy = KNN.score(x_test, y_test)
print(f"KNN Model Accuracy: {accuracy:.2f}")

KNN Model Accuracy: 0.75


### User Input & Prediction

In [None]:
# Collect user input for prediction
test = pd.DataFrame(columns=x_values_df.columns)

test.loc[0] = [
    float(input("Enter your age: ")),
    float(input("Do you have anaemia? (1 or 0): ")),
    float(input("Creatinine Phosphokinase: ")),
    float(input("Do you have diabetes? (1 or 0): ")),
    float(input("Ejection Fraction: ")),
    float(input("Suffering from high blood pressure? (1 or 0): ")),
    float(input("Platelets Level: ")),
    float(input("Serum Creatinine: ")),
    float(input("Serum Sodium: ")),
    float(input("Sex? (1=Male, 0=Female): ")),
    float(input("Do you smoke? (1 or 0): ")),
    float(input("Time (Follow-up period in days): "))
]

# Scale user input using the previously fitted scaler
test_scaled = standardise.transform(test)

# Make prediction
Prediction = KNN.predict(test_scaled)

# Display result
if Prediction == [0]:
    print("You don't have a risk of heart disease.")
else:
    print("You have a risk of heart disease.")