In [1]:

# KNN PROJECT
# Heart Disease Risk Prediction (Fitness App)



import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Step 2: Load Dataset
df = pd.read_csv("Fitness_app.csv")

print("===== DATASET =====")
print(df.head())

print("\n===== DATA INFO =====")
print(df.info())

print("\n===== MISSING VALUES =====")
print(df.isnull().sum())

===== DATASET =====
   Exercise  Diet  Stress  AtRisk
0         5     4       2       0
1         3     3       4       1
2         2     2       5       1
3         6     5       1       0
4         4     2       4       1

===== DATA INFO =====
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   Exercise  10 non-null     int64
 1   Diet      10 non-null     int64
 2   Stress    10 non-null     int64
 3   AtRisk    10 non-null     int64
dtypes: int64(4)
memory usage: 452.0 bytes
None

===== MISSING VALUES =====
Exercise    0
Diet        0
Stress      0
AtRisk      0
dtype: int64


In [3]:

# Step 3: Prepare Data


X = df[["Exercise", "Diet", "Stress"]]
y = df["AtRisk"]   # 1 = At Risk, 0 = Not at Risk

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)


In [5]:
# Step 4: Feature Scaling (VERY IMPORTANT)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# Step 5: Train Models with Different

k_values = [1, 3, 5]
accuracies = {}

for k in k_values:
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    accuracies[k] = acc
    print(f"Accuracy for K={k}: {acc}")


# Step 6: Select Best Model

best_k = max(accuracies, key=accuracies.get)
print("\nBest K:", best_k)

best_model = KNeighborsClassifier(n_neighbors=best_k)
best_model.fit(X_train_scaled, y_train)


# Step 7: Predict New User

# New user: Exercise=4, Diet=3, Stress=4
new_user = [[4, 3, 4]]
new_user_scaled = scaler.transform(new_user)

prediction = best_model.predict(new_user_scaled)

print(prediction)



Accuracy for K=1: 1.0
Accuracy for K=3: 1.0
Accuracy for K=5: 1.0

Best K: 1
[1]


