In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

# 1️ Load the dataset
df = pd.read_csv("diabetes.csv")

# Show first few rows
print(df.head())

# 2️ Separate features and target
X = df.drop(columns=['Outcome'])   # Features
y = df['Outcome']                  # Target (1 = diabetic, 0 = non-diabetic)

# 3️ Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4️ Normalize (scale) features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5️ Train KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)

# 6️ Make predictions
y_pred = knn.predict(X_test_scaled)

# 7️ Evaluate performance
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
error_rate = 1 - accuracy
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

# 8️ Print results
print("\n K-Nearest Neighbors Results:")
print("Confusion Matrix:\n", cm)
print(f"Accuracy: {accuracy:.3f}")
print(f"Error Rate: {error_rate:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")


   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   Pedigree  Age  Outcome  
0     0.627   50        1  
1     0.351   31        0  
2     0.672   32        1  
3     0.167   21        0  
4     2.288   33        1  

✅ K-Nearest Neighbors Results:
Confusion Matrix:
 [[79 20]
 [27 28]]
Accuracy: 0.695
Error Rate: 0.305
Precision: 0.583
Recall: 0.509
