In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
import matplotlib.pyplot as plt
import seaborn as sns

# --- 1. Data Loading and Preparation ---
try:
    df = pd.read_csv('diabetes.csv')
except FileNotFoundError:
    print("Error: 'diabetes.csv' not found. Please ensure the file is in the correct directory.")
    exit()

# Features (X): All columns except the last one ('Outcome')
X = df.drop('Outcome', axis=1).values
# Target (Y): The last column ('Outcome' - 0 or 1)
y = df['Outcome'].values

# Split data into training (70%) and testing (30%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# --- 2. Scaling (Essential for KNN) ---
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# --- 3. Model Training (K-Nearest Neighbors) ---
K = 11  # Typically an odd number; often chosen through tuning
knn = KNeighborsClassifier(n_neighbors=K)
knn.fit(X_train_scaled, y_train)

# --- 4. Prediction and Evaluation ---
y_pred = knn.predict(X_test_scaled)

# Compute Metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
error_rate = 1 - accuracy
cm = confusion_matrix(y_test, y_pred)

# --- 5. Output Results ---
print(f"--- K-Nearest Neighbors (K={K}) Performance ---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Error Rate: {error_rate:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall (Sensitivity): {recall:.4f}")

print("\nConfusion Matrix:")
print(cm)

# Plot Confusion Matrix for better visualization
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=['No Diabetes (0)', 'Diabetes (1)'],
            yticklabels=['No Diabetes (0)', 'Diabetes (1)'])
plt.title(f'Confusion Matrix (K={K})')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()