In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import ConfusionMatrixDisplay, classification_report


In [None]:
df = pd.read_csv('4_diabetes.csv')

print(df.columns)
df.head()

sns.countplot(x='Outcome', data=df)
plt.title('Class Distribution: Outcome')
plt.show()

print(df['Outcome'].value_counts())

In [3]:
X = df.drop(['Outcome'], axis=1)
y = df['Outcome']  # Target

In [4]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, random_state=42)

print(f'Training data shape: {X_train.shape}')
print(f'Testing data shape: {X_test.shape}')

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)

knn.fit(X_train, y_train)

In [None]:
y_pred = knn.predict(X_test)

ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
plt.title('Confusion Matrix')
plt.show()

print(classification_report(y_test, y_pred))


In [None]:
error = []

for i in range(1, 20):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    error.append(np.mean(y_pred != y_test))

plt.figure(figsize=(16,9))
plt.plot(range(1, 20), error, marker='.')
plt.xlabel('Value of k')
plt.ylabel('Error Rate')
plt.grid()
plt.xticks(range(1, 20))
plt.title('Error Rate vs. k-value')
plt.show()