In [1]:
import numpy as np
import pandas as pd

In [7]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [35]:
diabetes = load_diabetes()
print(diabetes.feature_names)
X, y = (diabetes.data, diabetes.target)
X

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']


array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]])

In [17]:
# Z-Score Normalization
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled

array([[ 0.80050009,  1.06548848,  1.29708846, ..., -0.05449919,
         0.41853093, -0.37098854],
       [-0.03956713, -0.93853666, -1.08218016, ..., -0.83030083,
        -1.43658851, -1.93847913],
       [ 1.79330681,  1.06548848,  0.93453324, ..., -0.05449919,
         0.06015558, -0.54515416],
       ...,
       [ 0.87686984,  1.06548848, -0.33441002, ..., -0.23293356,
        -0.98564884,  0.32567395],
       [-0.9560041 , -0.93853666,  0.82123474, ...,  0.55838411,
         0.93616291, -0.54515416],
       [-0.9560041 , -0.93853666, -1.53537419, ..., -0.83030083,
        -0.08875225,  0.06442552]])

In [19]:
# Binning: Converting continuous variable into categorical variable
y_binned = np.digitize(y, bins=[y.mean() - y.std(), y.mean() + y.std()])
y_binned

array([1, 0, 1, 1, 1, 1, 1, 0, 1, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 2, 1, 1, 1, 1, 1, 2, 1, 0, 2, 1, 0, 1, 2, 2, 2, 1, 1, 0, 0, 1,
       2, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1,
       1, 1, 1, 1, 0, 2, 1, 1, 1, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 1, 0, 1,
       0, 1, 1, 1, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 0, 1, 1, 2, 1,
       1, 0, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1,
       1, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 0, 1, 2, 1, 1, 1, 1, 2, 1, 2, 0,
       1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 2, 1, 0, 0, 2, 1, 1, 1,
       2, 1, 1, 1, 1, 0, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 1, 1,
       1, 1, 2, 1, 1, 1, 1, 1, 1, 0, 1, 1, 2, 1, 2, 1, 0, 0, 2, 2, 2, 1,
       0, 0, 1, 1, 1, 0, 2, 1, 2, 2, 1, 1, 2, 1, 2, 0, 1, 0, 0, 1, 2, 1,
       1, 0, 0, 1, 2, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 1, 2, 1, 2, 1, 1, 2,
       0, 1, 1, 0, 2, 2, 1, 1, 0, 1, 1, 0, 1, 1, 2,

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_binned, test_size=0.2, random_state=42)

In [23]:
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train, y_train)

In [39]:
y_pred = classifier.predict(X_test)

In [41]:
accuracy = accuracy_score(y_pred, y_test)
print(f"Accuracy: {(accuracy * 100):.2f}%") 

Accuracy: 61.80%
