# Importing the necessary libraries 

In [18]:
import pandas as pd
from pickle import *
import os 
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# Ignoring the Warnings

In [32]:
import warnings
warnings.filterwarnings("ignore")

# Loading the Dataset

In [3]:
data = pd.read_csv("diabetes.csv")
print(data)

      FS   FU Diabetes
0     81  YES       NO
1    120   NO       NO
2    150  YES      YES
3     91  YES       NO
4    150   NO      YES
..   ...  ...      ...
97    76   NO       NO
98   131  YES      YES
99   126   NO       NO
100   61  YES       NO
101  156   NO      YES

[102 rows x 3 columns]


# Check for Null Data

In [4]:
print(data.isnull().sum())

FS          0
FU          0
Diabetes    0
dtype: int64


# Handling Duplicated Data

In [6]:
print(data.duplicated().sum())

38


In [7]:
data.drop_duplicates(keep="first", inplace=True)

In [8]:
print(data.duplicated().sum())

0


# Features and Target

In [12]:
features = data.drop(["Diabetes"], axis="columns")
target = data["Diabetes"]

In [35]:
features

Unnamed: 0,FS,FU
0,81,YES
1,120,NO
2,150,YES
3,91,YES
4,150,NO
...,...,...
80,77,NO
81,132,YES
82,127,NO
83,62,YES


# Handling the Categorical Data

In [13]:
cfeatures = pd.get_dummies(features)

In [14]:
cfeatures

Unnamed: 0,FS,FU_NO,FU_YES
0,81,False,True
1,120,True,False
2,150,False,True
3,91,False,True
4,150,True,False
...,...,...,...
80,77,True,False
81,132,False,True
82,127,True,False
83,62,False,True


# Feature Scaling

In [37]:
mms = MinMaxScaler()
nfeatures = mms.fit_transform(cfeatures.values)
print(nfeatures)

[[0.15384615 0.         1.        ]
 [0.42657343 1.         0.        ]
 [0.63636364 0.         1.        ]
 [0.22377622 0.         1.        ]
 [0.63636364 1.         0.        ]
 [0.13286713 0.         1.        ]
 [0.21678322 0.         1.        ]
 [0.56643357 0.         1.        ]
 [0.22377622 1.         0.        ]
 [0.98601399 1.         0.        ]
 [0.95104895 0.         1.        ]
 [0.07692308 0.         1.        ]
 [0.11188811 1.         0.        ]
 [0.4965035  0.         1.        ]
 [0.46153846 1.         0.        ]
 [0.00699301 0.         1.        ]
 [0.67132867 1.         0.        ]
 [0.16083916 0.         1.        ]
 [0.43356643 1.         0.        ]
 [0.64335664 0.         1.        ]
 [0.23076923 0.         1.        ]
 [0.64335664 1.         0.        ]
 [0.13986014 0.         1.        ]
 [0.57342657 0.         1.        ]
 [0.23076923 1.         0.        ]
 [0.99300699 1.         0.        ]
 [0.95804196 0.         1.        ]
 [0.08391608 0.         1.  

# Training & Testing 

In [21]:
x_train, x_test, y_train, y_test = train_test_split(nfeatures, target, stratify = target)

# Model

In [23]:
model = KNeighborsClassifier(n_neighbors = 21, metric="euclidean")

In [24]:
model.fit(x_train, y_train)

# Confusion Matrix

In [26]:
cm = confusion_matrix(y_test, model.predict(x_test))

In [27]:
print(cm)

[[9 0]
 [4 3]]


# Classification Report

In [30]:
cr = classification_report(y_test, model.predict(x_test))

In [31]:
print(cr)

              precision    recall  f1-score   support

          NO       0.69      1.00      0.82         9
         YES       1.00      0.43      0.60         7

    accuracy                           0.75        16
   macro avg       0.85      0.71      0.71        16
weighted avg       0.83      0.75      0.72        16



# Prediction Using One-Hot Encoding

In [44]:
fs = float(input("Enter the Fasting Sugar value\n"))
fu = int(input("Enter the Fasting Urine\nEnter 1 for Yes\n2 for No\n"))

input_data = {
    "Fasting Sugar": [fs],
    "Fasting Urine_1": [1 if fu == 1 else 0],
    "Fasting Urine_2": [1 if fu == 2 else 0]
}
df_encoded = pd.DataFrame(input_data)

nd = mms.transform(df_encoded)

result = model.predict(nd)

if result[0] == 1:
    print("You have a chance of Diabetes")
else:
    print("You don't have Diabetes")

Enter the Fasting Sugar value
 120
Enter the Fasting Urine
Enter 1 for Yes
2 for No
 2


You don't have Diabetes
