Let's Start with importing required Packages or Library

In [37]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

In [38]:
#Loading the dataset
dataset = pd.read_csv("KNN_Dataset.csv")
print(len(dataset))
#Having a look at our dataset
print(dataset.head())

768
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


Values of columns like 'glucose', 'BloodPressure' cannot be accepted as zeros  because it will affect the outcome we can replace such values with the mean of respective column.

In [39]:
#Replace Zeros
zero_not_accepted = ["Glucose", "BloodPressure", "SkinThickness", "BMI", "Insulin"]

for column in zero_not_accepted:
    dataset[column] = dataset[column].replace(0, np.NaN)
    mean = int(dataset[column].mean(skipna = True))
    dataset[column] = dataset[column].replace(np.NaN, mean)

Let's Check the change in one column from our dataset

In [40]:
#Checking change in "Glucose" column
print(dataset['Glucose'])

0      148.0
1       85.0
2      183.0
3       89.0
4      137.0
       ...  
763    101.0
764    122.0
765    121.0
766    126.0
767     93.0
Name: Glucose, Length: 768, dtype: float64


Before Preceeding any further, let us split the dataset into train and test :-

In [41]:
#Spliting the dataset
X = dataset.iloc[:, 0:8]
y = dataset.iloc[:, 8]
x_train, x_test, y_train, y_test = train_test_split(X, y, random_state = 0, test_size = 0.2)

In [42]:
#Feature Scaling
sc_x = StandardScaler()
x_train = sc_x.fit_transform(x_train)
x_test = sc_x.transform(x_test)

That's great we have scaled our model now let's define the model usinh KNeighborsClassifier and fit the data in the model

In [43]:
#Define the Model : Init K-NN
classifier = KNeighborsClassifier(n_neighbors = 11, p = 2, metric = 'euclidean')

#Fit the Model
classifier.fit(x_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',
                     metric_params=None, n_jobs=None, n_neighbors=11, p=2,
                     weights='uniform')

It's Importanat to evaluate our model, let's use confusion metrix to do that

In [44]:
## Before Evaluting the model we have to Predict the Results
#predicting the test set results
y_pred = classifier.predict(x_test)
y_pred

array([1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
       0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
      dtype=int64)

In [45]:
#Evaluate the Model
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[94 13]
 [15 32]]


In [46]:
print(f1_score(y_test, y_pred))

0.6956521739130436


In [47]:
#Accuracy Check
print(accuracy_score(y_test, y_pred))

0.8181818181818182


## yeeeaaaah!!!
so we, have created a model using KNN which can predict whether a person will have Diabetes or not.
and with an accuracy of nearly 82% we can say that it's a grest fit to our model.