In [40]:
#lets import dependencies
import numpy as np
import statistics

### K Nearest Neighbours Classifiers

In [75]:
class KNN_Classifiers():
    
    #initiating the parameters
    def __init__(self,distance_metric):
        
        self.distance_metric = distance_metric
        
    # getting the distance metric
    def get_distance_metric(self,training_datapoint,test_datapoint):
        
        if (self.distance_metric == 'euclidean'):
            
            dist = 0
            
            for i in range(len(training_datapoint)-1):
                dist = dist + (training_datapoint[i] - test_datapoint[i]**2)
                
            euclidean_dist = np.sqrt(dist)
            return euclidean_dist
        
        elif (self.distance_metric == 'manhattan'):
            
            dist = 0
            
            for i in range(len(training_datapoint)-1):
                dist = dist + abs(training_datapoint[i] - test_datapoint[i])
                
            manhattan_dist = dist
            return manhattan_dist
        
    # getting the nearest neighbours
    def nearest_neighbours(self,X_train, test_data, k):
        
        distance_list =[]
        
        for training_data in X_train:
            
            distance = self.get_distance_metric(training_data,test_data)
            distance_list.append((training_data, distance))
        
        distance_list.sort(key = lambda x:x[1])
        
        neighbours_list =[]
            
        for j in range(k):
            neighbours_list.append(distance_list[j][0])
                
        return neighbours_list
        
    #predict the new class of datapoint
    def predict(self,X_train, test_data,k):
        
        neighbours = self.nearest_neighbours(X_train, test_data, k)
        
        for data in neighbours:
            label = []
            label.append(data[-1])
            
        predicted_class = statistics.mode(label)
        
        return predicted_class

### Diabetics Prediction

In [76]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [77]:
#loading the dataset
diabetics = pd.read_csv(r"C:\Users\bharathi.v04\Downloads\DS\diabetes.csv")

In [78]:
diabetics.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [79]:
#no of rows & columns in dataset
diabetics.shape

(768, 9)

In [80]:
#separating X and Y values
X = diabetics.drop(columns='Outcome',axis=1)
Y = diabetics['Outcome']

In [81]:
# converting the data into numpy array
X = X.to_numpy()
Y = Y.to_numpy()

In [82]:
print(X)
print(Y)

[[  6.    148.     72.    ...  33.6     0.627  50.   ]
 [  1.     85.     66.    ...  26.6     0.351  31.   ]
 [  8.    183.     64.    ...  23.3     0.672  32.   ]
 ...
 [  5.    121.     72.    ...  26.2     0.245  30.   ]
 [  1.    126.     60.    ...  30.1     0.349  47.   ]
 [  1.     93.     70.    ...  30.4     0.315  23.   ]]
[1 0 1 0 1 0 1 0 1 1 0 1 0 1 1 1 1 1 0 1 0 0 1 1 1 1 1 0 0 0 0 1 0 0 0 0 0
 1 1 1 0 0 0 1 0 1 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 1 0
 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 1
 1 0 0 1 1 1 0 0 0 1 0 0 0 1 1 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 1 0 1 1 0 0 0 1 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0
 1 1 1 1 1 0 0 1 1 0 1 0 1 1 1 0 0 0 0 0 0 1 1 0 1 0 0 0 1 1 1 1 0 1 1 1 1
 0 0 0 0 0 1 0 0 1 1 0 0 0 1 1 1 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0
 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 1 0 0 0 1 1 1 0 0
 1 0 1 0 1 1 0 1 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 1 0 1 1 1 0 0 1 0

### Train Test & Split

In [83]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,random_state=2)

In [84]:
print(X.shape,X_train.shape, X_test.shape)

(768, 8) (614, 8) (154, 8)


In [85]:
print(X_train)

[[1.000e+00 1.240e+02 7.400e+01 ... 2.780e+01 1.000e-01 3.000e+01]
 [1.000e+00 9.500e+01 8.200e+01 ... 3.500e+01 2.330e-01 4.300e+01]
 [0.000e+00 9.400e+01 7.000e+01 ... 4.350e+01 3.470e-01 2.100e+01]
 ...
 [4.000e+00 1.250e+02 7.000e+01 ... 2.890e+01 1.144e+00 4.500e+01]
 [3.000e+00 1.160e+02 7.400e+01 ... 2.630e+01 1.070e-01 2.400e+01]
 [4.000e+00 1.100e+02 6.600e+01 ... 3.190e+01 4.710e-01 2.900e+01]]


In [86]:
X_train = np.insert(X_train, 8, Y_train, axis=1)

In [87]:
print(X_train)

[[1.000e+00 1.240e+02 7.400e+01 ... 1.000e-01 3.000e+01 0.000e+00]
 [1.000e+00 9.500e+01 8.200e+01 ... 2.330e-01 4.300e+01 1.000e+00]
 [0.000e+00 9.400e+01 7.000e+01 ... 3.470e-01 2.100e+01 0.000e+00]
 ...
 [4.000e+00 1.250e+02 7.000e+01 ... 1.144e+00 4.500e+01 1.000e+00]
 [3.000e+00 1.160e+02 7.400e+01 ... 1.070e-01 2.400e+01 0.000e+00]
 [4.000e+00 1.100e+02 6.600e+01 ... 4.710e-01 2.900e+01 0.000e+00]]


In [88]:
X_train.shape

(614, 9)

In [89]:
print(X_train[:,8])

[0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 1. 1.
 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 1. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 1. 1.
 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 1.
 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 1.
 1. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0.
 1. 1. 0. 1. 1. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0.
 1. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 1.
 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1. 1. 0.
 0. 0. 1. 1. 0. 0. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0. 0. 0. 1. 0. 0. 1. 1. 0.
 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1. 0. 0.
 1. 1. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 1. 1. 0. 0. 1. 0.
 1. 1. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0.
 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 1.

X_train ---> training data with features and target


X_test -----> test data without target

### Model Training KNN Classifiers

In [90]:
classifiers = KNN_Classifiers(distance_metric='euclidean')

NOTE: KNN classifiers can predict the label for only one data point at a time

In [91]:
prediction = classifiers.predict(X_train,X_test[0],k=5)

  euclidean_dist = np.sqrt(dist)


In [96]:
print(X_test[2])

[  0.    102.     78.     40.     90.     34.5     0.238  24.   ]


In [97]:
print(Y_test[2])

0


In [98]:
print(prediction)

0.0


In [99]:
X_test.shape

(154, 8)

In [100]:
X_test_size = X_test.shape[0]
print(X_test_size)

154


In [102]:
y_pred =[]

for i in range(X_test_size):
    prediction = classifiers.predict(X_train,X_test[i],k=5)
    y_pred.append(prediction)

  euclidean_dist = np.sqrt(dist)


In [103]:
print(y_pred)

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


In [104]:
y_true = Y_test

### Model Evaluation

In [105]:
accuracy = accuracy_score(y_true,y_pred)

In [106]:
print(accuracy*100) # accuracy score in %

70.77922077922078
