## KNN Model Training

In [20]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

### Import data

In [None]:
df = pd.read_csv("../TAIPEI_diabetes.csv",sep=",",index_col="PatientID")


### Data Preparation

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
df_train, df_test = train_test_split(df,test_size = 0.2)

In [6]:
df_train.columns

Index(['Pregnancies', 'PlasmaGlucose', 'DiastolicBloodPressure',
       'TricepsThickness', 'SerumInsulin', 'BMI', 'DiabetesPedigree', 'Age',
       'Diabetic'],
      dtype='object')

In [7]:
# get the values of the columns for the training data
X_train = df_train.loc[:,['Pregnancies', 'PlasmaGlucose', 'DiastolicBloodPressure',
       'TricepsThickness', 'SerumInsulin', 'BMI', 'DiabetesPedigree', 'Age']].values
y_train = df_train.Diabetic.values

In [8]:
# get the values of the columns for the test data
X_test = df_test.loc[:,['Pregnancies', 'PlasmaGlucose', 'DiastolicBloodPressure',
       'TricepsThickness', 'SerumInsulin', 'BMI', 'DiabetesPedigree', 'Age']].values
y_test = df_test.Diabetic.values

### Model training

In [9]:
from sklearn.neighbors import KNeighborsRegressor

knn_regressor = KNeighborsRegressor(n_neighbors=3)
knn_regressor.fit(X=X_train, y=y_train)
y_test_predicted = knn_regressor.predict(X_test)
y_test_predicted = y_test_predicted.astype(int)


In [10]:
print((y_test_predicted == y_test).sum()/len(y_test))

0.803


### Metrics Calculation

In [11]:
from sklearn.metrics import confusion_matrix

In [12]:
cf = pd.DataFrame(
    columns=["y_test_0","y_test_1"],index=["y_pred_0","y_pred_1"]
)

In [13]:
cf.loc[:,:] = confusion_matrix(y_true= y_test,y_pred= y_test_predicted)

In [14]:
from sklearn.metrics import recall_score, precision_score

In [15]:
recall_score(y_true=y_test, y_pred=y_test_predicted)

0.466

In [16]:
precision_score(y_true=y_test, y_pred=y_test_predicted)

0.8910133843212237

In [17]:
from sklearn.metrics import classification_report

In [18]:
report =classification_report(y_true=y_test, y_pred=y_test_predicted)

In [19]:
print(report)

              precision    recall  f1-score   support

           0       0.78      0.97      0.87      2000
           1       0.89      0.47      0.61      1000

    accuracy                           0.80      3000
   macro avg       0.84      0.72      0.74      3000
weighted avg       0.82      0.80      0.78      3000



In [21]:
# Saving the model
with open("knn_model.pkl", "wb") as file:
    pickle.dump(knn_regressor, file)