# IMPORTING LIBRARIES

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# DATA COLLECTION

In [None]:
dataset = pd.read_csv('/kaggle/input/pima-indians-diabetes-database/diabetes.csv')
dataset.head()

In [None]:
dataset.shape

In [None]:
dataset.describe()

In [None]:
sns.countplot(x = 'Outcome', data = dataset)

In [None]:
dataset['Outcome'].value_counts()

In [None]:
corr_mat = dataset.corr()
sns.heatmap(corr_mat,annot = True)

# DATA CLEANING

In [None]:
dataset.isna().sum()

In [None]:
x = dataset.iloc[:,:-1].values
y = dataset.iloc[:,:-1].values

In [None]:
x.shape

In [None]:
x[0]

In [None]:
y

In [None]:
fig = plt.figure(figsize=(16,6))

sns.distplot(dataset["Glucose"][dataset["Outcome"] ==1])
plt.xticks([i for i in range(0,201,15)],rotation = 45)
plt.ylabel("Glucose count")
plt.title("Glucose",fontsize = 20)

In [None]:
fig = plt.figure(figsize=(16,6))
sns.distplot(dataset["Insulin"][dataset["Outcome"]==1])
plt.xticks()
plt.title("Insulin",fontsize=20)

In [None]:
fig = plt.figure(figsize=(16,6))
sns.distplot(dataset["BMI"][dataset["Outcome"]==1])
plt.xticks()
plt.title("BMI",fontsize = 20)

In [None]:
fig = plt.figure(figsize=(16,6))
sns.distplot(dataset["DiabetesPedigreeFunction"][dataset["Outcome"]==1])
plt.xticks([i*0.15 for i in range(1,12)])
plt.title("DiabetesPedigreeFunction",fontsize = 20)

In [None]:
x = dataset.drop(["Pregnancies","BloodPressure","SkinThickness","Outcome"],axis = 1)
y = dataset.iloc[:,-1]

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=0)

In [None]:
x_train.shape

In [None]:
x_test.shape

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [None]:
x_train

# K-Nearest Neighbor Algorithm

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 25, metric = 'minkowski')
knn.fit(x_train,y_train)

In [None]:
knn_y_pred = knn.predict(x_test)

In [None]:
knn_y_pred

In [None]:
from sklearn.metrics import confusion_matrix
knn_cm = confusion_matrix(y_test,knn_y_pred)
sns.heatmap(knn_cm,annot = True)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,knn_y_pred)

In [None]:
from sklearn.metrics import precision_recall_fscore_support
precision_recall_fscore_support(y_test,knn_y_pred)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,knn_y_pred))

# Support Vector Machine Algorithm

In [None]:
from sklearn.svm import SVC
svc = SVC(kernel="linear",random_state=0)
svc.fit(x_train,y_train)

In [None]:
svc_y_pred = svc.predict(x_test)

In [None]:
svc_cm = confusion_matrix(y_test,svc_y_pred)
print(svc_cm)

In [None]:
sns.heatmap(svc_cm,annot=True)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,svc_y_pred))

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,svc_y_pred)

# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [None]:
model = LogisticRegression()

In [None]:
model.fit(x_train,y_train)

In [None]:
y_pred = model.predict(x_test)

In [None]:
confusion = metrics.confusion_matrix(y_test,y_pred)
confusion

In [None]:
sns.heatmap(confusion,annot=True)

In [None]:
print('Accuracy of Logistic Regression is: ', model.score(x_test,y_test) * 100,'%')

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
TP = confusion[1,1]
TN = confusion[0,0]
FP = confusion[0,1]
FN = confusion[1,0]

In [None]:
Precision = TP/ (TP+FP)
print('Precision: ', Precision)

In [None]:
Recall = TP/ (TP+FN)
print('Recall: ', Recall)

In [None]:
metrics.f1_score(y_test,y_pred)

# Gradient Boost Algorithm

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

In [None]:
model = GradientBoostingClassifier()
model.fit(x_train,y_train)

In [None]:
y_pred = model.predict(x_test)

In [None]:
cm = confusion_matrix(y_pred,y_test)
sns.heatmap(cm,annot=True)

In [None]:
print('Accuracy of Gradient Boost is: ', model.score(x_test,y_test) * 100,'%')

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
print('Precision of Gradient Boost is: ',precision_score(y_test,y_pred))

In [None]:
print('Recall of Gradient Boost is: ',recall_score(y_test,y_pred))

In [None]:
metrics.f1_score(y_test,y_pred)