# IMPORTING LIBRARIES

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# DATA COLLECTION

In [None]:
dataset = pd.read_csv('/kaggle/input/pima-indians-diabetes-database/diabetes.csv')
dataset.head()

In [None]:
dataset.shape

In [None]:
dataset.describe()

In [None]:
sns.countplot(x = 'Outcome', data = dataset)

In [None]:
dataset['Outcome'].value_counts()

In [None]:
corr_mat = dataset.corr()
sns.heatmap(corr_mat,annot = True)

# DATA CLEANING

In [None]:
dataset.isna().sum()

In [None]:
x = dataset.iloc[:,:-1].values
y = dataset.iloc[:,:-1].values

In [None]:
x.shape

In [None]:
y

In [None]:
fig = plt.figure(figsize=(16,6))

sns.distplot(dataset["Glucose"][dataset["Outcome"] ==1])
plt.xticks([i for i in range(0,201,15)],rotation = 45)
plt.ylabel("Glucose count")
plt.title("Glucose",fontsize = 20)

In [None]:
fig = plt.figure(figsize=(16,6))
sns.distplot(dataset["Insulin"][dataset["Outcome"]==1])
plt.xticks()
plt.title("Insulin",fontsize=20)

In [None]:
fig = plt.figure(figsize=(16,6))
sns.distplot(dataset["BMI"][dataset["Outcome"]==1])
plt.xticks()
plt.title("BMI",fontsize = 20)

In [None]:
y = dataset.iloc[:,-1]

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.13,random_state=0)

In [None]:
x_train.shape

In [None]:
x_test.shape

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [None]:
x_train

# RANDOM FOREST

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=60)
clf.fit(x_train,y_train)

In [None]:
clf_y_pred = clf.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix
clf_cm = confusion_matrix(y_test,clf_y_pred)
sns.heatmap(clf_cm,annot = True)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,clf_y_pred)

# Linear Discriminant Analysis

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis(n_components=1)
lda.fit(x_train,y_train)

In [None]:
lda_y_pred = lda.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix
lda_cm = confusion_matrix(y_test,lda_y_pred)
sns.heatmap(lda_cm,annot = True)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,lda_y_pred)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,lda_y_pred))

# Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(x_train,y_train)

In [None]:
gnb_y_pred = gnb.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix
gnb_cm = confusion_matrix(y_test,gnb_y_pred)
sns.heatmap(gnb_cm,annot = True)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,gnb_y_pred)

# XGBoost

In [None]:
import xgboost as xgb
model = xgb.XGBClassifier(n_estimators=100,learning_rate=0.01,max_depth=9,min_child_weight=2,gamma=0.3,subsample=0.8,colsample_bytree=0.7,objective='binary:logistic',nthread=-1,scale_pos_weight=1)
model.fit(x_train,y_train)

In [None]:
xgb_y_pred = model.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix
xgb_cm = confusion_matrix(y_test,xgb_y_pred)
sns.heatmap(xgb_cm,annot = True)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,xgb_y_pred)

# NEURAL NETWORKS 

In [None]:
from sklearn.neural_network import MLPClassifier
classifier = MLPClassifier()
classifier.fit(x_train, y_train)
predictions = classifier.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix
nn_cm = confusion_matrix(y_test,predictions)
sns.heatmap(nn_cm,annot = True)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,predictions)

# DECISION TREE

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
bagging = BaggingClassifier(DecisionTreeClassifier(random_state=42),n_estimators=500,max_samples=100,bootstrap=True,n_jobs=-1,random_state=42,oob_score=True)
bagging.fit(x_train,y_train)

In [None]:
BaggingClassifier(base_estimator=DecisionTreeClassifier(random_state=42),max_samples=100, n_estimators=500, n_jobs=-1, oob_score=True,random_state=42)

In [None]:
from sklearn.metrics import confusion_matrix
dt_cm = confusion_matrix(bagging.predict(x_test),y_test)
sns.heatmap(dt_cm,annot = True)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(bagging.predict(x_test),y_test)