<h2 align='center'>Pima Indian Diseases</h2>
This dataset is originally from the National Institute of Diabetes and Digestive and Kidney Diseases. The objective of the dataset is to diagnostically predict whether or not a patient has diabetes, based on certain diagnostic measurements included in the dataset. Several constraints were placed on the selection of these instances from a larger database. In particular, all patients here are females at least 21 years old of Pima Indian heritage.

In [None]:
#import pandas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas_profiling

In [None]:
pima = pd.read_csv("Dataset/pima-indians-diabetes.csv")

In [None]:
pima.head()

In [None]:
pima.tail()

In [None]:
pima.columns

In [None]:
pima.describe()

In [None]:
pima.info()

In [None]:
pima.isna().sum()

In [None]:
#split dataset in features and target variable
feature_cols = ['Pregnancies', 'Glucose', 'BloodPressure','Insulin','BMI', 'DiabetesPedigreeFunction', 'Age']
X = pima[feature_cols] # Features
X

In [None]:
y = pima['Outcome'] # Target variable

In [None]:
sns.countplot(y)

In [None]:
# split X and y into training and testing sets
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=0)

In [None]:
# import the class
from sklearn.linear_model import LogisticRegression

# instantiate the model (using the default parameters)
logreg = LogisticRegression()

In [None]:
# fit the model with data
logreg.fit(X_train,y_train)

In [None]:
y_pred=logreg.predict(X_test)
y_pred

In [None]:
# import the metrics class
from sklearn import metrics
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
cnf_matrix

In [None]:
import seaborn as sns

In [None]:
sns.heatmap(cnf_matrix,annot_kws=pima['Outcome'])

In [None]:
class_names=[0,1] # name  of classes
fig, ax = plt.subplots()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names)
plt.yticks(tick_marks, class_names)
# create heatmap
sns.heatmap(pd.DataFrame(cnf_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
ax.xaxis.set_label_position("top")
plt.tight_layout()
plt.title('Confusion matrix', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

In [None]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))

In [None]:
y_pred_proba = logreg.predict_proba(X_test)[::,1]
fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred_proba)
auc = metrics.roc_auc_score(y_test, y_pred_proba)
plt.figure(figsize=(16,9))
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()