In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('./diabetes.csv')
df.head(10)

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression

X = df.drop(["Outcome"], axis=1)
y = df["Outcome"]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [None]:
# Logistic Regression Model
LR = LogisticRegression(max_iter=1000, random_state=42)

#fiting the model
LR.fit(X_train, y_train)

#prediction
y_pred = LR.predict(X_test)

print('train_accuracy: {:.4f}'.format(LR.score(X_train, y_train)))
print('accuracy: {:.4f}'.format(accuracy_score(y_test, y_pred)))
print('precision: {:.4f}'.format(precision_score(y_test, y_pred)))
print('recall: {:.4f}'.format(recall_score(y_test, y_pred)))
print('f1-score: {:.4f}'.format(f1_score(y_test, y_pred)))

conf_matrix = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(3, 2))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt_clf = DecisionTreeClassifier()
dt_clf.fit(X_train, y_train)
dt_pred = dt_clf.predict(X_test)

print('train_accuracy: {:.4f}'.format(dt_clf.score(X_train, y_train)))
print('accuracy: {:.4f}'.format(accuracy_score(y_test, dt_pred)))
print('precision: {:.4f}'.format(precision_score(y_test, dt_pred)))
print('recall: {:.4f}'.format(recall_score(y_test, dt_pred)))
print('f1-score: {:.4f}'.format(f1_score(y_test, dt_pred)))

In [None]:
conf_matrix = confusion_matrix(y_test, dt_pred)
plt.figure(figsize=(3, 2))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

In [None]:
from sklearn import tree
plt.figure(figsize=(12,8))
tree.plot_tree(dt_clf.fit(X_train, y_train))

In [None]:
!pip install --upgrade pip
!pip install graphviz

from sklearn.tree import export_graphviz
import graphviz

dot_data = tree.export_graphviz(dt_clf, out_file=None,
                                feature_names=X_train.columns,
                                class_names=["No Diabetes", "Diabetes"],
                                filled=True, rounded=True,
                                special_characters=True)

# Render the visualization
graph = graphviz.Source(dot_data)
graph