<a href="https://colab.research.google.com/github/Anny8910/Decision-Tree-Classification-on-Diabetes-Dataset/blob/master/Diabetes_set_(Decision_tree).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# =========================
# 1. Import Libraries
# =========================
import pandas as pd
import numpy as np

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import confusion_matrix, classification_report

from sklearn.tree import export_graphviz
from IPython.display import Image
import pydotplus
from io import StringIO


# =========================
# 2. Load Dataset
# =========================
df = pd.read_csv("diabetes_dataset.csv")
df.head()


# =========================
# 3. Feature & Target Split
# =========================
X = df.drop('Outcome', axis=1)
y = df['Outcome']


# =========================
# 4. Train-Test Split
# =========================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1
)


# =========================
# 5. Decision Tree (Default)
# =========================
model = DecisionTreeClassifier(random_state=1)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", metrics.accuracy_score(y_test, y_pred) * 100)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


# =========================
# 6. Sample Prediction
# =========================
sample = [[6,148,72,35,0,33.6,0.627,50]]
print("Prediction for sample:", model.predict(sample))


# =========================
# 7. Decision Tree Visualization (Default)
# =========================
dot_data = StringIO()
export_graphviz(
    model,
    out_file=dot_data,
    filled=True,
    rounded=True,
    special_characters=True,
    feature_names=X.columns,
    class_names=['No Diabetes', 'Diabetes']
)

graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
Image(graph.create_png())


# =========================
# 8. Optimized Decision Tree
# =========================
model_entropy = DecisionTreeClassifier(
    criterion="entropy",
    max_depth=3,
    random_state=1
)

model_entropy.fit(X_train, y_train)
y_pred_entropy = model_entropy.predict(X_test)

print("Optimized Accuracy:", metrics.accuracy_score(y_test, y_pred_entropy) * 100)
print("\nOptimized Classification Report:\n",
      classification_report(y_test, y_pred_entropy))


# =========================
# 9. Optimized Tree Visualization
# =========================
dot_data = StringIO()
export_graphviz(
    model_entropy,
    out_file=dot_data,
    filled=True,
    rounded=True,
    special_characters=True,
    feature_names=X.columns,
    class_names=['No Diabetes', 'Diabetes']
)

graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
Image(graph.create_png())
