<a href="https://colab.research.google.com/github/g-e-mm/SupervisedLearning/blob/main/DecisionTree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Heart Disease Prediction
---

- This project aims to predict heart disease from patient medical data using Decision Tree classification, helping in early diagnosis and prevention.

- A **Decision Tree** is a flowchart-like structure where internal nodes represent features (attributes), branches represent decision rules, and each leaf node represents the outcome (target). It is one of the simplest and most interpretable classification algorithms.

In this project, a **Decision Tree** will:
- Learn patterns from the heart disease dataset.
- Use those patterns to classify whether a person is likely to have heart disease (`target=1`) or not (`target=0`).
- Provide visual and logical reasoning for predictions.

# Loading dataset and necessary libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn import tree

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
path = '/content/drive/MyDrive/Data Science/Decision Tree/HeartDisease.csv'

# Exploratory Data Analysis

In [None]:
def load_dataset(file_path):
  """
  Loads a dataset from a given file path using pandas.

  Args:
    file_path: The path to the file containing the dataset.

  Returns:
    A pandas DataFrame containing the dataset.
  """
  try:
    df = pd.read_csv(file_path)
    return df
  except FileNotFoundError:
    print(f"Error: File not found at {file_path}")
    return None

df = load_dataset(path)

In [None]:
df.head()

In [None]:
def explore_data(data):
  """
  Prints the shape and descriptive statistics of a given dataset.

  Args:
    data: A pandas DataFrame.
  """
  print("Data Shape:", data.shape)
  print("______________________________________________________________________________")
  print("Null values:", data.isnull().sum())
  print("______________________________________________________________________________")
  print("\nData Description:")
  print(data.describe())


explore_data(data)

# Preprocessing and Train-Test Split

In [None]:
X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Train Decision Tree Classifier

In [None]:

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)


print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

print("____________________________________________________________________________________________________________________________________________________________")
print("")

sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

print("____________________________________________________________________________________________________________________________________________________________")
print("")

plt.figure(figsize=(20,10))
tree.plot_tree(clf, filled=True, feature_names=X.columns, class_names=["No Disease", "Disease"])
plt.show()


In [None]:
def predict_heart_disease(input_data):
    input_df = pd.DataFrame([input_data], columns=X.columns)
    prediction = clf.predict(input_df)[0]
    return "🟢 No Heart Disease Detected" if prediction == 0 else "🔴 Heart Disease Likely"

example_input = {
    'age': 54,
    'gender': 1,
    'chest_pain': 2,
    'rest_bps': 130,
    'cholesterol': 250,
    'fasting_blood_sugar': 0,
    'rest_ecg': 1,
    'thalach': 150,
    'exer_angina': 0,
    'old_peak': 1.2,
    'slope': 2,
    'ca': 0,
    'thalassemia': 2
}

predict_heart_disease(example_input)
