# EDA - Heart Disease Dataset

This notebook explores the dataset, checks missing values, and produces basic visualizations.

In [None]:
from pathlib import Path
import sys

ROOT = Path.cwd().parent
sys.path.append(str(ROOT))

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from src.config import RAW_DIR
from src.preprocessing import load_data

plt.rcParams['figure.figsize'] = (6, 4)


data_path = RAW_DIR / 'heart.csv'
df = load_data(data_path)
df.head()


In [None]:
df.info()

In [None]:
df.isna().sum().sort_values(ascending=False)

In [None]:
df['target'].value_counts(normalize=True)

In [None]:
corr = df.corr(numeric_only=True)
plt.figure(figsize=(8, 6))
sns.heatmap(corr, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap')
plt.tight_layout()
plt.show()


In [None]:
df.hist(figsize=(10, 8), bins=20)
plt.tight_layout()
plt.show()
