#  <h1 style="font-family: Trebuchet MS; padding: 30px; font-size: 40px; color: #0000f0; text-align: center; line-height: 1.5;background-color: #faafbe"><b>Heart Disease Prediction</b><br></h1>

In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
  
df = pd.read_csv('/kaggle/input/heart-failure-prediction/heart.csv')
df.head()

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
sns.heatmap(df.isnull(),cmap = 'magma',cbar = False);


In [None]:
sns.displot( df['Age'])

In [None]:
sns.displot(df['Age'], kde=True)

In [None]:
sns.distplot(df['Age'], kde= True, hist = True, rug = True)

In [None]:
sns.displot(df['RestingBP'], kde = True, color = 'blue')

In [None]:
sns.displot(df['FastingBS'], kde = True, color = 'pink')

In [None]:
sns.displot(df['Sex'])

# Pie chart distribution #

In [None]:
df.groupby('FastingBS').size().plot(kind = 'pie', autopct = '%.0f')

In [None]:
df.groupby('ChestPainType').size().plot(kind = 'pie', autopct = '%.0f')

In [None]:
df.groupby('ST_Slope').size().plot(kind = 'pie', autopct = '%.0f')

In [None]:
df.groupby('HeartDisease').size().plot(kind = 'pie', autopct = '%.0f')

## violinPlot ##

In [None]:
sns.violinplot(y = df['Sex'], x = df['HeartDisease'])

In [None]:
sns.violinplot(y = df['Age'], x = df['HeartDisease'])

In [None]:
sns.violinplot(y = df['Cholesterol'], x = df['HeartDisease'])

In [None]:
df.head()

## correlation  (Heatmap)

In [None]:
df.corr()

In [None]:
sns.heatmap(df.corr())

## JointPlot

In [None]:
sns.jointplot(x = 'HeartDisease', y ='Age', data = df)

In [None]:
sns.jointplot(x = 'HeartDisease', y ='Oldpeak', data = df)

In [None]:
sns.jointplot(x = 'HeartDisease', y ='Cholesterol', data = df, kind = 'hex')

In [None]:
sns.jointplot(x = 'Age', y ='Cholesterol', data = df, kind = 'hex')

In [None]:
sns.jointplot(x = 'Age', y ='MaxHR', data = df, kind = 'hex', color = 'yellow')

In [None]:
sns.jointplot(x = 'Age', y ='MaxHR', data = df, kind = 'reg')

In [None]:
sns.jointplot(x = 'HeartDisease', y ='MaxHR', data = df, kind = 'reg', color = 'green')

In [None]:
sns.jointplot(x = 'Cholesterol', y ='MaxHR', data = df, kind = 'reg', color = 'black')

## PairPlot

In [None]:
sns.pairplot(df)

In [None]:
#  Encode categorical features
categorical_cols = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']
for col in categorical_cols:
    df[col] = df[col].astype('category')

In [None]:
print("\n🔹 Unique values per categorical column:")
for col in categorical_cols:
    print(f"{col}: {df[col].unique()}")

In [None]:
# Check target distribution
plt.figure(figsize=(6,4))
sns.countplot(x='HeartDisease', data=df, palette='Set2')
plt.title("Heart Disease Distribution (1 = Disease, 0 = No Disease)")
plt.show()

In [None]:
# Chest Pain Type vs Heart Disease
plt.figure(figsize=(7,4))
sns.countplot(x='ChestPainType', hue='HeartDisease', data=df, palette='husl')
plt.title("Heart Disease by Chest Pain Type")
plt.show()

In [None]:
# Correlation between numeric variables
numeric_df = df.select_dtypes(include=['int64', 'float64'])
plt.figure(figsize=(10,6))
sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()

In [None]:
#  Age distribution
plt.figure(figsize=(8,5))
sns.histplot(df['Age'], bins=20, kde=True, color='teal')
plt.title("Age Distribution of Patients")
plt.show()

In [None]:
#  Age vs Heart Disease
plt.figure(figsize=(8,5))
sns.boxplot(x='HeartDisease', y='Age', data=df, palette='Set3')
plt.title("Age vs Heart Disease")
plt.show()

In [None]:
#  Cholesterol distribution
plt.figure(figsize=(8,5))
sns.histplot(df['Cholesterol'], bins=25, kde=True, color='orange')
plt.title("Cholesterol Level Distribution")
plt.show()

In [None]:
# Oldpeak vs Heart Disease
plt.figure(figsize=(8,5))
sns.boxplot(x='HeartDisease', y='Oldpeak', data=df, palette='coolwarm')
plt.title("ST Depression (Oldpeak) vs Heart Disease")
plt.show()

In [None]:
# MaxHR vs Heart Disease
plt.figure(figsize=(8,5))
sns.boxplot(x='HeartDisease', y='MaxHR', data=df, palette='coolwarm')
plt.title("Maximum Heart Rate vs Heart Disease")
plt.show()

In [None]:
#ST_Slope vs Heart Disease
plt.figure(figsize=(6,4))
sns.countplot(x='ST_Slope', hue='HeartDisease', data=df, palette='Set2')
plt.title("ST Slope vs Heart Disease")
plt.show()


<center>
    <img src="https://fldscc.com/wp-content/uploads/2020/07/shutterstock_30528475.jpg" alt="Heart Failure Prediction" width="25%">
</center>

### Problem Statement :

With a plethora of medical data available and the rise of Data Science, a host of startups are taking up the challenge of attempting to create indicators for the forseen diseases that might be contracted! Cardiovascular diseases (CVDs) are the number 1 cause of death globally, taking an estimated 17.9 million lives each year, which accounts for 31% of all deaths worldwide. Heart failure is a common event caused by CVDs. People with cardiovascular disease or who are at high cardiovascular risk (due to the presence of one or more risk factors such as hypertension, diabetes, hyperlipidaemia or already established disease) need early detection and management wherein a machine learning model can be of great help. In this way, we try to solve automate another problem that occurs in the nature with a view to counter it and focus on to the next problem with the help of AI techniques!

### Aim :
- To classify / predict whether a patient is prone to heart failure depending on multiple attributes.
- It is a **binary classification** with multiple numerical and categorical features.

### <center>Dataset Attributes</center>
    
- **Age** : age of the patient [years]
- **Sex** : sex of the patient [M: Male, F: Female]
- **ChestPainType** : chest pain type [TA: Typical Angina, ATA: Atypical Angina, NAP: Non-Anginal Pain, ASY: Asymptomatic]
- **RestingBP** : resting blood pressure [mm Hg]
- **Cholesterol** : serum cholesterol [mm/dl]
- **FastingBS** : fasting blood sugar [1: if FastingBS > 120 mg/dl, 0: otherwise]
- **RestingECG** : resting electrocardiogram results [Normal: Normal, ST: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV), LVH: showing probable or definite left ventricular hypertrophy by Estes' criteria]
- **MaxHR** : maximum heart rate achieved [Numeric value between 60 and 202]
- **ExerciseAngina** : exercise-induced angina [Y: Yes, N: No]
- **Oldpeak** : oldpeak = ST [Numeric value measured in depression]
- **ST_Slope** : the slope of the peak exercise ST segment [Up: upsloping, Flat: flat, Down: downsloping]
- **HeartDisease** : output class [1: heart disease, 0: Normal]