In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import warnings
import plotly.express as px
from plotly.offline import offline, iplot
import plotly.graph_objects as go
from datetime import datetime
from wordcloud import WordCloud, ImageColorGenerator
warnings.filterwarnings("ignore")

import dash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc

In [3]:
# heart = pd.read_csv('/kaggle/input/stroke-detection-dataset/heart.csv')
heart = pd.read_csv('heart.csv')
heart

Unnamed: 0,age,sex,chest_pain_type,resting_bp,cholestoral,fasting_blood_sugar,restecg,max_hr,exang,oldpeak,slope,num_major_vessels,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0


In [4]:
heart.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   age                  303 non-null    int64  
 1   sex                  303 non-null    int64  
 2   chest_pain_type      303 non-null    int64  
 3   resting_bp           303 non-null    int64  
 4   cholestoral          303 non-null    int64  
 5   fasting_blood_sugar  303 non-null    int64  
 6   restecg              303 non-null    int64  
 7   max_hr               303 non-null    int64  
 8   exang                303 non-null    int64  
 9   oldpeak              303 non-null    float64
 10  slope                303 non-null    int64  
 11  num_major_vessels    303 non-null    int64  
 12  thal                 303 non-null    int64  
 13  target               303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [5]:
heart.isnull().sum()

age                    0
sex                    0
chest_pain_type        0
resting_bp             0
cholestoral            0
fasting_blood_sugar    0
restecg                0
max_hr                 0
exang                  0
oldpeak                0
slope                  0
num_major_vessels      0
thal                   0
target                 0
dtype: int64

In [6]:
heart.describe()

Unnamed: 0,age,sex,chest_pain_type,resting_bp,cholestoral,fasting_blood_sugar,restecg,max_hr,exang,oldpeak,slope,num_major_vessels,thal,target
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.366337,0.683168,0.966997,131.623762,246.264026,0.148515,0.528053,149.646865,0.326733,1.039604,1.39934,0.729373,2.313531,0.544554
std,9.082101,0.466011,1.032052,17.538143,51.830751,0.356198,0.52586,22.905161,0.469794,1.161075,0.616226,1.022606,0.612277,0.498835
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,47.5,0.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,2.0,0.0
50%,55.0,1.0,1.0,130.0,240.0,0.0,1.0,153.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,274.5,0.0,1.0,166.0,1.0,1.6,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


In [26]:
#number of unique values in each column
heart.nunique()

age                     41
sex                      2
chest_pain_type          4
resting_bp              49
cholestoral            152
fasting_blood_sugar      2
restecg                  3
max_hr                  91
exang                    2
oldpeak                 40
slope                    3
num_major_vessels        5
thal                     4
target                   2
dtype: int64

In [7]:
# from ydata_profiling import ProfileReport
# heart_report = ProfileReport(heart)
# heart_report

In [8]:
heart = heart.drop_duplicates()
heart.shape

(302, 14)

In [9]:
def custome_layout(title_size = 28, showlegend = False):
    fig.update_layout(
    showlegend = showlegend,
    title = {
        "x": 0.5,
        "font" :{
            "size" :title_size,
            "family" : "tahoma"
        }
        
    },
    hoverlabel = {
        "bgcolor" :"#111",
        "font_size" : 16,
        "font_family" :"arial"
    }

    )

What is the age distribution of patients❓

In [10]:
age_mean = np.mean(heart['age'].to_list())
age_median = np.median(heart['age'].to_list())
print(f"Average Age: {age_mean:0.2f}")
print(f"Median Age: {age_median}")

Average Age: 54.42
Median Age: 55.5


In [11]:
fig = px.histogram(heart["age"],
                   nbins=48,
                   title="The Distribution of Age",
                   template="plotly_white")

custome_layout()

fig.add_vline(
    x=age_mean,
    line_dash="dash",
    line_color="green",
    annotation_text=f"Mean: {age_mean:.1f}",
    annotation_position="top left",
    annotation_font_size=14,
    annotation_font_color="green"
)
fig.add_vline(
    x=age_median,
    line_dash="dash",
    line_color="red",
    annotation_text=f"Median: {age_median:.1f}",
    annotation_position="top",
    annotation_font_size=14,
    annotation_font_color="red"
)

fig.update_traces(
    hovertemplate = "Age: %{x}<br>Frequency: %{y}",
    marker=dict(line=dict(color='#111', width=1))
)

In [12]:
fig = px.histogram(heart, x="age", nbins=30, color="sex",
                   title="Age Distribution by Gender",
                   opacity=0.7, barmode="overlay",
                   color_discrete_sequence=["blue", "red"])  
                   
fig.update_layout(title_x=0.5, font_size=18)  
fig.update_traces(
    hovertemplate = "Age: %{x}<br>Frequency: %{y}",
    marker=dict(line=dict(color='#111', width=1))
)
fig.show()


Is there a significant difference in stroke occurrence between males and females❓

In [13]:
gender = heart["sex"].value_counts()
gender

sex
1    206
0     96
Name: count, dtype: int64

In [14]:
fig = px.pie(
    heart,
    names='sex',
    title='Gender Distribution of Patients',
    color_discrete_sequence=['blue', 'red'],

)
fig.update_layout(  
    title = {
        "x": 0.5,
        "font" :{
            "size" :28,
            "family" : "tahoma"
        }
    }
)  
fig.update_traces(
    textinfo='percent+label',
    hovertemplate='%{label}: %{value} <br>Percentage: %{percent}',
    # marker=dict(line=dict(color='#111', width=1)),
    hole=0.4,
    pull=[0.1, 0]
)
fig

How do cholesterol levels vary among stroke patients❓

In [15]:
fig = px.box(heart, x="target", y="cholestoral", color="target",
             title="Cholesterol Levels in Patients",
             labels={"target": "Heart disease", "cholestoral": "Cholesterol"},
             color_discrete_sequence=["blue", "red"])

fig.update_layout(title_x=0.5)
fig.show()


Is there a correlation between resting blood pressure and heart disease❓

In [16]:
fig = px.scatter(heart, x="resting_bp", y="age", color="target",
                 title="Resting Blood Pressure vs Age",
                 labels={"resting_bp": "Resting Blood Pressure", "age": "Age", "target": "Heart Disease"},
                 trendline="ols",
                 color_continuous_scale=px.colors.sequential.Viridis)  

custome_layout()
fig.update_layout(title_x=0.5)
fig.show()


In [17]:
fig = px.box(heart, x="target", y="resting_bp", color="target",
             title="Resting Blood Pressure in Patients",
             labels={"target": "Heart disease", "resting_bp": "Blood Pressure"},
            #  color_discrete_sequence=["blue", "red"]
            color_discrete_sequence=px.colors.qualitative.Set2            
            )
            

fig.update_layout(title_x=0.5)
fig.show()

Is fasting blood sugar a significant indicator of heart disease❓

In [18]:
blood_sugar = heart['fasting_blood_sugar'].value_counts()
blood_sugar

fasting_blood_sugar
0    257
1     45
Name: count, dtype: int64

In [19]:
import plotly.express as px

fig = px.histogram(heart, 
                   x="fasting_blood_sugar", 
                   color="target", 
                   barmode="group",
                   text_auto=True,
                   labels={"fasting_blood_sugar": "Fasting Blood Sugar", 
                           "target": "Heart Disease (0 = No, 1 = Yes)"},
                   title="Heart Disease vs. Fasting Blood Sugar",
                   color_discrete_sequence=px.colors.sequential.Viridis)


fig.update_layout(title_x=0.5, font_size=18)
fig.update_traces(
    hovertemplate="Fasting Blood Sugar: %{x}<br>Frequency: %{y}",
    marker=dict(line=dict(color='#111', width=1))
)


How does the maximum heart rate achieved vary between patients with and without heart disease❓

In [20]:
fig = px.box(heart, x="target", y="max_hr", color="target",
             title="Max Heart Rate in Patients",
             labels={"target": "Heart disease", "max_hr": "Max Heart Rate"},
             color_discrete_sequence=px.colors.qualitative.Vivid
             )
             
fig.update_layout(title_x=0.5, font_size=18)
fig.update_traces(
    hovertemplate="Max Heart Rate: %{y}", #with outliers
    marker=dict(line=dict(color='#111', width=1))
)

In [21]:
fig = px.histogram(
    heart,
    x='exang',
    color='target',
    barmode='group',
    title='Impact of Exercise-Induced Angina on Heart Disease',
    labels={'exang':'Exercise Angina', 'target':'Heart Disease'},
    text_auto=True,
    color_discrete_sequence=px.colors.sequential.Cividis
)

fig.update_layout(title_x=0.5, font_size=18)
fig.update_traces(
    hovertemplate='Exercise Angina: %{x}<br>Frequency: %{y}',
    marker=dict(line=dict(color='#111', width=1))
)

In [22]:
fig = px.scatter(
        heart, 
        x='chest_pain_type',
        y='cholestoral', 
        color="target",
        size="age",
        hover_name="sex", 
        color_discrete_sequence=["#FFA7A7", "#A8E6CF"], 
        labels={
            "target": "Heart Disease", 
        }
    )

fig

What are the most correlated factors in heart disease presence❓

In [23]:
correlation = heart.corr()["target"].drop("target").abs().sort_values(ascending=False)

correlation_df = correlation.reset_index()
correlation_df.columns = ["Feature", "Correlation"]

# print(correlation_df.head(10))  

fig = px.bar(correlation_df, 
             x="Feature", 
             y="Correlation", 
             text=correlation_df['Correlation'].apply(lambda x: f"{x:.3f}"), 
             color="Correlation", 
             color_continuous_scale="reds",  # Red color for strong correlation
             title="Most Correlated Factors with Heart Disease")

fig.update_layout(xaxis_title="Feature", yaxis_title="Correlation Strength", title_x=0.5, font_size=14)
fig.update_traces(
    hovertemplate="Feature: %{x}<br>Correlation: %{y}",
    marker=dict(line=dict(color='#111', width=1))
)

fig.show()
