In [28]:
import streamlit as st
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt

In [18]:
df = pd.read_csv("healthcare-dataset-stroke-data.csv")
df

Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,"stroke (1-diagnosed w/ stroke, 0-no stroke)",Unnamed: 12,Unnamed: 13,Unnamed: 14
0,9046,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1,,,
1,51676,Female,61.0,0,0,Yes,Self-employed,Rural,202.21,,never smoked,1,,,
2,31112,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1,,,
3,60182,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1,,,
4,1665,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5105,18234,Female,80.0,1,0,Yes,Private,Urban,83.75,,never smoked,0,,,
5106,44873,Female,81.0,0,0,Yes,Self-employed,Urban,125.20,40.0,never smoked,0,,,
5107,19723,Female,35.0,0,0,Yes,Self-employed,Rural,82.99,30.6,never smoked,0,,,
5108,37544,Male,51.0,0,0,Yes,Private,Rural,166.29,25.6,formerly smoked,0,,,


In [22]:
# --- Preprocessing ---
df['bmi'] = pd.to_numeric(df['bmi'], errors='coerce')
df['avg_glucos'] = pd.to_numeric(df['avg_glucose_level'], errors='coerce')

In [30]:
# --- Sidebar Filters ---
st.sidebar.title("🔍 Filter Data")
gender = st.sidebar.multiselect("Gender", df["gender"].unique(), default=df["gender"].unique())
residence = st.sidebar.multiselect("Residence_type", df["Residence_type"].unique(), default=df["Residence_type"].unique())
stroke_filter = st.sidebar.selectbox("Stroke Status", options=["All", "Diagnosed", "Not Diagnosed"])

In [34]:
# --- Filter Logic ---
filtered_df = df[(df["gender"].isin(gender)) & (df["Residence_type"].isin(residence))]
if stroke_filter == "Diagnosed":
    filtered_df = filtered_df[filtered_df["stroke (1-diagnosed w/ stroke, 0=no stroke)"] == 1]
elif stroke_filter == "Not Diagnosed":
    filtered_df = filtered_df[filtered_df["stroke (1-diagnosed w/ stroke, 0=no stroke)"] == 0]

In [36]:
# --- Dashboard Header ---
st.title("Stroke Risk Insights Dashboard")
st.markdown("Analyze patterns and risk factors associated with stroke occurrences.")

DeltaGenerator()

In [68]:
# --- KPIs ---
col1, col2, col3 = st.columns(3)
col1.metric("Total Patients", len(filtered_df))
col2.metric("Stroke Cases", filtered_df["stroke (1-diagnosed w/ stroke, 0-no stroke)"].sum())
col3.metric("Avg BMI", f"{filtered_df['bmi'].mean():.2f}")

DeltaGenerator()

In [74]:
# --- Stroke vs Smoking Status ---
age_chart = px.histogram(
    filtered_df,
    x="age_group",
    color="stroke (1-diagnosed w/ stroke, 0-no stroke)",
    barmode="group",
    title="Stroke Cases by Age Group"
)
st.plotly_chart(age_chart, use_container_width=True)

DeltaGenerator()

In [82]:
# --- Correlation Heatmap ---
st.subheader("📈 Correlation Heatmap (Numerical Features)")
corr_df = filtered_df[['age', 'hypertension', 'heart_disease', 'avg_glucose_level', 'bmi',
                       'stroke (1-diagnosed w/ stroke, 0-no stroke)']].dropna()
fig, ax = plt.subplots()
sns.heatmap(corr_df.corr(), annot=True, cmap="coolwarm", ax=ax)
st.pyplot(fig)

DeltaGenerator()

In [84]:
# --- Raw Data Viewer ---
with st.expander("📄 View Raw Data"):
    st.dataframe(filtered_df)