In [None]:
# -----------------------------------------------
# 1. Import Libraries and Load Dataset
# -----------------------------------------------
import pandas as pd
import numpy as np

# NOTE: Replace 'patient_data.csv' with your actual CSV file name
df = pd.read_csv("patient_data.csv")

print("📌 First 5 rows:")
print(df.head())

print("\n📌 Dataset Info:")
print(df.info())

print("\n📌 Summary Statistics:")
print(df.describe(include='all'))

In [None]:
# -----------------------------------------------
# 2. Explore and Summarize Demographics
# -----------------------------------------------

# Age distribution
print("📌 Age Distribution:")
print(df['age'].describe())

# Gender distribution
print("\n📌 Gender Distribution:")
print(df['gender'].value_counts())

# Department admissions
print("\n📌 Admissions per Department:")
print(df['department'].value_counts())

In [None]:
# -----------------------------------------------
# 3. Aggregate Key Statistics
# -----------------------------------------------

# Admissions by department
dept_admissions = df.groupby('department').size().reset_index(name='admissions_count')

# Average stay length
avg_stay = df['length_of_stay'].mean()

print("📌 Department Admissions:
", dept_admissions)
print(f"\n📌 Average Length of Stay: {avg_stay:.2f} days")

In [None]:
# -----------------------------------------------
# 4. Data Cleaning
# -----------------------------------------------

# Check for missing values
print("📌 Missing Values:")
print(df.isnull().sum())

# Fill missing values
df['length_of_stay'].fillna(df['length_of_stay'].mean(), inplace=True)

# Convert categorical columns
df['gender'] = df['gender'].astype('category')
df['department'] = df['department'].astype('category')

print("\n📌 Updated Data Types:")
print(df.dtypes)

In [None]:
# -----------------------------------------------
# 5. Document Findings (Markdown)
# -----------------------------------------------
from IPython.display import display, Markdown

display(Markdown("## 🏥 Hospital Data Summary"))
display(Markdown(f"- Total patients: **{len(df)}**"))
display(Markdown(f"- Unique departments: **{df['department'].nunique()}**"))
display(Markdown(f"- Average stay duration: **{df['length_of_stay'].mean():.2f}** days"))
display(Markdown("- Gender breakdown and age distribution analyzed."))
display(Markdown("- Missing values handled and data cleaned."))