# Public Health Policy & Reporting
## Philippine Health Indicators

**Purpose**
Translate analytical results into policy-relevant insights, benchmarks,
and executive-ready outputs to support evidence-based decision-making.

**Dataset Source**
https://www.kaggle.com/datasets/thedevastator/philippine-health-indicators

**Global Benchmarks**
- Sustainable Development Goals (SDGs)
- WHO / UNICEF health indicators



In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
pd.set_option("display.max_columns", 100)

# Load datasets
df = pd.read_csv("/content/cleaned_philippine_health_indicators.csv")
trends = pd.read_csv("/content/national_health_indicator_trends.csv", errors="ignore")
forecast = pd.read_csv("/content/health_indicator_forecast.csv", errors="ignore")

df.head()


In [None]:
# Example global benchmarks (illustrative WHO / SDG targets)
benchmark_targets = {
    "Under-Five Mortality Rate": 25,     # per 1,000 live births (SDG 3.2)
    "Maternal Mortality Ratio": 70,      # per 100,000 live births (SDG 3.1)
    "Life Expectancy": 75,               # years
}

benchmark_targets

In [None]:
# Match benchmarks to dataset columns
available_targets = {
    col: benchmark_targets[col]
    for col in benchmark_targets
    if col in df.columns
}

available_targets

In [None]:
benchmark_summary = []

for indicator, target in available_targets.items():
    latest_value = (
        df.groupby("Year")[indicator]
        .mean()
        .sort_index()
        .iloc[-1]
    )

    benchmark_summary.append({
        "Indicator": indicator,
        "Latest National Value": round(latest_value, 2),
        "Global Target": target,
        "Gap": round(latest_value - target, 2),
        "Status": "On Track" if latest_value <= target else "Off Track"
    })

benchmark_df = pd.DataFrame(benchmark_summary)
benchmark_df

In [None]:
# Identify indicators with worsening trends
trend_flags = []

numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
numeric_cols = [c for c in numeric_cols if c != "Year"]

for indicator in numeric_cols:
    yearly = (
        df.groupby("Year")[indicator]
        .mean()
        .sort_index()
    )

    if len(yearly) >= 3:
        recent_trend = yearly.diff().iloc[-3:].mean()
        trend_flags.append({
            "Indicator": indicator,
            "Recent Trend": round(recent_trend, 3),
            "Flag": "Worsening" if recent_trend > 0 else "Improving / Stable"
        })

bottleneck_df = pd.DataFrame(trend_flags)
bottleneck_df.sort_values("Recent Trend", ascending=False).head(10)

In [None]:
# Check for geographic dimension
if "Region" in df.columns:
    risk_indicator = numeric_cols[0]

    regional_risk = (
        df.groupby("Region")[risk_indicator]
        .mean()
        .sort_values(ascending=False)
        .reset_index()
    )

    # Define risk tiers
    regional_risk["Risk Tier"] = pd.qcut(
        regional_risk[risk_indicator],
        q=3,
        labels=["Low", "Medium", "High"]
    )

regional_risk.head()

In [None]:
# Visualize risk tiers
plt.figure(figsize=(10, 6))
sns.barplot(
    data=regional_risk,
    x=risk_indicator,
    y="Region",
    hue="Risk Tier"
)
plt.title(f"Regional Risk Stratification: {risk_indicator}")
plt.show()

In [None]:
priority_df = benchmark_df.merge(
    bottleneck_df,
    on="Indicator",
    how="left"
)

priority_df["Priority Score"] = (
    priority_df["Gap"].abs().fillna(0) +
    priority_df["Recent Trend"].abs().fillna(0)
)

priority_df.sort_values("Priority Score", ascending=False)

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Benchmark status
sns.countplot(
    data=benchmark_df,
    x="Status",
    ax=axes[0, 0]
)
axes[0, 0].set_title("Indicator Benchmark Status")

# Top gaps
sns.barplot(
    data=benchmark_df.sort_values("Gap", ascending=False),
    x="Gap",
    y="Indicator",
    ax=axes[0, 1]
)
axes[0, 1].set_title("Gap vs Global Targets")

# Bottlenecks
sns.barplot(
    data=bottleneck_df.sort_values("Recent Trend", ascending=False).head(5),
    x="Recent Trend",
    y="Indicator",
    ax=axes[1, 0]
)
axes[1, 0].set_title("Worsening Indicators")

# Regional risk (if available)
if "Region" in df.columns:
    sns.countplot(
        data=regional_risk,
        x="Risk Tier",
        ax=axes[1, 1]
    )
    axes[1, 1].set_title("Regional Risk Distribution")

plt.tight_layout()
plt.show()

In [None]:
recommendations = []

for _, row in priority_df.iterrows():
    if row.get("Status") == "Off Track":
        recommendations.append({
            "Indicator": row["Indicator"],
            "Policy Recommendation": (
                "Prioritize funding, service coverage expansion, "
                "and monitoring aligned with SDG targets."
            )
        })

policy_recommendations = pd.DataFrame(recommendations)
policy_recommendations

In [None]:
benchmark_df.to_csv(
    "/content/policy_benchmark_summary.csv",
    index=False
)

regional_risk.to_csv(
    "/content/regional_risk_stratification.csv",
    index=False
)

policy_recommendations.to_csv(
    "/content/policy_recommendations.csv",
    index=False
)

## Executive Summary

- Several national health indicators remain off-track relative to SDG targets
- Trend analysis highlights persistent bottlenecks requiring intervention
- Regional risk stratification supports geographically targeted policies
- Priority scoring enables evidence-based allocation of limited resources

This notebook translates analytics into actionable intelligence for health sector leaders and policymakers.