# Comparative Analysis
## Philippine Health Indicators

**Purpose**
Identify disparities, inter-indicator relationships, and equity gaps across
health outcomes, service coverage, and demographic or geographic dimensions.

**Dataset Source**
https://www.kaggle.com/datasets/thedevastator/philippine-health-indicators



In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
pd.set_option("display.max_columns", 100)

# Load cleaned dataset
df = pd.read_csv("/content/cleaned_philippine_health_indicators.csv")

df.head()


In [None]:
# Identify numeric and categorical columns
numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
categorical_cols = df.select_dtypes(include=["category", "object"]).columns.tolist()

numeric_cols, categorical_cols


In [None]:
corr_matrix = df[numeric_cols].corr(method="pearson")

plt.figure(figsize=(12, 8))
sns.heatmap(
    corr_matrix,
    cmap="coolwarm",
    center=0,
    linewidths=0.5
)
plt.title("Correlation Matrix of Health Indicators")
plt.show()


In [None]:
corr_pairs = (
    corr_matrix
    .unstack()
    .reset_index()
    .rename(columns={0: "correlation"})
)

# Remove self-correlations
corr_pairs = corr_pairs[corr_pairs["level_0"] != corr_pairs["level_1"]]

corr_pairs.sort_values("correlation", ascending=False).head(10)


In [None]:
corr_pairs.sort_values("correlation").head(10)


In [None]:
# Select example indicators (adjust names if needed)
x_indicator = numeric_cols[0]
y_indicator = numeric_cols[1]

plt.figure(figsize=(8, 6))
sns.scatterplot(
    data=df,
    x=x_indicator,
    y=y_indicator,
    alpha=0.6
)

sns.regplot(
    data=df,
    x=x_indicator,
    y=y_indicator,
    scatter=False,
    color="red"
)

plt.title(f"{x_indicator} vs {y_indicator}")
plt.show()


In [None]:
pairplot_cols = numeric_cols[:4]  # limit for readability

sns.pairplot(
    df[pairplot_cols],
    diag_kind="kde"
)
plt.show()


In [None]:
# Detect grouping variables
possible_group_cols = ["Region", "Province", "UrbanRural", "Sex", "IncomeGroup"]
group_cols = [c for c in possible_group_cols if c in df.columns]

group_cols


In [None]:
if "Region" in group_cols:
    indicator = numeric_cols[0]

    regional_summary = (
        df.groupby("Region")[indicator]
        .mean()
        .sort_values(ascending=False)
        .reset_index()
    )

    plt.figure(figsize=(10, 6))
    sns.barplot(
        data=regional_summary,
        x=indicator,
        y="Region"
    )

    plt.title(f"Regional Comparison: {indicator}")
    plt.xlabel("Mean Value")
    plt.ylabel("Region")
    plt.show()


In [None]:
if "UrbanRural" in group_cols:
    indicator = numeric_cols[0]

    equity_summary = (
        df.groupby("UrbanRural")[indicator]
        .mean()
        .reset_index()
    )

    equity_summary


In [None]:
if "UrbanRural" in group_cols:
    plt.figure(figsize=(6, 5))
    sns.barplot(
        data=equity_summary,
        x="UrbanRural",
        y=indicator
    )
    plt.title(f"Equity Comparison: {indicator}")
    plt.show()


In [None]:
if "UrbanRural" in group_cols:
    ratio = (
        equity_summary[indicator].max() /
        equity_summary[indicator].min()
    )
    ratio


In [None]:
geo_cols = [c for c in ["Region", "Province"] if c in df.columns]

if geo_cols:
    geo_indicator_summary = (
        df.groupby(geo_cols)[numeric_cols]
        .mean()
        .reset_index()
    )

geo_indicator_summary.head()


In [None]:
# Save correlation matrix
corr_matrix.to_csv(
    "/content/indicator_correlation_matrix.csv"
)

# Save regional summaries if available
if "Region" in df.columns:
    regional_summary.to_csv(
        "/content/regional_indicator_summary.csv",
        index=False
    )


## Key Findings from Comparative Analysis

- Several indicators exhibit strong positive and negative correlations,
  suggesting potential causal or structural relationships.
- Cross-indicator comparisons highlight service-outcome linkages.
- Group-level analysis reveals equity gaps across regions and demographics.
- Regional disparities justify targeted policy intervention and resource allocation.

