# Trend and Time-Series Analysis
## Philippine Health Indicators

**Purpose**
Analyze temporal trends, growth rates, and long-term patterns in Philippine
health indicators to support epidemiological insight and policy evaluation.

**Dataset Source**
https://www.kaggle.com/datasets/thedevastator/philippine-health-indicators

**Prerequisite**
Output from:
01_exploratory_data_analysis.ipynb
(cleaned_philippine_health_indicators.csv)


In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.tsa.seasonal import STL

sns.set(style="whitegrid")
pd.set_option("display.max_columns", 100)

# Load cleaned dataset
df = pd.read_csv("/content/cleaned_philippine_health_indicators.csv")

df.head()


In [None]:
# Verify temporal coverage
assert "Year" in df.columns, "Year column is required for time-series analysis"

df["Year"].describe()


In [None]:
# Identify numeric indicators
numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
numeric_cols.remove("Year")

numeric_cols


In [None]:
national_trends = (
    df
    .groupby("Year")[numeric_cols]
    .mean()
    .reset_index()
)

national_trends.head()


In [None]:
plt.figure(figsize=(14, 8))

for col in numeric_cols:
    plt.plot(
        national_trends["Year"],
        national_trends[col],
        label=col,
        alpha=0.7
    )

plt.title("National Health Indicator Trends Over Time")
plt.xlabel("Year")
plt.ylabel("Value")
plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
plt.tight_layout()
plt.show()


In [None]:
# Adjust indicator names based on dataset
key_indicators = numeric_cols[:3]  # example selection
key_indicators


In [None]:
fig, axes = plt.subplots(len(key_indicators), 1, figsize=(12, 10), sharex=True)

for ax, indicator in zip(axes, key_indicators):
    ax.plot(
        national_trends["Year"],
        national_trends[indicator],
        marker="o"
    )
    ax.set_title(f"Trend: {indicator}")
    ax.set_ylabel("Value")

plt.xlabel("Year")
plt.tight_layout()
plt.show()


In [None]:
rolling_trends = national_trends.copy()

for col in numeric_cols:
    rolling_trends[f"{col}_rolling_3yr"] = (
        rolling_trends[col].rolling(window=3).mean()
    )

rolling_trends.head()


In [None]:
# Plot rolling average vs original
indicator = key_indicators[0]

plt.figure(figsize=(10, 5))
plt.plot(national_trends["Year"], national_trends[indicator], label="Original")
plt.plot(
    rolling_trends["Year"],
    rolling_trends[f"{indicator}_rolling_3yr"],
    label="3-Year Rolling Avg",
    linewidth=3
)

plt.title(f"Rolling Average Trend: {indicator}")
plt.xlabel("Year")
plt.ylabel("Value")
plt.legend()
plt.show()


In [None]:
growth_rates = national_trends.copy()

for col in numeric_cols:
    growth_rates[f"{col}_pct_change"] = growth_rates[col].pct_change() * 100

growth_rates.filter(like="pct_change").head()


In [None]:
# Plot YoY change
plt.figure(figsize=(10, 5))
plt.bar(
    growth_rates["Year"],
    growth_rates[f"{indicator}_pct_change"]
)

plt.axhline(0, color="black", linestyle="--")
plt.title(f"Year-over-Year % Change: {indicator}")
plt.xlabel("Year")
plt.ylabel("% Change")
plt.show()


In [None]:
# Check for subgroup columns
possible_group_cols = ["Region", "Province", "Sex", "AgeGroup"]
group_cols = [c for c in possible_group_cols if c in df.columns]

group_cols


In [None]:
if "Region" in group_cols:
    indicator = key_indicators[0]

    regional_trends = (
        df.groupby(["Year", "Region"])[indicator]
        .mean()
        .reset_index()
    )

    g = sns.FacetGrid(
        regional_trends,
        col="Region",
        col_wrap=4,
        height=3,
        sharey=False
    )

    g.map(sns.lineplot, "Year", indicator)
    g.fig.suptitle(
        f"Regional Trends: {indicator}",
        y=1.02
    )
    plt.show()


In [None]:
# Check if monthly or quarterly data exists
time_cols = [c for c in df.columns if "month" in c.lower() or "date" in c.lower()]
time_cols


In [None]:
# Example: convert to datetime if Date column exists
if "Date" in df.columns:
    df["Date"] = pd.to_datetime(df["Date"])

    ts = (
        df
        .set_index("Date")
        .resample("M")[indicator]
        .mean()
        .dropna()
    )

    stl = STL(ts, period=12)
    result = stl.fit()

    result.plot()
    plt.suptitle(f"STL Decomposition: {indicator}")
    plt.show()


In [None]:
national_trends.to_csv(
    "/content/national_health_indicator_trends.csv",
    index=False
)

growth_rates.to_csv(
    "/content/health_indicator_growth_rates.csv",
    index=False
)


## Key Findings from Trend Analysis

- National health indicators show distinct long-term trajectories
- Rolling averages reveal structural improvements or stagnation
- Year-over-year growth rates highlight acceleration and regression periods
- Regional comparisons expose geographic health disparities
- Time-series methods justify forecasting and causal analysis
