In [None]:
# Maternal Mortality Capstone: Race, Economics, and Climate

This notebook extends the previous MMR visualizations by:
- Analyzing racial and ethnic disparities in maternal mortality (IHME data)
- Linking global MMR to economic indicators (World Bank GDP & income group)
- Linking national maternal mortality to climate variables (NOAA, monthly)

: 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
plt.rcParams["figure.figsize"] = (8, 5)
plt.rcParams["axes.grid"] = True

In [None]:
vsrr_path   = '/Volumes/Extreme SSD/capstone/VSRR_Provisional_Maternal_Death_Counts_and_Rates.csv'
ihme_path   = '/Volumes/Extreme SSD/capstone/IHME_USA_MMR_STATE_RACE_ETHN_1999_2019_DATA/IHME_USA_MMR_STATE_RACE_ETHN_1999_2019_ESTIMATES_COLLAPSED_Y2023M07D03.CSV'
who_mmr_path = '/Volumes/Extreme SSD/capstone/MMR-maternal-deaths-and-LTR_MMEIG-trends_2000-2023_Revised-2025-1.xlsx'  

In [None]:
gdp_path    = '/Volumes/Extreme SSD/capstone/worldbank gdp per capita/API_NY.GDP.PCAP.CD_DS2_en_csv_v2_134819.csv'
income_path = '/Volumes/Extreme SSD/capstone/worldbank income groups.xlsx'
climate_path = "noaa_us_national_climate_monthly.csv"


In [None]:
vsrr      = pd.read_csv(vsrr_path)
ihme_full = pd.read_csv(ihme_path)
who_mmr   = pd.read_csv(who_mmr_path)


In [None]:
print("VSRR shape:", vsrr.shape)
print("IHME shape:", ihme_full.shape)
print("WHO MMR shape:", who_mmr.shape)

In [None]:
vsrr.head()

In [None]:
# clean IGME for females age 10-54
ihme_clean = ihme_full.query(
    "sex_name == 'Female' and age_group_name == '10 to 54' and metric_name == 'Rate'"
).copy()

ihme_clean[["location_name", "race_group", "year_id", "val"]].head()


In [None]:
#race summary table
race_summary = (
    ihme_clean
    .groupby("race_group")["val"]
    .agg(["count", "mean", "min", "max"])
    .sort_values("mean", ascending=False)
    .round(2)
)

race_summary


In [None]:
#national MMR over time by race
race_year = (
    ihme_clean
    .groupby(["year_id", "race_group"])["val"]
    .mean()
    .reset_index()
)

plt.figure()
for race, g in race_year.groupby("race_group"):
    plt.plot(g["year_id"], g["val"], marker="o", label=race)

plt.xlabel("Year")
plt.ylabel("MMR (per 100,000 live births)")
plt.title("National MMR by Race/Ethnicity (IHME 1999–2019)")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
#state vs race heatmap
pivot_state_race = (
    ihme_clean
    .query("year_id >= 2010")
    .groupby(["location_name", "race_group"])["val"]
    .mean()
    .reset_index()
    .pivot(index="location_name", columns="race_group", values="val")
)

plt.figure(figsize=(10, 8))
plt.imshow(pivot_state_race, aspect="auto")
plt.xticks(range(pivot_state_race.shape[1]), pivot_state_race.columns, rotation=45, ha="right")
plt.yticks(range(pivot_state_race.shape[0]), pivot_state_race.index)
plt.colorbar(label="MMR")
plt.title("Average MMR by State and Race (2010–2019)")
plt.tight_layout()
plt.show()


In [None]:
## global MMR vs GDP & income group
who_mmr.head()
who_mmr.columns


In [None]:
who_mmr_clean = (
    who_mmr
    .query("IND_NAME == 'Maternal mortality ratio'")
    .rename(columns={
        "GEO_NAME_SHORT": "country",
        "DIM_TIME": "year",
        "RATE_PER_100000_N": "mmr"
    })
    [["country", "year", "mmr"]]
)

who_mmr_clean.head()


In [None]:
gdp = pd.read_csv(gdp_path)
income = pd.read_csv(income_path)


gdp_long = gdp.melt(
    id_vars=["Country Name", "Country Code"],
    var_name="year",
    value_name="gdp_pc"
)

gdp_long["year"] = pd.to_numeric(gdp_long["year"], errors="coerce")

# Merge MMR + GDP
mmr_gdp = who_mmr_clean.merge(
    gdp_long,
    left_on=["country", "year"],
    right_on=["Country Name", "year"],
    how="left"
)

# Clean income file; keep only columns with country name + income group
income = income.rename(columns={"TableName": "country"})  # may need to adjust if column names differ
mmr_gdp = mmr_gdp.merge(
    income[["country", "IncomeGroup"]],
    on="country",
    how="left"
)

mmr_gdp.head()


In [None]:
# GDP vs MMR scatter plot (one year)
recent_year = 2019 
recent = mmr_gdp.query("year == @recent_year").copy()

recent = recent.dropna(subset=["mmr", "gdp_pc"])

plt.figure()
plt.scatter(recent["gdp_pc"], recent["mmr"])
plt.xscale("log")
plt.xlabel("GDP per capita (current US$, log scale)")
plt.ylabel("MMR (per 100,000)")
plt.title(f"Global Maternal Mortality vs GDP ({recent_year})")
plt.tight_layout()
plt.show()


In [None]:
#boxplot by income group
groups = [
    "Low income",
    "Lower middle income",
    "Upper middle income",
    "High income"
]

data = [recent.loc[recent["IncomeGroup"] == g, "mmr"].dropna() for g in groups]

plt.figure()
plt.boxplot(data, labels=groups, showfliers=False)
plt.ylabel("MMR (per 100,000)")
plt.title(f"Maternal Mortality by World Bank Income Group ({recent_year})")
plt.xticks(rotation=20)
plt.tight_layout()
plt.show()
