## CDC Maternal Mortality data

### load packages

In [1]:
%load_ext lab_black

In [2]:
import numpy as np
import pandas as pd
import altair as alt
import altair_grid as altgrid

alt.themes.register("grid", altgrid.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

### Maternal mortality over time with CDC-limited cause of death codes
The CDC limits maternal mortality to surveillance to obstetric causes of death within 42 days after giving birth.<br>One thing I'm wondering about is how many maternal deaths occur in that period that the CDC doesn't include in its maternal mortality surveillance, so I've included all obstetric death causes here and am separating out the causes the CDC excludes

In [4]:
mm_total = pd.read_table(
    "data/raw/after42days.txt", dtype={"Year": str, "Year Code": str}
)

In [5]:
mm_total.columns = mm_total.columns.str.lower().str.replace(" ", "_")

#### Calculating what percent of obstetric deaths are excluded from the CDC count

In [6]:
mm_pct = mm_total.dropna(subset=["year"]).drop(columns="notes").copy()

In [7]:
mm_pct.value_counts("cause_of_death_code")
post42 = ["O96.0", "O96.1", "O97", "O97.0", "O97.1", "O97.9"]
conditions = [
    (mm_pct["cause_of_death_code"].isin(post42)),
    (~mm_pct["cause_of_death_code"].isin(post42)),
]

values = ["after_42_days", "within_42_days"]

mm_pct["later_death"] = np.select(conditions, values)

In [8]:
mm_pct = (
    mm_pct.groupby(["year", "later_death"])
    .agg(sum_after42=pd.NamedAgg(column="deaths", aggfunc=sum))
    .reset_index()
    .pivot(index="year", columns="later_death", values="sum_after42")
    .dropna(subset=["after_42_days"])
    .reset_index()
    .copy()
)

In [9]:
mm_pct

later_death,year,after_42_days,within_42_days
0,1999,10.0,802.0
1,2000,6.0,802.0
2,2001,8.0,824.0
3,2002,13.0,745.0
4,2003,4.0,1086.0
5,2004,12.0,1382.0
6,2005,7.0,1513.0
7,2006,11.0,1509.0
8,2007,6.0,1532.0
9,2008,7.0,1583.0


In [10]:
mm_pct["pct"] = (mm_pct["after_42_days"]) / (
    mm_pct["after_42_days"] + mm_pct["within_42_days"]
)

In [11]:
mm_pct_final = mm_pct.reset_index().copy()

In [12]:
mm_pct_final["yeardate"] = pd.to_datetime(mm_pct_final["year"])

#### Percent of total obstetric deaths from causes 42 days or more after birth, over time
what is the sudden jump from? did they start tracking better?

In [13]:
alt.Chart(mm_pct_final).mark_line().encode(
    x=alt.X("year:N"),
    y=(
        alt.Y(
            "pct",
            axis=alt.Axis(
                format="%", title="Percent of deaths 42 days or more after birth"
            ),
        )
    ),
)

### International maternal mortality data
Compare the U.S. to other countries with similar rates of maternal mortality

In [14]:
international_mm = pd.read_csv("data/raw/mm_intl.csv", dtype={"Period": str})

In [15]:
international_mm.columns = international_mm.columns.str.lower().str.replace(" ", "_")

In [16]:
intl_mm_rates = international_mm[
    (
        international_mm["indicator"]
        == "Maternal mortality ratio (per 100 000 live births)"
    )
    & (international_mm["period"] == "2017")
][["spatialdimvaluecode", "location", "period", "factvaluenumeric"]].copy()

In [17]:
countrycodes = intl_mm_rates[
    (intl_mm_rates["factvaluenumeric"] > 15) & (intl_mm_rates["factvaluenumeric"] < 20)
]["spatialdimvaluecode"]

In [18]:
rates_overtime = international_mm[
    (international_mm["spatialdimvaluecode"].isin(countrycodes))
    & (
        international_mm["indicator"]
        == "Maternal mortality ratio (per 100 000 live births)"
    )
]

In [19]:
# narrow it down a little more for chart
target_countries = [
    "United States of America",
    "Russian Federation",
    "Iran (Islamic Republic of)",
    "Saudi Arabia",
    "Uruguay",
]

In [20]:
rates_overtime_slim = rates_overtime[rates_overtime["location"].isin(target_countries)][
    ["location", "period", "factvaluenumeric"]
].copy()

#### the US is rising over time, while other countries are reducing their maternal mortality

In [21]:
alt.Chart(rates_overtime_slim).mark_line(point=True).encode(
    x=alt.X("period:O", timeUnit="year", axis=alt.Axis(title="Year")),
    y=alt.Y("factvaluenumeric", axis=alt.Axis(title="Deaths per 100K live births")),
    color=alt.Color("location:N"),
)