### Necessary packages

In [1]:
%load_ext lab_black

In [2]:
import requests
import urllib.request
import numpy as np
import pandas as pd
import altair as alt
import altair_grid as altgrid

alt.themes.register("grid", altgrid.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [3]:
from datawrapper import Datawrapper

dw = Datawrapper(
    access_token="FtIwtvFtoGLaRT9a3gjX69PLu4wSuRyKddoOz6SOPw3k9wWyNICMHTkcPhOGCR5Z"
)

In [6]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

### Maternal mortality over time with CDC-limited cause of death codes

In [7]:
year_cause_cdclimited = pd.read_table(
    "data/raw/byyear_andcause.txt", dtype={"Year": str, "Year Code": str}
)

In [8]:
year_cause_cdclimited.columns = year_cause_cdclimited.columns.str.lower().str.replace(
    " ", "_"
)

### Maternal mortality over time with all obstetric cause death codes

In [9]:
year_cause_all = pd.read_table(
    "data/raw/after42days.txt", dtype={"Year": str, "Year Code": str}
)

In [10]:
year_cause_all.columns = year_cause_all.columns.str.lower().str.replace(" ", "_")

#### Percent comparisons: deaths included by cdc and those excluded (42 days or more post pregnancy)

In [11]:
yc_all_pct = year_cause_all.dropna(subset=["year"]).drop(columns="notes")

In [12]:
yc_all_pct.value_counts("cause_of_death_code")
post42 = ["O96.0", "O96.1", "O97", "O97.0", "O97.1", "O97.9"]
conditions = [
    (yc_all_pct["cause_of_death_code"].isin(post42)),
    (~yc_all_pct["cause_of_death_code"].isin(post42)),
]

values = ["after 42 days", "within 42 days"]

yc_all_pct["later_death"] = np.select(conditions, values)

In [13]:
timing = (
    yc_all_pct.groupby(["year", "later_death"])
    .agg(sum_after42=pd.NamedAgg(column="deaths", aggfunc=sum))
    .reset_index()
)

In [14]:
timing_comparison = timing.pivot(
    index="year", columns="later_death", values="sum_after42"
).dropna(subset=["after 42 days"])

In [15]:
timing_comparison["pct"] = (timing_comparison["after 42 days"]) / (
    timing_comparison["after 42 days"] + timing_comparison["within 42 days"]
)

In [16]:
timing_comparison = timing_comparison.reset_index()

In [17]:
timing_comparison["yeardate"] = pd.to_datetime(timing_comparison["year"])

#### Percent of total obstetric deaths from causes 42 days or more after birth, over time
#### what is the sudden jump from? did they start tracking better?

In [18]:
alt.Chart(timing_comparison).mark_line().encode(x=alt.X("year:N"), y=(alt.Y("pct")))

### 2020 Maternal mortality broken down by race and cause
##### includes all obstetric causes, not just those CDC counts

In [19]:
mm_byrace = pd.read_table("data/raw/mm_2020_byrace_bycause.txt")

In [20]:
mm_byrace.columns = mm_byrace.columns.str.lower().str.replace(" ", "_")

#### coding for causes included in CDC maternal mortality surveillance

In [21]:
conditions = [
    (mm_byrace["cause_of_death_code"].isin(post42)),
    (~mm_byrace["cause_of_death_code"].isin(post42)),
]

values = ["not recorded", "recorded"]

mm_byrace["cdc_maternal_mortality"] = np.select(conditions, values)

#### coding race and hispanic origin combined variable

In [22]:
race_ho_conditions = [
    (
        (mm_byrace["race"] == "White")
        & (mm_byrace["hispanic_origin"] == "Not Hispanic or Latino")
    ),
    (
        (mm_byrace["race"] == "Black or African American")
        & (mm_byrace["hispanic_origin"] == "Not Hispanic or Latino")
    ),
    (
        (mm_byrace["race"] == "Asian or Pacific Islander")
        & (mm_byrace["hispanic_origin"] == "Not Hispanic or Latino")
    ),
    (
        (mm_byrace["race"] == "American Indian or Alaska Native")
        & (mm_byrace["hispanic_origin"] == "Not Hispanic or Latino")
    ),
    (mm_byrace["hispanic_origin"] == "Hispanic or Latino"),
    (mm_byrace["race"].isna())
    & (mm_byrace["hispanic_origin"].isna())
    & (mm_byrace["notes"] == "Total"),
]

race_ho_values = [
    "White",
    "Black",
    "AAPI",
    "American Indian or Alaska Native",
    "Hispanic or Latino",
    "Overall",
]

mm_byrace["race_whispanicorigin"] = np.select(race_ho_conditions, race_ho_values)

In [23]:
rawdeaths_byrace_cdcrec = (
    mm_byrace[
        (mm_byrace["race_whispanicorigin"] != "0") & (mm_byrace["notes"] != "Total")
    ]
    .groupby(["race_whispanicorigin", "cdc_maternal_mortality"])
    .agg(deaths=pd.NamedAgg(column="deaths", aggfunc=sum))
    .reset_index()
)

In [24]:
rawdeaths_byrace_cdcrec_wide = rawdeaths_byrace_cdcrec.pivot(
    index="race_whispanicorigin", columns="cdc_maternal_mortality", values="deaths"
)

### International maternal mortality data

In [25]:
international_mm = pd.read_csv("data/raw/mm_intl.csv", dtype={"Period": str})

In [26]:
international_mm.columns = international_mm.columns.str.lower().str.replace(" ", "_")

In [27]:
intl_mm_rates = international_mm[
    (
        international_mm["indicator"]
        == "Maternal mortality ratio (per 100 000 live births)"
    )
    & (international_mm["period"] == "2017")
]
intl_mm_rates = intl_mm_rates[
    ["spatialdimvaluecode", "location", "period", "factvaluenumeric"]
]

In [28]:
close_to_us = intl_mm_rates[
    (intl_mm_rates["factvaluenumeric"] > 15) & (intl_mm_rates["factvaluenumeric"] < 20)
]

In [29]:
countrycodes = close_to_us["spatialdimvaluecode"]

In [30]:
overtime = international_mm[international_mm["spatialdimvaluecode"].isin(countrycodes)]

In [31]:
rates_overtime = overtime[
    overtime["indicator"] == "Maternal mortality ratio (per 100 000 live births)"
]

In [32]:
rates_overtime = rates_overtime[["location", "period", "factvaluenumeric"]].pivot(
    index="location", columns="period", values="factvaluenumeric"
)

In [33]:
target_countries = [
    "United States of America",
    "Russian Federation",
    "Iran (Islamic Republic of)",
    "Saudi Arabia",
    "Uruguay",
]

In [34]:
g7_countries = ["United States of America", "France"]

In [35]:
intl_rates_overtime_long = overtime[
    (overtime["indicator"] == "Maternal mortality ratio (per 100 000 live births)")
    & (overtime["location"].isin(target_countries))
]

In [36]:
intl_rates_overtime_long = intl_rates_overtime_long[
    ["location", "period", "factvaluenumeric"]
]

In [37]:
alt.Chart(intl_rates_overtime_long).mark_line(point=True).encode(
    x=alt.X("period:O", timeUnit="year"),
    y=alt.Y("factvaluenumeric"),
    color=alt.Color("location:N"),
)