# V&V maternal outcomes and intervention coverage

## Setup

In [None]:
import pandas as pd, numpy as np, os
from vivarium import Artifact
import db_queries
from get_draws.api import get_draws
import matplotlib.pyplot as plt
from pathlib import Path
import yaml

In [None]:
import warnings
warnings.filterwarnings('ignore', category=FutureWarning) 

In [None]:
locations = ['Pakistan', 'Ethiopia', 'Nigeria']

In [None]:
base_results_dir = Path('/mnt/team/simulation_science/pub/models/vivarium_gates_mncnh/results/model27.0/')
results_dirs = {}
assert set([p.stem for p in base_results_dir.iterdir()]) == set([l.lower() for l in locations])
for location in locations:
    location_dir = base_results_dir / location.lower()
    timestamps = sorted(location_dir.iterdir())
    last_timestamp = timestamps[-1]
    if len(timestamps) > 1:
        print(f'Multiple timestamps: {timestamps}, using {last_timestamp}')
    results_dirs[location] = location_dir / last_timestamp / 'results'

results_dirs

In [None]:
location_ids = db_queries.get_ids('location')
location_ids = location_ids.loc[location_ids.location_name.str.lower().isin([x.lower() for x in results_dirs.keys()])]
location_ids

In [None]:
def load_yaml_file(path):
    with open(path) as stream:
        return yaml.safe_load(stream)

In [None]:
artifact_paths = {
    location: load_yaml_file(result_dir.parent / 'model_specification.yaml')['configuration']['input_data']['artifact_path']
    for location, result_dir
    in results_dirs.items()
}
artifact_paths

In [None]:
def read_results(result_file_name, baseline_only=True):
    all_locations_results = []
    for location, result_dir in results_dirs.items():
        if baseline_only:
           filters = [('scenario', '==', 'baseline')]
           location_results = pd.read_parquet(result_dir / f'{result_file_name}.parquet', filters=filters).drop(columns=['measure','entity','sub_entity','entity_type'])
        else:
            location_results = pd.read_parquet(result_dir / f'{result_file_name}.parquet').drop(columns=['measure','entity','sub_entity','entity_type'])
        location_results['location'] = location
        #if 'scenario' not in location_results.columns:
        #    location_results['scenario'] = 'baseline'
        #if baseline_only:
        #    location_results = location_results.loc[location_results.scenario == 'baseline']

        # Aggregate over random seeds (if necessary)
        # TODO: Could use vivarium_helpers marginalize method here
        if 'random_seed' in location_results.columns:
            location_results = location_results.groupby([
                c for c in location_results.columns if c != 'random_seed' and c != 'value'
            ]).sum().reset_index().drop(columns='random_seed')

        all_locations_results.append(location_results)
    return pd.concat(all_locations_results, ignore_index=True)

In [None]:
causes = [
    'maternal_sepsis_and_other_maternal_infections',
    'maternal_hemorrhage',
    'maternal_obstructed_labor_and_uterine_rupture',
    'postpartum_depression',
]

In [None]:
def combine_count_results_by_cause(measure, results_file_suffix=None, baseline_only=True):
    if results_file_suffix is None:
        results_file_suffix = measure

    all_causes_results = []

    for cause in causes:
        cause_results = read_results(f'{cause}_{results_file_suffix}', baseline_only=baseline_only)
        if baseline_only:
            cause_results = cause_results.drop(columns=['scenario'])
        cause_results['cause'] = cause
        cause_results['measure'] = measure
        all_causes_results.append(cause_results)

    return pd.concat(all_causes_results, ignore_index=True)

## Verify burden

In [None]:
incidence = combine_count_results_by_cause('incidence', 'counts')
incidence

In [None]:
ylds = combine_count_results_by_cause('ylds')
ylds

In [None]:
deaths = (read_results('maternal_disorders_burden_observer_disorder_deaths', baseline_only=True).drop(columns=['scenario'])
          .rename(columns={'maternal_disorders_burden_observer_cause_of_death':'cause'}))
deaths['measure'] = 'mortality'
deaths

In [None]:
deaths.cause.unique()

In [None]:
births = read_results('anc_other', baseline_only=True).drop(columns=['scenario'])
births

In [None]:
population_stratification_cols = [x for x in births.columns if x in deaths.columns and x != 'value']
postpartum_population = (
    births.groupby(population_stratification_cols).value.sum()
    - deaths.groupby(population_stratification_cols).value.sum()
).reset_index()
postpartum_population

In [None]:
count_results = pd.concat([incidence, deaths, ylds], ignore_index=True)
count_results.cause.unique()

In [None]:
groupby_cols = ['location','input_draw','age_group']
# Postpartum causes use the postpartum population as a denominator
# All other causes use births
postpartum_causes = ['postpartum_depression']

grouped_counts = count_results.groupby(groupby_cols + ['cause', 'measure'])['value'].sum()

assert grouped_counts.notnull().all()

rate_results = pd.concat([
    grouped_counts[~grouped_counts.index.get_level_values('cause').isin(postpartum_causes)]
        / births.groupby(groupby_cols)['value'].sum(),
    grouped_counts[grouped_counts.index.get_level_values('cause').isin(postpartum_causes)]
        / postpartum_population.groupby(groupby_cols)['value'].sum(),
]).reset_index()
# Drop where denominator was zero
rate_results = rate_results.dropna(subset='value')
rate_results

In [None]:
draws = list(deaths.input_draw.unique())
draws

In [None]:
def read_artifact(key, filter_terms=['sex == Female' , 'age_start > 5', 'age_end < 60']):
    all_locations_data = []
    for location in locations:
        art = Artifact(artifact_paths[location], filter_terms=filter_terms)
        location_data = art.load(key)
        if not isinstance(location_data, pd.DataFrame):
            location_data = pd.DataFrame({'value': location_data, 'location': location}, index=[0]).set_index('location')
        else:
            location_data['location'] = location
            location_data = location_data.reset_index().set_index(['location'] + [c for c in location_data.index.names if c is not None])
        all_locations_data.append(location_data)

    all_locations_data = pd.concat(all_locations_data)
    if 'draw' in all_locations_data.columns[0]:
        all_locations_data = all_locations_data[[f'draw_{draw}' for draw in draws]]
    else:
        for draw in draws:
            all_locations_data[f'draw_{draw}'] = all_locations_data['value']
        all_locations_data = all_locations_data.drop(columns='value')
    return all_locations_data

In [None]:
asfr = read_artifact('covariate.age_specific_fertility_rate.estimate')
sbr = read_artifact('covariate.stillbirth_24_weeks_to_live_birth_ratio.estimate')
still = asfr * sbr
ectopic = read_artifact('cause.ectopic_pregnancy.raw_incidence_rate')
miscarriage = read_artifact('cause.maternal_abortion_and_miscarriage.raw_incidence_rate')

pregnancy_incidence = (asfr
                       + still
                       + ectopic
                       + miscarriage)

# TODO: check to see if "pregnancy_incidence" is a key in the artifact that I can use directly

asfr['outcome'] = 'live_birth'
still['outcome'] = 'stillbirth'
# FIXME: update these to "abortion/miscarriage/ectopic" rather than "partial term" which we have moved away from in docs
ectopic['outcome'] = 'partial_term'
miscarriage['outcome'] = 'partial_term'
artifact_birth_outcomes = pd.concat([
    asfr,
    still.reorder_levels(asfr.index.names),
    ectopic.reorder_levels(asfr.index.names),
    miscarriage.reorder_levels(asfr.index.names),
]).set_index('outcome', append=True)
artifact_birth_outcomes = (artifact_birth_outcomes / pregnancy_incidence).groupby(artifact_birth_outcomes.index.names).sum()

artifact_birth_outcomes

In [None]:
postpartum_depression_severity_split = Artifact(artifact_paths['Ethiopia']).load('cause.postpartum_depression.case_severity')

for location in locations:
    assert postpartum_depression_severity_split == Artifact(artifact_paths[location]).load('cause.postpartum_depression.case_severity')

postpartum_depression_severity_split

In [None]:
postpartum_depression_severity_split = pd.DataFrame({
    'postpartum_depression_case_type': postpartum_depression_severity_split.keys(),
    **{
        f'draw_{draw}': postpartum_depression_severity_split.values()
        for draw in draws
    }
}).set_index('postpartum_depression_case_type')
postpartum_depression_severity_split

In [None]:
postpartum_depression_duration = read_artifact('cause.postpartum_depression.case_duration')
postpartum_depression_duration

In [None]:
postpartum_depression_disability_weights = read_artifact('cause.postpartum_depression.disability_weight')
postpartum_depression_disability_weights

In [None]:
postpartum_depression_incidence_risk = read_artifact('cause.postpartum_depression.incidence_risk')
postpartum_depression_incidence_risk

In [None]:
postpartum_depression_ylds_per_case = (
    postpartum_depression_severity_split * postpartum_depression_disability_weights * postpartum_depression_duration
).groupby(postpartum_depression_duration.index.names).sum()
postpartum_depression_ylds_per_case

In [None]:
postpartum_depression_yld_rate = (
    postpartum_depression_ylds_per_case * postpartum_depression_incidence_risk
)
postpartum_depression_yld_rate['measure'] = 'ylds'
postpartum_depression_yld_rate['cause'] = 'postpartum_depression'
postpartum_depression_yld_rate = postpartum_depression_yld_rate.set_index(['cause', 'measure'], append=True)
postpartum_depression_yld_rate

In [None]:
postpartum_depression_incidence_risk['measure'] = 'incidence'
postpartum_depression_incidence_risk['cause'] = 'postpartum_depression'
postpartum_depression_incidence_risk = postpartum_depression_incidence_risk.set_index(['cause', 'measure'], append=True)

In [None]:
artifact_rates = []

for cause in causes:
    if cause in postpartum_causes:
        # Handled separately above
        continue

    cause_artifact_rates = pd.concat([
        read_artifact(f'cause.{cause}.cause_specific_mortality_rate').assign(
            cause=cause,
            measure='mortality',
        ),
        read_artifact(f'cause.{cause}.incidence_rate').assign(
            cause=cause,
            measure='incidence',
        ),
        read_artifact(f'cause.{cause}.yld_rate').assign(
            cause=cause,
            measure='ylds',
        ),
    ])
    artifact_rates.append(cause_artifact_rates)

artifact_rates = pd.concat(artifact_rates).set_index(['cause', 'measure'], append=True)
artifact_rates = artifact_rates / pregnancy_incidence
# add in the PPD data
artifact_rates = pd.concat([
    artifact_rates,
    postpartum_depression_incidence_risk,
    postpartum_depression_yld_rate,
])

artifact_rates

In [None]:
def describe_rowwise(df, percentiles=(0.025, 0.975)):
    # The pandas .describe() method describes columns
    # We can transpose before and after to describe rows instead
    return df.transpose().describe(percentiles=percentiles).transpose()

In [None]:
rate_results['age_start'] = rate_results.age_group.str.split('_to_').str[0].astype(float)
rate_results['age_end'] = rate_results.age_group.str.split('_to_').str[1].astype(float) + 1
rate_results


In [None]:
artifact_rates_prepped = artifact_rates.stack().reset_index().rename(columns={0: 'value', 'level_8':'input_draw'})
artifact_rates_prepped['input_draw'] = artifact_rates_prepped.input_draw.str.replace('draw_', '').astype(int)
assert artifact_rates_prepped.year_start.nunique() == artifact_rates_prepped.year_end.nunique() == 1
assert artifact_rates_prepped.sex.nunique() == 1
artifact_rates_prepped = artifact_rates_prepped.drop(columns=['year_start', 'year_end', 'sex'])
artifact_rates_prepped['age_group'] = (
    artifact_rates_prepped.age_start.astype(int).astype(str) + '_to_' + (artifact_rates_prepped.age_end.astype(int) - 1).astype(str)
)

In [None]:
location = 'Ethiopia'
measure = 'mortality'
cause = 'maternal_sepsis_and_other_maternal_infections'

import matplotlib

def plot_quantitative_comparison(simulation_data, artifact_data, filters={}, scalar=1):
    all_columns = set(simulation_data.columns) | set([c for c in simulation_data.index.names if c is not None])
    assert all_columns == set(artifact_data.columns) | set([c for c in artifact_data.index.names if c is not None])
    data_for_plot = artifact_data.merge(simulation_data, on=[c for c in all_columns if c != 'value'], how='outer', suffixes=('_artifact', '_simulation'))

    for filter_col, filter_value in filters.items():
        data_for_plot = data_for_plot[
            (data_for_plot[filter_col] == filter_value)
        ]

    # Exclude edge age groups which are very noisy
    data_for_plot = data_for_plot[
        (data_for_plot.age_start >= 15) &
        (data_for_plot.age_end <= 45)
    ]

    age_groups = data_for_plot.age_group.unique()
    colors = matplotlib.colormaps.get_cmap('tab20')
    color_map = {age: colors(i) for i, age in enumerate(age_groups)}

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 7))#, gridspec_kw={'height_ratios': [2, 1]})

    for ax, relative in [(ax1, False), (ax2, True)]:
        for age in age_groups:
            data_for_plot_age = data_for_plot[data_for_plot.age_group == age]
            if relative:
                y_value = data_for_plot_age.value_simulation / data_for_plot_age.value_artifact
            else:
                y_value = data_for_plot_age.value_artifact
            ax.scatter(data_for_plot_age.value_simulation * scalar, y_value * scalar, color=color_map[age], label=age)
            mean_x = data_for_plot_age.value_simulation.mean() * scalar
            mean_y = y_value.mean() * scalar
            ax.scatter(mean_x, mean_y, color=color_map[age], marker='o', s=200, edgecolors='black', zorder=10)

        if scalar != 1:
            scalar_suffix = f' per {scalar:,} pregnancies'
        else:
            scalar_suffix = ''
        if not relative:
            # Plot line x=y
            max_value = max(data_for_plot.value_simulation.max() * scalar, data_for_plot.value_artifact.max() * scalar)
            min_value = min(data_for_plot.value_simulation.min() * scalar, data_for_plot.value_artifact.min() * scalar)
            ax.plot([min_value, max_value], [min_value, max_value], 'k--')
            ax.set_xlabel(f'Simulation{scalar_suffix}')
            ax.set_ylabel(f'Artifact{scalar_suffix}')
            ax.set_title('Simulation vs Artifact')
        else:
            # Plot heuristic bounds
            ax.axhline(1, color='k', linestyle='--')
            ax.axhline(1.1, color='k', linestyle='dotted')
            ax.axhline(0.9, color='k', linestyle='dotted')
            ax.set_xlabel(f'Simulation{scalar_suffix}')
            ax.set_ylabel('Simulation / Artifact value')
            ax.set_title('Relative error')

        ax.legend(title='Age Group', bbox_to_anchor=(1.05, 1), loc='upper left')
        ax.grid(True)

    filter_desc = ', '.join([f'{filter_col}={filter_value.replace("_", " ")}' for filter_col, filter_value in filters.items()])
    fig.suptitle(f"{filter_desc}", fontsize=16, y=1.02)
    plt.tight_layout()
    plt.show()

In [None]:
for location in locations:
    for measure in ['incidence', 'mortality']:    
        plot_quantitative_comparison(rate_results, artifact_rates_prepped, filters={
            'location': location,
            'cause': 'maternal_hemorrhage',
            'measure': measure,
        })

# Incidence is overestimated, which has been the case since model 13
# https://github.com/ihmeuw/vivarium_research_mncnh_portfolio/blob/718ba6c8d45c024b8594d1b980e8d7afdfb58f9a/verification_and_validation/model_13.0_maternal_checks.ipynb
# It seems like we might overestimate more in draws with higher incidence (and less in draws with lower incidence)

In [None]:
for location in locations:
    for measure in ['incidence', 'mortality']:    
        plot_quantitative_comparison(rate_results, artifact_rates_prepped, filters={
            'location': location,
            'cause': 'maternal_sepsis_and_other_maternal_infections',
            'measure': measure,
        })

# Similar issue to hemorrhage
# TODO: we should consider bumping up population size... there are several draws with zero death counts for given age groups

In [None]:
for location in locations:
    for measure in ['incidence', 'mortality']:    
        plot_quantitative_comparison(rate_results, artifact_rates_prepped, filters={
            'location': location,
            'cause': 'maternal_obstructed_labor_and_uterine_rupture',
            'measure': measure,
        })

# Looks good, mortality sample size is very very small

In [None]:
for location in locations:
    plot_quantitative_comparison(rate_results, artifact_rates_prepped, filters={
        'location': location,
        'cause': 'postpartum_depression',
        'measure': 'incidence',
    })

# Looks good.

## Verify pregnancy outcomes

In [None]:
outcome_fractions = (births.groupby(groupby_cols + ['pregnancy_outcome'])['value'].sum()
                    / births.groupby(groupby_cols)['value'].sum()).reset_index()
outcome_fractions = outcome_fractions.dropna().rename(columns={'pregnancy_outcome': 'outcome'})
outcome_fractions['age_start'] = outcome_fractions.age_group.str.split('_to_').str[0].astype(float)
outcome_fractions['age_end'] = outcome_fractions.age_group.str.split('_to_').str[1].astype(float) + 1
outcome_fractions

In [None]:
artifact_birth_outcomes.columns.name = 'input_draw'

In [None]:
artifact_birth_outcomes_prepped = artifact_birth_outcomes.stack().rename("value").reset_index()
artifact_birth_outcomes_prepped['input_draw'] = artifact_birth_outcomes_prepped.input_draw.str.replace('draw_', '').astype(int)
assert artifact_birth_outcomes_prepped.year_start.nunique() == artifact_birth_outcomes_prepped.year_end.nunique() == 1
assert artifact_birth_outcomes_prepped.sex.nunique() == 1
artifact_birth_outcomes_prepped = artifact_birth_outcomes_prepped.drop(columns=['year_start', 'year_end', 'sex'])
artifact_birth_outcomes_prepped['age_group'] = (
    artifact_birth_outcomes_prepped.age_start.astype(int).astype(str) + '_to_' + (artifact_birth_outcomes_prepped.age_end.astype(int) - 1).astype(str)
)

In [None]:
for location in locations:
    for outcome in ['live_birth', 'stillbirth', 'partial_term']:
        plot_quantitative_comparison(
            outcome_fractions,
            artifact_birth_outcomes_prepped,
            filters={
                'location': location,
                'outcome': outcome,
            }
        )
# Still look good

## Verify antenatal care (ANC) coverage vs artifact

In [None]:
# Not age-specific
births['partial_term'] = births.pregnancy_outcome == 'partial_term'

anc_groupby_cols = [c for c in groupby_cols if c != 'age_group'] + ['partial_term']
anc_coverage_simulation = (
       (
       births.groupby(anc_groupby_cols + ['anc_coverage'])['value'].sum()
              / births.groupby(anc_groupby_cols)['value'].sum()
       )
       #.unstack('input_draw')
       #.pipe(describe_rowwise)
       .reset_index()
)
anc_coverage_simulation

In [None]:
anc1_coverage_artifact = (
    read_artifact('covariate.antenatal_care_1_visit_coverage_proportion.estimate')
)
anc4_coverage_artifact = (
    read_artifact('covariate.antenatal_care_4_visits_coverage_proportion.estimate')
)
ancfirst_coverage_artifact = (
    read_artifact('covariate.antenatal_care_first_trimester_visit_coverage_proportion.estimate')
)
anc1_coverage_artifact

In [None]:
# so let's check partial term pregnancies first

In [None]:
# make sure no partial term pregnancies attend anc in later pregnancy
assert anc_coverage_simulation.loc[(anc_coverage_simulation.partial_term)
                                   &(anc_coverage_simulation.anc_coverage.isin(['first_trimester_and_later_pregnancy',
                                                                                'later_pregnancy_only']))].value.sum() == 0, "Partial term pregnancies are attending later pregnancy ANC visits"

In [None]:
# now let's check that the rate of ANC visit for first trimester only matches artifact value for ANCfirst
anc_coverage_simulation_partial_term = anc_coverage_simulation[(anc_coverage_simulation.partial_term)
                                                               &(anc_coverage_simulation.anc_coverage == 'first_trimester_only')
                                                               ]
anc_coverage_simulation_partial_term = anc_coverage_simulation_partial_term.groupby('location')['value'].describe(percentiles=[0.025,0.975])
anc_coverage_simulation_partial_term

In [None]:
plot_data = anc_coverage_simulation_partial_term.merge(ancfirst_coverage_artifact.pipe(describe_rowwise), on='location', suffixes=('_sim', '_art'))

(
    plot_data[['mean_sim', 'mean_art']]
        .rename(columns=lambda c: c.replace('mean_', ''))
        .plot.bar(yerr=
            plot_data[['std_sim', 'std_art']]
                .rename(columns=lambda c: c.replace('std_', ''))
        )
)
plt.grid()
plt.title('First trimester only ANC coverage among partial term pregnancies (sim)\nrelative to ANC first trimester coverage (artifact)')
plt.xlabel('Location')
plt.ylabel('ANC Coverage')

# Looks good

In [None]:
# great, now let's move on to live and still birth outcomes
anc_coverage_simulation_births = anc_coverage_simulation[(anc_coverage_simulation.partial_term == False)]
anc_coverage_simulation_births = (anc_coverage_simulation_births.groupby([x for x in anc_coverage_simulation_births.columns 
                                                                          if x not in ['value', 'input_draw']])
                                                                          ['value'].describe(percentiles=[0.025,0.975]).reset_index())
anc_coverage_simulation_births

In [None]:
first_trimester_and_later_pregnancy_target = np.minimum(anc4_coverage_artifact, ancfirst_coverage_artifact)
first_trimester_and_later_pregnancy_target['anc_coverage'] = 'first_trimester_and_later_pregnancy'
first_trimester_only_target = ancfirst_coverage_artifact - first_trimester_and_later_pregnancy_target
first_trimester_only_target['anc_coverage'] = 'first_trimester_only'
later_pregnancy_only_target = anc1_coverage_artifact - ancfirst_coverage_artifact
later_pregnancy_only_target['anc_coverage'] = 'later_pregnancy_only'
none_target = 1 - anc1_coverage_artifact
none_target['anc_coverage'] = 'none'
target_data = pd.concat([first_trimester_and_later_pregnancy_target,
                          first_trimester_only_target,
                          later_pregnancy_only_target,
                          none_target]).reset_index()
target_data = (target_data.set_index([x for x in target_data.columns if 'draw' not in x])
               .pipe(describe_rowwise)
               .reset_index())
target_data

In [None]:
plot_data = target_data.merge(anc_coverage_simulation_births, on=['location','anc_coverage'], suffixes=('_target', '_sim')) 
plot_data = plot_data.set_index('anc_coverage')
for location in locations:
    (
        plot_data.loc[plot_data.location==location][['mean_sim', 'mean_target']]
            .rename(columns=lambda c: c.replace('mean_', ''))
            .plot.bar(yerr=
                plot_data.loc[plot_data.location==location][['std_sim', 'std_target']]
                    .rename(columns=lambda c: c.replace('std_', ''))
            )
    )
    plt.grid()
    plt.title(f'{location}: ANC coverage rates for live and still births')
    plt.xlabel('ANC coverage type')
    plt.ylabel('Coverage')

    # looks great!

## Verify ultrasound coverage vs docs

In [None]:
assert births[(births.anc_coverage == 'none') & (births.ultrasound_type != 'no_ultrasound')]['value'].sum() == 0, "There should be no ultrasound data for non-ANC births"

# Not age-specific
ultrasound_groupby_cols = [c for c in groupby_cols if c != 'age_group']
ultrasound_coverage_sim = (
    (
        births[births.anc_coverage != 'none'].groupby(ultrasound_groupby_cols + ['ultrasound_type'])['value'].sum() /
            births[births.anc_coverage != 'none'].groupby(ultrasound_groupby_cols)['value'].sum()
    )
        .unstack('input_draw')
        .pipe(describe_rowwise)
        .reset_index()
)
ultrasound_coverage_sim = ultrasound_coverage_sim[(ultrasound_coverage_sim.ultrasound_type != 'no_ultrasound')]
ultrasound_coverage_sim

# looks good
# https://vivarium-research.readthedocs.io/en/latest/models/concept_models/vivarium_mncnh_portfolio/ai_ultrasound_module/module_document.html#id6
# Ethiopia 60.7%, Nigeria 58.7%, Pakistan 66.7%

In [None]:
# now let's make sure that that doesn't vary significantly by anc coverage type
    # (so far we are not modeling any differences here)
# Not age-specific
ultrasound_groupby_cols = [c for c in groupby_cols if c != 'age_group'] + ['anc_coverage']
ultrasound_coverage_sim = (
    (
        births[births.anc_coverage != 'none'].groupby(ultrasound_groupby_cols + ['ultrasound_type'])['value'].sum() /
            births[births.anc_coverage != 'none'].groupby(ultrasound_groupby_cols)['value'].sum()
    )
        .unstack('input_draw')
        .pipe(describe_rowwise)
        .reset_index()
)
ultrasound_coverage_sim = ultrasound_coverage_sim[(ultrasound_coverage_sim.ultrasound_type != 'no_ultrasound')]
ultrasound_coverage_sim
# looks great!

## Verify azithromycin and misoprostol coverage vs artifact

In [None]:
assert births[(births.misoprostol_availability == True) & (births.anc_coverage == False)].value.sum() == 0, "Misoprostol coverage is non-zero for those without ANC"
assert births[(births.misoprostol_availability == True) & (births.delivery_facility_type != 'home')].value.sum() == 0, "Misoprostol coverage among non-home births"
assert births[(births.azithromycin_availability == True) & (births.delivery_facility_type.isin(['home', 'none']))].value.sum() == 0, "Azithromycin coverage among home births"

In [None]:
# Not age-specific, but is facility-type-specific
azithromycin_misoprostol_groupby_cols = [c for c in groupby_cols if c != 'age_group'] + ['delivery_facility_type']

azithromycin_misoprostol_coverage_simulation = []

for intervention in ['azithromycin', 'misoprostol']:
    intervention_coverage = (
        births[births[f'{intervention}_availability'] == True].groupby(azithromycin_misoprostol_groupby_cols)['value'].sum() /
        births.groupby(azithromycin_misoprostol_groupby_cols)['value'].sum()
    ).reset_index()
    intervention_coverage['intervention'] = intervention
    azithromycin_misoprostol_coverage_simulation.append(intervention_coverage)

azithromycin_misoprostol_coverage_simulation = pd.concat(azithromycin_misoprostol_coverage_simulation, ignore_index=True)
azithromycin_misoprostol_coverage_simulation

In [None]:
# Simulation results contain a 'none' delivery facility type, this should never have any coverage
# Redundant to checks above but it doesn't hurt
assert azithromycin_misoprostol_coverage_simulation[azithromycin_misoprostol_coverage_simulation.delivery_facility_type == 'none'].value.sum() == 0
azithromycin_misoprostol_coverage_simulation = azithromycin_misoprostol_coverage_simulation[azithromycin_misoprostol_coverage_simulation.delivery_facility_type != 'none']

In [None]:
azithromycin_misoprostol_coverage_simulation_summaries = (
    azithromycin_misoprostol_coverage_simulation
        .groupby([x for x in azithromycin_misoprostol_coverage_simulation.columns if x not in ['value', 'input_draw']])
        ['value'].describe(percentiles=[0.025, 0.975])
)
azithromycin_misoprostol_coverage_simulation_summaries

In [None]:
# NOTE: Only baseline coverage is in artifact
azithromycin_misoprostol_baseline_coverage_artifact = pd.concat([
    read_artifact(f'intervention.no_{intervention}_risk.probability_{intervention}_{delivery_facility.lower()}', filter_terms=['child_age_start==0'])
        .assign(intervention=intervention, delivery_facility_type=delivery_facility)
        .set_index(['intervention', 'delivery_facility_type'], append=True)
        .reset_index()
    for delivery_facility in ['BEmONC', 'CEmONC', 'home']
    for intervention in ['azithromycin', 'misoprostol']
], ignore_index=True)
azithromycin_misoprostol_baseline_coverage_artifact['scenario'] = 'baseline'
azithromycin_misoprostol_baseline_coverage_artifact

In [None]:
assert (
    azithromycin_misoprostol_baseline_coverage_artifact
        .groupby([c for c in azithromycin_misoprostol_baseline_coverage_artifact.columns if c in azithromycin_misoprostol_groupby_cols] + ['intervention'])
        [[f'draw_{draw}' for draw in draws]]
        .nunique().max().max()
) == 1, "Varying azithromycin or misoprostol coverage rates (by age or sex or year)"

In [None]:
azithromycin_misoprostol_baseline_coverage_artifact = (
    azithromycin_misoprostol_baseline_coverage_artifact
        .groupby([c for c in azithromycin_misoprostol_baseline_coverage_artifact.columns if c in azithromycin_misoprostol_groupby_cols] + ['intervention'])
        [[f'draw_{draw}' for draw in draws]]
        .first()
)
azithromycin_misoprostol_baseline_coverage_artifact

In [None]:
azithromycin_misoprostol_baseline_coverage_artifact_summaries = (
    azithromycin_misoprostol_baseline_coverage_artifact
        .pipe(describe_rowwise)
)
azithromycin_misoprostol_baseline_coverage_artifact_summaries

In [None]:
all_intervention_plot_data = (
    azithromycin_misoprostol_coverage_simulation_summaries.join(azithromycin_misoprostol_baseline_coverage_artifact_summaries, lsuffix='_sim', rsuffix='_art')
).reset_index()
assert len(all_intervention_plot_data) == len(azithromycin_misoprostol_coverage_simulation_summaries) == len(azithromycin_misoprostol_baseline_coverage_artifact_summaries)

for intervention in ['azithromycin', 'misoprostol']:
    for location in locations:
        for delivery_facility_type in ['BEmONC', 'CEmONC', 'home']:
            try:
                plot_data = all_intervention_plot_data[
                    (all_intervention_plot_data.location == location) &
                    (all_intervention_plot_data.delivery_facility_type == delivery_facility_type) &
                    (all_intervention_plot_data.intervention == intervention)
                ].drop(columns=["location", "delivery_facility_type", "intervention"]).assign(scenario='baseline').set_index('scenario')
            except KeyError:
                continue

            # If coverage should be zero, we can check that exactly (without a plot)
            if (plot_data['max_art'] == 0).all():
                assert (plot_data['max_sim'] == 0).all()
                print(f'{intervention} coverage is 0 at {delivery_facility_type} in {location}')
                continue

            (
                plot_data[['mean_sim', 'mean_art']]
                    .rename(columns=lambda c: c.replace('mean_', ''))
                    .plot.bar(yerr=
                        plot_data[['std_sim', 'std_art']]
                            .rename(columns=lambda c: c.replace('std_', ''))
                    )
            )
            plt.grid()
            plt.title(f'{intervention} Coverage in {delivery_facility_type} in Simulation vs Artifact for {location}')
            plt.xlabel('Scenario')
            plt.ylabel(f'{intervention} Coverage')
            plt.show()

# looks good

## Verify azithromycin and misoprostol coverage and RRs vs docs


In [None]:
births_all_scenarios = read_results('anc_other', baseline_only=False)
births_all_scenarios

In [None]:
scenarios_run = list(births_all_scenarios.scenario.unique())
scenarios_run

In [None]:
incidence_all_scenarios = combine_count_results_by_cause('incidence', 'counts', baseline_only=False)
incidence_all_scenarios

In [None]:
deaths_all_scenarios = (
    read_results('maternal_disorders_burden_observer_disorder_deaths', baseline_only=False)
        .rename(columns={'maternal_disorders_burden_observer_cause_of_death':'cause'})
)
deaths_all_scenarios

In [None]:
azithromycin_coverage = (
    births_all_scenarios[births_all_scenarios.azithromycin_availability].groupby(['delivery_facility_type', 'scenario', 'location'])['value'].sum() /
    births_all_scenarios.groupby(['delivery_facility_type', 'scenario', 'location'])['value'].sum()
).dropna()
azithromycin_coverage

In [None]:
assert (azithromycin_coverage.loc[slice('home', 'none')] == 0).all(), "azithromycin coverage outside IFD"
azithromycin_coverage = azithromycin_coverage.loc[slice('BEmONC', 'CEmONC')]

In [None]:
azithromycin_scaleup_scenarios = ['azithromycin_vv', 'azithromycin_scaleup', 'full_product_scaleup']

In [None]:
# https://vivarium-research.readthedocs.io/en/latest/models/intervention_models/intrapartum/azithromycin_intervention.html#baseline-coverage-data
assert (azithromycin_coverage[
    ~azithromycin_coverage.index.get_level_values('scenario').isin(azithromycin_scaleup_scenarios)
    & (azithromycin_coverage.index.get_level_values('delivery_facility_type') != 'CEmONC')
    & (azithromycin_coverage.index.get_level_values('location') != 'Pakistan')
] == 0).all(), "baseline coverage outside Pakistan CEmONC"

In [None]:
# https://vivarium-research.readthedocs.io/en/latest/models/intervention_models/intrapartum/azithromycin_intervention.html#baseline-coverage-data
# Not guaranteed to be exactly the same across scenarios because IFD (and therefore eligibility) can change
# But all scenarios should be close to target of 20.3, and they are
azithromycin_coverage[
    ~azithromycin_coverage.index.get_level_values('scenario').isin(azithromycin_scaleup_scenarios)
].loc[('CEmONC', slice(None), 'Pakistan')]

In [None]:
if 'azithromycin_scaleup' in scenarios_run:
    # https://vivarium-research.readthedocs.io/en/latest/models/concept_models/vivarium_mncnh_portfolio/concept_model.html#id17
    assert (azithromycin_coverage.loc[(slice(None), 'azithromycin_scaleup')] == 1).all(), "azithromycin not fully scaled up in scale up scenario"

In [None]:
if 'azithromycin_vv' in scenarios_run:
    # Target is 50%, this looks close
    display(azithromycin_coverage.loc[(slice(None), 'azithromycin_vv')])

In [None]:
def calculate_incidence_and_deaths_rrs(intervention, confounders=(), baseline_coverage_confounders=()):
    print(intervention)
    for outcome, outcome_df in [('incidence', incidence_all_scenarios), ('deaths', deaths_all_scenarios)]:
        # TODO: Could compare these RRs at the draw level vs artifact
        print(outcome)

        if f'{intervention}_vv' in scenarios_run:
            print('Cross-simulant comparison: compare people with and without, in the V&V scenario')
            outcome_events_in_scenario = outcome_df.loc[
                (outcome_df.scenario == f'{intervention}_vv')
            ]
            births_in_scenario = births_all_scenarios.loc[
                (births_all_scenarios.scenario == f'{intervention}_vv') &
                (births_all_scenarios.pregnancy_outcome != 'partial_term')
            ]
            outcome_risk_by_intervention = (
                outcome_events_in_scenario.groupby([f'{intervention}_availability'] + list(confounders) + ['input_draw', 'location', 'cause'])['value'].sum()
                /
                births_in_scenario.groupby([f'{intervention}_availability'] + list(confounders) + ['input_draw', 'location'])['value'].sum()
            ).dropna()
            relative_risk = (outcome_risk_by_intervention.loc[(True,)] / outcome_risk_by_intervention.loc[(False,)]).replace(np.inf, np.nan).dropna()
            display(relative_risk.sort_values())
            display(relative_risk.groupby(['cause']).describe(percentiles=[0.025,0.975]))
        else:
            print(f'Cannot do cross-simulant comparison because {intervention} V&V scenario was not run')

        if f'{intervention}_scaleup' in scenarios_run:
            print('Cross-scenario comparison: compare those eligible, between baseline and scale-up scenario')
            outcome_events_by_scenario = outcome_df.loc[
                (outcome_df.scenario.isin(['baseline', f'{intervention}_scaleup']))
            ].groupby(['scenario'] + list(baseline_coverage_confounders) + ['input_draw', 'location', 'cause'])['value'].sum()
            births_by_scenario = births_all_scenarios.loc[
                (births_all_scenarios.scenario.isin(['baseline', f'{intervention}_scaleup'])) &
                (births_all_scenarios.pregnancy_outcome != 'partial_term')
            ].groupby(['scenario'] + list(baseline_coverage_confounders) + ['input_draw', 'location'])['value'].sum()
            assert (births_by_scenario.loc['baseline'] == births_by_scenario.loc[f'{intervention}_scaleup']).all(), "intervention scaleup changed number of births"
            births_for_coverage = births_all_scenarios.loc[
                (births_all_scenarios.scenario.isin(['baseline', f'{intervention}_scaleup'])) &
                (births_all_scenarios.pregnancy_outcome != 'partial_term')
            ]
            coverage_by_scenario = (
                births_for_coverage.loc[births_for_coverage[f'{intervention}_availability']]
                    .groupby(['scenario'] + list(baseline_coverage_confounders) + ['input_draw', 'location'])['value'].sum()
                /
                births_for_coverage
                    .groupby(['scenario'] + list(baseline_coverage_confounders) + ['input_draw', 'location'])['value'].sum()
            ).dropna()
            if (coverage_by_scenario.loc['baseline'] > 0).any():
                print('Scaled up from')
                display(coverage_by_scenario.loc['baseline'].sort_values())
                display(coverage_by_scenario.loc['baseline'].groupby(['location'] + list(baseline_coverage_confounders)).describe(percentiles=[0.025,0.975]))
            else:
                assert len(baseline_coverage_confounders) == 0

            assert (
                (coverage_by_scenario.loc[f'{intervention}_scaleup'] == coverage_by_scenario.loc['baseline']) |
                (coverage_by_scenario.loc[f'{intervention}_scaleup'] == 1)
            ).all()
            scaled_up_index = (coverage_by_scenario.loc[f'{intervention}_scaleup'] != coverage_by_scenario.loc['baseline']).pipe(lambda s: s.index[s])

            relative_risk = (
                outcome_events_by_scenario.loc[f'{intervention}_scaleup'].pipe(lambda s: s[s.index.droplevel('cause').isin(scaled_up_index)])
                /
                outcome_events_by_scenario.loc['baseline'].pipe(lambda s: s[s.index.droplevel('cause').isin(scaled_up_index)])
            ).replace(np.inf, np.nan).dropna()
            print('Observed RRs')
            display(relative_risk.sort_values())
            display(relative_risk.groupby(['cause'] + list(baseline_coverage_confounders)).describe(percentiles=[0.025,0.975]))
        else:
            print(f'Cannot do cross-scenario comparison because {intervention} scale-up scenario was not run')

In [None]:
# Target is 0.65 (95% CI 0.55-0.77) on maternal sepsis
# https://vivarium-research.readthedocs.io/en/latest/models/intervention_models/intrapartum/azithromycin_intervention.html#id13

# Confounded by delivery facility -- it affects azithromycin availability, but is also correlated
# with ANC which increases hemoglobin (through IFA)
# Unfortunately having this confounder means we don't have good sample size/power for deaths
calculate_incidence_and_deaths_rrs(
    'azithromycin',
    confounders=['delivery_facility_type'],
    # Baseline coverage in Pakistan is specific to CEmONC
    baseline_coverage_confounders=['delivery_facility_type'],
)

# It seems like we are slightly underestimating the impact, based on the incidence results
# but deaths look better?
# Maybe just stochastic noise

In [None]:
# https://vivarium-research.readthedocs.io/en/latest/models/concept_models/vivarium_mncnh_portfolio/concept_model.html#id16
misoprostol_coverage = (
    births_all_scenarios[births_all_scenarios.misoprostol_availability].groupby(['scenario', 'delivery_facility_type', 'anc_coverage'])['value'].sum() /
    births_all_scenarios.groupby(['scenario', 'delivery_facility_type', 'anc_coverage'])['value'].sum()
).dropna()
assert (misoprostol_coverage[misoprostol_coverage.index.get_level_values('scenario') != 'misoprostol_vv'] == 0).all(), "misoprostol coverage outside of misoprostol scenario"
if 'misoprostol_vv' in scenarios_run:
    misoprostol_coverage = misoprostol_coverage.loc['misoprostol_vv']
    assert (misoprostol_coverage[misoprostol_coverage.index.get_level_values('delivery_facility_type') != 'home'] == 0).all(), "misoprostol coverage outside of home births"
    misoprostol_coverage = misoprostol_coverage.loc['home']
    assert (misoprostol_coverage[misoprostol_coverage.index.get_level_values('anc_coverage') == 'none'] == 0).all(), "misoprostol coverage among those not attending ANC"
    misoprostol_coverage = misoprostol_coverage[misoprostol_coverage.index.get_level_values('anc_coverage') != 'none']
    display(misoprostol_coverage)

# Should all be close to 50%, which they are

In [None]:
# Target is 0.61 (95% CI: 0.50 to 0.74) on maternal hemorrhage
# https://vivarium-research.readthedocs.io/en/latest/models/concept_models/vivarium_mncnh_portfolio/concept_model.html#id17

# Confounded by delivery facility -- misoprostol only available at home, but that is
# correlated with no ANC which decreases hemoglobin (through IFA)
calculate_incidence_and_deaths_rrs(
    'misoprostol',
    confounders=['delivery_facility_type']
)

# Seems relatively close, although again a bit off for incidence?