# V&V anemia screening and iron interventions (after sim run)

This notebook focuses on anemia screening, oral iron, and IV iron.

All the separate checks in this notebok are labeled with "CHECK" (all caps).

Unless otherwise stated, all checks in this file will go in the after-sim/results suite.

## Setup

In [None]:
import pandas as pd, numpy as np, os
from vivarium import Artifact
import db_queries
from get_draws.api import get_draws
import matplotlib.pyplot as plt
from pathlib import Path
import yaml

In [None]:
import warnings
warnings.filterwarnings('ignore', category=FutureWarning) 

In [None]:
locations = ['Pakistan', 'Ethiopia', 'Nigeria']

In [None]:
# Parameters cell for papermill
model_dir = "model26.0"

In [None]:
base_results_dir = Path("/mnt/team/simulation_science/pub/models/vivarium_gates_mncnh/results/") / model_dir

In [None]:
results_dirs = {}
assert set([p.stem for p in base_results_dir.iterdir()]) == set([l.lower() for l in locations])
for location in locations:
    location_dir = base_results_dir / location.lower()
    timestamps = sorted(location_dir.iterdir())
    last_timestamp = timestamps[-1]
    if len(timestamps) > 1:
        print(f'Multiple timestamps: {timestamps}, using {last_timestamp}')
    results_dirs[location] = location_dir / last_timestamp / 'results'

results_dirs

In [None]:
location_ids = db_queries.get_ids('location')
location_ids = location_ids.loc[location_ids.location_name.str.lower().isin([x.lower() for x in results_dirs.keys()])]
location_ids

In [None]:
def load_yaml_file(path):
    with open(path) as stream:
        return yaml.safe_load(stream)

In [None]:
artifact_paths = {
    location: load_yaml_file(result_dir.parent / 'model_specification.yaml')['configuration']['input_data']['artifact_path']
    for location, result_dir
    in results_dirs.items()
}
artifact_paths

In [None]:
def read_results(result_file_name, baseline_only=True):
    all_locations_results = []
    for location, result_dir in results_dirs.items():
        if baseline_only:
           filters = [('scenario', '==', 'baseline')]
           location_results = pd.read_parquet(result_dir / f'{result_file_name}.parquet', filters=filters)
        else:
            location_results = pd.read_parquet(result_dir / f'{result_file_name}.parquet')
        location_results['location'] = location

        if baseline_only:
            location_results = location_results.loc[location_results.scenario == 'baseline']
        # note! I am running into issues if I do not drop these extra columns
        location_results = location_results.drop(columns=['measure','entity_type','entity','sub_entity'])
        if 'random_seed' in location_results.columns:
            location_results = location_results.drop(columns='random_seed').groupby([
                c for c in location_results.columns if c != 'random_seed' and c != 'value'
            ]).sum().reset_index()

        all_locations_results.append(location_results)
    return pd.concat(all_locations_results, ignore_index=True)

In [None]:
result_file_name = 'anc_hemoglobin'
anc = read_results(result_file_name, baseline_only=False)
anc.head()

In [None]:
scenarios_run = list(anc.scenario.unique())
assert 'baseline' in scenarios_run
scenarios_run

## checks on anemia screening

### hemoglobin

CHECK: Hemoglobin screening/testing only occurs among those who attend later-pregnancy ANC.

Type: precise assert.

In [None]:
assert anc.loc[(anc.anc_coverage.isin(['none', 'first_trimester_only']))
    & (anc.tested_hemoglobin_exposure != 'not_tested')]['value'].sum() == 0, 'hemoglobin screening among those without later pregnancy ANC coverage'

In [None]:
# https://vivarium-research.readthedocs.io/en/latest/models/intervention_models/mncnh_pregnancy/anemia_screening.html#baseline-coverage-data
hemoglobin_screening_coverage_targets = pd.read_csv(
    '/snfs1/Project/simulation_science/mnch_grant/MNCNH portfolio/anc_bloodsample_prop_st-gpr_results_aggregates_scaled2025-05-29.csv'
)
assert (hemoglobin_screening_coverage_targets.age_group_id == 22).all()
assert (hemoglobin_screening_coverage_targets.sex_id == 3).all()
hemoglobin_screening_coverage_targets = hemoglobin_screening_coverage_targets[
    hemoglobin_screening_coverage_targets.location_id.isin(list(location_ids.location_id.values)) &
    (hemoglobin_screening_coverage_targets.year_id == 2023)
].merge(location_ids[['location_name', 'location_id']].rename(columns={'location_name': 'location'})).set_index('location')['mean']
hemoglobin_screening_coverage_targets.sort_index()

In [None]:
tested_hemoglobin_exposure_by_scenario = (
    anc.loc[anc.anc_coverage.isin(['first_trimester_and_later_pregnancy', 'later_pregnancy_only'])].groupby(['location','scenario','tested_hemoglobin_exposure'])['value'].sum() 
    / anc.loc[anc.anc_coverage.isin(['first_trimester_and_later_pregnancy', 'later_pregnancy_only'])].groupby(['location','scenario'])['value'].sum()
)
tested_hemoglobin_exposure_by_scenario

# not easy to validate the proportion low vs adequate?
    # it's a measure of true hemoglobin exposure, but with testing error introduced via sensitivity and specificity of the test
    # the hemoglobin exposure at the time of measurement is also a little odd in that it is:
        # after the effect of oral iron at the first trimester ANC visit has been applied
        # and before the effect of oral iron not received until later pregnancy is applied
        # this means that we will overestimate the prevalence of anemia at this timepoint relative to the baseline GBD estimate

CHECK: Hemoglobin screening/testing in the baseline scenario matches documented targets (see 2 cells above).

Type: fuzzy proportion check, though might need a bit of "fudge factor" for our limited number of parameter uncertainty draws.

In [None]:
# Coverage is inverted!
# TODO: add a plot
tested_hemoglobin_exposure_by_scenario.loc[(slice(None), 'baseline', 'not_tested')].sort_index()

In [None]:
anemia_screening_scaleup_scenarios = list(set(scenarios_run) & {'anemia_screening_vv', 'anemia_screening_and_iv_iron_scaleup'})
anemia_screening_scaleup_scenarios

CHECK: Hemoglobin screening/testing in the anemia screening scaleup scenarios is 100%.

Type: precise assert.

In [None]:
for scenario in anemia_screening_scaleup_scenarios:
    assert (tested_hemoglobin_exposure_by_scenario.loc[(slice(None), scenario, 'not_tested')] == 0).all(), "not everyone tested in anemia screening scaleup scenario"

In [None]:
tested_hemoglobin_exposure_by_scenario.loc[('Ethiopia', slice(None), 'not_tested')].sort_values()

CHECK: Hemoglobin screening/testing coverage does not differ between scenarios, except scenarios that scale it up.

Type: precise assert (due to CRN).

In [None]:
assert (tested_hemoglobin_exposure_by_scenario[
    (tested_hemoglobin_exposure_by_scenario.index.get_level_values('tested_hemoglobin_exposure') == 'not_tested') &
    (tested_hemoglobin_exposure_by_scenario.index.get_level_values('scenario').isin(anemia_screening_scaleup_scenarios)) # TODO: exclude other scenarios expected to scale up screening
].groupby('location').nunique() == 1).all(), "anemia screening coverage differs between scenarios that shouldn't affect it"

CHECK: The amount of tested "low" hemoglobin is less in scenarios that scale up MMS relative to those that don't (and don't change anemia screening coverage).

Type: precise assert (due to CRN).

In [None]:
if 'mms_total_scaleup' in scenarios_run:
    assert (
        tested_hemoglobin_exposure_by_scenario.loc[(slice(None), "mms_total_scaleup", "low")]
        <
        tested_hemoglobin_exposure_by_scenario.loc[(slice(None), "baseline", "low")]
    ).all(), "not seeing decreases in testing low in MMS scale-up scenario"

In [None]:
true_hemoglobin_exposure_by_scenario = (
    anc.loc[anc.anc_coverage!='none'].groupby(['location','scenario','true_hemoglobin_exposure'])['value'].sum() 
    / anc.loc[anc.anc_coverage!='none'].groupby(['location','scenario'])['value'].sum()
)
true_hemoglobin_exposure_by_scenario

CHECK: Proportion truly low hemoglobin (<100) is a bit higher in the baseline scenario than in GBD results.

Type: manual, since we have a known difference (having not applied all baseline IFA effects). Could consider observing something different so we could check this exactly.

In [None]:
from db_queries import get_outputs

# Get targets from the GBD estimates of the anemia impairment in pregnancy
# NOTE: pregnancy-specific GBD estimates are only available at the mean-UI, not draw, level
get_outputs(
    location_id=list(location_ids.location_id),
    topic='rei',
    rei_id=432, # rei_id=432 for moderate and severe anemia combined, which corresponds to our 'low' hemoglobin threshold of 100
    population_group_id=16, # pregnant population
    sex_id=2, # female
    year_id=2023,
    release_id=16, # GBD 2023
    # https://hub.ihme.washington.edu/spaces/GBDdirectory/pages/229280352/GBD+2023+EPIC+COMO+tracking
    # Latest COMO run (as of 12/2/2025) that included the pregnant population
    compare_version_id=8333,
    measure_id=5, # prevalence
    metric_id=3, # rate
    age_group_id=169, # 10-54 years
).set_index('location_name')[['val', 'lower', 'upper']].sort_index().join(
    true_hemoglobin_exposure_by_scenario.loc[(slice(None), 'baseline', 'low')].rename('sim')
)

# Looks reasonable, `sim` is pretty close to `val`, and we expect a slight overestimate from
# not fully applying baseline IFA yet.
    # we could follow up on this in the interactive sim where we could assess severity-specific prevalence after all baseline IFA effects have been applied

CHECK: Proportion truly low hemoglobin (<100) is the same by scenarios that only differ on screening.

Type: precise assert (due to CRN).

In [None]:
if 'anemia_screening_vv' in scenarios_run:
    assert (
        true_hemoglobin_exposure_by_scenario.loc[(slice(None), 'anemia_screening_vv')]
        ==
        true_hemoglobin_exposure_by_scenario.loc[(slice(None), 'baseline')]
    ).all(), "Anemia screening scaleup modified true hemoglobin"

CHECK: Proportion truly low hemoglobin (<100) is lower in scenarios that scale up MMS.

Type: precise assert (due to CRN).

In [None]:
if 'mms_total_scaleup' in scenarios_run:
    assert (
        true_hemoglobin_exposure_by_scenario.loc[(slice(None), 'mms_total_scaleup', 'low')]
        <
        true_hemoglobin_exposure_by_scenario.loc[(slice(None), 'baseline', 'low')]
    ).all(), "MMS did not decrease the amount of low hemoglobin"

In [None]:
# let's check sensitivity and specificity for the hemoglobin screen

tested_hemoglobin_exposure_by_true_hemoglobin_exposure = (
    anc.loc[anc.tested_hemoglobin_exposure!='not_tested'].groupby(['location', 'scenario', 'true_hemoglobin_exposure','tested_hemoglobin_exposure'])['value'].sum() 
    / anc.loc[anc.tested_hemoglobin_exposure!='not_tested'].groupby(['location', 'scenario', 'true_hemoglobin_exposure'])['value'].sum()
)
tested_hemoglobin_exposure_by_true_hemoglobin_exposure

CHECK: Proportion truly adequate hemoglobin (>=100) who *test* adequate is approximately 80%, in all scenarios and locations.

Type: fuzzy check of proportion. Would also like to include a check that aggregates across locations here.

In [None]:
# https://vivarium-research.readthedocs.io/en/latest/models/intervention_models/mncnh_pregnancy/anemia_screening.html#hemoglobin-screening-accuracy-instructions
# Specificity (percent of true negatives that test negative): 80%

tested_hemoglobin_exposure_by_true_hemoglobin_exposure.loc[(slice(None), slice(None), 'adequate', 'adequate')]

# Looks good

CHECK: Proportion truly low hemoglobin (<100) who *test* low is approximately 85%, in all scenarios and locations.

Type: fuzzy check of proportion. Would also like to include a check that aggregates across locations here.

In [None]:
# Sensitivity (percent of true positives that test positive): 85%
# a little confusing, but positive test result refers to low hemoglobin screening value
tested_hemoglobin_exposure_by_true_hemoglobin_exposure.loc[(slice(None), slice(None), 'low', 'low')]

# Looks good

### ferritin

CHECK: Ferritin screening only occurs in anemia screening scaleup scenarios.

Type: precise assert.

In [None]:
assert anc.loc[(~anc.scenario.isin(anemia_screening_scaleup_scenarios)) 
    & (anc.ferritin_status!='not_tested')]['value'].sum() == 0, "Non-zero ferritin screening coverage outside of anemia screening scale up scenarios"

CHECK: Ferritin screening only occurs in simulants who go to later-pregnancy ANC.

Type: precise assert.

In [None]:
assert anc.loc[(anc.anc_coverage.isin(['none', 'first_trimester_only']))
    & (anc.ferritin_status != 'not_tested')]['value'].sum() == 0, 'ferritin screening among those without later pregnancy ANC coverage'

CHECK: Ferritin screening only occurs in simulants who test low hemoglobin.

Type: precise assert.

In [None]:
assert anc.loc[(anc.ferritin_status != 'not_tested')
    & (anc.tested_hemoglobin_exposure != 'low')]['value'].sum() == 0, 'ferritin testing occuring among those who do not test low hemoglobin'

CHECK: Ferritin screening occurs in 100% of simulants who test low hemoglobin in the anemia screening scale-up scenarios.

Type: precise assert.

In [None]:
assert anc.loc[(anc.ferritin_status == 'not_tested')
    & (anc.scenario =='anemia_screening_vv')
    & (anc.tested_hemoglobin_exposure == 'low')]['value'].sum() == 0, 'ferritin testing not occuring among those who DO test low hemoglobin'

CHECK: The probability of low ferritin among those screened approximately matches the probability of low ferritin in the artifact.

Type: manual, since this check is pretty flawed; we can check it better in the interactive sim.

In [None]:
draws = [f'draw_{draw}' for draw in anc.input_draw.unique()]
probability_low_ferritin_targets = pd.concat([
    Artifact(path).load('ferritin.probability_of_low_ferritin').assign(location=location).set_index('location', append=True)
    for location, path in artifact_paths.items()
]).reorder_levels(['location', 'age_start', 'age_end', 'anemia_status_during_pregnancy']).T.describe(percentiles=[0.025, 0.975]).T
probability_low_ferritin_targets

In [None]:
probability_low_ferritin_targets['age_group'] = (
    probability_low_ferritin_targets.index.get_level_values('age_start').astype(int).astype(str)
    + '_to_'
    + (probability_low_ferritin_targets.index.get_level_values('age_end').astype(int) - 1).astype(str)
)

probability_low_ferritin_targets = probability_low_ferritin_targets.droplevel(['age_start', 'age_end']).set_index('age_group', append=True)
probability_low_ferritin_targets

In [None]:
means_by_category = probability_low_ferritin_targets['mean'].unstack('anemia_status_during_pregnancy')
# We have to map low/adequate to the anemia categories, which is not exact and makes this all a bit handwavey;
# the value we observe from the sim will be some weighted mix of these two "bounds"
probability_low_ferritin_targets = pd.concat([
    # 'low' corresponds to below 100, which means moderate or severe
    means_by_category[['moderate', 'severe']].rename(columns={'moderate': 'bound1', 'severe': 'bound2'})
        .assign(true_hemoglobin_exposure='low')
        .set_index('true_hemoglobin_exposure', append=True),
    # 'adequate' corresponds to above 100, which means mild or not_anemic
    means_by_category[['mild', 'not_anemic']].rename(columns={'not_anemic': 'bound1', 'mild': 'bound2'})
        .assign(true_hemoglobin_exposure='adequate')
        .set_index('true_hemoglobin_exposure', append=True),
])
probability_low_ferritin_targets

In [None]:
ferritin_results_by_scenario = (
    anc.loc[anc.scenario.isin(anemia_screening_scaleup_scenarios) & (anc.ferritin_status == 'low')].groupby(['scenario', 'location', 'age_group', 'true_hemoglobin_exposure'])['value'].sum()
    /
    anc.loc[anc.scenario.isin(anemia_screening_scaleup_scenarios) & (anc.ferritin_status != 'not_tested')].groupby(['scenario', 'location', 'age_group', 'true_hemoglobin_exposure'])['value'].sum()
).dropna()
ferritin_results_by_scenario

In [None]:
assert (ferritin_results_by_scenario.groupby([c for c in ferritin_results_by_scenario.index.names if c != 'scenario']).nunique() == 1).all(), "ferritin results differ between scaleup scenarios"

ferritin_results = ferritin_results_by_scenario.groupby([c for c in ferritin_results_by_scenario.index.names if c != 'scenario']).first()
ferritin_results

In [None]:
comparison = ferritin_results.rename('sim').to_frame().join(probability_low_ferritin_targets.reorder_levels(ferritin_results.index.names))
comparison

# Looks reasonable

In [None]:
# As we can see there are a few cases where the sim value is outside the bounds of the means, particularly in Nigeria (why?)

comparison[(comparison['sim'] >= comparison['bound1']) == (comparison['sim'] >= comparison['bound2'])]

## checks on iron interventions

### oral iron

CHECK: Oral iron only received by simulants who attend ANC.

Type: precise assert.

In [None]:
assert anc.loc[(anc.anc_coverage=='none')
    & (anc.oral_iron_coverage != 'none')]['value'].sum() == 0, "coverage of oral iron among those who do not attend ANC"

CHECK: MMS only received in scenarios that scale it up.

Type: precise assert.

In [None]:
assert anc.loc[
    (anc.scenario != 'mms_total_scaleup') &
    (anc.oral_iron_coverage == 'mms')
]['value'].sum() == 0, "baseline MMS coverage"

CHECK: MMS received by every simulant who attends ANC in the scenarios that fully scale up MMS.

Type: precise assert.

In [None]:
assert anc.loc[
    (anc.scenario == 'mms_total_scaleup') &
    (anc.anc_coverage != 'none') &
    (anc.oral_iron_coverage != 'mms')
]['value'].sum() == 0, "MMS coverage not fully scaled up"

CHECK: IFA coverage is the same between baseline and anemia screening scenarios.

Type: precise assert (due to CRN).

In [None]:
if 'anemia_screening_vv' in scenarios_run:
    assert anc.loc[
        (anc.scenario == 'anemia_screening_vv') &
        (anc.oral_iron_coverage == 'ifa')
    ]['value'].sum() == anc.loc[
        (anc.scenario == 'baseline') &
        (anc.oral_iron_coverage == 'ifa')
    ]['value'].sum(), "IFA coverage changed in anemia screening scenario"

In [None]:
# https://vivarium-research.readthedocs.io/en/latest/models/intervention_models/mncnh_pregnancy/oral_iron_antenatal/oral_iron_antenatal.html#baseline-coverage-data
baseline_ifa_coverage_targets = pd.read_csv(
    '/snfs1/Project/simulation_science/mnch_grant/MNCNH portfolio/anc_iron_prop_st-gpr_results_aggregates_scaled2025-05-30.csv'
)
assert (baseline_ifa_coverage_targets.age_group_id == 22).all()
assert (baseline_ifa_coverage_targets.sex_id == 3).all()
baseline_ifa_coverage_targets = baseline_ifa_coverage_targets[
    baseline_ifa_coverage_targets.location_id.isin(list(location_ids.location_id.values)) &
    (baseline_ifa_coverage_targets.year_id == 2023)
].merge(location_ids[['location_name', 'location_id']].rename(columns={'location_name': 'location'})).set_index('location')['mean']
baseline_ifa_coverage_targets.sort_index()

CHECK: IFA coverage in the baseline scenario matches documented targets.

Type: fuzzy proportion check, though might need a bit of "fudge factor" for our limited number of parameter uncertainty draws. Alternatively, could check vs artifact at the draw level.

In [None]:
(
    anc.loc[(anc.anc_coverage!='none') & (anc.scenario == 'baseline') & (anc.oral_iron_coverage == 'ifa')].groupby(['location', 'input_draw'])['value'].sum()
    /
    anc.loc[(anc.anc_coverage!='none') & (anc.scenario == 'baseline')].groupby(['location', 'input_draw'])['value'].sum()
).groupby('location').describe()

# IFA coverage looks very similar to the targets above

CHECK: Observed RR of IFA on pregnancy outcome is approximately 1, comparing *simulants* within the baseline scenario.

Type: manual (not easy to fuzzy check because there is stochastic uncertainty on both). We should really consider adding scenarios so that we can check this between scenarios, fuzzily, with CRN.

In [None]:
# Now checking the effects
# If we ran only the baseline scenario, we can still approximately (without CRN) check the IFA effect on birth outcomes (which is no effect)
# by comparing the groups, however we have to stratify by ANC to control for confounding by ANC
# this is because partial term pregnancies are less likely to go to ANC and therefore less likely to receive IFA
baseline_with_ifa_coverage = anc.loc[(anc.scenario=='baseline')].assign(ifa_coverage=lambda df: df.oral_iron_coverage == 'ifa')

cols = ['location']
baseline_pregnancy_outcome_proportions = (
    baseline_with_ifa_coverage.groupby(cols + ['anc_coverage', 'ifa_coverage', 'pregnancy_outcome'])['value'].sum() /
    baseline_with_ifa_coverage.groupby(cols + ['anc_coverage', 'ifa_coverage'])['value'].sum()
)
cross_simulant_comparison = (
    baseline_pregnancy_outcome_proportions.loc[(slice(None), slice(None), True, slice(None))] /
    baseline_pregnancy_outcome_proportions.loc[(slice(None), slice(None), False, slice(None))]
)
cross_simulant_comparison
# Should all be close to 1, which it is

In [None]:
# NOTE: We don't have an IFA scale-up scenario to be able to do a cross-scenario comparison
(anc[anc.oral_iron_coverage == 'ifa'].groupby(['scenario'])['value'].sum() / anc.groupby(['scenario'])['value'].sum()).sort_values()

In [None]:
scenario_pregnancy_outcome_proportions = (
    anc.groupby(cols + ['scenario', 'pregnancy_outcome'])['value'].sum()
     / anc.groupby(cols + ['scenario'])['value'].sum()
)
scenario_pregnancy_outcome_proportions

In [None]:
cross_scenario_comparison = (
    scenario_pregnancy_outcome_proportions[scenario_pregnancy_outcome_proportions.index.get_level_values('scenario') != 'baseline'] /
    scenario_pregnancy_outcome_proportions.loc[(slice(None), 'baseline', slice(None))]
)
cross_scenario_comparison

CHECK: Pregnancy outcomes are the same between scenarios, except those that change MMS.

Type: precise assert (due to CRN).

CHECK: Prevalence of abortion/miscarriage/ectopic pregnancy outcomes are the same between scenarios.

Type: precise assert (due to CRN).

In [None]:
assert (cross_scenario_comparison[cross_scenario_comparison.index.get_level_values('scenario') != 'mms_total_scaleup'] == 1).all(), "non-MMS scenario changing pregnancy outcomes"
assert (cross_scenario_comparison[cross_scenario_comparison.index.get_level_values('pregnancy_outcome') == 'partial_term'] == 1).all(), "scenarios changing abortion/miscarriage/ectopic pregnancy outcomes"

CHECK: Stillbirth less common in MMS scaleup scenario, by amount matching documented target.

Type: precise assert (that stillbirth is lower). Fuzzy check of proportion (for amount lower). Again, maybe some fudge factor needed, or can compare at the draw level.

In [None]:
if 'mms_total_scaleup' in scenarios_run:
    cross_scenario_comparison = cross_scenario_comparison[
        (cross_scenario_comparison.index.get_level_values('pregnancy_outcome') != 'partial_term')
    ].loc[(slice(None), slice(None), 'mms_total_scaleup')]
    display(cross_scenario_comparison)

    # https://vivarium-research.readthedocs.io/en/latest/models/intervention_models/mncnh_pregnancy/oral_iron_antenatal/oral_iron_antenatal.html#id31
    # target is 0.91 (95% CI: 0.86, 0.98)

    display(cross_scenario_comparison.loc[(slice(None), 'stillbirth')])

    # Looks reasonably close, though a bit off in Ethiopia and Nigeria

CHECK: Observed RR of IFA on preterm birth approximately matches documented target, comparing *simulants* within the baseline scenario.

Type: manual (not easy to fuzzy check because there is stochastic uncertainty on both). We should really consider adding scenarios so that we can check this between scenarios, fuzzily, with CRN. Looks like we might need a "fudge factor" regardless since this is a bit off but we have deemed it acceptable.

In [None]:
# check IFA on PTB relative to no treatment
    # filter to single ANC category to control for confounding by ANC in the IFA->PTB effect
    # this is because ANC is correlated with IFA (IFA is distributed at ANC) and correlated with PTB (through the delivery choice model)
x = (anc.loc[(anc.preterm_birth==True) & (anc.scenario=='baseline') 
    & (anc.pregnancy_outcome=='live_birth')
    &(anc.anc_coverage=='first_trimester_and_later_pregnancy')].assign(ifa_coverage=lambda df: df.oral_iron_coverage == 'ifa').groupby(['location','input_draw','ifa_coverage'])['value'].sum()
 /anc.loc[(anc.scenario=='baseline') 
     & (anc.pregnancy_outcome=='live_birth')
     &(anc.anc_coverage=='first_trimester_and_later_pregnancy')].assign(ifa_coverage=lambda df: df.oral_iron_coverage == 'ifa').groupby(['location','input_draw','ifa_coverage'])['value'].sum())
x_no_tx = x.reset_index().loc[~x.index.get_level_values('ifa_coverage')].set_index(['location','input_draw'])['value']
rr = (x / x_no_tx).reset_index()
rr.groupby(['ifa_coverage'])['value'].describe(percentiles=[0.025,0.975]).reset_index()

# we expect this to be 0.9 (95% CI: 0.86, 0.95) based on: https://vivarium-research.readthedocs.io/en/latest/models/intervention_models/mncnh_pregnancy/oral_iron_antenatal/oral_iron_antenatal.html#id29
# well values are no longer 1, so that's an improvement! 

# we do seem to be exaggerating the effect a bit though

In [None]:
# let's check by location
rr.loc[rr.ifa_coverage].groupby(['ifa_coverage','location'])['value'].describe(percentiles=[0.025,0.975]).reset_index()

# so we're much closer to our target in Pakistan than we are for Nigeria and Ethiopia
    # let's check and make sure that the IFA GA shifts we are using are location-specific as intended

CHECK: Observed RR of MMS (vs IFA) on preterm birth approximately matches documented target, comparing *simulants* across all scenarios.

Type: manual (not easy to fuzzy check because there is stochastic uncertainty on both). We should really check this between scenarios, fuzzily, with CRN.

In [None]:
# now check MMS on PTB relative to IFA

x = (anc.loc[(anc.preterm_birth==True)
    &(anc.anc_coverage=='first_trimester_and_later_pregnancy')].groupby(['oral_iron_coverage','location','input_draw'])['value'].sum()
 /anc.loc[anc.anc_coverage=='first_trimester_and_later_pregnancy'].groupby(['oral_iron_coverage', 'location','input_draw'])['value'].sum())
x_ifa = x.loc['ifa']
rr = x / x_ifa
rr = rr.groupby(['oral_iron_coverage','location']).describe(percentiles=[0.025,0.975]).reset_index()
rr = rr.loc[rr.oral_iron_coverage == 'mms']
rr

# expect this to be RR = 0.91 (95% CI: 0.84, 0.99) based on research docs
# Looking good!
# our confidence interval is a bit tighter than our input data, but hopefully that improves as we increase the number of draws we run

In [None]:
# ok now let's check hemoglobin exposure by scenario

# TODO: name variable
x = (anc.loc[(anc.true_hemoglobin_exposure=='low')
    &(anc.anc_coverage=='first_trimester_and_later_pregnancy')
    ].groupby(['location','oral_iron_coverage'])['value'].sum()
 /anc.loc[anc.anc_coverage=='first_trimester_and_later_pregnancy'].groupby(['location','oral_iron_coverage'])['value'].sum()).fillna(0)

x

CHECK: Proportion of simulants with low hemoglobin (<100) is lower when they receive IFA or MMS than nothing.

Type: manual (not easy to fuzzy check because there is stochastic uncertainty on both). We should really check this between scenarios, fuzzily, with CRN.

In [None]:
# we don't have an expected verification target here... 
# we're just looking to see a decrease for the covered population
# we could look around for a validation target tho
assert (x.loc[(slice(None), 'ifa')] < x.loc[(slice(None), 'none')]).all()
assert (x.loc[(slice(None), 'mms')] < x.loc[(slice(None), 'none')]).all()

In [None]:
# ok so let's also do a quick check that neonatal deaths are lower by scenario too

In [None]:
nn_deaths = read_results('neonatal_burden_observer_disorder_deaths', baseline_only=False)
nn_deaths.head()

CHECK: Fewer neonatal deaths in MMS scale up scenario(s).

Type: precise assert.

In [None]:
if 'mms_total_scaleup' in scenarios_run:
    assert (
        nn_deaths[nn_deaths.scenario == 'baseline']['value'].sum() >
        nn_deaths[nn_deaths.scenario == 'mms_total_scaleup']['value'].sum()
    ), "neonatal deaths not lower in MMS scenario"

In [None]:
deaths = read_results('maternal_disorders_burden_observer_disorder_deaths', baseline_only=False)
deaths.head()

In [None]:
mmr = (deaths.groupby(['location','scenario','input_draw'])['value'].sum()
       / anc.groupby(['location','scenario','input_draw'])['value'].sum()) * 100_000
mmr = mmr.groupby(['location','scenario']).mean()
mmr

# this looks reasonable

In [None]:
mmr[mmr.index.get_level_values('scenario').isin(['baseline', 'mms_total_scaleup'])]

CHECK: Fewer maternal deaths in MMS scale up scenario(s).

Type: precise assert.

CHECK: Same maternal deaths in anemia screening scaleup scenario(s) as baseline.

Type: precise assert.

In [None]:
if 'mms_total_scaleup' in scenarios_run:
    assert (mmr.loc[(slice(None), 'mms_total_scaleup')] < mmr.loc[(slice(None), 'baseline')]).all(), "maternal mortality not reduced in MMS scale up scenario"
if 'anemia_screening_vv' in scenarios_run:
    assert (mmr.loc[(slice(None), 'anemia_screening_vv')] == mmr.loc[(slice(None), 'baseline')]).all(), "maternal mortality changed by anemia screening scale up"

In [None]:
# check incidence too (better sample size)

In [None]:
inc = read_results('maternal_hemorrhage_counts', baseline_only=False)
inc.head()

In [None]:
inc = inc.groupby(['location', 'scenario'])['value'].sum()
inc

CHECK: Fewer incident maternal hemorrhage cases in MMS scale up scenario(s).

Type: precise assert.

CHECK: Same incident maternal hemorrhage cases in anemia screening scaleup scenario(s) as baseline.

Type: precise assert.

In [None]:
if 'mms_total_scaleup' in scenarios_run:
    assert (inc.loc[(slice(None), 'mms_total_scaleup')] < inc.loc[(slice(None), 'baseline')]).all(), "maternal hemorrhage not reduced in MMS scale up scenario"
if 'anemia_screening_vv' in scenarios_run:
    assert (inc.loc[(slice(None), 'anemia_screening_vv')] == inc.loc[(slice(None), 'baseline')]).all(), "maternal hemorrhage changed by anemia screening scale up"

### IV iron

CHECK: IV iron only received by simulants who tested low ferritin.

Type: precise assert.

In [None]:
# https://vivarium-research.readthedocs.io/en/latest/models/intervention_models/mncnh_pregnancy/iv_iron_antenatal/iv_iron_mncnh.html#intervention-overview
assert anc[
    (anc.iv_iron_coverage == 'covered') &
    (anc.ferritin_status != 'low')
]['value'].sum() == 0, "IV iron among those not eligible!"

In [None]:
iv_iron_coverage_by_scenario = (
    anc[anc.iv_iron_coverage == 'covered'].groupby(['scenario'])['value'].sum()
    /
    anc[anc.ferritin_status == 'low'].groupby(['scenario'])['value'].sum()
).dropna()
iv_iron_coverage_by_scenario

CHECK: IV iron only received in IV iron scale up scenario(s).

Type: precise assert.

In [None]:
assert (iv_iron_coverage_by_scenario[iv_iron_coverage_by_scenario.index.get_level_values('scenario') != 'anemia_screening_and_iv_iron_scaleup'] == 0).all(), "IV iron scaled up in non-IV iron scenarios"

CHECK: IV iron received by all simulants with tested low ferritin in IV iron scale up scenario(s).

Type: precise assert.

In [None]:
if 'anemia_screening_and_iv_iron_scaleup' in scenarios_run:
    assert iv_iron_coverage_by_scenario.loc['anemia_screening_and_iv_iron_scaleup'] == 1, "IV iron not fully scaled up"

In [None]:
mmr[mmr.index.get_level_values('scenario').isin(['baseline', 'anemia_screening_and_iv_iron_scaleup'])]

CHECK: Fewer maternal deaths in IV iron scale up scenario(s).

Type: precise assert.

In [None]:
if 'anemia_screening_and_iv_iron_scaleup' in scenarios_run:
    assert (mmr.loc[(slice(None), 'anemia_screening_and_iv_iron_scaleup')] < mmr.loc[(slice(None), 'baseline')]).all(), "maternal mortality not reduced in IV iron scale up scenario"

In [None]:
# check incidence too (better sample size)

CHECK: Fewer incident maternal hemorrhage cases in IV iron scale up scenario(s).

Type: precise assert.

In [None]:
if 'anemia_screening_and_iv_iron_scaleup' in scenarios_run:
    assert (inc.loc[(slice(None), 'anemia_screening_and_iv_iron_scaleup')] < inc.loc[(slice(None), 'baseline')]).all(), "maternal hemorrhage not reduced in IV iron scale up scenario"

In [None]:
# NOTE: In future models IV iron will affect BW and GA so this won't be true
# https://vivarium-research.readthedocs.io/en/latest/models/intervention_models/mncnh_pregnancy/iv_iron_antenatal/iv_iron_mncnh.html#id13

if 'anemia_screening_and_iv_iron_scaleup' in scenarios_run:
    assert (
        nn_deaths[nn_deaths.scenario == 'baseline']['value'].sum() ==
        nn_deaths[nn_deaths.scenario == 'anemia_screening_and_iv_iron_scaleup']['value'].sum()
    ), "neonatal deaths different in IV iron scenario"

In [None]:
# also need to check stillbirth when that is implemented