In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 8)

import collections

import warnings
# warnings.filterwarnings('ignore')
from matplotlib.backends.backend_pdf import PdfPages

from pathlib import Path

import db_queries as db
import vivarium_helpers.id_helper as idh
import gbd_mapping
from vivarium import Artifact

# Add the repo directory vivarium_research_ciff_sam/ to sys.path
import os, sys
repo_path = os.path.abspath('../..')
sys.path.append(repo_path)
# Assumes vivarium_research_ciff_sam/ is in sys.path
import model_validation.vivarium_transformed_output as vto
# import model_validation.vivarium_raw_output as vro
import model_validation.vivarium_output_processing as vop
import model_validation.ciff_sam_results as csr

!pwd
!whoami
!date

/ihme/homes/ndbs/vivarium_research_ciff_sam/model_validation/model3
ndbs
Thu Sep  9 16:55:37 PDT 2021


In [2]:
%load_ext autoreload
%autoreload 2

# Validation and Verification Criteria from SQLNS documentation

[SQLNS documentation on Vivarium Research](https://vivarium-research.readthedocs.io/en/latest/intervention_models/lipid_based_nutrient_supplements/index.html)

1. verification: coverage of SQ-LNS as a function of time in baseline and intervention scenario

2. verification: prevalence of stunting in supplemented vs non-supplemented group

3. verification: incidence of moderate wasting from mild in supplemented vs non-supplemented group

4. validation: check that the prevalence of moderate wasting in supplemented vs non-supplemented group agrees with the prevalence RR that we applied to the incidence instead.

5. validation: check to see how much of SAM prevalence decreases from reduction in MAM incidence from MILD.

# Load output from model 3.1 SQLNS and compute total person time

In [3]:
data = csr.VivariumResults.cleaned_from_model_spec(3.1)
data.compute_total_person_time()
data.table_names()

['wasting_transition_count',
 'wasting_state_person_time',
 'deaths',
 'stunting_state_person_time',
 'population',
 'ylls',
 'ylds',
 'disease_state_person_time',
 'disease_transition_count',
 'person_time']

# 1. Check SQ-LNS coverage as a function of time

Target is 90% coverage for 6 months and older, starting in 2023, 0% coverage under 6 months and in 2022.

Looks good.

In [4]:
sqlns_coverage_by_age_year = csr.get_sqlns_coverage(data, ['age', 'year'])
sqlns_coverage_baseline = sqlns_coverage_by_age_year.query("scenario=='baseline'")
sqlns_coverage_intervention = sqlns_coverage_by_age_year.query("scenario=='treatment_and_prevention'")

In [5]:
sqlns_coverage_by_age_year

Unnamed: 0,age,year,input_draw,scenario,sq_lns,value,numerator_measure,denominator_measure,multiplier
0,1-5_months,2022,29,baseline,covered,0.0,state_person_time,person_time,1
1,1-5_months,2022,29,baseline,uncovered,1.0,state_person_time,person_time,1
2,1-5_months,2022,29,treatment_and_prevention,covered,0.0,state_person_time,person_time,1
3,1-5_months,2022,29,treatment_and_prevention,uncovered,1.0,state_person_time,person_time,1
...,...,...,...,...,...,...,...,...,...
1436,late_neonatal,2026,946,baseline,covered,0.0,state_person_time,person_time,1
1437,late_neonatal,2026,946,baseline,uncovered,1.0,state_person_time,person_time,1
1438,late_neonatal,2026,946,treatment_and_prevention,covered,0.0,state_person_time,person_time,1
1439,late_neonatal,2026,946,treatment_and_prevention,uncovered,1.0,state_person_time,person_time,1


In [6]:
# Check that covered + uncovered always adds up to 1
assert np.allclose(vop.marginalize(sqlns_coverage_by_age_year, 'sq_lns').value, 1)

In [7]:
# Check that baseline coverage is 0
assert (sqlns_coverage_baseline.query("sq_lns=='covered'").value == 0).all()

## Check that coverage under 6 months is always 0

Looks good.

In [8]:
under_6mo = ['early_neonatal', 'late_neonatal', '1-5_months']
over_6mo = ['6-11_months', '12_to_23_months', '2_to_4']

In [9]:
sqlns_coverage_under_6mo = sqlns_coverage_intervention.query("age in @under_6mo and sq_lns == 'covered'")
vop.describe(vop.marginalize(sqlns_coverage_under_6mo, 'age'))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,count,mean,std,min,2.5%,50%,97.5%,max
denominator_measure,multiplier,numerator_measure,scenario,sq_lns,year,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
person_time,1,state_person_time,treatment_and_prevention,covered,2022,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
person_time,1,state_person_time,treatment_and_prevention,covered,2023,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
person_time,1,state_person_time,treatment_and_prevention,covered,2024,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
person_time,1,state_person_time,treatment_and_prevention,covered,2025,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
person_time,1,state_person_time,treatment_and_prevention,covered,2026,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
assert (sqlns_coverage_under_6mo.value == 0).all()

## Check that for age > 6mo, coverage goes from 0% in 2022 to 90% in 2023

Looks good.

In [11]:
sqlns_coverage_over_6mo = sqlns_coverage_intervention.query("age in @over_6mo and sq_lns == 'covered'")
sqlns_coverage_over_6mo

Unnamed: 0,age,year,input_draw,scenario,sq_lns,value,numerator_measure,denominator_measure,multiplier
242,12_to_23_months,2022,29,treatment_and_prevention,covered,0.000000,state_person_time,person_time,1
246,12_to_23_months,2022,223,treatment_and_prevention,covered,0.000000,state_person_time,person_time,1
250,12_to_23_months,2022,232,treatment_and_prevention,covered,0.000000,state_person_time,person_time,1
254,12_to_23_months,2022,357,treatment_and_prevention,covered,0.000000,state_person_time,person_time,1
...,...,...,...,...,...,...,...,...,...
946,6-11_months,2026,650,treatment_and_prevention,covered,0.900650,state_person_time,person_time,1
950,6-11_months,2026,680,treatment_and_prevention,covered,0.899938,state_person_time,person_time,1
954,6-11_months,2026,829,treatment_and_prevention,covered,0.900966,state_person_time,person_time,1
958,6-11_months,2026,946,treatment_and_prevention,covered,0.899846,state_person_time,person_time,1


In [12]:
age = iter(over_6mo)
vop.describe(sqlns_coverage_over_6mo.query(f"age == '{next(age)}'"))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,count,mean,std,min,2.5%,50%,97.5%,max
age,denominator_measure,multiplier,numerator_measure,scenario,sq_lns,year,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
6-11_months,person_time,1,state_person_time,treatment_and_prevention,covered,2022,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6-11_months,person_time,1,state_person_time,treatment_and_prevention,covered,2023,12.0,0.899874,0.000842,0.898059,0.898271,0.900003,0.901053,0.901153
6-11_months,person_time,1,state_person_time,treatment_and_prevention,covered,2024,12.0,0.899749,0.000631,0.898685,0.898776,0.899662,0.900533,0.900535
6-11_months,person_time,1,state_person_time,treatment_and_prevention,covered,2025,12.0,0.900065,0.000575,0.898633,0.898903,0.900165,0.900674,0.900722
6-11_months,person_time,1,state_person_time,treatment_and_prevention,covered,2026,12.0,0.900026,0.000795,0.898734,0.898806,0.899892,0.901009,0.901025


In [13]:
vop.describe(sqlns_coverage_over_6mo.query(f"age == '{next(age)}'"))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,count,mean,std,min,2.5%,50%,97.5%,max
age,denominator_measure,multiplier,numerator_measure,scenario,sq_lns,year,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
12_to_23_months,person_time,1,state_person_time,treatment_and_prevention,covered,2022,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12_to_23_months,person_time,1,state_person_time,treatment_and_prevention,covered,2023,12.0,0.899852,0.000429,0.899119,0.899198,0.899916,0.900577,0.900658
12_to_23_months,person_time,1,state_person_time,treatment_and_prevention,covered,2024,12.0,0.899821,0.000722,0.898311,0.898458,0.899924,0.900701,0.900733
12_to_23_months,person_time,1,state_person_time,treatment_and_prevention,covered,2025,12.0,0.899815,0.000504,0.898926,0.899028,0.899848,0.900511,0.900514
12_to_23_months,person_time,1,state_person_time,treatment_and_prevention,covered,2026,12.0,0.900073,0.000556,0.899063,0.899166,0.900193,0.90077,0.90079


In [14]:
vop.describe(sqlns_coverage_over_6mo.query(f"age == '{next(age)}'"))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,count,mean,std,min,2.5%,50%,97.5%,max
age,denominator_measure,multiplier,numerator_measure,scenario,sq_lns,year,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2_to_4,person_time,1,state_person_time,treatment_and_prevention,covered,2022,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2_to_4,person_time,1,state_person_time,treatment_and_prevention,covered,2023,12.0,0.900106,0.000567,0.899034,0.899135,0.900177,0.90094,0.901014
2_to_4,person_time,1,state_person_time,treatment_and_prevention,covered,2024,12.0,0.900051,0.000479,0.89922,0.899328,0.900065,0.900777,0.900818
2_to_4,person_time,1,state_person_time,treatment_and_prevention,covered,2025,12.0,0.899955,0.000392,0.899241,0.899335,0.899968,0.900498,0.900558
2_to_4,person_time,1,state_person_time,treatment_and_prevention,covered,2026,12.0,0.899853,0.000369,0.899204,0.899248,0.899903,0.900324,0.900366


# 2. Verify prevalence of stunting in supplemented vs non-supplemented group

See [SQ-LNS Vivarium Modeling Strategy](https://vivarium-research.readthedocs.io/en/latest/intervention_models/lipid_based_nutrient_supplements/index.html#vivarium-modeling-strategy)

In [15]:
data.stunting_state_person_time

Unnamed: 0,sex,year,stunting_state,measure,input_draw,scenario,value,sq_lns,age
0,female,2022,cat1,state_person_time,29,baseline,0.000000,covered,early_neonatal
1,female,2022,cat1,state_person_time,29,baseline,0.000000,uncovered,early_neonatal
2,female,2022,cat1,state_person_time,29,baseline,0.000000,covered,late_neonatal
3,female,2022,cat1,state_person_time,29,baseline,0.000000,uncovered,late_neonatal
...,...,...,...,...,...,...,...,...,...
11516,male,2026,cat4,state_person_time,946,treatment_and_prevention,43671.262149,covered,12_to_23_months
11517,male,2026,cat4,state_person_time,946,treatment_and_prevention,4796.904860,uncovered,12_to_23_months
11518,male,2026,cat4,state_person_time,946,treatment_and_prevention,109126.299795,covered,2_to_4
11519,male,2026,cat4,state_person_time,946,treatment_and_prevention,11987.500342,uncovered,2_to_4


## Calculate the prevalence ratios

Should we stratify by year, or not???

In [16]:
stunting_prevalence_ratio = csr.get_sqlns_stunting_prevalence_ratio(data, stratify_by_year=True)
stunting_prevalence_ratio

Unnamed: 0,year,sex,age,stunting_state,input_draw,scenario,value,numerator_measure,denominator_measure,multiplier
0,2023,female,12_to_23_months,cat1,29,treatment_and_prevention,0.840435,stunting_prevalence_among_sqlns_covered,stunting_prevalence_among_sqlns_uncovered,1
1,2023,female,12_to_23_months,cat1,223,treatment_and_prevention,0.859978,stunting_prevalence_among_sqlns_covered,stunting_prevalence_among_sqlns_uncovered,1
2,2023,female,12_to_23_months,cat1,232,treatment_and_prevention,0.844374,stunting_prevalence_among_sqlns_covered,stunting_prevalence_among_sqlns_uncovered,1
3,2023,female,12_to_23_months,cat1,357,treatment_and_prevention,0.878011,stunting_prevalence_among_sqlns_covered,stunting_prevalence_among_sqlns_uncovered,1
...,...,...,...,...,...,...,...,...,...,...
1148,2026,male,6-11_months,cat4,650,treatment_and_prevention,0.994527,stunting_prevalence_among_sqlns_covered,stunting_prevalence_among_sqlns_uncovered,1
1149,2026,male,6-11_months,cat4,680,treatment_and_prevention,0.983574,stunting_prevalence_among_sqlns_covered,stunting_prevalence_among_sqlns_uncovered,1
1150,2026,male,6-11_months,cat4,829,treatment_and_prevention,1.008697,stunting_prevalence_among_sqlns_covered,stunting_prevalence_among_sqlns_uncovered,1
1151,2026,male,6-11_months,cat4,946,treatment_and_prevention,1.001852,stunting_prevalence_among_sqlns_covered,stunting_prevalence_among_sqlns_uncovered,1


## Verify that prevalence ratios match the targets for each stunting category

Targets:
 - cat1 (severe stunting): 0.85 (95% CI 0.74 to 0.98)
 - cat2 (moderate stunting): 0.93 (95% CI 0.88 to 0.98)
 - cat3 (mild stunting): Ratio depends on the category prevalences for the stratum -- should be greater than 1
 - cat4 (TMREL): 1.0 (no uncertainty)
 
The sim is matching the targets quite well.

In [17]:
stunting_prevalence_ratio.query("stunting_state == 'cat1'").value.describe(percentiles=[0.025,0.975])

count    288.000000
mean       0.851874
std        0.020178
min        0.773168
2.5%       0.805503
50%        0.851308
97.5%      0.890926
max        0.912309
Name: value, dtype: float64

In [18]:
stunting_prevalence_ratio.query("stunting_state == 'cat2'").value.describe(percentiles=[0.025,0.975])

count    288.000000
mean       0.931274
std        0.017381
min        0.878948
2.5%       0.892815
50%        0.931400
97.5%      0.966858
max        0.990079
Name: value, dtype: float64

In [19]:
stunting_prevalence_ratio.query("stunting_state == 'cat3'").value.describe(percentiles=[0.025,0.975])

count    288.000000
mean       1.170014
std        0.056519
min        1.040068
2.5%       1.068717
50%        1.184666
97.5%      1.265076
max        1.277410
Name: value, dtype: float64

In [20]:
stunting_prevalence_ratio.query("stunting_state == 'cat4'").value.describe(percentiles=[0.025,0.975])

count    288.000000
mean       0.998246
std        0.008142
min        0.972977
2.5%       0.982502
50%        0.998531
97.5%      1.014440
max        1.025068
Name: value, dtype: float64

# 3. Verify incidence of moderate wasting from mild in supplemented vs non-supplemented group

# 4. Validation: Check that the prevalence of moderate wasting in supplemented vs non-supplemented group agrees with the prevalence RR that we applied to the incidence instead

# 5. Validation: Check to see how much of SAM prevalence decreases from reduction in MAM incidence from MILD