In [1]:
from vivarium import Artifact
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from db_queries import get_ids, get_outputs
import scipy.stats

!whoami
!date

alibow
Wed Jun 24 13:09:35 PDT 2020


In [2]:
output_dirs = ['/share/costeffectiveness/results/vivarium_conic_lsff/00_bugfix/india_new_start_date/count_data/']

locations = ['India']

# 1. Iron effect on birth weight

In [3]:
births = pd.read_hdf(output_dirs[0] + 'births.hdf')
births.head()

# no stratification of birth counts by iron coverage group

Unnamed: 0,year,sex,folic_acid_fortification_group,measure,input_draw,scenario,value
0,2020,female,covered,live_births,357,baseline,89.0
1,2020,female,covered,live_births,357,baseline,1.0
2,2020,female,covered,live_births,357,baseline,27.0
3,2020,female,covered,live_births,357,iron_fortification_scale_up,89.0
4,2020,female,covered,live_births,357,iron_fortification_scale_up,1.0


In [4]:
bw = pd.read_hdf(output_dirs[0] + 'birth_weight.hdf')
bw.head()

Unnamed: 0,year,sex,measure,input_draw,scenario,value,iron_fortification_group
0,2020,female,birth_weight_mean,357,baseline,2893.407771,uncovered
1,2020,female,birth_weight_mean,357,baseline,2827.694517,covered
2,2020,female,birth_weight_mean,357,iron_fortification_scale_up,2893.407771,uncovered
3,2020,female,birth_weight_mean,357,iron_fortification_scale_up,2827.694517,covered
4,2020,female,birth_weight_mean,357,vitamin_a_fortification_scale_up,2893.407771,uncovered


In [5]:
bw_by_coverage_and_scenario = bw.loc[bw.measure == 'birth_weight_mean']
bw_by_coverage_and_scenario = bw_by_coverage_and_scenario.groupby(['scenario','iron_fortification_group']).mean()
bw_by_coverage_and_scenario

Unnamed: 0_level_0,Unnamed: 1_level_0,input_draw,value
scenario,iron_fortification_group,Unnamed: 2_level_1,Unnamed: 3_level_1
baseline,covered,676.8,2878.236418
baseline,uncovered,676.8,2888.341063
iron_fortification_scale_up,covered,676.8,2882.310974
iron_fortification_scale_up,uncovered,676.8,2886.687729
vitamin_a_fortification_scale_up,covered,676.8,2878.236418
vitamin_a_fortification_scale_up,uncovered,676.8,2888.341063


## Birth weight conclusions

- Cannot calculate overall mean birth weight by scenario because births are not stratified by iron coverage group
- Mean birthweight is lower in the covered group than the uncovered group... this is the opposite of what we would expect
- Mean birthweight is different in the iron scenario than the baseline scenario, which is encouraging

# 2. Vitamin A Fortification and Vitamin A Deficiency Prevalence

In [6]:
state_pt = pd.read_hdf(output_dirs[0] + 'state_person_time.hdf')
state_pt.head()

Unnamed: 0,year,age_group,sex,cause,folic_acid_fortification_group,vitamin_a_fortification_group,measure,input_draw,scenario,value
0,2020,1_to_4,female,diarrheal_diseases,covered,covered,person_time,357,baseline,0.0
1,2020,1_to_4,female,diarrheal_diseases,covered,covered,person_time,357,iron_fortification_scale_up,0.0
2,2020,1_to_4,female,diarrheal_diseases,covered,covered,person_time,357,vitamin_a_fortification_scale_up,0.0
3,2020,1_to_4,female,diarrheal_diseases,covered,covered,person_time,602,baseline,0.0
4,2020,1_to_4,female,diarrheal_diseases,covered,covered,person_time,602,iron_fortification_scale_up,0.0


In [7]:
def calculate_stratified_vad_prevalence(strata_cols):
    state_pt = pd.read_hdf(output_dirs[0] + 'state_person_time.hdf')
    vad = state_pt.loc[state_pt.cause == 'vitamin_a_deficiency']
    vad = vad.groupby((['scenario','input_draw'] + strata_cols)).sum()
    pt = pd.read_hdf(output_dirs[0] + 'person_time.hdf')
    pt = pt.groupby((['scenario','input_draw'] + strata_cols)).sum()
    vad_prev = vad / pt
    vad_prev = vad_prev.reset_index()
    vad_prev = vad_prev.loc[vad_prev.scenario != 'iron_fortification_scale_up']
    vad_prev = vad_prev.groupby((['scenario'] + strata_cols)).mean()
    return vad_prev

In [8]:
overall = calculate_stratified_vad_prevalence([])
overall

# looks good!

Unnamed: 0_level_0,input_draw,value
scenario,Unnamed: 1_level_1,Unnamed: 2_level_1
baseline,676.8,0.292965
vitamin_a_fortification_scale_up,676.8,0.229858


In [34]:
by_year = calculate_stratified_vad_prevalence(['year'])
by_year

# looks good! This is behaving like the CORRECT coverage algorithm with no change until 2021

Unnamed: 0_level_0,Unnamed: 1_level_0,input_draw,value
scenario,year,Unnamed: 2_level_1,Unnamed: 3_level_1
baseline,2020,676.8,0.297132
baseline,2021,676.8,0.286624
baseline,2022,676.8,0.288669
baseline,2023,676.8,0.30292
baseline,2024,676.8,0.289505
vitamin_a_fortification_scale_up,2020,676.8,0.297131
vitamin_a_fortification_scale_up,2021,676.8,0.24762
vitamin_a_fortification_scale_up,2022,676.8,0.197216
vitamin_a_fortification_scale_up,2023,676.8,0.206642
vitamin_a_fortification_scale_up,2024,676.8,0.195424


In [10]:
by_age = calculate_stratified_vad_prevalence(['age_group'])
by_age

# no change in neonatal age groups, as expected
# looks good!

Unnamed: 0_level_0,Unnamed: 1_level_0,input_draw,value
scenario,age_group,Unnamed: 2_level_1,Unnamed: 3_level_1
baseline,1_to_4,676.8,0.24181
baseline,early_neonatal,676.8,0.835498
baseline,late_neonatal,676.8,0.800689
baseline,post_neonatal,676.8,0.476731
vitamin_a_fortification_scale_up,1_to_4,676.8,0.177532
vitamin_a_fortification_scale_up,early_neonatal,676.8,0.835498
vitamin_a_fortification_scale_up,late_neonatal,676.8,0.800689
vitamin_a_fortification_scale_up,post_neonatal,676.8,0.413542


In [11]:
by_coverage_group = calculate_stratified_vad_prevalence(['vitamin_a_fortification_group'])
by_coverage_group

Unnamed: 0_level_0,Unnamed: 1_level_0,input_draw,value
scenario,vitamin_a_fortification_group,Unnamed: 2_level_1,Unnamed: 3_level_1
baseline,covered,676.8,10.362414
baseline,effectively_covered,676.8,0.133674
baseline,uncovered,676.8,0.326613
vitamin_a_fortification_scale_up,covered,676.8,8.788398
vitamin_a_fortification_scale_up,effectively_covered,676.8,0.10427
vitamin_a_fortification_scale_up,uncovered,676.8,0.818536


## Conclusions

- For the covered group, VAD state person time > overall person time, which appears to be an error (prevalence > 1)
- For the baseline scenario, VAD prevalence in the effectively covered group is ~ 0.45 times that in the uncovered group, as expected. However, in the vitamin A scenario this is not the case and the difference is greater between these groups -- we would expect to see the same rates here and just more person time in the covered groups in the vitamin A scenario.
- Otherwise, behavior by year, age group, and overall looks as expected
- VAD prevalence does not decrease until 2021, indicating that the coverage algorithm was implemented correctly

# 3. Iron effect on hemoglobin

In [12]:
hb = pd.read_hdf(output_dirs[0] + 'hemoglobin_level.hdf')
hb.head()

Unnamed: 0,sex,measure,input_draw,scenario,value,age,status,responsive
0,female,hemoglobin_mean,357,baseline,103.266892,0.5,covered,responsive
1,female,hemoglobin_mean,357,baseline,81.415903,0.5,covered,non-responsive
2,female,hemoglobin_mean,357,baseline,103.073674,0.5,uncovered,responsive
3,female,hemoglobin_mean,357,baseline,93.440171,0.5,uncovered,non-responsive
4,female,hemoglobin_mean,357,baseline,104.80555,1.0,covered,responsive


In [13]:
np.unique(hb['measure'])

array(['hemoglobin_mean', 'hemoglobin_variance'], dtype=object)

In [14]:
pt = pd.read_hdf(output_dirs[0] + 'person_time.hdf')
pt.head()

# NOTE: cannot calculate weighted averages of Hb because persontime data not stratified by iron coverage or responsiveness

Unnamed: 0,year,age_group,sex,folic_acid_fortification_group,vitamin_a_fortification_group,measure,input_draw,scenario,value
0,2020,1_to_4,female,covered,covered,person_time,357,baseline,0.0
1,2020,1_to_4,female,covered,covered,person_time,357,iron_fortification_scale_up,0.0
2,2020,1_to_4,female,covered,covered,person_time,357,vitamin_a_fortification_scale_up,0.0
3,2020,1_to_4,female,covered,covered,person_time,602,baseline,0.0
4,2020,1_to_4,female,covered,covered,person_time,602,iron_fortification_scale_up,0.0


In [15]:
def get_stratified_hb_mean(strata_cols):
    hb = pd.read_hdf(output_dirs[0] + 'hemoglobin_level.hdf')
    hb = hb.loc[hb.scenario != 'vitamin_a_fortification_scale_up']
    hb = hb.loc[hb.measure == 'hemoglobin_mean']
    hb = hb.groupby(['scenario','measure'] + strata_cols).mean()
    return hb

In [16]:
overall = get_stratified_hb_mean([])
overall

# hemoglobin is slightly higher in iron fortification scenario... looks good, but effect is small

Unnamed: 0_level_0,Unnamed: 1_level_0,input_draw,value
scenario,measure,Unnamed: 2_level_1,Unnamed: 3_level_1
baseline,hemoglobin_mean,676.8,96.405078
iron_fortification_scale_up,hemoglobin_mean,676.8,97.400318


In [17]:
by_coverage = get_stratified_hb_mean(['status'])
by_coverage

# hemoglobin is lower in covered group, which is the opposite of what we would expect

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,input_draw,value
scenario,measure,status,Unnamed: 3_level_1,Unnamed: 4_level_1
baseline,hemoglobin_mean,covered,676.8,94.464719
baseline,hemoglobin_mean,uncovered,676.8,98.345436
iron_fortification_scale_up,hemoglobin_mean,covered,676.8,96.481512
iron_fortification_scale_up,hemoglobin_mean,uncovered,676.8,98.319124


In [18]:
by_responsiveness = get_stratified_hb_mean(['status','responsive'])
by_responsiveness

# would expect to see difference between responsive and non-responsive given that they are covered, which we see. Looks good!
# would NOT expect to see significant difference between responsive and non-responsive population given that they are UNcovered
    # they are closer in the uncovered group, but still fairly separated, which is suspicious

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,input_draw,value
scenario,measure,status,responsive,Unnamed: 4_level_1,Unnamed: 5_level_1
baseline,hemoglobin_mean,covered,non-responsive,676.8,86.104964
baseline,hemoglobin_mean,covered,responsive,676.8,102.824474
baseline,hemoglobin_mean,uncovered,non-responsive,676.8,93.668201
baseline,hemoglobin_mean,uncovered,responsive,676.8,103.022671
iron_fortification_scale_up,hemoglobin_mean,covered,non-responsive,676.8,89.988614
iron_fortification_scale_up,hemoglobin_mean,covered,responsive,676.8,102.97441
iron_fortification_scale_up,hemoglobin_mean,uncovered,non-responsive,676.8,93.613955
iron_fortification_scale_up,hemoglobin_mean,uncovered,responsive,676.8,103.024292


In [19]:
by_age = get_stratified_hb_mean(['age'])
by_age

# would not expect to see a difference between scenarios for the age = 0.5 group

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,input_draw,value
scenario,measure,age,Unnamed: 3_level_1,Unnamed: 4_level_1
baseline,hemoglobin_mean,0.5,676.8,94.44275
baseline,hemoglobin_mean,1.0,676.8,97.055817
baseline,hemoglobin_mean,2.0,676.8,96.979515
baseline,hemoglobin_mean,3.0,676.8,97.142228
iron_fortification_scale_up,hemoglobin_mean,0.5,676.8,95.587617
iron_fortification_scale_up,hemoglobin_mean,1.0,676.8,98.274526
iron_fortification_scale_up,hemoglobin_mean,2.0,676.8,97.861188
iron_fortification_scale_up,hemoglobin_mean,3.0,676.8,97.87794


In [20]:
by_all = get_stratified_hb_mean(['age','status','responsive'])
by_all

# we are seeing an increase in hemoglobin between scenarios in the covered NON-RESPONSIVE population. 
# this is unexpected, we should only see it in the responsive population

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,input_draw,value
scenario,measure,age,status,responsive,Unnamed: 5_level_1,Unnamed: 6_level_1
baseline,hemoglobin_mean,0.5,covered,non-responsive,676.8,85.562941
baseline,hemoglobin_mean,0.5,covered,responsive,676.8,99.951534
baseline,hemoglobin_mean,0.5,uncovered,non-responsive,676.8,92.080141
baseline,hemoglobin_mean,0.5,uncovered,responsive,676.8,100.176385
baseline,hemoglobin_mean,1.0,covered,non-responsive,676.8,86.447983
baseline,hemoglobin_mean,1.0,covered,responsive,676.8,103.6714
baseline,hemoglobin_mean,1.0,uncovered,non-responsive,676.8,94.117011
baseline,hemoglobin_mean,1.0,uncovered,responsive,676.8,103.986874
baseline,hemoglobin_mean,2.0,covered,non-responsive,676.8,85.994142
baseline,hemoglobin_mean,2.0,covered,responsive,676.8,103.748386


## Conclusions

- hemoglobin is lower in covered group, which is the opposite of what we would expect
- there is significant variation in mean hemoglobin between responsive and non-responsive groups in the uncovered population, which is unexpected
- seeing a difference in mean hemoglobin between scenarios for the 0.5 age, which is unexpected
- seeing an increase in mean hemoglobin between scenarios in the non-responsive population, which is unexpected

# 4. Iron fortification on anemia prevalence

In [21]:
anemia_pt = pd.read_hdf(output_dirs[0] + 'anemia_state_person_time.hdf')
anemia_pt.head()

Unnamed: 0,year,age_group,sex,measure,input_draw,scenario,value
0,2020,1_to_4,female,anemia_mild,357,baseline,2278.809035
1,2020,1_to_4,female,anemia_mild,357,iron_fortification_scale_up,2278.809035
2,2020,1_to_4,female,anemia_mild,357,vitamin_a_fortification_scale_up,2278.809035
3,2020,1_to_4,female,anemia_mild,602,baseline,2276.13963
4,2020,1_to_4,female,anemia_mild,602,iron_fortification_scale_up,2276.13963


In [22]:
def get_anemia_prevalence(strata_cols):
    anemia_pt = pd.read_hdf(output_dirs[0] + 'anemia_state_person_time.hdf')
    anemia_pt = anemia_pt.loc[anemia_pt.scenario != 'vitamin_a_fortification_scale_up']
    anemia_pt = anemia_pt.groupby((['scenario','input_draw'] + strata_cols + ['measure'])).sum()
    pt = pd.read_hdf(output_dirs[0] + 'person_time.hdf')
    pt = pt.loc[pt.scenario != 'vitamin_a_fortification_scale_up']
    pt = pt.groupby((['scenario','input_draw'] + strata_cols)).sum()
    anemia_prev = anemia_pt / pt
    anemia_prev = anemia_prev.groupby((['scenario','measure'] + strata_cols)).mean() 
    return anemia_prev

In [23]:
overall = get_anemia_prevalence([])
overall

# VERY slight decrease in anemia prevalence between scenarios

Unnamed: 0_level_0,Unnamed: 1_level_0,value
scenario,measure,Unnamed: 2_level_1
baseline,anemia_mild,0.291932
baseline,anemia_moderate,0.369857
baseline,anemia_none,0.318952
baseline,anemia_severe,0.021318
iron_fortification_scale_up,anemia_mild,0.291832
iron_fortification_scale_up,anemia_moderate,0.369632
iron_fortification_scale_up,anemia_none,0.3193
iron_fortification_scale_up,anemia_severe,0.021294


In [24]:
by_year = get_anemia_prevalence(['year'])
by_year = by_year.reset_index().sort_values(by=['measure','year','scenario']).set_index(['measure','year','scenario'])
by_year

# this is displaying expected behavior
# no difference in 2020, slight differences in later years

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value
measure,year,scenario,Unnamed: 3_level_1
anemia_mild,2020,baseline,0.298978
anemia_mild,2020,iron_fortification_scale_up,0.298978
anemia_mild,2021,baseline,0.287504
anemia_mild,2021,iron_fortification_scale_up,0.287431
anemia_mild,2022,baseline,0.286925
anemia_mild,2022,iron_fortification_scale_up,0.286859
anemia_mild,2023,baseline,0.299103
anemia_mild,2023,iron_fortification_scale_up,0.298899
anemia_mild,2024,baseline,0.286893
anemia_mild,2024,iron_fortification_scale_up,0.286726


In [25]:
by_age = get_anemia_prevalence(['age_group'])
by_age = by_age.reset_index().sort_values(by=['measure','age_group','scenario']).set_index(['measure','age_group','scenario'])
by_age

# this is showing expected behavior... no change in early/late neonatal age groups, but slight change in others

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value
measure,age_group,scenario,Unnamed: 3_level_1
anemia_mild,1_to_4,baseline,0.297974
anemia_mild,1_to_4,iron_fortification_scale_up,0.29785
anemia_mild,early_neonatal,baseline,0.065592
anemia_mild,early_neonatal,iron_fortification_scale_up,0.065592
anemia_mild,late_neonatal,baseline,0.13774
anemia_mild,late_neonatal,iron_fortification_scale_up,0.13774
anemia_mild,post_neonatal,baseline,0.279633
anemia_mild,post_neonatal,iron_fortification_scale_up,0.279633
anemia_moderate,1_to_4,baseline,0.346039
anemia_moderate,1_to_4,iron_fortification_scale_up,0.345768


## Conclusions

- Nothing unexpected here!

# 5. VAD and affected causes

In [26]:
deaths = pd.read_hdf(output_dirs[0] + 'deaths.hdf')
deaths.head()

Unnamed: 0,year,age_group,sex,cause,folic_acid_fortification_group,vitamin_a_fortification_group,measure,input_draw,scenario,value
0,2020,1_to_4,female,diarrheal_diseases,covered,covered,death,357,baseline,0.0
1,2020,1_to_4,female,diarrheal_diseases,covered,covered,death,357,iron_fortification_scale_up,0.0
2,2020,1_to_4,female,diarrheal_diseases,covered,covered,death,357,vitamin_a_fortification_scale_up,0.0
3,2020,1_to_4,female,diarrheal_diseases,covered,covered,death,602,baseline,0.0
4,2020,1_to_4,female,diarrheal_diseases,covered,covered,death,602,iron_fortification_scale_up,0.0


In [27]:
np.unique(deaths['cause'])

array(['diarrheal_diseases', 'lower_respiratory_infections', 'measles',
       'neural_tube_defects', 'other_causes'], dtype=object)

In [28]:
# vad should affect LRI, measles, DD, but NOT NTDs

In [29]:
def get_csmrs(strata_cols):
    deaths = pd.read_hdf(output_dirs[0] + 'deaths.hdf')
    pt = pd.read_hdf(output_dirs[0] + 'person_time.hdf')
    deaths = deaths.groupby((['scenario','input_draw'] + strata_cols + ['cause'])).sum()
    pt = pt.groupby((['scenario','input_draw'] + strata_cols)).sum()
    csmrs = deaths / pt
    csmrs = csmrs.reset_index()
    csmrs = csmrs.loc[csmrs.scenario != 'iron_fortification_scale_up']
    csmrs = csmrs.groupby((['cause'] + strata_cols + ['scenario'])).mean()
    return csmrs

In [30]:
overall = get_csmrs([])
overall

# looks good!
# not sure why other causes is different here

Unnamed: 0_level_0,Unnamed: 1_level_0,input_draw,value
cause,scenario,Unnamed: 2_level_1,Unnamed: 3_level_1
diarrheal_diseases,baseline,676.8,0.002049
diarrheal_diseases,vitamin_a_fortification_scale_up,676.8,0.001957
lower_respiratory_infections,baseline,676.8,0.002228
lower_respiratory_infections,vitamin_a_fortification_scale_up,676.8,0.002193
measles,baseline,676.8,0.000176
measles,vitamin_a_fortification_scale_up,676.8,0.000165
neural_tube_defects,baseline,676.8,4e-06
neural_tube_defects,vitamin_a_fortification_scale_up,676.8,4e-06
other_causes,baseline,676.8,0.00542
other_causes,vitamin_a_fortification_scale_up,676.8,0.005423


In [31]:
by_age = get_csmrs(['age_group'])
by_age

# this is behaving as expected
# lower CSMRs for postneonatal and 1-4 age group, but not neonatal age groups

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,input_draw,value
cause,age_group,scenario,Unnamed: 3_level_1,Unnamed: 4_level_1
diarrheal_diseases,1_to_4,baseline,676.8,0.000611
diarrheal_diseases,1_to_4,vitamin_a_fortification_scale_up,676.8,0.000585
diarrheal_diseases,early_neonatal,baseline,676.8,0.0
diarrheal_diseases,early_neonatal,vitamin_a_fortification_scale_up,676.8,0.0
diarrheal_diseases,late_neonatal,baseline,676.8,0.013012
diarrheal_diseases,late_neonatal,vitamin_a_fortification_scale_up,676.8,0.013012
diarrheal_diseases,post_neonatal,baseline,676.8,0.007791
diarrheal_diseases,post_neonatal,vitamin_a_fortification_scale_up,676.8,0.007402
lower_respiratory_infections,1_to_4,baseline,676.8,0.000528
lower_respiratory_infections,1_to_4,vitamin_a_fortification_scale_up,676.8,0.000512


In [33]:
by_year = get_csmrs(['year'])
by_year

# this is behaving as we would expect with the CORRECT population coverage algorithm (no change until 2020)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,input_draw,value
cause,year,scenario,Unnamed: 3_level_1,Unnamed: 4_level_1
diarrheal_diseases,2020,baseline,676.8,0.002238
diarrheal_diseases,2020,vitamin_a_fortification_scale_up,676.8,0.002238
diarrheal_diseases,2021,baseline,676.8,0.001608
diarrheal_diseases,2021,vitamin_a_fortification_scale_up,676.8,0.001555
diarrheal_diseases,2022,baseline,676.8,0.001943
diarrheal_diseases,2022,vitamin_a_fortification_scale_up,676.8,0.001804
diarrheal_diseases,2023,baseline,676.8,0.002096
diarrheal_diseases,2023,vitamin_a_fortification_scale_up,676.8,0.001953
diarrheal_diseases,2024,baseline,676.8,0.002368
diarrheal_diseases,2024,vitamin_a_fortification_scale_up,676.8,0.002236


## Conclusions

This is looking as expected (VAD prevalence appears to be affecting CSMRs due to diarrheal diseases, measles, and LRI.

However, it also appears to be affecting "other causes" which is unexpected