In [1]:
from vivarium import Artifact
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from db_queries import get_ids, get_outputs
import scipy.stats

!whoami
!date

alibow
Tue Jul  7 14:42:01 PDT 2020


In [2]:

output_dirs = ['/share/costeffectiveness/results/vivarium_conic_lsff/india/2020_06_26_20_35_00/count_data/']

locations = ['India']

# NOTE: Conclusion statements in this notebook have not been updated

# 1. Iron effect on birth weight

In [3]:
births = pd.read_hdf(output_dirs[0] + 'births.hdf')
births.head()

# note: no stratification of birth counts by iron coverage group

Unnamed: 0,year,sex,folic_acid_fortification_group,measure,input_draw,scenario,value
0,2020,female,covered,live_births,21,baseline,1857.0
1,2020,female,covered,live_births,21,baseline,12.0
2,2020,female,covered,live_births,21,baseline,530.0
3,2020,female,covered,live_births,21,folic_acid_fortification_scale_up,1857.0
4,2020,female,covered,live_births,21,folic_acid_fortification_scale_up,12.0


In [7]:
bw = pd.read_hdf(output_dirs[0] + 'birth_weight.hdf')
bw.head(20)

Unnamed: 0,year,sex,measure,input_draw,scenario,value,iron_fortification_group
0,2020,female,birth_weight_mean,21,baseline,2912.186341,uncovered
1,2020,female,birth_weight_mean,21,baseline,2897.54748,covered
2,2020,female,birth_weight_mean,21,folic_acid_fortification_scale_up,2912.186341,uncovered
3,2020,female,birth_weight_mean,21,folic_acid_fortification_scale_up,2897.54748,covered
4,2020,female,birth_weight_mean,21,iron_folic_acid_fortification_scale_up,2912.186341,uncovered
5,2020,female,birth_weight_mean,21,iron_folic_acid_fortification_scale_up,2897.54748,covered
6,2020,female,birth_weight_mean,21,vitamin_a_fortification_scale_up,2912.186341,uncovered
7,2020,female,birth_weight_mean,21,vitamin_a_fortification_scale_up,2897.54748,covered
8,2020,female,birth_weight_mean,29,baseline,2903.992955,uncovered
9,2020,female,birth_weight_mean,29,baseline,2922.724278,covered


In [9]:
bw.loc[bw.measure == 'birth_weight_mean'].groupby(['scenario','iron_fortification_group','year']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,input_draw,value
scenario,iron_fortification_group,year,Unnamed: 3_level_1,Unnamed: 4_level_1
baseline,covered,2020,437.52,2880.222912
baseline,covered,2021,437.52,2887.989735
baseline,covered,2022,437.52,2886.026953
baseline,covered,2023,437.52,2882.587484
baseline,uncovered,2020,437.52,2884.848491
baseline,uncovered,2021,437.52,2883.971367
baseline,uncovered,2022,437.52,2885.525545
baseline,uncovered,2023,437.52,2884.682001
folic_acid_fortification_scale_up,covered,2020,437.52,2880.222912
folic_acid_fortification_scale_up,covered,2021,437.52,2887.989735


In [5]:
bw_by_coverage_and_scenario = bw.loc[bw.measure == 'birth_weight_mean']
bw_by_coverage_and_scenario = bw_by_coverage_and_scenario.groupby(['scenario','iron_fortification_group']).mean()
bw_by_coverage_and_scenario

# we would expect mean birth weight to be higher in covered group than uncovered group (for all scenarios)
# we are seeing the opposite
# but Kjell verified that the MEDIAN birth weight in covered group > median birth weight in uncovered group
    # this suggests that the distribution is differentially skewed high
    
# we would also expect to see simular mean birth weights by coverage group between scenarios
    # we are not seeing this.

Unnamed: 0_level_0,Unnamed: 1_level_0,input_draw,value
scenario,iron_fortification_group,Unnamed: 2_level_1,Unnamed: 3_level_1
baseline,covered,437.52,2884.206771
baseline,uncovered,437.52,2884.756851
folic_acid_fortification_scale_up,covered,437.52,2884.206771
folic_acid_fortification_scale_up,uncovered,437.52,2884.756851
iron_folic_acid_fortification_scale_up,covered,437.52,2884.345141
iron_folic_acid_fortification_scale_up,uncovered,437.52,2884.740523
vitamin_a_fortification_scale_up,covered,437.52,2884.206771
vitamin_a_fortification_scale_up,uncovered,437.52,2884.756851


## Birth weight conclusions

NO CHANGE IN BIRTH WEIGHT IN IRON SCENARIO OR BY COVERAGE GROUP

# 2. Vitamin A Fortification and Vitamin A Deficiency Prevalence

In [8]:
state_pt = pd.read_hdf(output_dirs[0] + 'state_person_time.hdf')
state_pt.head()

Unnamed: 0,year,age_group,cause,folic_acid_fortification_group,vitamin_a_fortification_group,measure,input_draw,scenario,value
0,2020,1_to_4,diarrheal_diseases,covered,covered,person_time,21,baseline,0.0
1,2020,1_to_4,diarrheal_diseases,covered,covered,person_time,21,folic_acid_fortification_scale_up,0.0
2,2020,1_to_4,diarrheal_diseases,covered,covered,person_time,21,iron_folic_acid_fortification_scale_up,0.0
3,2020,1_to_4,diarrheal_diseases,covered,covered,person_time,21,vitamin_a_fortification_scale_up,0.0
4,2020,1_to_4,diarrheal_diseases,covered,covered,person_time,29,baseline,0.0


In [9]:
def calculate_stratified_vad_prevalence(strata_cols):
    state_pt = pd.read_hdf(output_dirs[0] + 'state_person_time.hdf')
    vad = state_pt.loc[state_pt.cause == 'vitamin_a_deficiency']
    vad = vad.groupby((['scenario','input_draw'] + strata_cols)).sum()
    pt = pd.read_hdf(output_dirs[0] + 'state_person_time.hdf')
    pt = pt.loc[pt.cause.str.contains('diarrheal')]
    pt = pt.groupby((['scenario','input_draw'] + strata_cols)).sum()
    vad_prev = vad / pt
    vad_prev = vad_prev.reset_index()
    vad_prev = vad_prev.loc[vad_prev.scenario != 'iron_fortification_scale_up']
    vad_prev = (vad_prev.groupby((['scenario'] + strata_cols)).mean()
                .rename(columns={'value':'vad_prevalence'})
                .drop(columns='input_draw'))
    return vad_prev

In [10]:
overall = calculate_stratified_vad_prevalence([])
overall

# looks good! (Prevalence of VAD is lower in vitamin_a_fortification_scale_up scenario than baseline scenario)

Unnamed: 0_level_0,vad_prevalence
scenario,Unnamed: 1_level_1
baseline,0.295592
folic_acid_fortification_scale_up,0.295592
iron_folic_acid_fortification_scale_up,0.295592
vitamin_a_fortification_scale_up,0.234934


In [11]:
by_year = calculate_stratified_vad_prevalence(['year'])
by_year

# looks good! 
# VAD prevalence is the same in each scenario in 2020, and then lower in vit_a_scenaario in 2021-2025

Unnamed: 0_level_0,Unnamed: 1_level_0,vad_prevalence
scenario,year,Unnamed: 2_level_1
baseline,2020,0.29429
baseline,2021,0.295265
baseline,2022,0.296192
baseline,2023,0.296717
folic_acid_fortification_scale_up,2020,0.29429
folic_acid_fortification_scale_up,2021,0.295265
folic_acid_fortification_scale_up,2022,0.296192
folic_acid_fortification_scale_up,2023,0.296717
iron_folic_acid_fortification_scale_up,2020,0.29429
iron_folic_acid_fortification_scale_up,2021,0.295265


In [12]:
by_age = calculate_stratified_vad_prevalence(['age_group'])
by_age

# no change in neonatal age groups between scenarios, as expected
# lower VAD prevalence in vitamin_a_scenario in post_neonatal and 1_to_4 ages, as expected
# looks good!

Unnamed: 0_level_0,Unnamed: 1_level_0,vad_prevalence
scenario,age_group,Unnamed: 2_level_1
baseline,1_to_4,0.251839
baseline,early_neonatal,0.794429
baseline,late_neonatal,0.753074
baseline,post_neonatal,0.453359
folic_acid_fortification_scale_up,1_to_4,0.251839
folic_acid_fortification_scale_up,early_neonatal,0.794429
folic_acid_fortification_scale_up,late_neonatal,0.753074
folic_acid_fortification_scale_up,post_neonatal,0.453359
iron_folic_acid_fortification_scale_up,1_to_4,0.251839
iron_folic_acid_fortification_scale_up,early_neonatal,0.794429


In [13]:
by_coverage_group = calculate_stratified_vad_prevalence(['vitamin_a_fortification_group'])
by_coverage_group

# unexpected results here...

# we should expect...

    # all VAD_prev < 1 in each coverage group
    # covered VAD_prev ~= uncovered VAD_prev
    # effectively_covered VAD_prev * 2.22 ~= uncovered VAD_prev in both scenarios
    # all VAD_prev by coverage group in baseline scenario should ~= VAD_prev by coverage group in vitamin_a scenario

Unnamed: 0_level_0,Unnamed: 1_level_0,vad_prevalence
scenario,vitamin_a_fortification_group,Unnamed: 2_level_1
baseline,covered,0.502596
baseline,effectively_covered,0.151775
baseline,uncovered,0.330887
folic_acid_fortification_scale_up,covered,0.502596
folic_acid_fortification_scale_up,effectively_covered,0.151775
folic_acid_fortification_scale_up,uncovered,0.330887
iron_folic_acid_fortification_scale_up,covered,0.502596
iron_folic_acid_fortification_scale_up,effectively_covered,0.151775
iron_folic_acid_fortification_scale_up,uncovered,0.330887
vitamin_a_fortification_scale_up,covered,0.416095


## Conclusions

- For the covered group, VAD state person time > overall person time, which appears to be an error (prevalence > 1)
- For the baseline scenario, VAD prevalence in the effectively covered group is ~ 0.45 times that in the uncovered group, as expected. However, in the vitamin A scenario this is not the case and the difference is greater between these groups -- we would expect to see the same rates here and just more person time in the covered groups in the vitamin A scenario.
- Otherwise, behavior by year, age group, and overall looks as expected
- VAD prevalence does not decrease until 2021, indicating that the coverage algorithm was implemented correctly

# 3. Iron effect on hemoglobin

In [14]:
hb = pd.read_hdf(output_dirs[0] + 'hemoglobin_level.hdf')
hb.head()

Unnamed: 0,sex,measure,input_draw,scenario,value,age,status,responsive
0,female,hemoglobin_mean,21,baseline,104.391697,0.5,covered,responsive
1,female,hemoglobin_mean,21,baseline,84.403095,0.5,covered,non-responsive
2,female,hemoglobin_mean,21,baseline,104.741896,0.5,uncovered,responsive
3,female,hemoglobin_mean,21,baseline,95.027138,0.5,uncovered,non-responsive
4,female,hemoglobin_mean,21,baseline,106.950925,1.0,covered,responsive


In [15]:
np.unique(hb['measure'])

array(['hemoglobin_mean', 'hemoglobin_variance'], dtype=object)

In [16]:
pt = pd.read_hdf(output_dirs[0] + 'person_time.hdf')
pt.head()

# NOTE: cannot calculate weighted averages of Hb because persontime data not stratified by iron coverage or responsiveness

Unnamed: 0,year,age_group,folic_acid_fortification_group,vitamin_a_fortification_group,measure,input_draw,scenario,value
0,2020,1_to_4,covered,covered,person_time,21,baseline,0.0
1,2020,1_to_4,covered,covered,person_time,21,folic_acid_fortification_scale_up,0.0
2,2020,1_to_4,covered,covered,person_time,21,iron_folic_acid_fortification_scale_up,0.0
3,2020,1_to_4,covered,covered,person_time,21,vitamin_a_fortification_scale_up,0.0
4,2020,1_to_4,covered,covered,person_time,29,baseline,0.0


In [17]:
def get_stratified_hb_mean(strata_cols):
    hb = pd.read_hdf(output_dirs[0] + 'hemoglobin_level.hdf')
    hb = hb.loc[hb.scenario != 'vitamin_a_fortification_scale_up']
    hb = hb.loc[hb.measure == 'hemoglobin_mean']
    hb = hb.groupby(['scenario','measure'] + strata_cols).mean()
    return hb

In [18]:
overall = get_stratified_hb_mean([])
overall


# looks good! (mean hemoglobin is slightly higher in iron_fortification scenario)

Unnamed: 0_level_0,Unnamed: 1_level_0,input_draw,value
scenario,measure,Unnamed: 2_level_1,Unnamed: 3_level_1
baseline,hemoglobin_mean,437.52,98.649279
folic_acid_fortification_scale_up,hemoglobin_mean,437.52,98.649278
iron_folic_acid_fortification_scale_up,hemoglobin_mean,437.52,99.71085


In [19]:
by_coverage = get_stratified_hb_mean(['status'])
by_coverage

# hemoglobin is lower in covered group, which is the opposite of what we would expect

# should expect...
    
    # covered mean hemoglobin to be higher than uncovered mean hemoglobin in both scenarios
        # note, if this is not true, it should be true for MEDIAN hemoglobin by coverage group
    # mean hemoglobin by coverage group should be approximately equal between scenarios
        # note, it is ok if covered hemoglobin in the iron scenario is slightly less than the covered 
        # group in the baseline scenario

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,input_draw,value
scenario,measure,status,Unnamed: 3_level_1,Unnamed: 4_level_1
baseline,hemoglobin_mean,covered,437.52,96.464106
baseline,hemoglobin_mean,uncovered,437.52,100.834452
folic_acid_fortification_scale_up,hemoglobin_mean,covered,437.52,96.464106
folic_acid_fortification_scale_up,hemoglobin_mean,uncovered,437.52,100.834451
iron_folic_acid_fortification_scale_up,hemoglobin_mean,covered,437.52,98.603525
iron_folic_acid_fortification_scale_up,hemoglobin_mean,uncovered,437.52,100.818176


In [20]:
by_responsiveness = get_stratified_hb_mean(['status','responsive'])
by_responsiveness

# we are seeing some unexpected results here

    
# expected results:

    # for a given scenario...
        # covered, responsive hemoglobin > uncovered, responsive hemoglobin (by ~ 3 units)
        # covered, non-responsive hemoglobin ~= uncovered, non-responsive hemoglobin
    
    # mean hemoglobin by coverage group and responsiveness should be approximately equal between scenarios
        
    # for a given coverage group, responsive hemoglobin > non-responsive hemoglobin
        # for a given ANEMIA group (i.e. severe anemia)... responsive hemoglobin ~= non-responsive hemoglobin
            # can verify this in interactive sim?
        # note: difference between responsive and non-responsive should be larger in covered group than uncovered group

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,input_draw,value
scenario,measure,status,responsive,Unnamed: 4_level_1,Unnamed: 5_level_1
baseline,hemoglobin_mean,covered,non-responsive,437.52,86.381271
baseline,hemoglobin_mean,covered,responsive,437.52,106.54694
baseline,hemoglobin_mean,uncovered,non-responsive,437.52,96.277177
baseline,hemoglobin_mean,uncovered,responsive,437.52,105.391726
folic_acid_fortification_scale_up,hemoglobin_mean,covered,non-responsive,437.52,86.381271
folic_acid_fortification_scale_up,hemoglobin_mean,covered,responsive,437.52,106.54694
folic_acid_fortification_scale_up,hemoglobin_mean,uncovered,non-responsive,437.52,96.277177
folic_acid_fortification_scale_up,hemoglobin_mean,uncovered,responsive,437.52,105.391725
iron_folic_acid_fortification_scale_up,hemoglobin_mean,covered,non-responsive,437.52,90.530502
iron_folic_acid_fortification_scale_up,hemoglobin_mean,covered,responsive,437.52,106.676547


In [21]:
by_age = get_stratified_hb_mean(['age'])
by_age

# would expect to see slight difference for 0.5 age group between scenarios
# expect to see larger differences between scenarios for other ages

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,input_draw,value
scenario,measure,age,Unnamed: 3_level_1,Unnamed: 4_level_1
baseline,hemoglobin_mean,0.5,437.52,96.074232
baseline,hemoglobin_mean,1.0,437.52,99.1902
baseline,hemoglobin_mean,2.0,437.52,99.625878
baseline,hemoglobin_mean,3.0,437.52,99.706804
folic_acid_fortification_scale_up,hemoglobin_mean,0.5,437.52,96.074232
folic_acid_fortification_scale_up,hemoglobin_mean,1.0,437.52,99.1902
folic_acid_fortification_scale_up,hemoglobin_mean,2.0,437.52,99.625878
folic_acid_fortification_scale_up,hemoglobin_mean,3.0,437.52,99.706804
iron_folic_acid_fortification_scale_up,hemoglobin_mean,0.5,437.52,97.460305
iron_folic_acid_fortification_scale_up,hemoglobin_mean,1.0,437.52,100.31901


In [22]:
by_responsiveness = get_stratified_hb_mean(['responsive'])
by_responsiveness

# we would expect...

    # baseline non-responsive hemoglobin ~= iron scenario non-responsive hemoglobin
    # baseline responsive hemoglobin < iron scenario responsive hemoglobin (by < 3 units)
    
# we are seeing unexpected results here in that ...

    # baseline non-responsive hemoglobin < iron scenario non-responsive hemoglobin
    # baseline responsive hemoglobin = iron scenario responsive hemoglobin

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,input_draw,value
scenario,measure,responsive,Unnamed: 3_level_1,Unnamed: 4_level_1
baseline,hemoglobin_mean,non-responsive,437.52,91.329224
baseline,hemoglobin_mean,responsive,437.52,105.969333
folic_acid_fortification_scale_up,hemoglobin_mean,non-responsive,437.52,91.329224
folic_acid_fortification_scale_up,hemoglobin_mean,responsive,437.52,105.969333
iron_folic_acid_fortification_scale_up,hemoglobin_mean,non-responsive,437.52,93.388053
iron_folic_acid_fortification_scale_up,hemoglobin_mean,responsive,437.52,106.033648


## Conclusions

- hemoglobin is lower in covered group, which is the opposite of what we would expect
- there is significant variation in mean hemoglobin between responsive and non-responsive groups in the uncovered population, which is unexpected
- seeing a difference in mean hemoglobin between scenarios for the 0.5 age, which is unexpected
- seeing an increase in mean hemoglobin between scenarios in the non-responsive population, which is unexpected

# 4. Iron fortification on anemia prevalence

In [23]:
anemia_pt = pd.read_hdf(output_dirs[0] + 'anemia_state_person_time.hdf')
anemia_pt.head()

Unnamed: 0,year,age_group,sex,measure,input_draw,scenario,value
0,2020,1_to_4,female,anemia_mild,21,baseline,44605.015743
1,2020,1_to_4,female,anemia_mild,21,folic_acid_fortification_scale_up,44605.015743
2,2020,1_to_4,female,anemia_mild,21,iron_folic_acid_fortification_scale_up,44605.015743
3,2020,1_to_4,female,anemia_mild,21,vitamin_a_fortification_scale_up,44605.015743
4,2020,1_to_4,female,anemia_mild,29,baseline,43965.626283


In [24]:
def get_anemia_prevalence(strata_cols):
    anemia_pt = pd.read_hdf(output_dirs[0] + 'anemia_state_person_time.hdf')
    anemia_pt = anemia_pt.loc[anemia_pt.scenario != 'vitamin_a_fortification_scale_up']
    anemia_pt = anemia_pt.groupby((['scenario','input_draw'] + strata_cols + ['measure'])).sum()
    pt = pd.read_hdf(output_dirs[0] + 'state_person_time.hdf')
    pt = pt.loc[pt.cause.str.contains('diarrheal')]
    pt = pt.loc[pt.scenario.isin(['baseline','iron_folic_acid_fortification_scneario'])]
    pt = pt.groupby((['scenario','input_draw'] + strata_cols)).sum()
    anemia_prev = anemia_pt / pt
    anemia_prev = anemia_prev.groupby((['scenario','measure'] + strata_cols)).mean() 
    return anemia_prev

In [25]:
overall = get_anemia_prevalence([])
overall

# we are seeing a VERY slight decrease in anemia prevalence between scenarios
# this is generally expected
# should see decrease in all anemia prevalences in iron scenario relative to baseline
    # an increase in anemia_none prevalence

Unnamed: 0_level_0,Unnamed: 1_level_0,value
scenario,measure,Unnamed: 2_level_1
baseline,anemia_mild,0.285056
baseline,anemia_moderate,0.309495
baseline,anemia_none,0.389486
baseline,anemia_severe,0.015963
folic_acid_fortification_scale_up,anemia_mild,
folic_acid_fortification_scale_up,anemia_moderate,
folic_acid_fortification_scale_up,anemia_none,
folic_acid_fortification_scale_up,anemia_severe,
iron_folic_acid_fortification_scale_up,anemia_mild,
iron_folic_acid_fortification_scale_up,anemia_moderate,


In [26]:
by_year = get_anemia_prevalence(['year'])
by_year = by_year.reset_index().sort_values(by=['measure','year','scenario']).set_index(['measure','year','scenario'])
by_year

# this is displaying expected behavior
# no difference in 2020, slight differences in later years

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value
measure,year,scenario,Unnamed: 3_level_1
anemia_mild,2020,baseline,0.285336
anemia_mild,2020,folic_acid_fortification_scale_up,
anemia_mild,2020,iron_folic_acid_fortification_scale_up,
anemia_mild,2021,baseline,0.285021
anemia_mild,2021,folic_acid_fortification_scale_up,
anemia_mild,2021,iron_folic_acid_fortification_scale_up,
anemia_mild,2022,baseline,0.28495
anemia_mild,2022,folic_acid_fortification_scale_up,
anemia_mild,2022,iron_folic_acid_fortification_scale_up,
anemia_mild,2023,baseline,0.284901


In [27]:
by_age = get_anemia_prevalence(['age_group'])
by_age = by_age.reset_index().sort_values(by=['measure','age_group','scenario']).set_index(['measure','age_group','scenario'])
by_age

# this is showing expected behavior... no change in early/late neonatal age groups, but slight change in others

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value
measure,age_group,scenario,Unnamed: 3_level_1
anemia_mild,1_to_4,baseline,0.287816
anemia_mild,1_to_4,folic_acid_fortification_scale_up,
anemia_mild,1_to_4,iron_folic_acid_fortification_scale_up,
anemia_mild,early_neonatal,baseline,0.081349
anemia_mild,early_neonatal,folic_acid_fortification_scale_up,
anemia_mild,early_neonatal,iron_folic_acid_fortification_scale_up,
anemia_mild,late_neonatal,baseline,0.18523
anemia_mild,late_neonatal,folic_acid_fortification_scale_up,
anemia_mild,late_neonatal,iron_folic_acid_fortification_scale_up,
anemia_mild,post_neonatal,baseline,0.283204


## Conclusions

- Nothing unexpected here!

# 5. VAD and affected causes

In [26]:
deaths = pd.read_hdf(output_dirs[0] + 'deaths.hdf')
deaths.head()

Unnamed: 0,year,age_group,cause,folic_acid_fortification_group,vitamin_a_fortification_group,measure,input_draw,scenario,value
0,2020,1_to_4,diarrheal_diseases,covered,covered,death,21,baseline,0.0
1,2020,1_to_4,diarrheal_diseases,covered,covered,death,21,folic_acid_fortification_scale_up,0.0
2,2020,1_to_4,diarrheal_diseases,covered,covered,death,21,iron_folic_acid_fortification_scale_up,0.0
3,2020,1_to_4,diarrheal_diseases,covered,covered,death,21,vitamin_a_fortification_scale_up,0.0
4,2020,1_to_4,diarrheal_diseases,covered,covered,death,29,baseline,0.0


In [27]:
np.unique(deaths['cause'])

array(['diarrheal_diseases', 'lower_respiratory_infections', 'measles',
       'neural_tube_defects', 'other_causes'], dtype=object)

In [28]:
# vad should affect LRI, measles, DD, but NOT NTDs

In [28]:
def get_csmrs(strata_cols):
    deaths = pd.read_hdf(output_dirs[0] + 'deaths.hdf')
    pt = pd.read_hdf(output_dirs[0] + 'state_person_time.hdf')
    pt = pt.loc[pt.cause.str.contains('diarrheal')]
    deaths = deaths.groupby((['scenario','input_draw'] + strata_cols + ['cause'])).sum()
    pt = pt.groupby((['scenario','input_draw'] + strata_cols)).sum()
    csmrs = deaths / pt
    csmrs = csmrs.reset_index()
    csmrs = csmrs.loc[csmrs.scenario != 'iron_fortification_scale_up']
    csmrs = csmrs.groupby((['cause'] + strata_cols + ['scenario'])).mean()
    return csmrs

In [29]:
overall = get_csmrs([])
overall

# looks good!
# CSMR for other causes may be slightly higher in vitamin A scenario because 
# simulants who did not die of affected causes can now die of another cause

Unnamed: 0_level_0,Unnamed: 1_level_0,input_draw,value
cause,scenario,Unnamed: 2_level_1,Unnamed: 3_level_1
diarrheal_diseases,baseline,437.52,0.00081
diarrheal_diseases,folic_acid_fortification_scale_up,437.52,0.00081
diarrheal_diseases,iron_folic_acid_fortification_scale_up,437.52,0.00081
diarrheal_diseases,vitamin_a_fortification_scale_up,437.52,0.000774
lower_respiratory_infections,baseline,437.52,0.001142
lower_respiratory_infections,folic_acid_fortification_scale_up,437.52,0.001142
lower_respiratory_infections,iron_folic_acid_fortification_scale_up,437.52,0.001142
lower_respiratory_infections,vitamin_a_fortification_scale_up,437.52,0.001128
measles,baseline,437.52,0.000108
measles,folic_acid_fortification_scale_up,437.52,0.000108


In [30]:
by_age = get_csmrs(['age_group'])
by_age

# this is behaving as expected
# lower CSMRs for postneonatal and 1-4 age group, but not neonatal age groups

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,input_draw,value
cause,age_group,scenario,Unnamed: 3_level_1,Unnamed: 4_level_1
diarrheal_diseases,1_to_4,baseline,437.52,2.439610e-04
diarrheal_diseases,1_to_4,folic_acid_fortification_scale_up,437.52,2.439610e-04
diarrheal_diseases,1_to_4,iron_folic_acid_fortification_scale_up,437.52,2.439609e-04
diarrheal_diseases,1_to_4,vitamin_a_fortification_scale_up,437.52,2.290589e-04
diarrheal_diseases,early_neonatal,baseline,437.52,1.349641e-02
diarrheal_diseases,early_neonatal,folic_acid_fortification_scale_up,437.52,1.349641e-02
diarrheal_diseases,early_neonatal,iron_folic_acid_fortification_scale_up,437.52,1.347559e-02
diarrheal_diseases,early_neonatal,vitamin_a_fortification_scale_up,437.52,1.349641e-02
diarrheal_diseases,late_neonatal,baseline,437.52,6.603921e-03
diarrheal_diseases,late_neonatal,folic_acid_fortification_scale_up,437.52,6.603921e-03


In [31]:
by_year = get_csmrs(['year'])
by_year

# this is behaving as we would expect with the CORRECT population coverage algorithm (no change until 2020)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,input_draw,value
cause,year,scenario,Unnamed: 3_level_1,Unnamed: 4_level_1
diarrheal_diseases,2020,baseline,437.52,0.000791
diarrheal_diseases,2020,folic_acid_fortification_scale_up,437.52,0.000791
diarrheal_diseases,2020,iron_folic_acid_fortification_scale_up,437.52,0.000791
diarrheal_diseases,2020,vitamin_a_fortification_scale_up,437.52,0.000791
diarrheal_diseases,2021,baseline,437.52,0.000815
diarrheal_diseases,2021,folic_acid_fortification_scale_up,437.52,0.000815
diarrheal_diseases,2021,iron_folic_acid_fortification_scale_up,437.52,0.000815
diarrheal_diseases,2021,vitamin_a_fortification_scale_up,437.52,0.000785
diarrheal_diseases,2022,baseline,437.52,0.000812
diarrheal_diseases,2022,folic_acid_fortification_scale_up,437.52,0.000812


## Conclusions

This is looking as expected (VAD prevalence appears to be affecting CSMRs due to diarrheal diseases, measles, and LRI.

However, it also appears to be affecting "other causes" which is unexpected