In [1]:
import pandas as pd, numpy as np
import vivarium_output_loader as vol
import lsff_output_processing as lop

!whoami
!date

ndbs
Fri May 22 13:10:03 PDT 2020


In [2]:
%load_ext autoreload
%autoreload 2

## 1. Define directories and load count space data

In [5]:
base_directory = '/share/costeffectiveness/results/vivarium_conic_lsff/00_bugfix'
output_directory = base_directory + '/v6.0_prelim_maternal_iron_lri_bp/india_5_20'

In [6]:
data = vol.load_transformed_count_data(f'{output_directory}/count_data')
data.keys()

dict_keys(['gestational_age', 'transition_count', 'deaths', 'state_person_time', 'births_with_ntd', 'population', 'person_time', 'ylls', 'ylds', 'births', 'birth_weight'])

## 2. Look at deaths table; check scenarios and stratification

*  No iron fortification
*  Stratification not updated to new group definitions

In [7]:
data['deaths'].head()

Unnamed: 0,year,age_group,sex,cause,folic_acid_fortification_group,vitamin_a_fortification_group,measure,input_draw,scenario,value
0,2020,1_to_4,female,diarrheal_diseases,covered,covered,death,29,baseline,0.0
1,2020,1_to_4,female,diarrheal_diseases,covered,covered,death,29,folic_acid_fortification_scale_up,0.0
2,2020,1_to_4,female,diarrheal_diseases,covered,covered,death,29,vitamin_a_fortification_scale_up,0.0
3,2020,1_to_4,female,diarrheal_diseases,covered,covered,death,232,baseline,0.0
4,2020,1_to_4,female,diarrheal_diseases,covered,covered,death,232,folic_acid_fortification_scale_up,0.0


In [8]:
data['deaths'].scenario.unique()

array(['baseline', 'folic_acid_fortification_scale_up',
       'vitamin_a_fortification_scale_up'], dtype=object)

In [9]:
data['deaths'].folic_acid_fortification_group.unique()

array(['covered', 'uncovered', 'unknown'], dtype=object)

In [10]:
data['deaths'].vitamin_a_fortification_group.unique()

array(['covered', 'effectively_covered', 'uncovered'], dtype=object)

## 3. Look at births and births_with_ntds tables

In [12]:
data['births'].head()

Unnamed: 0,year,sex,folic_acid_fortification_group,measure,input_draw,scenario,value
0,2020,female,covered,live_births,29,baseline,327.0
1,2020,female,covered,live_births,29,baseline,5.0
2,2020,female,covered,live_births,29,baseline,80.0
3,2020,female,covered,live_births,29,folic_acid_fortification_scale_up,233.0
4,2020,female,covered,live_births,29,folic_acid_fortification_scale_up,4.0


In [13]:
data['births_with_ntd'].head()

Unnamed: 0,year,sex,folic_acid_fortification_group,measure,input_draw,scenario,value
0,2020,female,covered,live_births_with_ntds,29,baseline,0.0
1,2020,female,covered,live_births_with_ntds,29,baseline,0.0
2,2020,female,covered,live_births_with_ntds,29,baseline,0.0
3,2020,female,covered,live_births_with_ntds,29,folic_acid_fortification_scale_up,0.0
4,2020,female,covered,live_births_with_ntds,29,folic_acid_fortification_scale_up,0.0


In [14]:
data['births'].scenario.unique()

array(['baseline', 'folic_acid_fortification_scale_up',
       'vitamin_a_fortification_scale_up'], dtype=object)

## 4. Check state person time and person time tables

In [15]:
data['state_person_time'].head()

Unnamed: 0,year,age_group,sex,cause,folic_acid_fortification_group,vitamin_a_fortification_group,measure,input_draw,scenario,value
0,2020,1_to_4,female,diarrheal_diseases,covered,covered,person_time,29,baseline,0.0
1,2020,1_to_4,female,diarrheal_diseases,covered,covered,person_time,29,folic_acid_fortification_scale_up,0.0
2,2020,1_to_4,female,diarrheal_diseases,covered,covered,person_time,29,vitamin_a_fortification_scale_up,0.0
3,2020,1_to_4,female,diarrheal_diseases,covered,covered,person_time,232,baseline,0.0
4,2020,1_to_4,female,diarrheal_diseases,covered,covered,person_time,232,folic_acid_fortification_scale_up,0.0


In [16]:
data['state_person_time'].cause.unique()

array(['diarrheal_diseases', 'lower_respiratory_infections', 'measles',
       'neural_tube_defects', 'susceptible_to_diarrheal_diseases',
       'susceptible_to_lower_respiratory_infections',
       'susceptible_to_measles', 'susceptible_to_neural_tube_defects',
       'susceptible_to_vitamin_a_deficiency', 'vitamin_a_deficiency'],
      dtype=object)

In [17]:
data['person_time'].head()

Unnamed: 0,year,age_group,sex,folic_acid_fortification_group,vitamin_a_fortification_group,measure,input_draw,scenario,value
0,2020,1_to_4,female,covered,covered,person_time,29,baseline,0.0
1,2020,1_to_4,female,covered,covered,person_time,29,folic_acid_fortification_scale_up,0.0
2,2020,1_to_4,female,covered,covered,person_time,29,vitamin_a_fortification_scale_up,0.0
3,2020,1_to_4,female,covered,covered,person_time,232,baseline,0.0
4,2020,1_to_4,female,covered,covered,person_time,232,folic_acid_fortification_scale_up,0.0


## 5. Compute prevalences by age group

In [18]:
numerator_strata = ['year', 'age_group']
denominator_strata = ['year', 'age_group']
multiplier = 100 # Get prevalences as percents
broadcast_cols = ['cause'] # Broadcast over cause column in state person time

prevalence = lop.rate_or_ratio(data['state_person_time'],
                               data['person_time'],
                               numerator_strata,
                               denominator_strata,
                               multiplier,
                               broadcast_cols,
                              )
prevalence.head()

Unnamed: 0,year,age_group,input_draw,scenario,cause,value
0,2020,1_to_4,29,baseline,diarrheal_diseases,1.257451
1,2020,1_to_4,29,baseline,lower_respiratory_infections,0.281531
2,2020,1_to_4,29,baseline,measles,0.115051
3,2020,1_to_4,29,baseline,neural_tube_defects,0.050326
4,2020,1_to_4,29,baseline,susceptible_to_diarrheal_diseases,98.199246


## 6. Test some pandas functions

In [19]:
prevalence.stack().head(10)

0  year                        2020
   age_group                 1_to_4
   input_draw                    29
   scenario                baseline
   cause         diarrheal_diseases
   value                    1.25745
1  year                        2020
   age_group                 1_to_4
   input_draw                    29
   scenario                baseline
dtype: object

## 7. Test my new describe function

In [22]:
lop.describe(prevalence, percentiles=[0.025, 0.975]).head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,mean,std,min,2.5%,50%,97.5%,max
year,age_group,scenario,cause,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020,1_to_4,baseline,diarrheal_diseases,10.0,1.245838,0.147376,0.970996,0.980257,1.276842,1.395314,1.398819
2020,1_to_4,baseline,lower_respiratory_infections,10.0,0.242103,0.040227,0.176669,0.179983,0.249304,0.288445,0.289114
2020,1_to_4,baseline,measles,10.0,0.171337,0.105097,0.040904,0.047228,0.12978,0.319079,0.324046
2020,1_to_4,baseline,neural_tube_defects,10.0,0.048008,0.00864,0.038739,0.038989,0.046749,0.064845,0.066972
2020,1_to_4,baseline,susceptible_to_diarrheal_diseases,10.0,98.211136,0.147322,98.057874,98.061556,98.180042,98.476773,98.486193
2020,1_to_4,baseline,susceptible_to_lower_respiratory_infections,10.0,99.214872,0.040144,99.167563,99.168348,99.20796,99.276874,99.280235
2020,1_to_4,baseline,susceptible_to_measles,10.0,97.667094,1.098914,96.05179,96.111782,98.109468,98.971326,99.037825
2020,1_to_4,baseline,susceptible_to_neural_tube_defects,10.0,99.408967,0.008639,99.390216,99.392322,99.409947,99.418192,99.418523
2020,1_to_4,baseline,susceptible_to_vitamin_a_deficiency,10.0,74.93742,1.348795,72.5842,72.933484,74.676873,77.263237,77.463852
2020,1_to_4,baseline,vitamin_a_deficiency,10.0,24.519554,1.348687,21.993052,22.193796,24.779715,26.523324,26.872477


### Check some prevalence estimates to see whether they add to 100%

Well, they're low by about 0.453%, but that's probably ok.

In [23]:
# LRI + susceptible to LRI in baseline
74.930194+24.526703

99.456897

In [24]:
# susceptible to VAD + VAD in FA scale-up
0.242103+99.214872

99.456975

## 8. Filter to LRI and check LRI prevalence by age group

* Looks like we're much closer to GBD now that birth prevalence is included!

In [26]:
prevalence.query("cause == 'lower_respiratory_infections'").head()

Unnamed: 0,year,age_group,input_draw,scenario,cause,value
1,2020,1_to_4,29,baseline,lower_respiratory_infections,0.281531
11,2020,1_to_4,29,folic_acid_fortification_scale_up,lower_respiratory_infections,0.278936
21,2020,1_to_4,29,vitamin_a_fortification_scale_up,lower_respiratory_infections,0.275317
31,2020,1_to_4,232,baseline,lower_respiratory_infections,0.213828
41,2020,1_to_4,232,folic_acid_fortification_scale_up,lower_respiratory_infections,0.205031


In [27]:
lri_prevalence = prevalence.query("cause == 'lower_respiratory_infections'")
lri_prevalence.shape

(600, 6)

In [28]:
lop.describe(lri_prevalence)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,mean,std,min,25%,50%,75%,max
year,age_group,scenario,cause,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020,1_to_4,baseline,lower_respiratory_infections,10.0,0.242103,0.040227,0.176669,0.214966,0.249304,0.277489,0.289114
2020,1_to_4,folic_acid_fortification_scale_up,lower_respiratory_infections,10.0,0.241958,0.043694,0.169517,0.20721,0.254122,0.278227,0.289391
2020,1_to_4,vitamin_a_fortification_scale_up,lower_respiratory_infections,10.0,0.24009,0.041315,0.170479,0.20922,0.251614,0.275215,0.282454
2020,early_neonatal,baseline,lower_respiratory_infections,10.0,3.915212,0.472143,2.92267,3.698256,4.019831,4.311837,4.387924
2020,early_neonatal,folic_acid_fortification_scale_up,lower_respiratory_infections,10.0,3.888874,0.487697,2.991142,3.670065,3.872257,4.015333,4.674246
2020,early_neonatal,vitamin_a_fortification_scale_up,lower_respiratory_infections,10.0,3.883888,0.410245,3.082405,3.735438,3.887528,4.143619,4.549584
2020,late_neonatal,baseline,lower_respiratory_infections,10.0,1.084098,0.117101,0.814978,1.031885,1.106479,1.159045,1.205168
2020,late_neonatal,folic_acid_fortification_scale_up,lower_respiratory_infections,10.0,1.043835,0.154607,0.775094,0.994965,1.02176,1.066394,1.343037
2020,late_neonatal,vitamin_a_fortification_scale_up,lower_respiratory_infections,10.0,1.048535,0.09445,0.88574,1.010501,1.024656,1.071596,1.246764
2020,post_neonatal,baseline,lower_respiratory_infections,10.0,0.287643,0.059799,0.223484,0.242509,0.272861,0.316605,0.395074


## 9. Compute change in prevalence from baseline to interventions

In [49]:
delta_prevalence = lop.averted(prevalence, 'baseline')
delta_prevalence.head()

Unnamed: 0,year,age_group,input_draw,cause,scenario,relative_to,value
0,2020,1_to_4,29,diarrheal_diseases,folic_acid_fortification_scale_up,baseline,-0.019993
1,2020,1_to_4,29,diarrheal_diseases,vitamin_a_fortification_scale_up,baseline,-0.005764
2,2020,1_to_4,29,lower_respiratory_infections,folic_acid_fortification_scale_up,baseline,0.002595
3,2020,1_to_4,29,lower_respiratory_infections,vitamin_a_fortification_scale_up,baseline,0.006214
4,2020,1_to_4,29,measles,folic_acid_fortification_scale_up,baseline,-0.000868


### Test some python/pandas stuff

In [41]:
prevalence.columns

Index(['year', 'age_group', 'input_draw', 'scenario', 'cause', 'value'], dtype='object')

In [42]:
['year', 'age_group', 'input_draw', 'scenario', 'cause', 'value'].index

<function list.index>

In [44]:
['year', 'age_group', 'input_draw', 'scenario', 'cause', 'value'].index('scenario')

3

In [45]:
prevalence.columns.get_loc('scenario')

3

## 10. Summarize the change in prevalence and take a look

Use pandas cross sections ([DataFrame.xs()](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.xs.html#pandas.DataFrame.xs)) to select one cause at a time.

In [50]:
delta_prevalence_summary = lop.describe(delta_prevalence)
delta_prevalence_summary.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count,mean,std,min,25%,50%,75%,max
year,age_group,cause,scenario,relative_to,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020,1_to_4,diarrheal_diseases,folic_acid_fortification_scale_up,baseline,10.0,-0.003733,0.011922,-0.019993,-0.011877,-0.006019,0.006175,0.016033
2020,1_to_4,diarrheal_diseases,vitamin_a_fortification_scale_up,baseline,10.0,0.000195,0.008045,-0.013481,-0.004511,0.001034,0.004667,0.011193
2020,1_to_4,lower_respiratory_infections,folic_acid_fortification_scale_up,baseline,10.0,0.000145,0.006005,-0.010737,-0.002961,8.1e-05,0.004123,0.008797
2020,1_to_4,lower_respiratory_infections,vitamin_a_fortification_scale_up,baseline,10.0,0.002012,0.005512,-0.009546,-0.000182,0.003099,0.006208,0.00738
2020,1_to_4,measles,folic_acid_fortification_scale_up,baseline,10.0,0.001837,0.004705,-0.003909,-0.001822,2.3e-05,0.005495,0.00983


In [51]:
delta_prevalence_summary.xs('lower_respiratory_infections', level='cause')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,mean,std,min,25%,50%,75%,max
year,age_group,scenario,relative_to,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020,1_to_4,folic_acid_fortification_scale_up,baseline,10.0,0.000145,0.006005,-0.010737,-0.002961,8.1e-05,0.004123,0.008797
2020,1_to_4,vitamin_a_fortification_scale_up,baseline,10.0,0.002012,0.005512,-0.009546,-0.000182,0.003099,0.006208,0.00738
2020,early_neonatal,folic_acid_fortification_scale_up,baseline,10.0,0.026338,0.234011,-0.314503,-0.089418,-0.056826,0.160669,0.439242
2020,early_neonatal,vitamin_a_fortification_scale_up,baseline,10.0,0.031325,0.181644,-0.182725,-0.132491,-0.00978,0.176702,0.324164
2020,late_neonatal,folic_acid_fortification_scale_up,baseline,10.0,0.040262,0.090822,-0.148176,0.003638,0.066362,0.091203,0.15197
2020,late_neonatal,vitamin_a_fortification_scale_up,baseline,10.0,0.035562,0.075588,-0.070762,-0.017376,0.036564,0.082305,0.18051
2020,post_neonatal,folic_acid_fortification_scale_up,baseline,10.0,0.002832,0.01405,-0.025367,-0.002665,0.001121,0.010113,0.024324
2020,post_neonatal,vitamin_a_fortification_scale_up,baseline,10.0,-0.002164,0.012691,-0.020015,-0.011449,-0.002177,0.004733,0.020147
2021,1_to_4,folic_acid_fortification_scale_up,baseline,10.0,0.001295,0.004018,-0.005248,-0.000729,0.001606,0.003618,0.008268
2021,1_to_4,vitamin_a_fortification_scale_up,baseline,10.0,0.003956,0.002306,0.00108,0.002802,0.003841,0.004475,0.009187


In [52]:
delta_prevalence_summary.xs('vitamin_a_deficiency', level='cause')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,mean,std,min,25%,50%,75%,max
year,age_group,scenario,relative_to,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020,1_to_4,folic_acid_fortification_scale_up,baseline,10.0,-0.007149,0.015195,-0.022699,-0.020245,-0.013809,0.007294,0.016348
2020,1_to_4,vitamin_a_fortification_scale_up,baseline,10.0,-0.013531,0.022767,-0.055416,-0.027425,-0.009866,0.005026,0.014902
2020,early_neonatal,folic_acid_fortification_scale_up,baseline,10.0,-0.074395,0.193611,-0.338557,-0.231826,-0.106744,0.07681,0.254346
2020,early_neonatal,vitamin_a_fortification_scale_up,baseline,10.0,-0.053485,0.118433,-0.29179,-0.111589,-0.028273,0.034495,0.082581
2020,late_neonatal,folic_acid_fortification_scale_up,baseline,10.0,-0.033823,0.11585,-0.319748,-0.052297,0.02356,0.038783,0.045007
2020,late_neonatal,vitamin_a_fortification_scale_up,baseline,10.0,-0.022617,0.067793,-0.162205,-0.06432,-0.004215,0.03329,0.045207
2020,post_neonatal,folic_acid_fortification_scale_up,baseline,10.0,-0.008304,0.104786,-0.253755,-0.03771,0.026914,0.04877,0.108142
2020,post_neonatal,vitamin_a_fortification_scale_up,baseline,10.0,0.011155,0.072662,-0.101382,-0.042653,-0.001689,0.077358,0.112618
2021,1_to_4,folic_acid_fortification_scale_up,baseline,10.0,0.001978,0.020396,-0.039017,-0.009917,0.008557,0.016742,0.027934
2021,1_to_4,vitamin_a_fortification_scale_up,baseline,10.0,4.919112,1.404088,2.411906,3.916873,5.300109,5.567258,6.647624
