In [1]:
import pandas as pd, numpy as np
import vivarium_output_loader as vol
import lsff_output_processing as lop
# import lsff_summarizer
# from lsff_summarizer import LSFFOutputSummarizer

!whoami
!date

ndbs
Sat Apr 18 22:41:53 PDT 2020


In [20]:
%load_ext autoreload
%autoreload 2

## 1. Load count data from all locations

In [2]:
base_directory3 = '/share/costeffectiveness/results/vivarium_conic_lsff/v3.0_paf_of_one'


locations_rundates3 = {
    'Ethiopia': '2020_03_17_14_59_54',
    'India': '2020_03_17_15_00_02',
    'Nigeria': '2020_03_17_15_00_10',
}

In [21]:
data = vol.load_transformed_count_data_and_merge_locations(base_directory3, locations_rundates3)
data.keys()

dict_keys(['gestational_age', 'transition_count', 'deaths', 'state_person_time', 'births_with_ntd', 'population', 'person_time', 'ylls', 'ylds', 'births', 'birth_weight'])

## 2a. Check person time data tables

In [22]:
data['person_time'].head()

Unnamed: 0,location,year,age_group,sex,vitamin_a_cat,anemia_group,measure,input_draw,value
0,Ethiopia,2020,1_to_4,female,cat1,mild,person_time,21,44.907203
1,Ethiopia,2020,1_to_4,female,cat1,mild,person_time,29,50.865774
2,Ethiopia,2020,1_to_4,female,cat1,mild,person_time,55,55.07337
3,Ethiopia,2020,1_to_4,female,cat1,mild,person_time,78,43.449582
4,Ethiopia,2020,1_to_4,female,cat1,mild,person_time,155,44.586546


In [23]:
data['person_time'].shape

(24000, 9)

In [24]:
data['state_person_time'].head()

Unnamed: 0,location,year,age_group,sex,cause,vitamin_a_cat,anemia_group,measure,input_draw,value
0,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,person_time,21,187.589322
1,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,person_time,29,194.436687
2,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,person_time,55,216.386037
3,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,person_time,78,161.347023
4,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,person_time,155,180.558522


In [25]:
data['state_person_time'].shape

(144000, 10)

### Hmm, no iron deficiency or vitamin A deficiency in the causes for state person time

So it looks like to get prevalence for either of these, we would group by the appropriate columns (`vitamin_a_cat` or `anemia_group`) in the person_time table.

### Also no person-time for neural tube defects, so we can't compute NTD prevalence except at birth

In [26]:
data['state_person_time'].cause.unique()

array(['diarrheal_diseases', 'lower_respiratory_infections', 'measles',
       'susceptible_to_diarrheal_diseases',
       'susceptible_to_lower_respiratory_infections',
       'susceptible_to_measles'], dtype=object)

In [54]:
data['person_time'].input_draw.nunique()

25

## 2b. Compute anemia prevalence by age group and severity

In [55]:
# Compute anemia prevalence by age group and severity
lop.set_global_index_columns(['location', 'input_draw'])
numerator = data['person_time']
denominator = data['person_time']
numerator_strata = ['age_group', 'anemia_group']
denominator_strata = ['age_group']
anemia_prevalence = lop.rate_or_ratio(numerator, denominator, numerator_strata, denominator_strata)
anemia_prevalence.head()

Unnamed: 0,age_group,location,input_draw,anemia_group,value
0,1_to_4,Ethiopia,21,mild,0.268931
1,1_to_4,Ethiopia,21,moderate,0.321483
2,1_to_4,Ethiopia,21,none,0.387533
3,1_to_4,Ethiopia,21,severe,0.022052
4,1_to_4,Ethiopia,29,mild,0.27226


In [56]:
anemia_prevalence.groupby(['location'] + numerator_strata).value.describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,mean,std,min,25%,50%,75%,max
location,age_group,anemia_group,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Ethiopia,1_to_4,mild,25.0,0.269805,0.004267,0.260869,0.267027,0.270345,0.27226,0.276206
Ethiopia,1_to_4,moderate,25.0,0.313601,0.009044,0.295009,0.309245,0.315094,0.319653,0.327872
Ethiopia,1_to_4,none,25.0,0.396523,0.012845,0.375574,0.388207,0.393157,0.405637,0.42582
Ethiopia,1_to_4,severe,25.0,0.020071,0.001085,0.01828,0.019314,0.019985,0.020476,0.022052
Ethiopia,early_neonatal,mild,25.0,0.267002,0.004114,0.258732,0.264585,0.267168,0.269969,0.27329
Ethiopia,early_neonatal,moderate,25.0,0.339953,0.007161,0.324257,0.335368,0.340121,0.345097,0.352452
Ethiopia,early_neonatal,none,25.0,0.369195,0.010592,0.352299,0.362218,0.367014,0.373977,0.394291
Ethiopia,early_neonatal,severe,25.0,0.023851,0.000885,0.022469,0.023069,0.023808,0.02449,0.025427
Ethiopia,late_neonatal,mild,25.0,0.268353,0.004119,0.25983,0.265956,0.268656,0.271214,0.2749
Ethiopia,late_neonatal,moderate,25.0,0.334735,0.00734,0.319009,0.330194,0.33506,0.340105,0.346667


## 3. See if I can reproduce the nonsensical YLD results seen in `output.hdf`

Yes, looks like the same data (see `2020_04_17a_compare_model_output_files.ipynb`): There are YLDs due to vitamin A deficiency in groups where `vitamin_a_cat == 'cat2'`, which is supposed to be the *absence* of vitamin A deficiency.

In [27]:
ylds = data['ylds']
print(ylds.shape)
print(ylds.location.unique())
print(ylds.cause.unique()) # No ylds from measles, LRI, or NTDs?
print(ylds.vitamin_a_cat.unique())
print(ylds.anemia_group.unique())
ylds.head()

(72000, 10)
['Ethiopia' 'India' 'Nigeria']
['diarrheal_diseases' 'iron_deficiency' 'vitamin_a_deficiency']
['cat1' 'cat2']
['mild' 'moderate' 'none' 'severe']


Unnamed: 0,location,year,age_group,sex,cause,vitamin_a_cat,anemia_group,measure,input_draw,value
0,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,ylds,21,22.49146
1,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,ylds,29,26.88347
2,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,ylds,55,30.886019
3,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,ylds,78,26.885465
4,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,ylds,155,20.254785


In [28]:
vad_ylds = ylds.query('cause == "vitamin_a_deficiency"')
vad_ylds = vad_ylds.query('location == "Nigeria" and age_group == "1_to_4"')
vad_ylds = vad_ylds.query('vitamin_a_cat == "cat2" and anemia_group == "none"')
vad_ylds.head()

Unnamed: 0,location,year,age_group,sex,cause,vitamin_a_cat,anemia_group,measure,input_draw,value
550,Nigeria,2020,1_to_4,female,vitamin_a_deficiency,cat2,none,ylds,21,138.145595
551,Nigeria,2020,1_to_4,female,vitamin_a_deficiency,cat2,none,ylds,29,159.216477
552,Nigeria,2020,1_to_4,female,vitamin_a_deficiency,cat2,none,ylds,55,208.218167
553,Nigeria,2020,1_to_4,female,vitamin_a_deficiency,cat2,none,ylds,78,200.973797
554,Nigeria,2020,1_to_4,female,vitamin_a_deficiency,cat2,none,ylds,155,116.01336


In [29]:
vad_ylds.groupby('year').value.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020,200.0,42.463187,62.359793,0.0,0.002478,4.508588,49.359592,235.964744
2021,200.0,85.822972,126.02628,0.0,0.004784,9.308261,101.039733,474.811778
2022,200.0,87.240319,128.4717,0.0,0.005082,9.239089,101.658594,482.982637
2023,200.0,88.341364,130.456522,0.0,0.005102,9.137019,101.741493,489.342824
2024,200.0,89.328181,132.076935,0.0,0.004895,9.293067,102.950906,493.80267


In [34]:
vad_ylds = ylds.query('cause == "vitamin_a_deficiency"')
vad_ylds = vad_ylds.query('location == "Nigeria"')
vad_ylds.groupby(['age_group', 'vitamin_a_cat']).value.describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
age_group,vitamin_a_cat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1_to_4,cat1,1000.0,62.340274,48.359677,0.853347,22.015973,55.775357,92.521236,295.716155
1_to_4,cat2,1000.0,203.939749,155.913271,2.817547,70.968186,192.375015,298.596032,976.53738
early_neonatal,cat1,1000.0,0.424042,0.666585,0.0,0.016289,0.119069,0.55512,4.393167
early_neonatal,cat2,1000.0,1.359162,1.94333,0.0,0.04771,0.447054,1.838249,8.774532
late_neonatal,cat1,1000.0,1.092235,1.748611,0.000142,0.017635,0.287773,1.459459,11.583601
late_neonatal,cat2,1000.0,3.5831,5.254214,0.00038,0.065312,1.06746,4.579388,23.844569
post_neonatal,cat1,1000.0,13.24833,11.249942,0.232236,4.480372,10.943939,17.923385,72.124187
post_neonatal,cat2,1000.0,48.957556,39.181298,0.861743,19.071784,41.524509,66.134132,205.561598


## 4. Along similar lines, let's check if there are iron deficiency YLDs in groups with no anemia

Well, this is looking better than VAD did

In [30]:
iron_ylds = ylds.query('cause == "iron_deficiency"')
iron_ylds = iron_ylds.query('location == "Nigeria" and age_group == "1_to_4"')
iron_ylds = iron_ylds.query('vitamin_a_cat == "cat2" and anemia_group == "none"')
iron_ylds.head()

Unnamed: 0,location,year,age_group,sex,cause,vitamin_a_cat,anemia_group,measure,input_draw,value
350,Nigeria,2020,1_to_4,female,iron_deficiency,cat2,none,ylds,21,0.0
351,Nigeria,2020,1_to_4,female,iron_deficiency,cat2,none,ylds,29,0.0
352,Nigeria,2020,1_to_4,female,iron_deficiency,cat2,none,ylds,55,0.0
353,Nigeria,2020,1_to_4,female,iron_deficiency,cat2,none,ylds,78,0.0
354,Nigeria,2020,1_to_4,female,iron_deficiency,cat2,none,ylds,155,0.0


In [31]:
iron_ylds.groupby('year').value.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2024,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
iron_ylds = ylds.query('cause == "iron_deficiency"')
iron_ylds = iron_ylds.query('location == "Nigeria"')
iron_ylds.groupby(['age_group', 'anemia_group']).value.describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
age_group,anemia_group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1_to_4,mild,500.0,69.127099,52.44465,5.727482,28.092448,50.974109,93.523514,231.450355
1_to_4,moderate,500.0,1468.190867,1045.1848,145.539985,642.869854,1049.934377,2090.082759,4671.102718
1_to_4,none,500.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1_to_4,severe,500.0,362.339948,300.796317,26.025761,151.326757,256.788035,522.25176,1744.349289
early_neonatal,mild,500.0,0.138844,0.175956,0.002334,0.030091,0.074579,0.167964,1.076538
early_neonatal,moderate,500.0,16.9033,11.275381,2.290497,7.605912,12.763095,27.017585,45.222315
early_neonatal,none,500.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
early_neonatal,severe,500.0,6.718936,6.020507,0.30467,2.191011,4.485018,9.170326,28.253149
late_neonatal,mild,500.0,0.227453,0.274871,0.001861,0.056483,0.132643,0.299213,1.803908
late_neonatal,moderate,500.0,50.494263,33.437127,6.612109,22.993841,37.275918,82.042489,127.641712


## 5. Check more data tables

In [35]:
data.keys()

dict_keys(['gestational_age', 'transition_count', 'deaths', 'state_person_time', 'births_with_ntd', 'population', 'person_time', 'ylls', 'ylds', 'births', 'birth_weight'])

### Deaths and YLLs

In [37]:
data['deaths'].head()

Unnamed: 0,location,year,age_group,sex,cause,vitamin_a_cat,anemia_group,measure,input_draw,value
0,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,death,21,5.0
1,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,death,29,8.0
2,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,death,55,3.0
3,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,death,78,9.0
4,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,death,155,3.0


In [38]:
data['ylls'].head()

Unnamed: 0,location,year,age_group,sex,cause,vitamin_a_cat,anemia_group,measure,input_draw,value
0,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,ylls,21,424.232752
1,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,ylls,29,683.695536
2,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,ylls,55,253.604276
3,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,ylls,78,763.007286
4,Ethiopia,2020,1_to_4,female,diarrheal_diseases,cat1,mild,ylls,155,256.88107


### Births and births with NTDs

In [40]:
data['births'].head()

Unnamed: 0,location,year,sex,measure,input_draw,value
0,Ethiopia,2020,female,live_births,21,21508.0
1,Ethiopia,2020,female,live_births,29,21827.0
2,Ethiopia,2020,female,live_births,55,21629.0
3,Ethiopia,2020,female,live_births,78,21920.0
4,Ethiopia,2020,female,live_births,155,21866.0


In [41]:
data['births_with_ntd'].head()

Unnamed: 0,location,year,sex,measure,input_draw,value
0,Ethiopia,2020,female,live_births_with_ntds,21,42.0
1,Ethiopia,2020,female,live_births_with_ntds,29,38.0
2,Ethiopia,2020,female,live_births_with_ntds,55,27.0
3,Ethiopia,2020,female,live_births_with_ntds,78,41.0
4,Ethiopia,2020,female,live_births_with_ntds,155,39.0


### Transition count and population

In [42]:
data['transition_count'].head()

Unnamed: 0,location,year,age_group,sex,vitamin_a_cat,anemia_group,measure,input_draw,value
0,Ethiopia,2020,1_to_4,female,cat1,mild,diarrheal_diseases_to_susceptible_to_diarrheal...,21,12500.0
1,Ethiopia,2020,1_to_4,female,cat1,mild,diarrheal_diseases_to_susceptible_to_diarrheal...,29,11658.0
2,Ethiopia,2020,1_to_4,female,cat1,mild,diarrheal_diseases_to_susceptible_to_diarrheal...,55,12137.0
3,Ethiopia,2020,1_to_4,female,cat1,mild,diarrheal_diseases_to_susceptible_to_diarrheal...,78,9662.0
4,Ethiopia,2020,1_to_4,female,cat1,mild,diarrheal_diseases_to_susceptible_to_diarrheal...,155,11134.0


In [44]:
data['transition_count'].measure.unique()

array(['diarrheal_diseases_to_susceptible_to_diarrheal_diseases_event_count',
       'lower_respiratory_infections_to_susceptible_to_lower_respiratory_infections_event_count',
       'measles_to_susceptible_to_measles_event_count',
       'susceptible_to_diarrheal_diseases_to_diarrheal_diseases_event_count',
       'susceptible_to_lower_respiratory_infections_to_lower_respiratory_infections_event_count',
       'susceptible_to_measles_to_measles_event_count'], dtype=object)

In [43]:
data['population'].head()

Unnamed: 0,location,measure,input_draw,value
0,Ethiopia,total_population,21,846725.0
1,Ethiopia,total_population,29,847552.0
2,Ethiopia,total_population,55,846172.0
3,Ethiopia,total_population,78,847337.0
4,Ethiopia,total_population,155,846910.0


In [45]:
data['population'].measure.unique()

array(['total_population', 'total_population_dead',
       'total_population_living', 'total_population_tracked',
       'total_population_untracked'], dtype=object)

### Birth weight and gestational age

In [47]:
data['birth_weight'].head()

Unnamed: 0,location,measure,input_draw,value
0,Ethiopia,birth_weight_mean,21,3361.542087
1,Ethiopia,birth_weight_mean,29,3290.398268
2,Ethiopia,birth_weight_mean,55,3354.10135
3,Ethiopia,birth_weight_mean,78,3352.37455
4,Ethiopia,birth_weight_mean,155,3379.348886


In [48]:
data['birth_weight'].measure.unique()

array(['birth_weight_mean', 'birth_weight_sd'], dtype=object)

In [49]:
data['gestational_age'].head()

Unnamed: 0,location,measure,input_draw,value
0,Ethiopia,gestational_age_mean,21,38.71585
1,Ethiopia,gestational_age_mean,29,38.482497
2,Ethiopia,gestational_age_mean,55,38.75584
3,Ethiopia,gestational_age_mean,78,38.712993
4,Ethiopia,gestational_age_mean,155,38.631846


In [50]:
data['gestational_age'].measure.unique()

array(['gestational_age_mean', 'gestational_age_sd'], dtype=object)