In [1]:
import pandas as pd, numpy as np
# import matplotlib.pyplot as plt
import lsff_output_processing as lop

!whoami
!date

ndbs
Fri Mar 27 14:53:51 PDT 2020


In [2]:
%load_ext autoreload
%autoreload 2

## Load all count space data tables and see what they are

In [3]:
base_directory = '/share/costeffectiveness/results/vivarium_conic_lsff/v4.0_folic_acid_fortification'

locations_rundates = {
    'Ethiopia': '2020_03_18_23_04_26',
    'India': '2020_03_18_23_04_36',
    'Nigeria': '2020_03_18_23_04_42',
}

In [5]:
data = lop.load_all_transformed_count_data(base_directory, locations_rundates)

In [6]:
[k for k in data.keys() if k[0] == 'ethiopia']

[('ethiopia', 'gestational_age'),
 ('ethiopia', 'transition_count'),
 ('ethiopia', 'deaths'),
 ('ethiopia', 'state_person_time'),
 ('ethiopia', 'births_with_ntd'),
 ('ethiopia', 'population'),
 ('ethiopia', 'person_time'),
 ('ethiopia', 'ylls'),
 ('ethiopia', 'ylds'),
 ('ethiopia', 'births'),
 ('ethiopia', 'birth_weight')]

## Get data to compute NTD birth prevalence in Ethiopia

In [8]:
data[('ethiopia', 'births_with_ntd')].head()

Unnamed: 0,year,sex,fortification_group,measure,input_draw,scenario,value
0,2020,female,False,live_births_with_ntds,21,baseline,42.0
1,2020,female,False,live_births_with_ntds,21,folic_acid_fortification_scale_up,40.0
2,2020,female,False,live_births_with_ntds,29,baseline,38.0
3,2020,female,False,live_births_with_ntds,29,folic_acid_fortification_scale_up,38.0
4,2020,female,False,live_births_with_ntds,55,baseline,27.0


In [71]:
data[('ethiopia', 'births_with_ntd')].groupby(['scenario']).value.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
scenario,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
baseline,750.0,24.166667,36.209673,0.0,0.0,0.0,58.0,121.0
folic_acid_fortification_scale_up,750.0,22.569333,29.991763,0.0,0.0,5.0,46.0,107.0


In [9]:
data[('ethiopia', 'births')].head()

Unnamed: 0,year,sex,fortification_group,measure,input_draw,scenario,value
0,2020,female,False,live_births,21,baseline,21508.0
1,2020,female,False,live_births,21,folic_acid_fortification_scale_up,20577.0
2,2020,female,False,live_births,29,baseline,21827.0
3,2020,female,False,live_births,29,folic_acid_fortification_scale_up,20978.0
4,2020,female,False,live_births,55,baseline,21611.0


In [72]:
data[('ethiopia', 'births')].groupby(['scenario']).value.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
scenario,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
baseline,750.0,13401.716,19519.8375,0.0,0.0,1.0,42783.5,46484.0
folic_acid_fortification_scale_up,750.0,13401.716,16103.652463,0.0,0.0,4788.0,29566.5,46042.0


In [67]:
data[('ethiopia', 'births')].input_draw.unique()

array([ 21,  29,  55,  78, 155, 223, 232, 254, 307, 357, 394, 417, 482,
       514, 524, 533, 602, 620, 629, 650, 674, 680, 733, 829, 946])

In [68]:
data[('ethiopia', 'births')].input_draw.nunique()

25

### These rows have no births, hence will result in NaN when computing prevalence

Note that there will be zero births with unknown fortification status since `'unknown'` only gets assigned to simulants alive at the beginning.

Sometimes there are also zero births with fortification status  = `'true'`. This makes sense because there are fewer people who are fortified than unfortified.

In [42]:
data[('ethiopia', 'births')].query('value == 0').head()

Unnamed: 0,year,sex,fortification_group,measure,input_draw,scenario,value
50,2020,female,True,live_births,21,baseline,0.0
52,2020,female,True,live_births,29,baseline,0.0
60,2020,female,True,live_births,223,baseline,0.0
64,2020,female,True,live_births,254,baseline,0.0
66,2020,female,True,live_births,307,baseline,0.0


In [43]:
data[('ethiopia', 'births')].query('value == 0').shape

(612, 7)

In [52]:
data[('ethiopia', 'births')].query('value == 0').fortification_group.unique()

array(['true', 'unknown'], dtype=object)

## Define index columns to line up data and divide

Omit the `'measure'` column because this just identifies the table; we need the values in all index columns to be the same between tables.

In [45]:
index_columns = ['year', 'sex', 'fortification_group', 'input_draw', 'scenario']

## List all the columns in order to standardize tables

In [16]:
all_columns_in_order = ['year', 'age_group', 'sex',
                        'cause','fortification_group', 'measure',
                        'input_draw', 'scenario', 'value'
                       ]

## Divide NTD births by all live births to get NTD birth prevalence (per 1000 live births)

In [57]:
# Define index columns - do it here so I can re-run the cell easily
index_columns = ['year', 'sex', 'fortification_group', 'input_draw', 'scenario']

# Load data for NTD births and live births
ntd_births = data[('ethiopia', 'births_with_ntd')]
live_births = data[('ethiopia', 'births')]

# Set the index columns and pull out the value column
ntd_births = ntd_births.set_index(index_columns).value
live_births = live_births.set_index(index_columns).value

# Divide the two pandas Series to get birth prevalence
# Multiply by 1000 to get ntd births per 1000 live births
ntd_birth_prevalence =  1000 * ntd_births / live_births

# Drop rows where we divided by 0 because there were no births
ntd_birth_prevalence.dropna(inplace=True)

# Put data back in standard form
ntd_birth_prevalence = ntd_birth_prevalence.reset_index()
ntd_birth_prevalence['age_group'] = 'birth'
ntd_birth_prevalence['measure'] = 'prevalence'
ntd_birth_prevalence['cause'] = 'neural_tube_defects'
ntd_birth_prevalence = ntd_birth_prevalence[all_columns_in_order]

ntd_birth_prevalence.head()

Unnamed: 0,year,age_group,sex,cause,fortification_group,measure,input_draw,scenario,value
0,2020,birth,female,neural_tube_defects,False,prevalence,21,baseline,1.952762
1,2020,birth,female,neural_tube_defects,False,prevalence,21,folic_acid_fortification_scale_up,1.943918
2,2020,birth,female,neural_tube_defects,False,prevalence,29,baseline,1.740963
3,2020,birth,female,neural_tube_defects,False,prevalence,29,folic_acid_fortification_scale_up,1.811421
4,2020,birth,female,neural_tube_defects,False,prevalence,55,baseline,1.249364


In [58]:
ntd_birth_prevalence.shape

(888, 9)

## Get separate dataframes for baseline and intervention

In [59]:
baseline_ntd_birth_prevalence = ntd_birth_prevalence.query('scenario == "baseline"')
intervention_ntd_birth_prevalence = ntd_birth_prevalence.query(
    'scenario == "folic_acid_fortification_scale_up"')

intervention_ntd_birth_prevalence.head()

Unnamed: 0,year,age_group,sex,cause,fortification_group,measure,input_draw,scenario,value
1,2020,birth,female,neural_tube_defects,False,prevalence,21,folic_acid_fortification_scale_up,1.943918
3,2020,birth,female,neural_tube_defects,False,prevalence,29,folic_acid_fortification_scale_up,1.811421
5,2020,birth,female,neural_tube_defects,False,prevalence,55,folic_acid_fortification_scale_up,1.203979
7,2020,birth,female,neural_tube_defects,False,prevalence,78,folic_acid_fortification_scale_up,1.736955
9,2020,birth,female,neural_tube_defects,False,prevalence,155,folic_acid_fortification_scale_up,1.877982


## Calculate reduction in NTDs due to FA fortification

### This is no good... birth prevalence *increases* on average in the intervention scenario

In [61]:
index_columns = ['year', 'sex', 'fortification_group', 'input_draw']

baseline = baseline_ntd_birth_prevalence.set_index(index_columns).value
intervention = intervention_ntd_birth_prevalence.set_index(index_columns).value

delta_ntd_birth_prevalence = baseline - intervention
delta_ntd_birth_prevalence = delta_ntd_birth_prevalence.reset_index()
delta_ntd_birth_prevalence.head()

Unnamed: 0,year,sex,fortification_group,input_draw,value
0,2020,female,False,21,0.008844
1,2020,female,False,29,-0.070458
2,2020,female,False,55,0.045384
3,2020,female,False,78,0.114725
4,2020,female,False,155,-0.094309


In [62]:
delta_ntd_birth_prevalence.groupby(['year', 'fortification_group']).value.describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
year,fortification_group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020,False,50.0,-0.002132,0.152236,-0.340863,-0.071459,-0.00453,0.072496,0.392787
2020,True,28.0,-0.777194,1.089774,-2.711864,-1.371841,-0.860357,-0.413497,1.948959
2021,False,50.0,-0.041211,0.162825,-0.936257,-0.052708,0.00092,0.02463,0.120622
2021,True,29.0,-0.499957,0.958946,-1.605652,-1.110289,-0.74549,-0.387597,2.342993
2022,False,50.0,-0.008183,0.150442,-0.863992,-0.048877,0.001708,0.047203,0.324996
2022,True,28.0,-0.358286,1.594296,-1.670379,-1.269272,-1.037058,0.000277,4.387192
2023,False,50.0,0.03607,0.12242,-0.20787,-0.02245,0.013145,0.064808,0.573462
2023,True,26.0,-0.805746,0.54442,-1.745581,-1.160619,-0.827244,-0.503961,0.353953
2024,False,50.0,0.001474,0.083385,-0.280974,-0.049146,0.000151,0.05038,0.256157
2024,True,27.0,-0.316726,2.222321,-1.74482,-1.313443,-0.847314,-0.471629,9.961712


## Check birth prevalence in baseline and intervention scenarios

### The prevalence is *higher* in the 'true' groups in intervention vs. baseline

Why in the world would that happen???

Note that in baseline, about 45% of the rows with `fortification_group=true` have been dropped because there were no births in this group.

In [63]:
baseline_ntd_birth_prevalence.groupby(['year', 'fortification_group']).value.describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
year,fortification_group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020,False,50.0,1.770523,0.348001,0.785256,1.578786,1.784775,1.98564,2.642659
2020,True,28.0,0.372384,0.975955,0.0,0.0,0.0,0.0,3.577818
2021,False,50.0,1.807914,0.290128,1.155588,1.636544,1.840816,1.987157,2.311483
2021,True,29.0,0.474882,0.985602,0.0,0.0,0.0,0.0,3.427592
2022,False,50.0,1.810242,0.321526,1.112218,1.618111,1.787337,2.017057,2.521432
2022,True,28.0,0.729153,1.550346,0.0,0.0,0.0,0.236295,5.586592
2023,False,50.0,1.839144,0.324154,1.182197,1.658862,1.78883,1.980955,2.6372
2023,True,26.0,0.264758,0.568303,0.0,0.0,0.0,0.0,1.712329
2024,False,50.0,1.768315,0.295951,1.133892,1.567911,1.712076,1.900107,2.561272
2024,True,27.0,0.798858,2.184697,0.0,0.0,0.0,0.407498,10.869565


In [64]:
intervention_ntd_birth_prevalence.groupby(['year', 'fortification_group']).value.describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
year,fortification_group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020,False,50.0,1.772656,0.380281,0.789633,1.516286,1.738711,2.072419,2.623754
2020,True,50.0,1.00927,0.893106,0.0,0.340287,0.934473,1.378793,3.802281
2021,False,50.0,1.849125,0.362352,1.101857,1.616221,1.84212,2.03296,3.059976
2021,True,50.0,0.844087,0.541967,0.0,0.578941,0.891288,1.149049,2.75634
2022,False,50.0,1.818425,0.359713,1.131222,1.532991,1.832713,1.991313,3.090447
2022,True,50.0,1.060309,0.558955,0.0,0.908697,1.106737,1.411865,2.318393
2023,False,50.0,1.803074,0.362444,1.098724,1.602626,1.776263,1.988668,2.751072
2023,True,50.0,1.131808,0.64881,0.0,0.783071,1.112779,1.464758,3.584229
2024,False,50.0,1.766841,0.29662,1.048312,1.578355,1.733595,1.963915,2.542573
2024,True,50.0,1.187002,0.915963,0.0,0.686636,1.009296,1.468426,5.277045


In [70]:
(50-29)/50

0.42