In [1]:
from functions_for_all_nutrients import *
from vitamin_a_and_zinc_functions import *
from low_ses_functions import *

In [2]:
# Get absolute path of repo based on relative directory structure,
# so it should work for all users
import os.path
vivarium_research_lsff_path = os.path.abspath("..")
vivarium_research_lsff_path

'/ihme/homes/alibow/notebooks/vivarium_research_lsff'

In [3]:
username = !whoami
username

['alibow']

In [4]:
# Make shared directories in which to store GBD data so we don't have to keep calling `get_draws`

# Nathaniel has saved some .hdf files in this directory:
# You can change it if you save GBD data in a different directory.
read_share_dir = f'/share/scratch/users/ndbs/vivarium_lsff/gbd_data'

# GBD data will be stored here in an .hdf file if it doesn't exist in the directory above:
# You can change this directory name if you want, and you may need to create it before running code below.
#write_share_dir = f'/share/scratch/users/{username[0]}/vivarium_lsff/gbd_data'

In [5]:
index_cols=['location_id','sex_id','age_group_id']
age_group_ids = [2,3,4,5]
sex_ids = [1,2]
coverage_levels = [0.2,0.5,0.8,1]
years = [2021,2022,2023,2024,2025]

In [6]:
coverage_data_dir = vivarium_research_lsff_path + '/multiplication_models/low_ses_coverage.csv'
# # Old version that may not work for all users:
# coverage_data_dir = f'/ihme/homes/{username[0]}/notebooks/' \
#     'vivarium_research_lsff/data_prep/outputs/lsff_input_coverage_data.csv'

In [7]:
location_ids = [214]
location_ids

[214]

In [8]:
vitamin_a_burden_multiplier = 1.2
zinc_burden_multiplier = 1.2

In [9]:
cause_ids = [341, #measles
            302] #diarrheal diseases]

In [10]:
burden_multipliers = pd.DataFrame()
burden_multipliers['cause_id'] = cause_ids
burden_multipliers['multiplier'] = [1.48,1.45]
burden_multipliers

Unnamed: 0,cause_id,multiplier
0,341,1.48
1,302,1.45


# VITAMIN A

In [11]:
# vitamin A specific -- these should be replaced for other models
rei_id = 96
nutrient = 'vitamin a'
vehicles = ['oil','wheat flour','maize flour','industry oil',
            'zero oil','zero wheat flour','zero maize flour','zero industry oil',
           'oil from lit','industry oil from lit','wheat flour from lit','maize flour from lit']#,
            #'oil*','wheat flour*','maize flour*','industry oil*']
effective_fractions = [0, 0, (365 - (365 * 0.5)) / (365 - 28), 1]

In [12]:
# define no fortification relative risk distribution
# vitamin a specific -- this should be replaced for other models

from numpy import log
from scipy.stats import norm, lognorm

# median and 0.975-quantile of lognormal distribution for RR
median = 2.22
q_975 = 5.26

# 0.975-quantile of standard normal distribution (=1.96, approximately)
q_975_stdnorm = norm().ppf(0.975)

mu = log(median) # mean of normal distribution for log(RR)
sigma = (log(q_975) - mu) / q_975_stdnorm # std dev of normal distribution for log(RR)

# Frozen lognormal distribution for RR, representing uncertainty in our effect size
# (s is the shape parameter)b
rr_distribution = lognorm(s=sigma, scale=median)

In [13]:
vitamin_a_baseline_coverage, vitamin_a_counterfactual_coverage = get_baseline_and_counterfactual_coverage(coverage_data_dir,
                                             location_ids,
                                             nutrient,
                                             vehicles,
                                             years,
                                             coverage_levels, 'U5', True)

Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data


In [19]:
vitamin_a_baseline_effective_coverage = get_effective_vitamin_a_coverage(vitamin_a_baseline_coverage.set_index(['location_id','year','vehicle']), 
                                                               sex_ids,
                                                               age_group_ids,
                                                               effective_fractions,
                                                               years)
vitamin_a_counterfactual_effective_coverage = get_effective_vitamin_a_coverage(vitamin_a_counterfactual_coverage.set_index(['location_id','year','vehicle','coverage_level']), 
                                                               sex_ids,
                                                               age_group_ids,
                                                               effective_fractions,
                                                               years)

In [20]:
vitamin_a_rr_deficiency_nofort = generate_rr_deficiency_nofort_draws(mu, sigma, location_ids)
vitamin_a_rr_deficiency_nofort.mean(axis=1)

location_id
214    2.396239
dtype: float64

In [21]:
#vitamin_a_exposure = pull_exposure(rei_id, sex_ids, age_group_ids, location_ids)
#adjusted_exposure = adjust_exposure(vitamin_a_exposure, 'cat1', vitamin_a_burden_multiplier, ['location_id','age_group_id','sex_id'])

In [22]:
#dalys = pull_affected_dalys([341, 302], age_group_ids, sex_ids, location_ids)

In [23]:
#adjusted_dalys = adjust_dalys(dalys, burden_multipliers)

In [24]:
#relative_risks = pull_relative_risks(rei_id, age_group_ids, sex_ids)

In [25]:
#pafs = calculate_adjusted_paf(adjusted_exposure, relative_risks)
#pafs.head()

In [26]:
#vitamin_a_exposure_prepped = vitamin_a_exposure.loc[vitamin_a_exposure.parameter=='cat1'].set_index(['location_id','age_group_id','sex_id'])
#vitamin_a_exposure_prepped = vitamin_a_exposure_prepped.drop(columns=[c for c in vitamin_a_exposure_prepped.columns if 'draw' not in c])

In [27]:
#unadjusted_pafs = calculate_adjusted_paf(vitamin_a_exposure_prepped, relative_risks).mean(axis=1)
#unadjusted_pafs.head()

In [28]:
# compare PAFs
attrib_302 = get_draws(
            gbd_id_type=['rei_id','cause_id'], 
            gbd_id=[96,302],
            location_id=214,
            source='burdenator',
            measure_id=[2],
            metric_id=1, 
            age_group_id=age_group_ids,
            year_id=2019,
            gbd_round_id=6,
            status='best',
            decomp_step='step5')
attrib_341 = get_draws(
            gbd_id_type=['rei_id','cause_id'], 
            gbd_id=[96,341],
            location_id=214,
            source='burdenator',
            measure_id=[2],
            metric_id=1, 
            age_group_id=age_group_ids,
            year_id=2019,
            gbd_round_id=6,
            status='best',
            decomp_step='step5')
attrib = pd.concat([attrib_302, attrib_341], ignore_index=True)
attrib

Unnamed: 0,age_group_id,cause_id,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,...,draw_996,draw_997,draw_998,draw_999,location_id,measure_id,metric_id,rei_id,sex_id,year_id
0,4,302,54594.140823,116590.227558,22866.734911,82215.848008,16681.797449,15324.017686,10656.592759,40776.054225,...,31175.402688,66527.509791,27849.836706,21765.008009,214,2,1,96,1,2019
1,5,302,52529.09813,110792.813901,27980.540701,101118.512229,6322.111616,25719.954208,24954.259641,53858.523863,...,30267.067624,40344.707475,58172.576661,22834.344543,214,2,1,96,1,2019
2,4,302,25644.765714,51467.545651,23438.738794,59895.684518,14908.86626,15021.064606,7179.768723,28637.645635,...,18661.743983,62357.414333,9825.613041,9999.914503,214,2,1,96,2,2019
3,5,302,35309.011285,55380.816766,40727.019075,43661.701172,11320.239133,11976.26268,11660.422804,44700.932737,...,16460.858506,30104.899398,25640.992089,10186.40511,214,2,1,96,2,2019
4,4,341,3897.336868,6412.049545,1248.533148,3699.258665,635.317629,322.741288,107.238029,2343.385226,...,926.188411,1556.716386,2803.287203,252.393913,214,2,1,96,1,2019
5,5,341,15365.707929,29397.851287,5930.85367,22101.337106,976.246817,2147.966142,872.620003,9683.076429,...,4590.326195,3717.251312,20189.021928,1293.337381,214,2,1,96,1,2019
6,4,341,2064.453622,4288.852572,1490.626266,3222.004705,458.96614,356.683083,68.867871,1547.511978,...,683.759953,1393.544312,1006.828163,164.737727,214,2,1,96,2,2019
7,5,341,9435.134027,16737.200566,9252.700633,10454.761051,2043.095492,1204.612682,484.84043,11303.823449,...,2467.455769,3810.858389,10816.011899,641.59609,214,2,1,96,2,2019


In [29]:
attrib_prepped = attrib.groupby(['location_id','cause_id','age_group_id','sex_id']).sum().filter([c for c in attrib.columns if 'draw' in c])
attrib_prepped = attrib_prepped.reset_index().merge(burden_multipliers, on='cause_id')
for i in list(range(0,1000)):
    attrib_prepped[f'draw_{i}'] = attrib_prepped[f'draw_{i}'] * attrib_prepped['multiplier']
attrib_prepped = attrib_prepped.groupby(['location_id','sex_id','age_group_id']).sum().filter([c for c in attrib_prepped.columns if 'draw' in c])
vitamin_a_dalys = attrib_prepped * vitamin_a_burden_multiplier
vitamin_a_dalys.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
214,1,4,101915.47531,214254.795941,42005.513616,149625.458923,30154.651671,27236.979302,18732.92614,75112.186512,126769.892224,177032.118673,...,55177.060121,59901.552885,72270.275956,104632.027911,149230.890366,57599.940561,55890.111296,118522.595338,53437.353942,38319.365525
214,1,5,118690.128029,244990.080072,59219.336938,215198.185979,12734.28856,48567.50819,44970.1849,110910.97526,131456.993388,57813.599429,...,29260.674312,91952.867662,205730.379629,55332.69814,182012.409307,53001.235554,60817.116987,76801.629335,137075.986333,42028.726693
214,2,4,48288.361976,97170.531601,43430.757751,109940.771418,26756.551156,26770.12157,12615.106916,52577.884677,46874.49364,71460.421288,...,48419.93037,76603.279264,101358.564577,54899.819856,58665.78624,64859.059653,33685.792207,110976.835636,18884.693508,17692.425439
214,2,5,78194.477668,126087.889378,87297.809514,94539.015667,23325.753685,22978.089186,21150.212284,97855.213408,76895.908846,120244.412293,...,25584.137218,111053.204658,100062.498711,89469.965236,157409.827959,60113.852086,33024.095247,59150.609452,63824.563367,18863.819548


In [30]:
#attributable_daly_rates = attributable_daly_rates.reset_index().set_index(['location_id','age_group_id','sex_id','cause_id']).sort_index()
#attributable_daly_rates.head()

In [31]:
#attributable_daly_rates = calculate_attributable_dalys(adjusted_dalys, pafs)
#attributable_daly_rates.head()

In [32]:
#vitamin_a_dalys = calculate_overall_attributable_daly_counts(attributable_daly_rates, age_group_ids, location_ids, sex_ids)
#vitamin_a_dalys = add_in_adjusted_paf_of_one(vitamin_a_dalys, location_ids, sex_ids, age_group_ids, 389, vitamin_a_burden_multiplier)
#vitamin_a_dalys

In [33]:
vitamin_a_paf_deficiency_nofort = calculate_paf_deficiency_nofort(
                                    vitamin_a_rr_deficiency_nofort, 
                                    vitamin_a_baseline_effective_coverage)

In [34]:
vitamin_a_pif_deficiency_nofort = calculate_pif_deficiency_nofort(
                                    vitamin_a_paf_deficiency_nofort,
                                    vitamin_a_baseline_effective_coverage,
                                    vitamin_a_counterfactual_effective_coverage)

In [35]:
vitamin_a_overall_pifs, vitamin_a_daly_reduction_counts = calculate_final_pifs_and_daly_reductions(
                                                    vitamin_a_pif_deficiency_nofort,
                                                    vitamin_a_dalys,
                                                    coverage_levels, years)

In [36]:
vitamin_a_daly_reduction_rates = calculate_rates(vitamin_a_daly_reduction_counts, 
                                            location_ids, 
                                            age_group_ids, 
                                            sex_ids)

In [37]:
vitamin_a_overall_pifs['measure'] = 'pif'
vitamin_a_daly_reduction_counts['measure'] = 'counts_averted'
vitamin_a_daly_reduction_rates['measure'] = 'rates_averted'
vitamin_a_results = pd.concat([vitamin_a_overall_pifs.reset_index(),
                         vitamin_a_daly_reduction_counts.reset_index(),
                         vitamin_a_daly_reduction_rates.reset_index()],
                        ignore_index=True)

# pull in overall national results
vitamin_a_no_oil = pd.read_pickle('results_raw/vitamin_a.pkl')
vitamin_a_no_oil = vitamin_a_no_oil.loc[vitamin_a_no_oil.vehicle.isin([v for v in vehicles if 'oil' not in v])]
vitamin_a_oil = pd.read_pickle('results_raw/vitamin_a_oil_update.pkl')
vitamin_a_national = pd.concat([vitamin_a_no_oil, vitamin_a_oil], ignore_index=True)
vitamin_a_national = (vitamin_a_national
                      .loc[vitamin_a_national.location_id.isin(vitamin_a_results.reset_index().location_id.unique())]
                      .loc[vitamin_a_national.vehicle.isin(vehicles)])

# location_comparisons
vitamin_a_results['subgroup'] = 'Lower Wealth Quintile'
vitamin_a_national['subgroup'] = 'Total Population'
vitamin_a_results = pd.concat([vitamin_a_results,vitamin_a_national], ignore_index=True)
vitamin_a_results.head()

draws,location_id,year,vehicle,coverage_level,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,...,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,measure,subgroup
0,214,2021,industry oil,0.2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
1,214,2021,industry oil,0.5,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
2,214,2021,industry oil,0.8,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
3,214,2021,industry oil,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
4,214,2021,industry oil from lit,0.2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile


# ZINC

In [38]:
# ZINC
rei_id = 97
nutrient = 'zinc'
vehicles = ['wheat flour', 'maize flour','zero wheat flour','zero maize flour',
           'wheat flour from lit','maize flour from lit']#,'wheat flour*','maize flour*']
effective_fractions = [0,0,0,1]

In [39]:
# define no fortification relative risk distribution
# vitamin a specific -- this should be replaced for other models

from numpy import log
from scipy.stats import norm, lognorm

# median and 0.975-quantile of lognormal distribution for RR
median = 0.47
q_975 = 0.69

# 0.975-quantile of standard normal distribution (=1.96, approximately)
q_975_stdnorm = norm().ppf(0.975)

mu = log(median) # mean of normal distribution for log(RR)
sigma = (log(q_975) - mu) / q_975_stdnorm # std dev of normal distribution for log(RR)

# Frozen lognormal distribution for RR, representing uncertainty in our effect size
# (s is the shape parameter)
rr_distribution = lognorm(s=sigma, scale=median)

In [40]:
zinc_baseline_coverage, zinc_counterfactual_coverage = zinc_baseline_coverage, zinc_counterfactual_coverage = get_baseline_and_counterfactual_coverage(coverage_data_dir,
                                             location_ids,
                                             'zinc',
                                             vehicles,
                                             years,
                                             coverage_levels, 'U5', True)

Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data


In [41]:
zinc_rr_deficiency_nofort = (1 / generate_rr_deficiency_nofort_draws(mu, sigma, location_ids))

In [42]:
zinc_baseline_effective_coverage = apply_age_related_effective_coverage_restrictions(
                                                                zinc_baseline_coverage.set_index([c for c in zinc_baseline_coverage.columns if 'draw' not in c]),
                                                                sex_ids,
                                                                age_group_ids,
                                                                effective_fractions)
zinc_counterfactual_effective_coverage = apply_age_related_effective_coverage_restrictions(
                                                                zinc_counterfactual_coverage.set_index([c for c in zinc_counterfactual_coverage.columns if 'draw' not in c]),
                                                                sex_ids,
                                                                age_group_ids,
                                                                effective_fractions)

In [43]:
#zinc_exposure = pull_exposure(97, sex_ids, age_group_ids, location_ids)

In [44]:
#zinc_rrs = pull_relative_risks(97, age_group_ids, sex_ids)

In [45]:
#adjusted_zinc_exposure = adjust_exposure(zinc_exposure, 'cat1', 
#                                         zinc_burden_multiplier, ['location_id','age_group_id','sex_id'])

In [46]:
#zinc_affected_dalys = pull_affected_dalys(302, age_group_ids, sex_ids, location_ids)

In [47]:
#zinc_adjusted_dalys = adjust_dalys(zinc_affected_dalys, burden_multipliers)

In [48]:
#zinc_pafs = calculate_adjusted_paf(adjusted_zinc_exposure, zinc_rrs)

In [49]:
#zinc_dalys = calculate_attributable_dalys(zinc_adjusted_dalys, zinc_pafs)
#pop = get_population(sex_id=sex_ids,
#                              age_group_id=age_group_ids,
#                              location_id=location_ids,
#                              year_id=2019,
#                              gbd_round_id=6,
#                             decomp_step='step4')
#zinc_dalys = zinc_dalys.reset_index().merge(pop, on=['location_id','age_group_id','sex_id'])
#for i in list(range(0,1000)):
#    zinc_dalys[f'draw_{i}'] = zinc_dalys[f'draw_{i}'] * zinc_dalys[f'population']
#zinc_dalys = zinc_dalys.groupby('location_id').sum()
#zinc_dalys = zinc_dalys.drop(columns=[c for c in zinc_dalys.columns if 'draw' not in c])
#zinc_dalys.head()

In [50]:
zinc_paf_deficiency_nofort = calculate_paf_deficiency_nofort(
                                    zinc_rr_deficiency_nofort, 
                                    zinc_baseline_effective_coverage)

In [51]:
zinc_pif_deficiency_nofort = calculate_pif_deficiency_nofort(
                                    zinc_paf_deficiency_nofort,
                                    zinc_baseline_effective_coverage,
                                    zinc_counterfactual_effective_coverage)

In [52]:
zinc_attrib_302 = get_draws(
            gbd_id_type=['rei_id','cause_id'], 
            gbd_id=[97,302],
            location_id=214,
            source='burdenator',
            measure_id=[2],
            metric_id=1, 
            age_group_id=age_group_ids,
            year_id=2019,
            gbd_round_id=6,
            status='best',
            decomp_step='step5')

In [53]:
attrib_prepped = zinc_attrib_302.groupby(['location_id','cause_id','sex_id','age_group_id']).sum().filter([c for c in zinc_attrib_302.columns if 'draw' in c])
attrib_prepped = attrib_prepped.reset_index().merge(burden_multipliers, on='cause_id')
for i in list(range(0,1000)):
    attrib_prepped[f'draw_{i}'] = attrib_prepped[f'draw_{i}'] * attrib_prepped['multiplier']
attrib_prepped = attrib_prepped.groupby(['location_id','sex_id','age_group_id']).sum().filter([c for c in attrib_prepped.columns if 'draw' in c])
zinc_dalys = attrib_prepped * zinc_burden_multiplier
zinc_dalys.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
214,1,5,26886.387084,2186.827041,14873.103309,29688.41072,5749.687878,5878.332342,7777.120917,35894.569203,39982.36613,15862.744214,...,23524.030903,53836.5172,11251.013697,20790.663659,66902.620426,60280.627663,23052.589844,14640.763681,18994.749033,26852.779488
214,2,5,29551.228471,1141.376468,9129.202846,28989.043687,5229.374369,4031.821339,6827.653831,29759.331283,32178.948439,13451.607635,...,21216.320402,46872.629609,10410.16055,19925.170322,64040.726232,55340.969034,19372.616229,14520.103517,17541.046389,27790.042617


In [54]:
zinc_overall_pifs, zinc_daly_reduction_counts = calculate_final_pifs_and_daly_reductions(
                                                    zinc_pif_deficiency_nofort,
                                                    zinc_dalys,
                                                    coverage_levels, years)

In [55]:
zinc_daly_reduction_rates = calculate_rates(zinc_daly_reduction_counts, 
                                            location_ids, 
                                            age_group_ids, 
                                            sex_ids)

In [56]:
zinc_overall_pifs['measure'] = 'pif'
zinc_daly_reduction_counts['measure'] = 'counts_averted'
zinc_daly_reduction_rates['measure'] = 'rates_averted'
zinc_results = pd.concat([zinc_overall_pifs.reset_index(),
                         zinc_daly_reduction_counts.reset_index(),
                         zinc_daly_reduction_rates.reset_index()],
                        ignore_index=True)
zinc_national_results = pd.read_pickle(vivarium_research_lsff_path + '/multiplication_models/results_raw/zinc.pkl')
zinc_national_results = (zinc_national_results
                         .loc[zinc_national_results.location_id.isin(vitamin_a_results.reset_index().location_id.unique())]
                         .loc[zinc_national_results.vehicle.isin(vehicles)])
# location_comparisons
zinc_results['subgroup'] = 'Lower Wealth Quintile'
zinc_national_results['subgroup'] = 'Total Population'
zinc_results = pd.concat([zinc_results,zinc_national_results], ignore_index=True)
zinc_results.head()

draws,location_id,year,vehicle,coverage_level,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,...,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,measure,subgroup
0,214,2021,maize flour,0.2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
1,214,2021,maize flour,0.5,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
2,214,2021,maize flour,0.8,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
3,214,2021,maize flour,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
4,214,2021,maize flour from lit,0.2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile


# Folic Acid

In [57]:
from mult_model_fns import *
from folic_acid.folic_acid_mult_model_fns import *

In [58]:
sexes = [1,2]
ages = [2,3,4,5]

draws = [f'draw_{i}' for i in range(1_000)]
index_cols=['location_id','sex_id','age_group_id']

# define alternative scenario coverage levels (low, medium, high)
    # this parameter represents the proportion of additional coverage achieved in the
    # alternative scenario, defined as the difference between the proportion of the population
    # that eats the fortified vehicle and the proportion of the population that eats 
    # the industrially produced vehicle
alternative_scenario_coverage_levels = [0.2, 0.5, 0.8, 1]
coverage_levels = alternative_scenario_coverage_levels

rei_id = [] #folic acid doesn't effect any risks; just NTDs
cause_ids = [642] # NTDs
nonfatal_causes = [642] # YLLs and YLDs
nutrient = 'folic acid'
vehicles = ['maize flour', 'wheat flour', 'zero wheat flour', 'zero maize flour',
           'maize flour from lit','wheat flour from lit']#,'wheat flour*','maize flour*'] 
ntd_burden_multiplier = 1.9

In [59]:
# define no fortification relative risk distribution
# folic acid specific -- this should be replaced for other models

from numpy import log
from scipy.stats import norm, lognorm

# median and 0.975-quantile of lognormal distribution for RR
median = 1.71
q_975 = 2.04

# 0.975-quantile of standard normal distribution (=1.96, approximately)
q_975_stdnorm = norm().ppf(0.975)

mu = log(median) # mean of normal distribution for log(RR)
sigma = (log(q_975) - mu) / q_975_stdnorm # std dev of normal distribution for log(RR)

In [60]:
# calculate relative risk for lack of fortification
# https://vivarium-research.readthedocs.io/en/latest/concept_models/vivarium_conic_lsff/concept_model.html#effect-size-folic-acid
rr_ntds_nofort = format_rrs(lognormal_draws(mu, sigma, seed = 7), location_ids)
rr_ntds_nofort.mean(axis=1)

location_id
214    1.712028
dtype: float64

In [61]:
dalys = pull_dalys(cause_ids, cause_ids, location_ids, ages, sexes, index_cols)
dalys = dalys * ntd_burden_multiplier
dalys.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,cause_id,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
214,1,2,642,371654.350684,386420.944817,408576.51947,369463.788284,277590.780637,454249.752867,128967.746274,192585.662321,207262.068624,215861.627186,...,973711.787553,707822.620698,571634.862518,505884.695647,535430.778939,762361.867689,570650.425039,606266.319512,636838.16305,536112.846739
214,1,3,642,91950.154891,98988.128002,91700.730925,76079.180311,87033.756918,128456.849137,46352.17013,44311.209668,40963.841682,50359.24333,...,146064.764447,96548.98829,159747.628844,113662.347304,121240.001572,162170.38594,124831.881862,115952.963359,131625.011172,113203.031468
214,1,4,642,219205.901261,255927.350726,237631.759195,176955.775749,240942.237385,317819.023184,110479.064599,127007.188455,111500.187686,118900.83913,...,506474.381088,402903.21976,523373.982363,406292.21043,325882.974519,482676.603178,401258.687933,398379.758392,431424.809219,341175.445091
214,1,5,642,69514.717045,70895.56424,76151.755169,102729.665638,60879.241542,101688.133258,49831.740455,44090.634038,72001.264279,41761.75104,...,580354.683367,532207.819749,755778.810062,434940.741712,353321.070408,629851.363904,479181.293219,568550.290465,506795.149231,558194.732414
214,2,2,642,306044.594554,367523.818857,232360.21323,479655.493223,340292.191067,306885.255405,272122.145649,367452.873504,365110.970477,334222.344053,...,628574.962663,538605.555438,462893.390582,618597.02979,484044.737475,679585.830321,564278.697787,867215.579353,662551.896681,906061.29659


In [62]:
pop = get_population(gbd_round_id=6,
                    location_id=location_ids,
                    sex_id=sexes,
                    age_group_id=ages,
                    year_id=2019,
                    decomp_step='step4')

In [63]:
low_ses_results = pd.DataFrame()
for vehicle in vehicles:
    alpha, alpha_star = get_baseline_and_counterfactual_coverage(coverage_data_dir,
                                             location_ids,
                                             'folic acid',
                                             [vehicle],
                                             list(range(2022,2026)),
                                             coverage_levels, 'WRA', True)
    alpha = alpha.loc[alpha.year==2025].set_index('location_id').drop(columns=['vehicle','year'])
    alpha_star = alpha_star.loc[alpha_star.year==2025].set_index(['location_id','coverage_level']).drop(columns=['vehicle','year'])
    gets_intervn = prop_gets_intervention_effect(location_ids, year_start=2022, estimation_years = range(2022,2026))
    new_coverage = percolate_new_coverage(gets_intervn, alpha, alpha_star)
    paf_ntds_nofort = paf_o_r(rr_ntds_nofort, alpha)
    pif_ntds_nofort = pif_o_r(paf_ntds_nofort, alpha = alpha, alpha_star = new_coverage)
    dalys_averted = calc_dalys_averted(dalys, pif_ntds_nofort)
    dalys_averted_u5 = dalys_averted.reset_index().groupby(['location_id','year_id','coverage_level']).sum()[draws]
    dalys_averted_u5['vehicle'] = vehicle
    counts = dalys_averted_u5.reset_index().loc[dalys_averted_u5.reset_index().year_id==2025]
    counts['measure'] = 'counts_averted'
    counts = counts.set_index([c for c in counts.columns if 'draw' not in c])
    rates = counts.reset_index().merge(pop.groupby('location_id').sum().drop(columns='year_id'), on='location_id')
    for i in list(range(0,1000)):
        rates[f'draw_{i}'] = rates[f'draw_{i}'] / rates['population'] * 100_000
    rates['measure'] = 'rates_averted'
    rates = rates.set_index(['location_id','measure','coverage_level','year_id'])
    rates = rates.drop(columns=[c for c in rates.columns if 'draw' not in c])
    pif = (counts / dalys.groupby('location_id').sum() * 100).reset_index()
    pif['measure'] = 'pif'
    pif = pif.set_index(['location_id','measure','coverage_level','year_id'])
    #counts['measure'] = 'counts_averted'
    counts = counts.reset_index().set_index(['location_id','measure','coverage_level','year_id'])
    vehicle_results = pd.concat([rates, pif, counts], sort=True).reset_index()
    vehicle_results['vehicle'] = vehicle

    low_ses_results = pd.concat([vehicle_results, low_ses_results], ignore_index=True)
    
low_ses_results.head()

Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data


Unnamed: 0,location_id,measure,coverage_level,year_id,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,...,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,vehicle
0,214,rates_averted,0.2,2025,15.153919,23.039473,0.734885,9.974342,11.953546,-2.85202,...,20.88652,42.561307,-8.210983,15.532621,19.159405,32.187166,26.555074,24.19971,4.44554,wheat flour from lit
1,214,rates_averted,0.5,2025,127.940549,103.764825,105.112808,119.448072,111.22448,154.808673,...,187.258262,207.940052,273.307868,189.313913,293.704671,229.399066,252.247386,229.749324,274.277852,wheat flour from lit
2,214,rates_averted,0.8,2025,240.727179,184.490177,209.490731,228.921803,210.495414,312.469365,...,353.630004,373.318797,554.826719,363.095204,568.249936,426.610967,477.939698,435.298939,544.110163,wheat flour from lit
3,214,rates_averted,1.0,2025,315.918265,238.307078,279.076013,301.90429,276.676036,417.576494,...,464.544498,483.571294,742.505953,478.949399,751.280114,558.085567,628.401239,572.332015,723.998371,wheat flour from lit
4,214,pif,0.2,2025,0.357231,0.473756,0.018183,0.183692,0.277411,-0.054819,...,0.214348,0.422089,-0.090773,0.155126,0.144385,0.295072,0.201547,0.201345,0.03375,wheat flour from lit


In [64]:
# pull in overall folic_acid_national_resultsional results
folic_acid_national_results = pd.read_pickle('/ihme/homes/alibow/notebooks/vivarium_research_lsff/' + '/multiplication_models/results_raw/folic_acid_waterfall_salt.pkl')
folic_acid_national_results = (folic_acid_national_results
                               .loc[folic_acid_national_results.location_id.isin(low_ses_results.reset_index()
                                                                                 .location_id.unique())]
                               .loc[folic_acid_national_results.vehicle.isin(['wheat flour','maize flour','zero wheat flour','zero maize flour'])])
# location_comparisons
low_ses_results['subgroup'] = 'Lower Wealth Quintile'
low_ses_results['nutrient'] = nutrient
folic_acid_national_results['subgroup'] = 'Total Population'
folic_acid_results = pd.concat([low_ses_results.reset_index(),
                     folic_acid_national_results.loc[folic_acid_national_results.year_id==2025]], 
                               ignore_index=True).rename(columns={'year_id':'year'})
folic_acid_results.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  del sys.path[0]


Unnamed: 0,coverage_level,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,...,draw_997,draw_998,draw_999,index,location_id,measure,nutrient,subgroup,vehicle,year
0,0.2,15.153919,23.039473,0.734885,9.974342,11.953546,-2.85202,5.398476,6.373405,12.149584,...,26.555074,24.19971,4.44554,0,214,rates_averted,folic acid,Lower Wealth Quintile,wheat flour from lit,2025
1,0.5,127.940549,103.764825,105.112808,119.448072,111.22448,154.808673,62.061118,106.514147,78.972387,...,252.247386,229.749324,274.277852,1,214,rates_averted,folic acid,Lower Wealth Quintile,wheat flour from lit,2025
2,0.8,240.727179,184.490177,209.490731,228.921803,210.495414,312.469365,118.723759,206.65489,145.79519,...,477.939698,435.298939,544.110163,2,214,rates_averted,folic acid,Lower Wealth Quintile,wheat flour from lit,2025
3,1.0,315.918265,238.307078,279.076013,301.90429,276.676036,417.576494,156.498854,273.415385,190.343726,...,628.401239,572.332015,723.998371,3,214,rates_averted,folic acid,Lower Wealth Quintile,wheat flour from lit,2025
4,0.2,0.357231,0.473756,0.018183,0.183692,0.277411,-0.054819,0.168674,0.162537,0.326365,...,0.201547,0.201345,0.03375,4,214,pif,folic acid,Lower Wealth Quintile,wheat flour from lit,2025


# Results Viz

In [65]:
vitamin_a_results['nutrient'] = 'vitamin a'
zinc_results['nutrient'] = 'zinc'
folic_acid_results['nutrient'] = 'folic acid'

results = pd.concat([vitamin_a_results,
                    zinc_results,
                    folic_acid_results], ignore_index=True, sort=True)
results.head()

Unnamed: 0,coverage_level,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,...,draw_997,draw_998,draw_999,index,location_id,measure,nutrient,subgroup,vehicle,year
0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil,2021
1,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil,2021
2,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil,2021
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil,2021
4,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil from lit,2021


In [69]:
#results_nat_star = results.loc[results.subgroup=='Total Population']
#results_nat_star['vehicle'] = results_nat_star.vehicle + '*'
#results_temp = pd.concat([results, results_nat_star], ignore_index=True)

#results_zero = results_temp.loc[results_temp.vehicle.isin(['zero wheat flour','zero oil','zero maize','zero industry oil'])]
#results_zero['vehicle'] = results_zero.vehicle + '*'
#results = pd.concat([results_temp, results_nat_star], ignore_index=True)
#results.head()

In [70]:
#results_star_cov = results.loc[results.vehicle.isin(['oil*','industry oil*','wheat flour*','maize flour*'])]
#results_star_cov['vehicle'] = results_star_cov.vehicle.str.replace('*','')
#results_star_cov['coverage_level'] = results_star_cov.coverage_level * (-1)
#results = pd.concat([results.loc[results.vehicle.isin([v for v in vehicles if '*' not in v])],
#                    results_star_cov], ignore_index=True, sort=True)
#results.head()

# Coverage

In [71]:
nat_cov_baseline = pd.DataFrame()
nat_cov_counterfactual = pd.DataFrame()
for nutrient in ['vitamin a','zinc','folic acid']:
    if nutrient=='vitamin a':
        vehicles = ['oil','industry oil','wheat flour','maize flour',
                   'oil from lit','industry oil from lit','wheat flour from lit','maize flour from lit']
    else:
        vehicles = ['wheat flour','maize flour','wheat flour from lit','maize flour from lit']
    if nutrient == 'folic acid':
        subgroup = 'WRA'
    else:
        subgroup = 'U5'
    baseline_coverage, counterfactual_coverage = get_baseline_and_counterfactual_coverage(
        '/ihme/homes/alibow/notebooks/vivarium_research_lsff/data_prep/outputs/waterfall_coverage_all_vehicles.csv',
                                             location_ids,
                                             nutrient,
                                             vehicles,
                                             years,
                                             coverage_levels, subgroup)
    baseline_coverage['nutrient'] = nutrient
    counterfactual_coverage['nutrient'] = nutrient
    nat_cov_baseline = pd.concat([nat_cov_baseline,baseline_coverage], ignore_index=True, sort=True)
    nat_cov_counterfactual = pd.concat([nat_cov_counterfactual,counterfactual_coverage], ignore_index=True, sort=True)
nat_cov_baseline.head()

Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data


Unnamed: 0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,location_id,nutrient,vehicle,year
0,0.068353,0.059991,0.079454,0.071563,0.069338,0.082378,0.071233,0.07387,0.065317,0.071208,...,0.071702,0.073334,0.066454,0.069537,0.069532,0.078434,214.0,vitamin a,oil,2021.0
1,0.068353,0.059991,0.079454,0.071563,0.069338,0.082378,0.071233,0.07387,0.065317,0.071208,...,0.071702,0.073334,0.066454,0.069537,0.069532,0.078434,214.0,vitamin a,oil,2022.0
2,0.068353,0.059991,0.079454,0.071563,0.069338,0.082378,0.071233,0.07387,0.065317,0.071208,...,0.071702,0.073334,0.066454,0.069537,0.069532,0.078434,214.0,vitamin a,oil,2023.0
3,0.068353,0.059991,0.079454,0.071563,0.069338,0.082378,0.071233,0.07387,0.065317,0.071208,...,0.071702,0.073334,0.066454,0.069537,0.069532,0.078434,214.0,vitamin a,oil,2024.0
4,0.068353,0.059991,0.079454,0.071563,0.069338,0.082378,0.071233,0.07387,0.065317,0.071208,...,0.071702,0.073334,0.066454,0.069537,0.069532,0.078434,214.0,vitamin a,oil,2025.0


In [72]:
nat_cov_baseline['coverage_level'] = 'baseline'
nat_cov = pd.concat([nat_cov_baseline, nat_cov_counterfactual], ignore_index=True)
nat_cov = nat_cov.loc[nat_cov.year==2025].drop(columns='year')
nat_cov['subgroup'] = 'Total Population'
nat_cov = nat_cov.set_index([c for c in nat_cov.columns if 'draw' not in c]).mean(axis=1).reset_index()
nat_cov.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0,coverage_level,location_id,nutrient,vehicle,subgroup,0
0,baseline,214.0,vitamin a,oil,Total Population,0.07521
1,baseline,214.0,vitamin a,industry oil,Total Population,0.07521
2,baseline,214.0,vitamin a,wheat flour,Total Population,0.174881
3,baseline,214.0,vitamin a,maize flour,Total Population,0.01255
4,baseline,214.0,zinc,wheat flour,Total Population,0.174881


In [73]:
#nat_cov_star = nat_cov.copy()
#nat_cov_star['vehicle'] = nat_cov_star.vehicle + '*'
#nat_cov = pd.concat([nat_cov, nat_cov_star], ignore_index=True)
#nat_cov.head()

In [74]:
folic_acid_baseline_coverage, folic_acid_counterfactual_coverage= get_baseline_and_counterfactual_coverage(coverage_data_dir,
                                             location_ids,
                                             'folic acid',
                                             ['maize flour','wheat flour','maize flour from lit','wheat flour from lit'],
                                             list(range(2022,2026)),
                                             coverage_levels, 'WRA', True)

Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data


In [75]:
vitamin_a_baseline_coverage['coverage_level'] = 'baseline'
vitamin_a_baseline_coverage['nutrient'] = 'vitamin a'
vitamin_a_counterfactual_coverage['nutrient'] = 'vitamin a'
zinc_baseline_coverage['coverage_level'] = 'baseline'
zinc_baseline_coverage['nutrient'] = 'zinc'
zinc_counterfactual_coverage['nutrient'] = 'zinc'
folic_acid_baseline_coverage['coverage_level'] = 'baseline'
folic_acid_baseline_coverage['nutrient'] = 'folic acid'
folic_acid_counterfactual_coverage['nutrient'] = 'folic acid'

max_cov_ses = pd.DataFrame()
for nutrient in ['vitamin a','zinc','folic acid']:
    if nutrient == 'vitamin a':
        vehicles = ['oil','wheat flour','maize flour','industry oil',
                   'oil from lit','industry oil from lit',
                    'wheat flour from lit','maize flour from lit']
    else:
        vehicles = ['wheat flour', 'maize flour','wheat flour from lit','maize flour from lit']
    if nutrient == 'folic acid':
        subgroup = 'WRA'
    else:
        subgroup = 'U5'
    for vehicle in vehicles:
        print(f'calculating {nutrient}/{vehicle}')
        cov_a, cov_b = generate_logical_coverage_draws(coverage_data_dir, location_ids, nutrient, vehicle, subgroup)
        cov_b['coverage_level'] = 'maximum'
        cov_b['nutrient'] = nutrient
        cov_b['vehicle'] = vehicle
        max_cov_ses = pd.concat([max_cov_ses, cov_b])

ses_cov = pd.concat([vitamin_a_baseline_coverage,
                    vitamin_a_counterfactual_coverage,
                    zinc_baseline_coverage,
                    zinc_counterfactual_coverage,
                    folic_acid_baseline_coverage,
                    folic_acid_counterfactual_coverage,
                    max_cov_ses.reset_index()], ignore_index=True, sort=True)
l = get_ids('location')
ses_cov = ses_cov.merge(l.filter(['location_name','location_id']), on='location_id')
ses_cov['subgroup'] = 'Lower Wealth Quintile'
ses_cov = ses_cov.set_index([c for c in ses_cov.columns if 'draw' not in c]).mean(axis=1).reset_index()
ses_cov.head()

calculating vitamin a/oil
Excluded location IDs [] due to missing data
calculating vitamin a/wheat flour
Excluded location IDs [] due to missing data
calculating vitamin a/maize flour
Excluded location IDs [] due to missing data
calculating vitamin a/industry oil
Excluded location IDs [] due to missing data
calculating vitamin a/oil from lit
Excluded location IDs [] due to missing data
calculating vitamin a/industry oil from lit
Excluded location IDs [] due to missing data
calculating vitamin a/wheat flour from lit
Excluded location IDs [] due to missing data
calculating vitamin a/maize flour from lit
Excluded location IDs [] due to missing data
calculating zinc/wheat flour
Excluded location IDs [] due to missing data
calculating zinc/maize flour
Excluded location IDs [] due to missing data
calculating zinc/wheat flour from lit
Excluded location IDs [] due to missing data
calculating zinc/maize flour from lit
Excluded location IDs [] due to missing data
calculating folic acid/wheat flo

Unnamed: 0,coverage_level,location_id,nutrient,vehicle,year,location_name,subgroup,0
0,baseline,214,vitamin a,oil,2021.0,Nigeria,Lower Wealth Quintile,0.054369
1,baseline,214,vitamin a,oil,2022.0,Nigeria,Lower Wealth Quintile,0.054369
2,baseline,214,vitamin a,oil,2023.0,Nigeria,Lower Wealth Quintile,0.054369
3,baseline,214,vitamin a,oil,2024.0,Nigeria,Lower Wealth Quintile,0.054369
4,baseline,214,vitamin a,oil,2025.0,Nigeria,Lower Wealth Quintile,0.054369


In [77]:
vehicles = ['industry oil','oil','wheat flour','maize flour','wheat flour*','maize flour*','oil*','industry oil*',
           'industry oil from lit','oil from lit','wheat flour from lit','maize flour from lit']

ses_cov_prepped = (ses_cov.loc[ses_cov.year==2025]
                   .loc[ses_cov.vehicle.isin(vehicles)]
                  .drop(columns=['location_name','year']))

In [78]:
#ses_cov_prepped_star_cov = ses_cov_prepped.loc[ses_cov_prepped.vehicle.isin(['oil*','industry oil*','wheat flour*','maize flour*'])]
#ses_cov_prepped_star_cov['vehicle'] = ses_cov_prepped_star_cov.vehicle.str.replace('*','')
#ses_cov_prepped_star_cov['coverage_level'] = ses_cov_prepped_star_cov.coverage_level * (-1)
#ses_cov_prepped = pd.concat([ses_cov_prepped.loc[ses_cov_prepped.vehicle.isin([v for v in vehicles if '*' not in v])],
#                    ses_cov_prepped_star_cov], ignore_index=True, sort=True)
#ses_cov_prepped.head()

In [79]:
cov_prepped = pd.concat([ses_cov_prepped, nat_cov], ignore_index=True, sort=True)#.drop(columns='score')
cov_baseline= cov_prepped.loc[cov_prepped.coverage_level=='baseline'].rename(columns={0:'baseline'}).drop(columns='coverage_level')
cov_prepped = (cov_prepped.loc[cov_prepped.coverage_level!='baseline']
               .merge(cov_baseline, on=['location_id','nutrient','vehicle','subgroup'])
               .set_index(['coverage_level','location_id','nutrient','vehicle','subgroup'])
               .rename(columns={0:'target_coverage'})
               .stack().reset_index()
               .rename(columns={'level_5':'col_name'}))
cov_prepped[0] = cov_prepped[0] * 100
cov_prepped['col_name'] = np.where(cov_prepped.col_name=='baseline',
                                  'Percent of population eating fortified vehicle at baseline',
                                  'Percent of population eating fortified vehicle at target coverage')

cov_prepped.head()

Unnamed: 0,coverage_level,location_id,nutrient,vehicle,subgroup,col_name,0
0,0.2,214.0,vitamin a,oil,Lower Wealth Quintile,Percent of population eating fortified vehicle...,19.999996
1,0.2,214.0,vitamin a,oil,Lower Wealth Quintile,Percent of population eating fortified vehicle...,5.436868
2,0.5,214.0,vitamin a,oil,Lower Wealth Quintile,Percent of population eating fortified vehicle...,49.99999
3,0.5,214.0,vitamin a,oil,Lower Wealth Quintile,Percent of population eating fortified vehicle...,5.436868
4,0.8,214.0,vitamin a,oil,Lower Wealth Quintile,Percent of population eating fortified vehicle...,79.999984


# Excel file

In [80]:
vehicles = ['oil','industry oil','wheat flour','maize flour',
            'zero oil','zero industry oil','zero wheat flour','zero maize flour',
           'oil from lit','industry oil from lit','wheat flour from lit','maize flour from lit']#,
            #'maize flour*','wheat flour*','oil*','industry oil*']
coverage_levels = [0.2,0.5,0.8,1]

In [81]:
vitamin_a_dalys_prepped = vitamin_a_dalys.groupby('location_id').sum().reset_index()
vitamin_a_dalys_prepped['nutrient'] = 'vitamin a'
zinc_dalys_prepped = zinc_dalys.groupby('location_id').sum().reset_index()
zinc_dalys_prepped['nutrient'] = 'zinc'
fa_dalys_prepped = dalys.groupby('location_id').sum().reset_index()
fa_dalys_prepped['nutrient'] = 'folic acid'

mn_dalys = (pd.concat([vitamin_a_dalys_prepped, zinc_dalys_prepped, fa_dalys_prepped], ignore_index=True)
            .set_index(['location_id','nutrient'])).reset_index()
mn_dalys_all = mn_dalys.groupby('location_id').sum().reset_index()
mn_dalys_all['nutrient'] = 'all'

mn_dalys = pd.concat([mn_dalys, mn_dalys_all], ignore_index=True)
mn_dalys['subgroup'] = 'Lower Wealth Quintile'

# convert to rates
pop = get_population(location_id=location_ids,
                    sex_id=[1,2],
                    age_group_id=[2,3,4,5],
                    year_id=2019,
                    gbd_round_id=6,
                    decomp_step='step4').groupby(['location_id']).sum().reset_index()
mn_dalys = mn_dalys.merge(pop.filter(['location_id','population']), on='location_id')
for i in list(range(0,1000)):
    mn_dalys[f'draw_{i}'] = mn_dalys[f'draw_{i}'] / mn_dalys['population'] * 100_000
    
mn_dalys = mn_dalys.drop(columns='population')
    
mn_dalys_final = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' not in v]:
    for coverage_level in coverage_levels:
        temp = mn_dalys.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_dalys_final = pd.concat([mn_dalys_final, temp], ignore_index=True, sort=True)

mn_dalys_zero = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' in v]:
    for coverage_level in coverage_levels:
        temp = mn_dalys.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_dalys_zero = pd.concat([mn_dalys_zero, temp], ignore_index=True, sort=True)
    
mn_dalys = mn_dalys_final.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index()
mn_dalys_zero = mn_dalys_zero.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index()  
   

mn_dalys.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  del sys.path[0]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,vehicle,nutrient,subgroup,coverage_level,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
214,industry oil,all,Lower Wealth Quintile,0.2,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,all,Lower Wealth Quintile,0.5,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,all,Lower Wealth Quintile,0.8,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,all,Lower Wealth Quintile,1.0,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,folic acid,Lower Wealth Quintile,0.2,4242.052323,4863.153136,4041.513172,5429.915487,4308.966355,5202.635216,3200.54594,3921.198353,3722.700565,3773.261414,...,13357.432192,9744.226221,10083.501658,9045.610657,10012.914665,13269.649553,10908.243473,13175.622335,12019.001045,13171.848271


In [82]:
vitamin_a_dalys_prepped = vitamin_a_dalys.groupby('location_id').sum().reset_index()
vitamin_a_dalys_prepped['nutrient'] = 'vitamin a'
zinc_dalys_prepped = zinc_dalys.groupby('location_id').sum().reset_index()
zinc_dalys_prepped['nutrient'] = 'zinc'
fa_dalys_prepped = dalys.groupby('location_id').sum().reset_index()
fa_dalys_prepped['nutrient'] = 'folic acid'

mn_daly_counts = (pd.concat([vitamin_a_dalys_prepped, zinc_dalys_prepped, fa_dalys_prepped], ignore_index=True)
            .set_index(['location_id','nutrient'])).reset_index()
mn_daly_counts_all = mn_daly_counts.groupby('location_id').sum().reset_index()
mn_daly_counts_all['nutrient'] = 'all'

mn_daly_counts = pd.concat([mn_daly_counts, mn_daly_counts_all], ignore_index=True)
mn_daly_counts['subgroup'] = 'Lower Wealth Quintile'

mn_daly_counts_final = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' not in v]:
    for coverage_level in coverage_levels:
        temp = mn_daly_counts.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_daly_counts_final = pd.concat([mn_daly_counts_final, temp], ignore_index=True, sort=True)

mn_daly_counts_zero = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' in v]:
    for coverage_level in coverage_levels:
        temp = mn_daly_counts.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_daly_counts_zero = pd.concat([mn_daly_counts_zero, temp], ignore_index=True, sort=True)
    
mn_daly_counts = mn_daly_counts_final.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index() / 5
mn_daly_counts_zero = mn_daly_counts_zero.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index() / 5  
mn_daly_counts.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  del sys.path[0]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,vehicle,nutrient,subgroup,coverage_level,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
214,industry oil,all,Lower Wealth Quintile,0.2,365106.294981,463208.016397,322147.426451,489635.498104,309677.2806,375894.277307,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.439224,741328.854903,776247.559911,675457.437826,806951.050189,959879.909836,776492.838522,962259.387631,867744.878724,917393.286459
214,industry oil,all,Lower Wealth Quintile,0.5,365106.294981,463208.016397,322147.426451,489635.498104,309677.2806,375894.277307,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.439224,741328.854903,776247.559911,675457.437826,806951.050189,959879.909836,776492.838522,962259.387631,867744.878724,917393.286459
214,industry oil,all,Lower Wealth Quintile,0.8,365106.294981,463208.016397,322147.426451,489635.498104,309677.2806,375894.277307,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.439224,741328.854903,776247.559911,675457.437826,806951.050189,959879.909836,776492.838522,962259.387631,867744.878724,917393.286459
214,industry oil,all,Lower Wealth Quintile,1.0,365106.294981,463208.016397,322147.426451,489635.498104,309677.2806,375894.277307,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.439224,741328.854903,776247.559911,675457.437826,806951.050189,959879.909836,776492.838522,962259.387631,867744.878724,917393.286459
214,industry oil,folic acid,Lower Wealth Quintile,0.2,284401.083273,326041.716296,270956.281657,364039.320825,288887.219137,348801.706921,214575.08375,262889.982088,249582.040153,252971.805111,...,895526.008559,653284.844648,676030.981287,606447.368802,671298.598083,889640.772926,731324.37416,883336.88024,805793.200209,883083.854597


In [83]:
gbd_data_dir = '/ihme/homes/alibow/notebooks/vivarium_research_lsff/gbd_data_summary/output_data/'
gbd_data_directory = '/share/scratch/users/ndbs/vivarium_lsff/gbd_data'
gbd_data_filepath = f'{gbd_data_directory}/multmodel_data.hdf'
vitamin_a_dalys_nat_hdf_key = '/vitamin_a_deficiency/dalys_attributable_bmgf_25_countries'
zinc_dalys_nat_hdf_key = '/zinc_deficiency/dalys_attributable_bmgf_25_countries'

vitamin_a_dalys_nat = pd.read_hdf(gbd_data_filepath, key=vitamin_a_dalys_nat_hdf_key).groupby('location_id').sum().reset_index()
vitamin_a_dalys_nat['nutrient'] = 'vitamin a'
zinc_dalys_nat = pd.read_hdf(gbd_data_filepath, key=zinc_dalys_nat_hdf_key).groupby('location_id').sum().reset_index()
zinc_dalys_nat['nutrient'] = 'zinc'
fa_dalys_nat = pd.read_pickle('results_raw/folic_acid_dalys').groupby('location_id').sum().reset_index()
fa_dalys_nat['nutrient'] = 'folic acid'

mn_dalys_nat_by_location = pd.concat([vitamin_a_dalys_nat,zinc_dalys_nat,fa_dalys_nat], ignore_index=True, sort=True)
mn_dalys_nat_by_location = mn_dalys_nat_by_location.loc[mn_dalys_nat_by_location.location_id.isin(location_ids)]
mn_dalys_nat_tot = mn_dalys_nat_by_location.groupby('location_id').sum().reset_index()
mn_dalys_nat_tot['nutrient'] = 'all'
mn_dalys_nat = pd.concat([mn_dalys_nat_by_location.reset_index(), mn_dalys_nat_tot], ignore_index=True, sort=True).drop(columns='index')
mn_dalys_nat['subgroup'] = 'Total Population'

# convert to rates
pop = get_population(location_id=location_ids,
                    sex_id=[1,2],
                    age_group_id=[2,3,4,5],
                    year_id=2019,
                    gbd_round_id=6,
                    decomp_step='step4').groupby(['location_id']).sum().reset_index()
mn_dalys_nat = mn_dalys_nat.merge(pop.filter(['location_id','population']), on='location_id')
for i in list(range(0,1000)):
    mn_dalys_nat[f'draw_{i}'] = mn_dalys_nat[f'draw_{i}'] / mn_dalys_nat['population'] * 100_000
    
mn_dalys_nat = mn_dalys_nat.drop(columns='population')

mn_dalys_nat_final = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' not in v]:
    for coverage_level in coverage_levels:
        temp = mn_dalys_nat.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_dalys_nat_final = pd.concat([mn_dalys_nat_final, temp], ignore_index=True, sort=True)

mn_dalys_nat_zero = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' in v]:
    for coverage_level in coverage_levels:
        temp = mn_dalys_nat.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_dalys_nat_zero = pd.concat([mn_dalys_nat_zero, temp], ignore_index=True, sort=True)
    
mn_dalys_nat = mn_dalys_nat_final.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index()
mn_dalys_nat_zero = mn_dalys_nat_zero.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index()
mn_dalys_nat.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,vehicle,nutrient,subgroup,coverage_level,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
214,industry oil,all,Total Population,0.2,3033.983105,3802.320453,2624.600568,3998.258397,2500.317046,3048.314341,1966.703897,2809.930938,2801.905519,2826.48936,...,7443.610679,5962.346421,6246.544974,5432.563832,6495.210827,7661.784986,6164.890188,7660.225881,6925.355984,7288.669261
214,industry oil,all,Total Population,0.5,3033.983105,3802.320453,2624.600568,3998.258397,2500.317046,3048.314341,1966.703897,2809.930938,2801.905519,2826.48936,...,7443.610679,5962.346421,6246.544974,5432.563832,6495.210827,7661.784986,6164.890188,7660.225881,6925.355984,7288.669261
214,industry oil,all,Total Population,0.8,3033.983105,3802.320453,2624.600568,3998.258397,2500.317046,3048.314341,1966.703897,2809.930938,2801.905519,2826.48936,...,7443.610679,5962.346421,6246.544974,5432.563832,6495.210827,7661.784986,6164.890188,7660.225881,6925.355984,7288.669261
214,industry oil,all,Total Population,1.0,3033.983105,3802.320453,2624.600568,3998.258397,2500.317046,3048.314341,1966.703897,2809.930938,2801.905519,2826.48936,...,7443.610679,5962.346421,6246.544974,5432.563832,6495.210827,7661.784986,6164.890188,7660.225881,6925.355984,7288.669261
214,industry oil,folic acid,Total Population,0.2,2232.659117,2559.554282,2127.112196,2857.850256,2267.877029,2738.229061,1684.497863,2063.788607,1959.316087,1985.92706,...,7030.227469,5128.540116,5307.106136,4760.847714,5269.955087,6984.026081,5741.180775,6934.538071,6325.790023,6932.551722


In [84]:
gbd_data_dir = '/ihme/homes/alibow/notebooks/vivarium_research_lsff/gbd_data_summary/output_data/'
gbd_data_directory = '/share/scratch/users/ndbs/vivarium_lsff/gbd_data'
gbd_data_filepath = f'{gbd_data_directory}/multmodel_data.hdf'
vitamin_a_dalys_nat_hdf_key = '/vitamin_a_deficiency/dalys_attributable_bmgf_25_countries'
zinc_dalys_nat_hdf_key = '/zinc_deficiency/dalys_attributable_bmgf_25_countries'

vitamin_a_dalys_nat = pd.read_hdf(gbd_data_filepath, key=vitamin_a_dalys_nat_hdf_key).groupby('location_id').sum().reset_index()
vitamin_a_dalys_nat['nutrient'] = 'vitamin a'
zinc_dalys_nat = pd.read_hdf(gbd_data_filepath, key=zinc_dalys_nat_hdf_key).groupby('location_id').sum().reset_index()
zinc_dalys_nat['nutrient'] = 'zinc'
fa_dalys_nat = pd.read_pickle('results_raw/folic_acid_dalys').groupby('location_id').sum().reset_index()
fa_dalys_nat['nutrient'] = 'folic acid'

mn_dalys_counts_nat_by_location = pd.concat([vitamin_a_dalys_nat,zinc_dalys_nat,fa_dalys_nat], ignore_index=True, sort=True)
mn_dalys_counts_nat_by_location = mn_dalys_counts_nat_by_location.loc[mn_dalys_counts_nat_by_location.location_id.isin(location_ids)]
mn_dalys_counts_nat_tot = mn_dalys_counts_nat_by_location.groupby('location_id').sum().reset_index()
mn_dalys_counts_nat_tot['nutrient'] = 'all'
mn_dalys_counts_nat = pd.concat([mn_dalys_counts_nat_by_location.reset_index(), mn_dalys_counts_nat_tot], ignore_index=True, sort=True).drop(columns='index')
mn_dalys_counts_nat['subgroup'] = 'Total Population'

mn_dalys_counts_nat_final = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' not in v]:
    for coverage_level in coverage_levels:
        temp = mn_dalys_counts_nat.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_dalys_counts_nat_final = pd.concat([mn_dalys_counts_nat_final, temp], ignore_index=True, sort=True)

mn_dalys_counts_nat_zero = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' in v]:
    for coverage_level in coverage_levels:
        temp = mn_dalys_counts_nat.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_dalys_counts_nat_zero = pd.concat([mn_dalys_counts_nat_zero, temp], ignore_index=True, sort=True)
    
mn_dalys_counts_nat = mn_dalys_counts_nat_final.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index()
mn_dalys_counts_nat_zero = mn_dalys_counts_nat_zero.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index()
mn_dalys_counts_nat.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,vehicle,nutrient,subgroup,coverage_level,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
214,industry oil,all,Total Population,0.2,1017041.0,1274600.0,879809.096784,1340282.0,838147.224666,1021845.0,659271.357676,941934.87247,939244.620336,947485.526564,...,2495220.0,1998676.0,2093944.0,1821084.0,2177301.0,2568356.0,2066572.0,2567833.0,2321493.0,2443281.0
214,industry oil,all,Total Population,0.5,1017041.0,1274600.0,879809.096784,1340282.0,838147.224666,1021845.0,659271.357676,941934.87247,939244.620336,947485.526564,...,2495220.0,1998676.0,2093944.0,1821084.0,2177301.0,2568356.0,2066572.0,2567833.0,2321493.0,2443281.0
214,industry oil,all,Total Population,0.8,1017041.0,1274600.0,879809.096784,1340282.0,838147.224666,1021845.0,659271.357676,941934.87247,939244.620336,947485.526564,...,2495220.0,1998676.0,2093944.0,1821084.0,2177301.0,2568356.0,2066572.0,2567833.0,2321493.0,2443281.0
214,industry oil,all,Total Population,1.0,1017041.0,1274600.0,879809.096784,1340282.0,838147.224666,1021845.0,659271.357676,941934.87247,939244.620336,947485.526564,...,2495220.0,1998676.0,2093944.0,1821084.0,2177301.0,2568356.0,2066572.0,2567833.0,2321493.0,2443281.0
214,industry oil,folic acid,Total Population,0.2,748423.9,858004.5,713042.846465,957998.2,760229.524044,917899.2,564671.273026,691815.742336,656794.842508,665715.276608,...,2356647.0,1719171.0,1779029.0,1595914.0,1766575.0,2341160.0,1924538.0,2324571.0,2120508.0,2323905.0


In [85]:
mn_dalys_fin = pd.concat([mn_dalys, mn_dalys_nat]).sort_index()
mn_dalys_zero_fin = pd.concat([mn_dalys_zero, mn_dalys_nat_zero]).sort_index()
mn_dalys_fin.head()#.reset_index().subgroup.unique()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,vehicle,nutrient,subgroup,coverage_level,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
214,industry oil,all,Lower Wealth Quintile,0.2,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,all,Lower Wealth Quintile,0.5,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,all,Lower Wealth Quintile,0.8,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,all,Lower Wealth Quintile,1.0,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,all,Total Population,0.2,3033.983105,3802.320453,2624.600568,3998.258397,2500.317046,3048.314341,1966.703897,2809.930938,2801.905519,2826.48936,...,7443.610679,5962.346421,6246.544974,5432.563832,6495.210827,7661.784986,6164.890188,7660.225881,6925.355984,7288.669261


In [86]:
mn_dalys_fin_counts = pd.concat([mn_daly_counts, mn_dalys_counts_nat]).sort_index()
mn_dalys_zero_fin_counts = pd.concat([mn_daly_counts_zero, mn_dalys_counts_nat_zero]).sort_index()
mn_dalys_fin_counts.head()#.reset_index().subgroup.unique()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,vehicle,nutrient,subgroup,coverage_level,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
214,industry oil,all,Lower Wealth Quintile,0.2,365106.3,463208.0,322147.426451,489635.5,309677.2806,375894.3,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.4,741328.9,776247.6,675457.4,806951.1,959879.9,776492.8,962259.4,867744.9,917393.3
214,industry oil,all,Lower Wealth Quintile,0.5,365106.3,463208.0,322147.426451,489635.5,309677.2806,375894.3,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.4,741328.9,776247.6,675457.4,806951.1,959879.9,776492.8,962259.4,867744.9,917393.3
214,industry oil,all,Lower Wealth Quintile,0.8,365106.3,463208.0,322147.426451,489635.5,309677.2806,375894.3,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.4,741328.9,776247.6,675457.4,806951.1,959879.9,776492.8,962259.4,867744.9,917393.3
214,industry oil,all,Lower Wealth Quintile,1.0,365106.3,463208.0,322147.426451,489635.5,309677.2806,375894.3,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.4,741328.9,776247.6,675457.4,806951.1,959879.9,776492.8,962259.4,867744.9,917393.3
214,industry oil,all,Total Population,0.2,1017041.0,1274600.0,879809.096784,1340282.0,838147.224666,1021845.0,659271.357676,941934.87247,939244.620336,947485.526564,...,2495220.0,1998676.0,2093944.0,1821084.0,2177301.0,2568356.0,2066572.0,2567833.0,2321493.0,2443281.0


In [91]:
# duplicate zero results for 'from lit' vehicles

results_zero = results.loc[results.vehicle.isin([v for v in vehicles if 'zero' in v])]
results_zero['vehicle'] = results.vehicle + ' from lit'
#results = pd.concat([results, results_zero], ignore_index=True)
results.head()

Unnamed: 0,coverage_level,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,...,draw_997,draw_998,draw_999,index,location_id,measure,nutrient,subgroup,vehicle,year
0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil,2021
1,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil,2021
2,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil,2021
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil,2021
4,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil from lit,2021


In [111]:
results.vehicle

0                    industry oil
1                    industry oil
2                    industry oil
3                    industry oil
4           industry oil from lit
                  ...            
2683    zero wheat flour from lit
2684    zero wheat flour from lit
2685    zero wheat flour from lit
2686    zero wheat flour from lit
2687    zero wheat flour from lit
Name: vehicle, Length: 2688, dtype: object

In [119]:
# drop unncessary rows

results_prepped = (results.loc[((results.vehicle.isin([v for v in vehicles if 'from lit' in v]))
                             & (results.coverage_level==0.8))
                   | ((results.vehicle.isin([v for v in vehicles if 'zero' in v]))
                       & (results.coverage_level==1))
                   | ((results.vehicle.isin([v for v in vehicles if 'from lit' not in v and 'zero' not in v]))
                       & (results.coverage_level!=1))])
results_prepped = (results_prepped.loc[results_prepped.nutrient!='all']
                   .loc[results_prepped.year==2025])#.drop(columns='index')
results_prepped.head()

Unnamed: 0,coverage_level,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,...,draw_997,draw_998,draw_999,index,location_id,measure,nutrient,subgroup,vehicle,year
192,0.2,8.799028,5.184798,4.171593,3.563713,6.074433,6.0674,1.674361,6.941798,4.674974,...,4.24579,4.827935,3.812955,,214,pif,vitamin a,Lower Wealth Quintile,industry oil,2025
193,0.5,28.600873,16.024976,14.618856,11.825937,19.86869,21.727959,5.544233,23.392932,14.911856,...,13.905214,15.811248,13.264032,,214,pif,vitamin a,Lower Wealth Quintile,industry oil,2025
194,0.8,48.402718,26.865155,25.066119,20.088161,33.662947,37.388518,9.414104,39.844065,25.148739,...,23.564638,26.794561,22.715108,,214,pif,vitamin a,Lower Wealth Quintile,industry oil,2025
198,0.8,29.72821,14.356285,18.137913,12.965055,20.99633,28.157756,6.045562,26.617863,14.710752,...,14.743493,16.762982,16.204371,,214,pif,vitamin a,Lower Wealth Quintile,industry oil from lit,2025
200,0.2,12.501415,7.041143,6.666192,5.339091,8.813635,9.817342,2.528718,10.398886,6.623386,...,6.230928,7.070347,6.050235,,214,pif,vitamin a,Lower Wealth Quintile,maize flour,2025


In [124]:
index_cols = ['location_id','vehicle','nutrient','subgroup','coverage_level']
rates_sub = (results_prepped.loc[results_prepped.measure=='rates_averted']
              #.loc[results.subgroup=='Lower Wealth Quintile']
              .loc[results_prepped.year==2025]
              .set_index([c for c in results_prepped.columns if 'draw' not in c])).reset_index().drop(columns=['index','year','measure'])
step1_increment = (rates_sub.loc[rates_sub.vehicle.str.contains('zero')]
         #.loc[rates_sub.coverage_level==1]
         #.drop(columns='coverage_level')
         .set_index(index_cols)) * -1

step2_increment = (rates_sub
                    .loc[rates_sub.vehicle.isin([v for v in vehicles if 'zero' not in v])]
                    #.loc[rates_sub.coverage_level==0.8]
                    #.drop(columns='coverage_level')
                    .set_index(index_cols))
dalys_zero_fortification = mn_dalys_zero_fin + step1_increment

dalys_baseline = mn_dalys_fin.copy()

dalys_literature = (mn_dalys_fin - step2_increment)

step1_pif = step1_increment / dalys_zero_fortification * 100
step2_pif = step2_increment / dalys_baseline * 100

dalys_zero_fortification['col_name'] = 'Zero fortification DALYs per 100,000'
step1_increment['col_name'] ='Zero fortification to baseline DALY increment per 100,000'
step1_pif['col_name'] = 'Zero fortification to baseline PIF as a proportion of zero fortification DALYs'
dalys_baseline['col_name'] = 'Baseline DALYs per 100,000'
dalys_literature['col_name'] = 'Target coverage DALYs per 100,000'
step2_increment['col_name'] = 'Baseline to target coverage DALY increment per 100,000'
step2_pif['col_name'] = 'Baseline to target coverage PIF as a proportion of baseline DALYs'

data_rates = (pd.concat([dalys_zero_fortification.reset_index(),
                      dalys_baseline.reset_index(),
                      dalys_literature.reset_index(), 
                      #dalys_industry.reset_index(),
                      step1_increment.reset_index(),
                      step2_increment.reset_index(),
                      step1_pif.reset_index(),
                      step2_pif.reset_index(),
                      #step3_increment.reset_index()
                  ], ignore_index=True)
          .set_index(['location_id','col_name','vehicle','nutrient','subgroup','coverage_level'])
          .filter([c for c in dalys_baseline.columns if 'draw' in c])
          #.mean(axis=1)
              .apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1)
          .reset_index())
data_rates = pd.concat([data_rates, 
                  cov_prepped.rename(columns={0:'mean'})],#.loc[cov_prepped.coverage_level.isin(['baseline',0.8])]],#.drop(columns='coverage_level')],
                ignore_index=True, sort=True)
data_rates = data_rates.loc[data_rates.nutrient!='all']
data_rates['vehicle'] = data_rates['vehicle'].str.replace('zero ','')
data_rates['vehicle'] = (data_rates['vehicle']
                   .replace('industry oil','oil (industry coverage data)')
                   .replace('oil','oil (literature coverage data)'))
#data_rates_sub = (data_rates.loc[(data_rates.col_name.isin(['Zero fortification DALYs per 100,000',
#                                          'Zero fortification to baseline DALY increment per 100,000',
#                                          'Zero fortification to baseline PIF as a proportion of zero fortification DALYs'])) 
#                      & (data_rates.coverage_level==1)])
#data_rates = pd.concat([(data_rates.loc[data_rates.col_name != 'Zero fortification DALYs per 100,000']
#                   .loc[data_rates.col_name != 'Zero fortification to baseline DALY increment per 100,000']
#                   .loc[data_rates.col_name != 'Zero fortification to baseline PIF as a proportion of zero fortification DALYs']),
#                 data_rates_sub], ignore_index=True)
l = get_ids('location')
data_rates = data_rates.merge(l.filter(['location_id','location_name']), on='location_id')
data_rates = data_rates.drop(columns='location_id').rename(columns={'location_name':'Location'})
#data_rates[0] = np.where(data_rates.col_name.str.contains('PIF'), data_rates[0].map('{:.1f}'.format),
#                   np.where(data_rates.col_name.str.contains('Percent'), data_rates[0].map('{:.1f}'.format), 
#                   np.where(data_rates.col_name.str.contains('increment'), data_rates[0].map('{:.1f}'.format), data_rates[0].map('{:,.0f}'.format))))
#data_rates = data_rates.pivot_table(index=['Location','vehicle','nutrient','coverage_level','subgroup'],
#                        columns='col_name',values=0,aggfunc=lambda x: ' '.join(x)).dropna()
#data_rates = (data_rates[['Percent of population eating fortified vehicle at baseline',
#            'Percent of population eating fortified vehicle at target coverage',
#            'Zero fortification DALYs per 100,000',
#            'Baseline DALYs per 100,000',
#            'Target coverage DALYs per 100,000',
#            'Zero fortification to baseline DALY increment per 100,000',
#            'Zero fortification to baseline PIF as a proportion of zero fortification DALYs',
#            'Baseline to target coverage DALY increment per 100,000',
#            'Baseline to target coverage PIF as a proportion of baseline DALYs']]
#        .reset_index().replace('-0', '0').replace('-0.0','0'))
data_rates.dropna().head()

Unnamed: 0,2.5%,50%,97.5%,col_name,count,coverage_level,max,mean,min,nutrient,std,subgroup,vehicle,Location
11,152.817911,1043.066507,2372.103925,"Zero fortification DALYs per 100,000",1000.0,1.0,4011.909676,1117.350292,-365.997379,vitamin a,588.641284,Lower Wealth Quintile,oil (industry coverage data),Nigeria
15,157.646647,673.328653,1433.247353,"Zero fortification DALYs per 100,000",1000.0,1.0,2420.636277,711.443808,-155.453898,vitamin a,338.694302,Total Population,oil (industry coverage data),Nigeria
27,3870.20453,7682.044075,16388.428286,"Zero fortification DALYs per 100,000",1000.0,1.0,21411.645669,8402.160263,2818.278584,folic acid,3430.187791,Lower Wealth Quintile,maize flour,Nigeria
31,2045.182204,4053.312708,8653.968298,"Zero fortification DALYs per 100,000",1000.0,1.0,11295.466049,4435.949004,1492.43631,folic acid,1810.203989,Total Population,maize flour,Nigeria
35,149.080972,1012.868132,2298.925699,"Zero fortification DALYs per 100,000",1000.0,1.0,3931.222696,1084.657004,-358.002309,vitamin a,570.603494,Lower Wealth Quintile,maize flour,Nigeria


In [107]:
index_cols = ['location_id','vehicle','nutrient','subgroup','coverage_level']

counts_sub_lwq = (results_prepped.loc[results.measure=='counts_averted']
              .loc[results.subgroup=='Lower Wealth Quintile']
              .loc[results.year==2025]
              .set_index([c for c in results.columns if 'draw' not in c]) / 5).reset_index().drop(columns=['index','year','measure'])
counts_sub_tot = (results.loc[results.measure=='counts_averted']
              .loc[results.subgroup=='Total Population']
              .loc[results.year==2025]
              .set_index([c for c in results.columns if 'draw' not in c])).reset_index().drop(columns=['index','year','measure'])
counts_sub = pd.concat([counts_sub_lwq, counts_sub_tot], ignore_index=True)

step1_increment = (counts_sub.loc[counts_sub.vehicle.str.contains('zero')]
         #.loc[counts_sub.coverage_level==1]
         #.drop(columns='coverage_level')
         .set_index(index_cols)) * -1

step2_increment = (counts_sub
                    .loc[counts_sub.vehicle.isin([v for v in vehicles if 'zero' not in v])]
                    #.loc[counts_sub.coverage_level==0.8]
                    #.drop(columns='coverage_level')
                    .set_index(index_cols))
dalys_zero_fortification = mn_dalys_zero_fin_counts + step1_increment

dalys_baseline = mn_dalys_fin_counts.copy()

dalys_literature = (mn_dalys_fin_counts - step2_increment)

step1_pif = step1_increment / dalys_zero_fortification * 100
step2_pif = step2_increment / dalys_baseline * 100

dalys_zero_fortification['col_name'] = 'Zero fortification DALYs'
step1_increment['col_name'] ='Zero fortification to baseline DALY increment'
dalys_baseline['col_name'] = 'Baseline DALYs'
dalys_literature['col_name'] = 'Target coverage DALYs'
step2_increment['col_name'] = 'Baseline to target coverage DALY increment'

data_counts = (pd.concat([dalys_zero_fortification.reset_index(),
                      dalys_baseline.reset_index(),
                      dalys_literature.reset_index(), 
                      #dalys_industry.reset_index(),
                      step1_increment.reset_index(),
                      step2_increment.reset_index(),
                      #step3_increment.reset_index()
                  ], ignore_index=True)
          .set_index(['location_id','col_name','vehicle','nutrient','subgroup','coverage_level'])
          .filter([c for c in dalys_baseline.columns if 'draw' in c])
          #.mean(axis=1)
               .apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1)
          .reset_index())
data_counts = pd.concat([data_counts, 
                  cov_prepped.rename(columns={0:'mean'})],#.loc[cov_prepped.coverage_level.isin(['baseline',0.8])]],#.drop(columns='coverage_level')],
                ignore_index=True, sort=True)
data_counts['vehicle'] = data_counts['vehicle'].str.replace('zero ','')
data_counts['vehicle'] = (data_counts['vehicle']
                   .replace('industry oil','oil (industry coverage data)')
                   .replace('oil','oil (literature coverage data)'))
#data_counts_sub = (data_counts.loc[(data_counts.col_name.isin(['Zero fortification DALYs per 100,000',
#                                          'Zero fortification to baseline DALY increment per 100,000',
#                                          'Zero fortification to baseline PIF as a proportion of zero fortification DALYs'])) 
#                      & (data_counts.coverage_level==1)])
#data_counts = pd.concat([(data_counts.loc[data_counts.col_name != 'Zero fortification DALYs per 100,000']
#                   .loc[data_counts.col_name != 'Zero fortification to baseline DALY increment per 100,000']
#                   .loc[data_counts.col_name != 'Zero fortification to baseline PIF as a proportion of zero fortification DALYs']),
#                 data_counts_sub], ignore_index=True)
l = get_ids('location')
data_counts = data_counts.merge(l.filter(['location_id','location_name']), on='location_id')
data_counts = data_counts.drop(columns='location_id').rename(columns={'location_name':'Location'})
#data_counts[0] = np.where(data_counts.col_name.str.contains('PIF'), data_counts[0].map('{:.1f}'.format),
#                   np.where(data_counts.col_name.str.contains('Percent'), data_counts[0].map('{:.1f}'.format), 
#                   np.where(data_counts.col_name.str.contains('increment per'), data_counts[0].map('{:.1f}'.format), data_counts[0].map('{:,.0f}'.format))))
#data_counts = data_counts.pivot_table(index=['Location','vehicle','nutrient','coverage_level','subgroup'],
#                        columns='col_name',values=0,aggfunc=lambda x: ' '.join(x)).dropna()
#data_counts = (data_counts[[
#            'Zero fortification DALYs',
#            'Baseline DALYs',
#            'Target coverage DALYs',
#            'Zero fortification to baseline DALY increment',
#            'Baseline to target coverage DALY increment']]
#        .reset_index().replace('-0', '0').replace('-0.0','0'))
data_counts.head()

Unnamed: 0,2.5%,50%,97.5%,col_name,count,coverage_level,max,mean,min,nutrient,std,subgroup,vehicle,Location
0,,,,Zero fortification DALYs,0.0,0.2,,,,all,,Lower Wealth Quintile,oil (industry coverage data),Nigeria
1,,,,Zero fortification DALYs,0.0,0.5,,,,all,,Lower Wealth Quintile,oil (industry coverage data),Nigeria
2,,,,Zero fortification DALYs,0.0,0.8,,,,all,,Lower Wealth Quintile,oil (industry coverage data),Nigeria
3,,,,Zero fortification DALYs,0.0,1.0,,,,all,,Lower Wealth Quintile,oil (industry coverage data),Nigeria
4,,,,Zero fortification DALYs,0.0,0.2,,,,all,,Total Population,oil (industry coverage data),Nigeria


In [108]:
data = pd.concat([data_rates,data_counts],ignore_index=True)#.drop(columns=0)#.dropna()
#data['vehicle'] = (data.vehicle.str.replace('industry oil from lit','oil (industry coverage data)')
#                   .replace('oil from lit','oil (literature coverage data)')
#                   .replace('wheat flour from lit','wheat flour')
#                   .replace('maize flour from lit','maize flour'))
#data_rates.merge(data_counts, on=['Location','vehicle','nutrient','subgroup','coverage_level'])#.drop(columns='coverage')
#data_fin = (data.loc[((data.subgroup=='Total Population') & (data.coverage_level==0.8))
#               |((data.subgroup=='Lower Wealth Quintile') & (data.coverage_level==1))]
#        .set_index(['Location','vehicle','nutrient','subgroup'])
#        .drop(columns='coverage_level')
#        .sort_index())
#data = data.loc[data.subgroup=='Lower Wealth Quintile'].drop(columns='coverage_level')
#data.to_pickle('lwq_copy1_data.pkl')
data_copy1 = pd.read_pickle('lwq_copy1_data.pkl')
data = pd.concat([data, data_copy1], ignore_index=True)
#data.dropna().head()
data.dropna().head()

Unnamed: 0,2.5%,50%,97.5%,col_name,count,coverage_level,max,mean,min,nutrient,std,subgroup,vehicle,Location
16,152.817918,1043.066589,2372.104067,"Zero fortification DALYs per 100,000",1000.0,0.2,4011.909908,1117.350364,-365.997396,vitamin a,588.641324,Lower Wealth Quintile,oil (industry coverage data),Nigeria
17,152.817916,1043.066559,2372.104013,"Zero fortification DALYs per 100,000",1000.0,0.5,4011.909821,1117.350337,-365.99739,vitamin a,588.641309,Lower Wealth Quintile,oil (industry coverage data),Nigeria
18,152.817913,1043.066528,2372.10396,"Zero fortification DALYs per 100,000",1000.0,0.8,4011.909734,1117.35031,-365.997383,vitamin a,588.641294,Lower Wealth Quintile,oil (industry coverage data),Nigeria
19,152.817911,1043.066507,2372.103925,"Zero fortification DALYs per 100,000",1000.0,1.0,4011.909676,1117.350292,-365.997379,vitamin a,588.641284,Lower Wealth Quintile,oil (industry coverage data),Nigeria
20,155.050186,651.826965,1395.290452,"Zero fortification DALYs per 100,000",1000.0,0.2,2363.50686,694.184442,-152.5995,vitamin a,330.114581,Total Population,oil (industry coverage data),Nigeria


In [110]:
(data.loc[data.col_name=='Percent of population eating fortified vehicle at target coverage']
 .loc[data.vehicle=='oil (literature coverage data)'])

Unnamed: 0,2.5%,50%,97.5%,col_name,count,coverage_level,max,mean,min,nutrient,std,subgroup,vehicle,Location
1376,,,,Percent of population eating fortified vehicle...,,0.2,,19.999996,,vitamin a,,Lower Wealth Quintile,oil (literature coverage data),Nigeria
1378,,,,Percent of population eating fortified vehicle...,,0.5,,49.99999,,vitamin a,,Lower Wealth Quintile,oil (literature coverage data),Nigeria
1380,,,,Percent of population eating fortified vehicle...,,0.8,,79.999984,,vitamin a,,Lower Wealth Quintile,oil (literature coverage data),Nigeria
1382,,,,Percent of population eating fortified vehicle...,,1.0,,99.99998,,vitamin a,,Lower Wealth Quintile,oil (literature coverage data),Nigeria
1504,,,,Percent of population eating fortified vehicle...,,0.2,,12.405204,,vitamin a,,Total Population,oil (literature coverage data),Nigeria
1506,,,,Percent of population eating fortified vehicle...,,0.5,,19.731442,,vitamin a,,Total Population,oil (literature coverage data),Nigeria
1508,,,,Percent of population eating fortified vehicle...,,0.8,,27.057681,,vitamin a,,Total Population,oil (literature coverage data),Nigeria
1510,,,,Percent of population eating fortified vehicle...,,1.0,,31.94184,,vitamin a,,Total Population,oil (literature coverage data),Nigeria
2496,,,,Percent of population eating fortified vehicle...,,0.2,,19.999996,,vitamin a,,Lower Wealth Quintile,oil (literature coverage data),Nigeria
2498,,,,Percent of population eating fortified vehicle...,,0.5,,49.99999,,vitamin a,,Lower Wealth Quintile,oil (literature coverage data),Nigeria


In [119]:
test = (data.loc[data.nutrient!='all']
        .pivot_table(index=['subgroup','vehicle','nutrient'], columns='col_name', values='mean')
        #.dropna()
        .reset_index())
test.loc[test.vehicle=='wheat flour'].loc[test.nutrient=='zinc']

col_name,subgroup,vehicle,nutrient,Baseline DALYs,"Baseline DALYs per 100,000",Baseline to target coverage DALY increment,"Baseline to target coverage DALY increment per 100,000",Baseline to target coverage PIF as a proportion of baseline DALYs,Percent of population eating fortified vehicle at baseline,Percent of population eating fortified vehicle at target coverage,Target coverage DALYs,"Target coverage DALYs per 100,000",Zero fortification DALYs,"Zero fortification DALYs per 100,000",Zero fortification to baseline DALY increment,"Zero fortification to baseline DALY increment per 100,000",Zero fortification to baseline PIF as a proportion of zero fortification DALYs
11,Lower Wealth Quintile,wheat flour,zinc,13484.400696,201.129801,2346.433555,34.99879,17.41351,4.113278,38.18496,11137.967141,166.131011,13782.377543,205.574346,297.976847,4.444545,2.148701
25,Total Population,wheat flour,zinc,38748.277862,115.59184,6306.061066,18.811912,16.295566,17.488055,45.685274,32442.216796,96.779928,41165.612204,122.803105,2417.334341,7.211266,5.815114


In [135]:
(cov_prepped.loc[cov_prepped.col_name=='Percent of population eating fortified vehicle at target coverage']
 .loc[cov_prepped.loc[cov_prepped.vehicle=='oil from lit']]
 .dropna())
 #.vehicle.unique())

ValueError: Cannot index with multidimensional key

In [129]:
cov_prepped.col_name.unique()

array(['Percent of population eating fortified vehicle at target coverage',
       'Percent of population eating fortified vehicle at baseline'],
      dtype=object)

In [98]:
data.vehicle.unique()

array(['oil (industry coverage data)', 'maize flour',
       'oil (literature coverage data)', 'wheat flour', 'industry wheat'],
      dtype=object)

In [83]:
data.loc[data.col_name=='Baseline to target coverage DALY increment per 100,000'].loc[data.subgroup=='Total Population'].loc[data.coverage_level==0.8]

Unnamed: 0,2.5%,50%,97.5%,col_name,count,coverage_level,max,mean,min,nutrient,std,subgroup,vehicle,Location
738,-1.327082,14.855874,43.20617,Baseline to target coverage DALY increment per...,1000.0,0.8,75.270776,16.201203,-23.137316,vitamin a,11.235045,Total Population,maize flour,Nigeria
742,-8.408387,102.441687,290.97258,Baseline to target coverage DALY increment per...,1000.0,0.8,509.311113,110.057157,-118.6884,vitamin a,76.188382,Total Population,wheat flour,Nigeria
746,-15.573656,150.232975,423.516813,Baseline to target coverage DALY increment per...,1000.0,0.8,766.300195,163.921089,-184.28821,vitamin a,115.103108,Total Population,oil (industry coverage data),Nigeria
750,-4.836123,52.116425,143.106594,Baseline to target coverage DALY increment per...,1000.0,0.8,254.27711,56.210437,-65.278271,vitamin a,38.789286,Total Population,oil (literature coverage data),Nigeria
770,0.060604,2.656339,11.625584,Baseline to target coverage DALY increment per...,1000.0,0.8,19.180042,3.541198,0.0,zinc,3.028519,Total Population,maize flour,Nigeria
774,0.414566,18.417473,78.58606,Baseline to target coverage DALY increment per...,1000.0,0.8,126.991553,24.079248,0.0,zinc,20.588094,Total Population,wheat flour,Nigeria
794,39.487173,75.488747,159.009725,Baseline to target coverage DALY increment per...,1000.0,0.8,239.138294,82.456926,28.820614,folic acid,31.56935,Total Population,maize flour,Nigeria
798,260.275882,505.372231,1061.854379,Baseline to target coverage DALY increment per...,1000.0,0.8,1532.22054,549.001546,169.440784,folic acid,207.029588,Total Population,wheat flour,Nigeria


In [84]:
data.col_name.unique()

array(['Zero fortification DALYs per 100,000',
       'Baseline DALYs per 100,000', 'Target coverage DALYs per 100,000',
       'Zero fortification to baseline DALY increment per 100,000',
       'Baseline to target coverage DALY increment per 100,000',
       'Zero fortification to baseline PIF as a proportion of zero fortification DALYs',
       'Baseline to target coverage PIF as a proportion of baseline DALYs',
       'Percent of population eating fortified vehicle at target coverage',
       'Percent of population eating fortified vehicle at baseline',
       'Zero fortification DALYs', 'Baseline DALYs',
       'Target coverage DALYs',
       'Zero fortification to baseline DALY increment',
       'Baseline to target coverage DALY increment'], dtype=object)

In [85]:
#data = data_rates.merge(data_counts, on=['Location','vehicle','nutrient','subgroup','coverage_level'])
#data = data.loc[data.subgroup=='Lower Wealth Quintile'].drop(columns='coverage_level')
#data.head(8)

In [86]:
data.to_excel('results_plots/nigeria_LWQ_percent_of_total_pop.xlsx')

In [87]:
# zero fort needs to apply to * vehicles
# baseline coverage needs to apply to all coverage levels

In [88]:
#data.reset_index().to_excel('results_plots/nigeria_lower_wealth_quintile_4_14_21.xlsx')

# archive code

In [89]:
assert 1==2, 'stop running'

AssertionError: stop running

In [None]:
impact = (results
          .loc[results.year==2025]
          .loc[results.measure.isin(['pif','rates_averted'])]
          .set_index([c for c in results.columns if 'draw' not in c])
          .apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1)
          .rename(columns={'mean':'mean_val','2.5%':'lower','97.5%':'upper'})
          .filter(['mean_val','lower','upper'])
          .reset_index()
          .replace('vitamin a','vitamin A')
          .drop(columns='index'))
l = get_ids('location')
impact = impact.merge(l.filter(['location_name','location_id']), on='location_id')
for col in ['mean_val','lower','upper']:
    impact[f'{col}'] = np.where(impact.measure.isin(['pif','deficiency_exposure']),
                                           impact[f'{col}'].map('{:.1f}'.format),
                                           impact[f'{col}'].map('{:,.0f}'.format))
impact['value'] = (impact['mean_val'].astype(str)
                    +'\r\n('
                    +impact['lower'].astype(str)
                    +', '
                    +impact['upper'].astype(str)
                    +')')
impact['col_name'] = (impact.nutrient.str.capitalize()
                      + ' in '
                      + impact.vehicle
                      + np.where(impact.measure == 'pif', ' PIF (percent): ',
                                np.where(impact.measure == 'rates_averted', ' DALYs averted per 100,000 person-years: ', 'ERROR'))
                      + (impact.coverage_level * 100).astype(str)
                       + '% scale-up')
impact['location_name'] = impact['location_name'] + ' ' + impact['subgroup']
impact = impact[['col_name','location_name','value']]
impact = impact.pivot_table(index='location_name',
                                     columns='col_name', 
                                     values='value',
                                     aggfunc=lambda x: ' '.join(x))   
impact = impact.replace(np.nan, 'Not modeled')
impact

In [None]:
coverage = cov_prepped
coverage = coverage.loc[coverage.year==2025].loc[coverage.nutrient!='iron']
coverage['subgroup'] = np.where(coverage.location_id==214, 'Total Population', 'Lower Wealth Quintile')
coverage['location_name'] = coverage['location_name'] + ' ' + coverage['subgroup']


coverage['value'] = ((coverage['mean'] * 100).round(1).astype(str)
                        +'\r\n('
                        +(coverage['2.5%'] * 100).round(1).astype(str)
                        +', '
                        +(coverage['97.5%'] * 100).round(1).astype(str)
                        +')')

coverage['parameter_description'] = np.where(coverage.coverage_level == 'baseline', 'Percent of population eating ' + coverage.nutrient + ' fortified ' + coverage.vehicle +' at baseline',
                                            np.where(coverage.coverage_level == 'maximum', f'Percent of population eating fortifiable ' + coverage.vehicle, 
                                            'Percent of population eating ' + coverage.nutrient + ' fortified ' + coverage.vehicle + ': ' + (coverage.coverage_level * 100).astype(str) + '0% scale-up'))

coverage['parameter_description'] = coverage['parameter_description'].str.replace('.00%','%')
coverage = coverage[['location_name','value','parameter_description']]
coverage = coverage.pivot_table(index='location_name',
                                     columns='parameter_description', 
                                     values='value',
                                     aggfunc=lambda x: ' '.join(x))   
coverage = coverage.replace(np.nan, 'Not modeled')

coverage

In [None]:
low_ses_table = pd.concat([coverage.T, impact.T])
low_ses_table.to_csv('results_plots/nigeria_lower_wealth_quintile_no_iron.csv')
low_ses_table

In [None]:
def make_ses_comparison_dot_plots(data, nutrient, vehicle, measure, coverage_levels, subtitle, wra=False):
    """This function takes a dataframe,
    nutrient (as a string),
    and measure (as a string, either: 'rates', 'counts', or 'pifs').
    """

    f, ax = plt.subplots(figsize=(7, 4), dpi=120)
    colors = ['tab:red', 'tab:orange', 'tab:green']

    location_spacer = 0.15
    coverage_spacer = 0.025
    df = (data.drop(columns='measure', errors='ignore')
          .apply(pd.DataFrame.describe, percentiles=[0.025, 0.975], axis=1).reset_index())

    for n in list(range(0, len(coverage_levels))):
        rate = (df.loc[df.year == 2025]
            .loc[df.coverage_level == coverage_levels[n]])
        for i in list(range(0, len(rate))):
            plt.plot([location_spacer * i + coverage_spacer * n, location_spacer * i + coverage_spacer * n],
                     [rate['2.5%'].values[i], rate['97.5%'].values[i]], c='black')
            plt.scatter([location_spacer * i + coverage_spacer * n], rate['2.5%'].values[i], s=50, marker='_',
                        c='black')
            plt.scatter([location_spacer * i + coverage_spacer * n], rate['97.5%'].values[i], s=50, marker='_',
                        c='black')

        x_vals = []
        for x in list(range(0, len(rate))):
            x_vals.append(location_spacer * x + coverage_spacer * n)
        plt.scatter(x_vals, rate['mean'], s=50,
                    label=f'{int(coverage_levels[n] * 100)} percent coverage', color=colors[n])

    plt.hlines(0, 0 - coverage_spacer * 2,
               location_spacer * (len(rate)) - coverage_spacer * 2,
               linestyle='dashed', color='grey', alpha=0.5)

    plt.plot()

    if wra == True:
        subpop = 'Women of Reproductive Age'
    else:
        subpop = 'children under five'

    if measure == 'rates':
        plt.title(f'DALYs averted per 100,000 person-years due to\n{nutrient} fortication in {vehicle} among {subpop}\n{subtitle}')
        plt.ylabel('DALYs Averted per 100,000')
    elif measure == 'counts':
        plt.title(f'DALYs averted due to\n{nutrient} fortication in {vehicle} among {subpop}\n{subtitle}')
        plt.ylabel('DALYs')
    elif measure == 'pifs':
        plt.title(f'Population impact fraction of {nutrient} fortication in {vehicle} \non DALYs among {subpop}\n{subtitle}')
        plt.ylabel('Population Impact Fraction (Percent)')

    plt.legend(bbox_to_anchor=[1.5, 1])

    x_ticks = []
    for x in list(range(0, len(rate))):
        x_ticks.append(location_spacer * x + coverage_spacer)
    ax.set_xticks(x_ticks)
    l = get_ids('location')
    l_names = df.loc[df.coverage_level == coverage_levels[0]].loc[df.year == 2025]
    l_names = l_names.reset_index().merge(l, on='location_id')
    l_names['label'] = l_names.location_name + ' ' + l_names.subgroup
    l_names = list(l_names.label.values)
    ax.set_xticklabels(l_names)

In [None]:
with PdfPages('results_plots/nigeria_lower_wealth_quintile_impact_by_nutrient_vehicle_pair.pdf') as pdf:
    for nutrient in ['vitamin a', 'zinc', 'folic acid']:
        if nutrient == 'vitamin a':
            vehicles = ['oil','wheat flour','maize flour','industry oil']
        else:
            vehicles = ['wheat flour','maize flour']
        for vehicle in vehicles:
            make_ses_comparison_dot_plots((results
                                .sort_values(by='subgroup', ascending=False)
                                .loc[results.measure=='rates_averted']
                                .loc[results.vehicle==vehicle]
                                .loc[results.nutrient==nutrient]
                                .set_index([c for c in results.columns if 'draw' not in c])), 
                                nutrient, vehicle, 'rates', coverage_levels, '')
            pdf.savefig(bbox_inches='tight')
            make_ses_comparison_dot_plots((results
                                .sort_values(by='subgroup', ascending=False)
                                .loc[results.measure=='pif']
                                .loc[results.vehicle==vehicle]
                                .loc[results.nutrient==nutrient]
                                .set_index([c for c in results.columns if 'draw' not in c])), 
                                nutrient, vehicle, 'pifs', coverage_levels, '')
            pdf.savefig(bbox_inches='tight')

In [None]:
def make_coverage_bar_charts(data, location_ids, nutrient, vehicle):
    plt.figure(figsize=(10, 4), dpi=120)
    
    data = (data
            .loc[data.vehicle==vehicle]
            .loc[data.nutrient.isin(['na',nutrient])]
            .loc[data.location_id.isin(location_ids)]
            .loc[data.year.isin([np.nan,2025])]
            .sort_values(by=['location_id'], ascending=True))
    data['mean'] = data['mean'] * 100
    data['2.5%'] = data['2.5%'] * 100
    data['97.5%'] = data['97.5%'] * 100
    base = data.loc[data.coverage_level=='baseline']
    alt_20 = data.loc[data.coverage_level==0.2]
    alt_50 = data.loc[data.coverage_level==0.5]
    alt_80 = data.loc[data.coverage_level==0.8]
    barWidth = 0.20
    a = np.arange(len(base))
    b = [x + barWidth for x in a]
    c = [x + barWidth for x in b]
    d = [x + barWidth for x in c]
    plt.bar(a, base['mean'], width=barWidth, yerr = [base['mean'] - base['2.5%'],base['97.5%'] - base['mean']], 
            label='Baseline Coverage')
    plt.bar(b, alt_20['mean'], width=barWidth, yerr = [alt_20['mean'] - alt_20['2.5%'],alt_20['97.5%'] - alt_20['mean']], 
            color='tab:red', label='20% Coverage')
    plt.bar(c, alt_50['mean'], width=barWidth, yerr = [alt_50['mean'] - alt_50['2.5%'],alt_50['97.5%'] - alt_50['mean']], 
            color='tab:orange', label='50% Coverage')
    plt.bar(d, alt_80['mean'], width=barWidth, yerr = [alt_80['mean'] - alt_80['2.5%'],alt_80['97.5%'] - alt_80['mean']], 
            color='tab:green', label='80% Coverage')

    df = data.loc[data.coverage_level=='maximum']
    for i in list(range(0,len(df.location_id.unique()))):
        if i == 0:
            plt.hlines(df.loc[df.location_id==df.location_id.unique()[i]]['mean'], -0.1 + i, 0.7 + i, 
                   linestyle='dashed', color='black', label='Eating industrially produced vehicle')
        else: 
            plt.hlines(df.loc[df.location_id==df.location_id.unique()[i]]['mean'], -0.1 + i, 0.7 + i, 
                   linestyle='dashed', color='black')
        plt.fill_between([-0.1 + i, 0.7 + i], [df.loc[df.location_id==df.location_id.unique()[i]]['2.5%'].values[0]] * 2,
                        [df.loc[df.location_id==df.location_id.unique()[i]]['97.5%'].values[0]] * 2,
                        alpha=0.2, color='black')
    
    plt.legend(bbox_to_anchor=[1.45,1])
    plt.title(f'{nutrient.capitalize()} in {vehicle} fortification coverage')
    plt.ylabel('Percent of population')
    plt.xticks([r + barWidth * 1.5 for r in range(len(df))], base['label'].values)

In [None]:
with PdfPages('results_plots/nigeria_lower_wealth_quintile_coverage_by_nutrient_vehicle_pair.pdf') as pdf:
    for nutrient in ['vitamin a','zinc','folic acid']:
        if nutrient == 'vitamin a':
            vehicles = ['oil','wheat flour','maize flour','industry oil']
        else:
            vehicles = ['wheat flour', 'maize flour']
        for vehicle in vehicles:
            make_coverage_bar_charts(cov_prepped, [214,214.5], nutrient, vehicle)
            pdf.savefig(bbox_inches='tight')