In [1]:
from functions_for_all_nutrients import *
from vitamin_a_and_zinc_functions import *
from low_ses_functions import *

In [2]:
# Get absolute path of repo based on relative directory structure,
# so it should work for all users
import os.path
vivarium_research_lsff_path = os.path.abspath("..")
vivarium_research_lsff_path

'/ihme/homes/alibow/notebooks/vivarium_research_lsff'

In [3]:
username = !whoami
username

['alibow']

In [4]:
# Make shared directories in which to store GBD data so we don't have to keep calling `get_draws`

# Nathaniel has saved some .hdf files in this directory:
# You can change it if you save GBD data in a different directory.
read_share_dir = f'/share/scratch/users/ndbs/vivarium_lsff/gbd_data'

# GBD data will be stored here in an .hdf file if it doesn't exist in the directory above:
# You can change this directory name if you want, and you may need to create it before running code below.
#write_share_dir = f'/share/scratch/users/{username[0]}/vivarium_lsff/gbd_data'

In [5]:
index_cols=['location_id','sex_id','age_group_id']
age_group_ids = [2,3,4,5]
sex_ids = [1,2]
coverage_levels = [0.2,0.5,0.8,1]
years = [2021,2022,2023,2024,2025]

In [6]:
coverage_data_dir = vivarium_research_lsff_path + '/multiplication_models/low_ses_coverage.csv'
# # Old version that may not work for all users:
# coverage_data_dir = f'/ihme/homes/{username[0]}/notebooks/' \
#     'vivarium_research_lsff/data_prep/outputs/lsff_input_coverage_data.csv'

In [7]:
location_ids = [214]
location_ids

[214]

In [8]:
vitamin_a_burden_multiplier = 1.2
zinc_burden_multiplier = 1.2

In [9]:
cause_ids = [341, #measles
            302] #diarrheal diseases]

In [10]:
burden_multipliers = pd.DataFrame()
burden_multipliers['cause_id'] = cause_ids
burden_multipliers['multiplier'] = [1.48,1.45]
burden_multipliers

Unnamed: 0,cause_id,multiplier
0,341,1.48
1,302,1.45


# VITAMIN A

In [11]:
# vitamin A specific -- these should be replaced for other models
rei_id = 96
nutrient = 'vitamin a'
vehicles = ['oil','wheat flour','maize flour','industry oil',
            'zero oil','zero wheat flour','zero maize flour','zero industry oil',
           'oil from lit','industry oil from lit','wheat flour from lit','maize flour from lit',
            'oil*','wheat flour*','maize flour*','industry oil*']
effective_fractions = [0, 0, (365 - (365 * 0.5)) / (365 - 28), 1]

In [12]:
# define no fortification relative risk distribution
# vitamin a specific -- this should be replaced for other models

from numpy import log
from scipy.stats import norm, lognorm

# median and 0.975-quantile of lognormal distribution for RR
median = 2.22
q_975 = 5.26

# 0.975-quantile of standard normal distribution (=1.96, approximately)
q_975_stdnorm = norm().ppf(0.975)

mu = log(median) # mean of normal distribution for log(RR)
sigma = (log(q_975) - mu) / q_975_stdnorm # std dev of normal distribution for log(RR)

# Frozen lognormal distribution for RR, representing uncertainty in our effect size
# (s is the shape parameter)b
rr_distribution = lognorm(s=sigma, scale=median)

In [13]:
vitamin_a_baseline_coverage, vitamin_a_counterfactual_coverage = get_baseline_and_counterfactual_coverage(coverage_data_dir,
                                             location_ids,
                                             nutrient,
                                             vehicles,
                                             years,
                                             coverage_levels, 'U5', True)

Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data


In [14]:
vitamin_a_counterfactual_coverage_prepped = (vitamin_a_counterfactual_coverage
                                       .loc[((vitamin_a_counterfactual_coverage.vehicle.isin([v for v in vehicles if 'zero' in v]))
                                           & (vitamin_a_counterfactual_coverage.coverage_level==1))
                                           | ((vitamin_a_counterfactual_coverage.vehicle.isin([v for v in vehicles if '*' in v]))
                                             & (vitamin_a_counterfactual_coverage.coverage_level==1))
                                           | ((vitamin_a_counterfactual_coverage.vehicle.isin([v for v in vehicles if 'from lit' in v])
                                              & (vitamin_a_counterfactual_coverage.coverage_level==0.8)))
                                           | ((vitamin_a_counterfactual_coverage.vehicle.isin([v for v in vehicles if 'zero' not in v
                                                                                        and '*' not in v
                                                                                        and 'from lit' not in v]))
                                             & (vitamin_a_counterfactual_coverage.coverage_level!=1))])
vitamin_a_counterfactual_coverage_prepped.head()

Unnamed: 0,coverage_level,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,...,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,location_id,vehicle,year
0,0.2,0.035534,0.013708,0.065921,0.044281,0.038209,0.073958,0.043379,0.050594,0.027348,...,0.077791,0.04466,0.049126,0.030397,0.038752,0.038738,0.063118,214,oil,2021
1,0.5,0.035534,0.013708,0.065921,0.044281,0.038209,0.073958,0.043379,0.050594,0.027348,...,0.077791,0.04466,0.049126,0.030397,0.038752,0.038738,0.063118,214,oil,2021
2,0.8,0.035534,0.013708,0.065921,0.044281,0.038209,0.073958,0.043379,0.050594,0.027348,...,0.077791,0.04466,0.049126,0.030397,0.038752,0.038738,0.063118,214,oil,2021
4,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,...,0.2,0.2,0.2,0.2,0.2,0.2,0.2,214,oil,2022
5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,214,oil,2022


In [15]:
vitamin_a_baseline_effective_coverage = get_effective_vitamin_a_coverage(vitamin_a_baseline_coverage.set_index(['location_id','year','vehicle']), 
                                                               sex_ids,
                                                               age_group_ids,
                                                               effective_fractions,
                                                               years)
vitamin_a_counterfactual_effective_coverage = get_effective_vitamin_a_coverage(vitamin_a_counterfactual_coverage_prepped.set_index(['location_id','year','vehicle','coverage_level']), 
                                                               sex_ids,
                                                               age_group_ids,
                                                               effective_fractions,
                                                               years)

In [16]:
vitamin_a_rr_deficiency_nofort = generate_rr_deficiency_nofort_draws(mu, sigma, location_ids)
vitamin_a_rr_deficiency_nofort.mean(axis=1)

location_id
214    2.396239
dtype: float64

In [17]:
# compare PAFs
attrib_302 = get_draws(
            gbd_id_type=['rei_id','cause_id'], 
            gbd_id=[96,302],
            location_id=214,
            source='burdenator',
            measure_id=[2],
            metric_id=1, 
            age_group_id=age_group_ids,
            year_id=2019,
            gbd_round_id=6,
            status='best',
            decomp_step='step5')
attrib_341 = get_draws(
            gbd_id_type=['rei_id','cause_id'], 
            gbd_id=[96,341],
            location_id=214,
            source='burdenator',
            measure_id=[2],
            metric_id=1, 
            age_group_id=age_group_ids,
            year_id=2019,
            gbd_round_id=6,
            status='best',
            decomp_step='step5')
attrib = pd.concat([attrib_302, attrib_341], ignore_index=True)
attrib

Unnamed: 0,age_group_id,cause_id,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,...,draw_996,draw_997,draw_998,draw_999,location_id,measure_id,metric_id,rei_id,sex_id,year_id
0,4,302,54594.140823,116590.227558,22866.734911,82215.848008,16681.797449,15324.017686,10656.592759,40776.054225,...,31175.402688,66527.509791,27849.836706,21765.008009,214,2,1,96,1,2019
1,5,302,52529.09813,110792.813901,27980.540701,101118.512229,6322.111616,25719.954208,24954.259641,53858.523863,...,30267.067624,40344.707475,58172.576661,22834.344543,214,2,1,96,1,2019
2,4,302,25644.765714,51467.545651,23438.738794,59895.684518,14908.86626,15021.064606,7179.768723,28637.645635,...,18661.743983,62357.414333,9825.613041,9999.914503,214,2,1,96,2,2019
3,5,302,35309.011285,55380.816766,40727.019075,43661.701172,11320.239133,11976.26268,11660.422804,44700.932737,...,16460.858506,30104.899398,25640.992089,10186.40511,214,2,1,96,2,2019
4,4,341,3897.336868,6412.049545,1248.533148,3699.258665,635.317629,322.741288,107.238029,2343.385226,...,926.188411,1556.716386,2803.287203,252.393913,214,2,1,96,1,2019
5,5,341,15365.707929,29397.851287,5930.85367,22101.337106,976.246817,2147.966142,872.620003,9683.076429,...,4590.326195,3717.251312,20189.021928,1293.337381,214,2,1,96,1,2019
6,4,341,2064.453622,4288.852572,1490.626266,3222.004705,458.96614,356.683083,68.867871,1547.511978,...,683.759953,1393.544312,1006.828163,164.737727,214,2,1,96,2,2019
7,5,341,9435.134027,16737.200566,9252.700633,10454.761051,2043.095492,1204.612682,484.84043,11303.823449,...,2467.455769,3810.858389,10816.011899,641.59609,214,2,1,96,2,2019


In [18]:
attrib_prepped = attrib.groupby(['location_id','cause_id','age_group_id','sex_id']).sum().filter([c for c in attrib.columns if 'draw' in c])
attrib_prepped = attrib_prepped.reset_index().merge(burden_multipliers, on='cause_id')
for i in list(range(0,1000)):
    attrib_prepped[f'draw_{i}'] = attrib_prepped[f'draw_{i}'] * attrib_prepped['multiplier']
attrib_prepped = attrib_prepped.groupby(['location_id','sex_id','age_group_id']).sum().filter([c for c in attrib_prepped.columns if 'draw' in c])
vitamin_a_dalys = attrib_prepped * vitamin_a_burden_multiplier
vitamin_a_dalys.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
214,1,4,101915.47531,214254.795941,42005.513616,149625.458923,30154.651671,27236.979302,18732.92614,75112.186512,126769.892224,177032.118673,...,55177.060121,59901.552885,72270.275956,104632.027911,149230.890366,57599.940561,55890.111296,118522.595338,53437.353942,38319.365525
214,1,5,118690.128029,244990.080072,59219.336938,215198.185979,12734.28856,48567.50819,44970.1849,110910.97526,131456.993388,57813.599429,...,29260.674312,91952.867662,205730.379629,55332.69814,182012.409307,53001.235554,60817.116987,76801.629335,137075.986333,42028.726693
214,2,4,48288.361976,97170.531601,43430.757751,109940.771418,26756.551156,26770.12157,12615.106916,52577.884677,46874.49364,71460.421288,...,48419.93037,76603.279264,101358.564577,54899.819856,58665.78624,64859.059653,33685.792207,110976.835636,18884.693508,17692.425439
214,2,5,78194.477668,126087.889378,87297.809514,94539.015667,23325.753685,22978.089186,21150.212284,97855.213408,76895.908846,120244.412293,...,25584.137218,111053.204658,100062.498711,89469.965236,157409.827959,60113.852086,33024.095247,59150.609452,63824.563367,18863.819548


In [19]:
vitamin_a_paf_deficiency_nofort = calculate_paf_deficiency_nofort(
                                    vitamin_a_rr_deficiency_nofort, 
                                    vitamin_a_baseline_effective_coverage)

In [20]:
vitamin_a_pif_deficiency_nofort = calculate_pif_deficiency_nofort(
                                    vitamin_a_paf_deficiency_nofort,
                                    vitamin_a_baseline_effective_coverage,
                                    vitamin_a_counterfactual_effective_coverage)

In [21]:
vitamin_a_overall_pifs, vitamin_a_daly_reduction_counts = calculate_final_pifs_and_daly_reductions(
                                                    vitamin_a_pif_deficiency_nofort,
                                                    vitamin_a_dalys,
                                                    coverage_levels, years)

In [22]:
vitamin_a_daly_reduction_rates = calculate_rates(vitamin_a_daly_reduction_counts, 
                                            location_ids, 
                                            age_group_ids, 
                                            sex_ids)

In [23]:
vitamin_a_overall_pifs['measure'] = 'pif'
vitamin_a_daly_reduction_counts['measure'] = 'counts_averted'
vitamin_a_daly_reduction_rates['measure'] = 'rates_averted'
vitamin_a_results = pd.concat([vitamin_a_overall_pifs.reset_index(),
                         vitamin_a_daly_reduction_counts.reset_index(),
                         vitamin_a_daly_reduction_rates.reset_index()],
                        ignore_index=True)
vitamin_a_results.head()

draws,location_id,year,vehicle,coverage_level,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,...,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,measure
0,214,2021,industry oil,0.2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif
1,214,2021,industry oil,0.5,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif
2,214,2021,industry oil,0.8,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif
3,214,2021,industry oil from lit,0.8,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif
4,214,2021,industry oil*,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif


In [24]:
# pull in overall national results
vitamin_a_national = pd.read_pickle('results_raw/vitamin_a.pkl')
vitamin_a_national = (vitamin_a_national
                      .loc[vitamin_a_national.location_id.isin(vitamin_a_results.reset_index().location_id.unique())]
                      .loc[vitamin_a_national.vehicle.isin(vehicles)])

# location_comparisons
vitamin_a_results['subgroup'] = 'Lower Wealth Quintile'
vitamin_a_national['subgroup'] = 'Total Population'
vitamin_a_results = pd.concat([vitamin_a_results,vitamin_a_national], ignore_index=True)
vitamin_a_results.head()

draws,location_id,year,vehicle,coverage_level,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,...,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,measure,subgroup
0,214,2021,industry oil,0.2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
1,214,2021,industry oil,0.5,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
2,214,2021,industry oil,0.8,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
3,214,2021,industry oil from lit,0.8,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
4,214,2021,industry oil*,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile


# ZINC

In [25]:
# ZINC
rei_id = 97
nutrient = 'zinc'
vehicles = ['wheat flour', 'maize flour','zero wheat flour','zero maize flour',
           'wheat flour from lit','maize flour from lit','wheat flour*','maize flour*']
effective_fractions = [0,0,0,1]

In [26]:
# define no fortification relative risk distribution
# vitamin a specific -- this should be replaced for other models

from numpy import log
from scipy.stats import norm, lognorm

# median and 0.975-quantile of lognormal distribution for RR
median = 0.47
q_975 = 0.69

# 0.975-quantile of standard normal distribution (=1.96, approximately)
q_975_stdnorm = norm().ppf(0.975)

mu = log(median) # mean of normal distribution for log(RR)
sigma = (log(q_975) - mu) / q_975_stdnorm # std dev of normal distribution for log(RR)

# Frozen lognormal distribution for RR, representing uncertainty in our effect size
# (s is the shape parameter)
rr_distribution = lognorm(s=sigma, scale=median)

In [27]:
zinc_baseline_coverage, zinc_counterfactual_coverage = zinc_baseline_coverage, zinc_counterfactual_coverage = get_baseline_and_counterfactual_coverage(coverage_data_dir,
                                             location_ids,
                                             'zinc',
                                             vehicles,
                                             years,
                                             coverage_levels, 'U5', True)

Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data


In [28]:
zinc_counterfactual_coverage_prepped = (zinc_counterfactual_coverage
                                       .loc[((zinc_counterfactual_coverage.vehicle.isin([v for v in vehicles if 'zero' in v]))
                                           & (zinc_counterfactual_coverage.coverage_level==1))
                                           | ((zinc_counterfactual_coverage.vehicle.isin([v for v in vehicles if '*' in v]))
                                             & (zinc_counterfactual_coverage.coverage_level==1))
                                           | ((zinc_counterfactual_coverage.vehicle.isin([v for v in vehicles if 'from lit' in v])
                                              & (zinc_counterfactual_coverage.coverage_level==0.8)))
                                           | ((zinc_counterfactual_coverage.vehicle.isin([v for v in vehicles if 'zero' not in v
                                                                                        and '*' not in v
                                                                                        and 'from lit' not in v]))
                                             & (zinc_counterfactual_coverage.coverage_level!=1))])
zinc_counterfactual_coverage_prepped.head()

Unnamed: 0,coverage_level,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,...,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,location_id,vehicle,year
0,0.2,0.030449,0.017451,0.047741,0.035449,0.031982,0.052297,0.034934,0.039041,0.025722,...,0.054469,0.035665,0.038207,0.027491,0.032293,0.032285,0.046152,214,wheat flour,2021
1,0.5,0.030449,0.017451,0.047741,0.035449,0.031982,0.052297,0.034934,0.039041,0.025722,...,0.054469,0.035665,0.038207,0.027491,0.032293,0.032285,0.046152,214,wheat flour,2021
2,0.8,0.030449,0.017451,0.047741,0.035449,0.031982,0.052297,0.034934,0.039041,0.025722,...,0.054469,0.035665,0.038207,0.027491,0.032293,0.032285,0.046152,214,wheat flour,2021
4,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,...,0.2,0.2,0.2,0.2,0.2,0.2,0.2,214,wheat flour,2022
5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,214,wheat flour,2022


In [29]:
zinc_rr_deficiency_nofort = (1 / generate_rr_deficiency_nofort_draws(mu, sigma, location_ids))

In [30]:
zinc_baseline_effective_coverage = apply_age_related_effective_coverage_restrictions(
                                                                zinc_baseline_coverage.set_index([c for c in zinc_baseline_coverage.columns if 'draw' not in c]),
                                                                sex_ids,
                                                                age_group_ids,
                                                                effective_fractions)
zinc_counterfactual_effective_coverage = apply_age_related_effective_coverage_restrictions(
                                                                zinc_counterfactual_coverage_prepped.set_index([c for c in zinc_counterfactual_coverage_prepped.columns if 'draw' not in c]),
                                                                sex_ids,
                                                                age_group_ids,
                                                                effective_fractions)

In [31]:
zinc_paf_deficiency_nofort = calculate_paf_deficiency_nofort(
                                    zinc_rr_deficiency_nofort, 
                                    zinc_baseline_effective_coverage)

In [32]:
zinc_pif_deficiency_nofort = calculate_pif_deficiency_nofort(
                                    zinc_paf_deficiency_nofort,
                                    zinc_baseline_effective_coverage,
                                    zinc_counterfactual_effective_coverage)

In [33]:
zinc_attrib_302 = get_draws(
            gbd_id_type=['rei_id','cause_id'], 
            gbd_id=[97,302],
            location_id=214,
            source='burdenator',
            measure_id=[2],
            metric_id=1, 
            age_group_id=age_group_ids,
            year_id=2019,
            gbd_round_id=6,
            status='best',
            decomp_step='step5')

In [34]:
attrib_prepped = zinc_attrib_302.groupby(['location_id','cause_id','sex_id','age_group_id']).sum().filter([c for c in zinc_attrib_302.columns if 'draw' in c])
attrib_prepped = attrib_prepped.reset_index().merge(burden_multipliers, on='cause_id')
for i in list(range(0,1000)):
    attrib_prepped[f'draw_{i}'] = attrib_prepped[f'draw_{i}'] * attrib_prepped['multiplier']
attrib_prepped = attrib_prepped.groupby(['location_id','sex_id','age_group_id']).sum().filter([c for c in attrib_prepped.columns if 'draw' in c])
zinc_dalys = attrib_prepped * zinc_burden_multiplier
zinc_dalys.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
214,1,5,26886.387084,2186.827041,14873.103309,29688.41072,5749.687878,5878.332342,7777.120917,35894.569203,39982.36613,15862.744214,...,23524.030903,53836.5172,11251.013697,20790.663659,66902.620426,60280.627663,23052.589844,14640.763681,18994.749033,26852.779488
214,2,5,29551.228471,1141.376468,9129.202846,28989.043687,5229.374369,4031.821339,6827.653831,29759.331283,32178.948439,13451.607635,...,21216.320402,46872.629609,10410.16055,19925.170322,64040.726232,55340.969034,19372.616229,14520.103517,17541.046389,27790.042617


In [35]:
zinc_overall_pifs, zinc_daly_reduction_counts = calculate_final_pifs_and_daly_reductions(
                                                    zinc_pif_deficiency_nofort,
                                                    zinc_dalys,
                                                    coverage_levels, years)

In [36]:
zinc_daly_reduction_rates = calculate_rates(zinc_daly_reduction_counts, 
                                            location_ids, 
                                            age_group_ids, 
                                            sex_ids)

In [37]:
zinc_overall_pifs['measure'] = 'pif'
zinc_daly_reduction_counts['measure'] = 'counts_averted'
zinc_daly_reduction_rates['measure'] = 'rates_averted'
zinc_results = pd.concat([zinc_overall_pifs.reset_index(),
                         zinc_daly_reduction_counts.reset_index(),
                         zinc_daly_reduction_rates.reset_index()],
                        ignore_index=True)

In [38]:
zinc_national_results = pd.read_pickle(vivarium_research_lsff_path + '/multiplication_models/results_raw/zinc.pkl')
zinc_national_results = (zinc_national_results
                         .loc[zinc_national_results.location_id.isin(vitamin_a_results.reset_index().location_id.unique())]
                         .loc[zinc_national_results.vehicle.isin(vehicles)])
# location_comparisons
zinc_results['subgroup'] = 'Lower Wealth Quintile'
zinc_national_results['subgroup'] = 'Total Population'
zinc_results = pd.concat([zinc_results,zinc_national_results], ignore_index=True)
zinc_results.head()

draws,location_id,year,vehicle,coverage_level,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,...,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,measure,subgroup
0,214,2021,maize flour,0.2,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
1,214,2021,maize flour,0.5,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
2,214,2021,maize flour,0.8,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
3,214,2021,maize flour from lit,0.8,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile
4,214,2021,maize flour*,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pif,Lower Wealth Quintile


# Folic Acid

In [39]:
from mult_model_fns import *
from folic_acid.folic_acid_mult_model_fns import *

In [40]:
sexes = [1,2]
ages = [2,3,4,5]

draws = [f'draw_{i}' for i in range(1_000)]
index_cols=['location_id','sex_id','age_group_id']

# define alternative scenario coverage levels (low, medium, high)
    # this parameter represents the proportion of additional coverage achieved in the
    # alternative scenario, defined as the difference between the proportion of the population
    # that eats the fortified vehicle and the proportion of the population that eats 
    # the industrially produced vehicle
alternative_scenario_coverage_levels = [0.2, 0.5, 0.8, 1]
coverage_levels = alternative_scenario_coverage_levels

rei_id = [] #folic acid doesn't effect any risks; just NTDs
cause_ids = [642] # NTDs
nonfatal_causes = [642] # YLLs and YLDs
nutrient = 'folic acid'
vehicles = ['maize flour', 'wheat flour', 'zero wheat flour', 'zero maize flour',
           'maize flour from lit','wheat flour from lit','wheat flour*','maize flour*'] 
ntd_burden_multiplier = 1.9

In [41]:
# define no fortification relative risk distribution
# folic acid specific -- this should be replaced for other models

from numpy import log
from scipy.stats import norm, lognorm

# median and 0.975-quantile of lognormal distribution for RR
median = 1.71
q_975 = 2.04

# 0.975-quantile of standard normal distribution (=1.96, approximately)
q_975_stdnorm = norm().ppf(0.975)

mu = log(median) # mean of normal distribution for log(RR)
sigma = (log(q_975) - mu) / q_975_stdnorm # std dev of normal distribution for log(RR)

In [42]:
# calculate relative risk for lack of fortification
# https://vivarium-research.readthedocs.io/en/latest/concept_models/vivarium_conic_lsff/concept_model.html#effect-size-folic-acid
rr_ntds_nofort = format_rrs(lognormal_draws(mu, sigma, seed = 7), location_ids)
rr_ntds_nofort.mean(axis=1)

location_id
214    1.712028
dtype: float64

In [43]:
dalys = pull_dalys(cause_ids, cause_ids, location_ids, ages, sexes, index_cols)
dalys = dalys * ntd_burden_multiplier
dalys.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,cause_id,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
214,1,2,642,371654.350684,386420.944817,408576.51947,369463.788284,277590.780637,454249.752867,128967.746274,192585.662321,207262.068624,215861.627186,...,973711.787553,707822.620698,571634.862518,505884.695647,535430.778939,762361.867689,570650.425039,606266.319512,636838.16305,536112.846739
214,1,3,642,91950.154891,98988.128002,91700.730925,76079.180311,87033.756918,128456.849137,46352.17013,44311.209668,40963.841682,50359.24333,...,146064.764447,96548.98829,159747.628844,113662.347304,121240.001572,162170.38594,124831.881862,115952.963359,131625.011172,113203.031468
214,1,4,642,219205.901261,255927.350726,237631.759195,176955.775749,240942.237385,317819.023184,110479.064599,127007.188455,111500.187686,118900.83913,...,506474.381088,402903.21976,523373.982363,406292.21043,325882.974519,482676.603178,401258.687933,398379.758392,431424.809219,341175.445091
214,1,5,642,69514.717045,70895.56424,76151.755169,102729.665638,60879.241542,101688.133258,49831.740455,44090.634038,72001.264279,41761.75104,...,580354.683367,532207.819749,755778.810062,434940.741712,353321.070408,629851.363904,479181.293219,568550.290465,506795.149231,558194.732414
214,2,2,642,306044.594554,367523.818857,232360.21323,479655.493223,340292.191067,306885.255405,272122.145649,367452.873504,365110.970477,334222.344053,...,628574.962663,538605.555438,462893.390582,618597.02979,484044.737475,679585.830321,564278.697787,867215.579353,662551.896681,906061.29659


In [44]:
pop = get_population(gbd_round_id=6,
                    location_id=location_ids,
                    sex_id=sexes,
                    age_group_id=ages,
                    year_id=2019,
                    decomp_step='step4')

In [45]:
low_ses_results = pd.DataFrame()
for vehicle in vehicles:
    alpha, alpha_star = get_baseline_and_counterfactual_coverage(coverage_data_dir,
                                             location_ids,
                                             'folic acid',
                                             [vehicle],
                                             list(range(2022,2026)),
                                             coverage_levels, 'WRA', True)
    alpha = alpha.loc[alpha.year==2025].set_index('location_id').drop(columns=['vehicle','year'])
    alpha_star = alpha_star.loc[alpha_star.year==2025].set_index(['location_id','coverage_level']).drop(columns=['vehicle','year'])
    gets_intervn = prop_gets_intervention_effect(location_ids, year_start=2022, estimation_years = range(2022,2026))
    new_coverage = percolate_new_coverage(gets_intervn, alpha, alpha_star)
    paf_ntds_nofort = paf_o_r(rr_ntds_nofort, alpha)
    pif_ntds_nofort = pif_o_r(paf_ntds_nofort, alpha = alpha, alpha_star = new_coverage)
    dalys_averted = calc_dalys_averted(dalys, pif_ntds_nofort)
    dalys_averted_u5 = dalys_averted.reset_index().groupby(['location_id','year_id','coverage_level']).sum()[draws]
    dalys_averted_u5['vehicle'] = vehicle
    counts = dalys_averted_u5.reset_index().loc[dalys_averted_u5.reset_index().year_id==2025]
    counts['measure'] = 'counts_averted'
    counts = counts.set_index([c for c in counts.columns if 'draw' not in c])
    rates = counts.reset_index().merge(pop.groupby('location_id').sum().drop(columns='year_id'), on='location_id')
    for i in list(range(0,1000)):
        rates[f'draw_{i}'] = rates[f'draw_{i}'] / rates['population'] * 100_000
    rates['measure'] = 'rates_averted'
    rates = rates.set_index(['location_id','measure','coverage_level','year_id'])
    rates = rates.drop(columns=[c for c in rates.columns if 'draw' not in c])
    pif = (counts / dalys.groupby('location_id').sum() * 100).reset_index()
    pif['measure'] = 'pif'
    pif = pif.set_index(['location_id','measure','coverage_level','year_id'])
    #counts['measure'] = 'counts_averted'
    counts = counts.reset_index().set_index(['location_id','measure','coverage_level','year_id'])
    vehicle_results = pd.concat([rates, pif, counts], sort=True).reset_index()
    vehicle_results['vehicle'] = vehicle

    low_ses_results = pd.concat([vehicle_results, low_ses_results], ignore_index=True)
    
low_ses_results.head()

Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data


Unnamed: 0,location_id,measure,coverage_level,year_id,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,...,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,vehicle
0,214,rates_averted,0.2,2025,23.425995,22.014191,16.105152,20.810685,20.067045,22.584367,...,34.037799,43.051206,38.876741,32.894858,49.612874,43.45026,45.421631,41.38391,42.720197,maize flour*
1,214,rates_averted,0.5,2025,63.858663,57.371444,46.771032,57.755667,55.001968,66.73115,...,93.104229,112.955441,115.832575,91.364155,139.076703,117.211603,124.635197,113.552563,123.325279,maize flour*
2,214,rates_averted,0.8,2025,104.291332,92.728696,77.436912,94.700649,89.936891,110.877933,...,152.170659,182.859676,192.788409,149.833452,228.540531,190.972947,203.848762,185.721216,203.93036,maize flour*
3,214,rates_averted,1.0,2025,131.246444,116.300197,97.880832,119.330637,113.22684,140.309121,...,191.548279,229.462499,244.092298,188.812983,288.183083,240.147175,256.657806,233.833651,257.667081,maize flour*
4,214,pif,0.2,2025,0.552233,0.452673,0.398493,0.38326,0.465704,0.434095,...,0.349312,0.426947,0.429786,0.328524,0.373882,0.398325,0.34474,0.344321,0.32433,maize flour*


In [46]:
low_ses_results_prepped = (low_ses_results
                                       .loc[((low_ses_results.vehicle.isin([v for v in vehicles if 'zero' in v]))
                                           & (low_ses_results.coverage_level==1))
                                           | ((low_ses_results.vehicle.isin([v for v in vehicles if '*' in v]))
                                             & (low_ses_results.coverage_level==1))
                                           | ((low_ses_results.vehicle.isin([v for v in vehicles if 'from lit' in v])
                                              & (low_ses_results.coverage_level==0.8)))
                                           | ((low_ses_results.vehicle.isin([v for v in vehicles if 'zero' not in v
                                                                                        and '*' not in v
                                                                                        and 'from lit' not in v]))
                                             & (low_ses_results.coverage_level!=1))])
    
low_ses_results_prepped.head()

Unnamed: 0,location_id,measure,coverage_level,year_id,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,...,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,vehicle
3,214,rates_averted,1.0,2025,131.246444,116.300197,97.880832,119.330637,113.22684,140.309121,...,191.548279,229.462499,244.092298,188.812983,288.183083,240.147175,256.657806,233.833651,257.667081,maize flour*
7,214,pif,1.0,2025,3.093937,2.391457,2.421886,2.197652,2.627703,2.696886,...,1.965762,2.275623,2.698461,1.885695,2.171746,2.20152,1.947975,1.945533,1.956195,maize flour*
11,214,counts_averted,1.0,2025,43995.957783,38985.730953,32811.258017,40001.583956,37955.491172,47033.915475,...,64210.120336,76919.588008,81823.736061,63293.20443,96603.689524,80501.266426,86035.90026,78384.869529,86374.225826,maize flour*
15,214,rates_averted,1.0,2025,972.148296,864.752452,716.227468,878.217234,836.531226,1027.17594,...,1413.754974,1707.024904,1791.096538,1389.94977,2121.629235,1778.456398,1893.272668,1724.503057,1886.68297,wheat flour*
19,214,pif,1.0,2025,22.916933,17.781724,17.721765,16.173681,19.413733,19.743378,...,14.508643,16.92889,19.800726,13.88157,15.988585,16.303783,14.369512,14.34814,14.323601,wheat flour*


In [47]:
# pull in overall folic_acid_national_resultsional results
folic_acid_national_results = pd.read_pickle('/ihme/homes/alibow/notebooks/vivarium_research_lsff/' + '/multiplication_models/results_raw/folic_acid_waterfall_salt.pkl')
folic_acid_national_results = (folic_acid_national_results
                               .loc[folic_acid_national_results.location_id.isin(low_ses_results.reset_index()
                                                                                 .location_id.unique())]
                               .loc[folic_acid_national_results.vehicle.isin(['wheat flour','maize flour','zero wheat flour','zero maize flour'])])
# location_comparisons
low_ses_results['subgroup'] = 'Lower Wealth Quintile'
low_ses_results['nutrient'] = nutrient
folic_acid_national_results['subgroup'] = 'Total Population'
folic_acid_results = pd.concat([low_ses_results.reset_index(),
                     folic_acid_national_results.loc[folic_acid_national_results.year_id==2025]], 
                               ignore_index=True).rename(columns={'year_id':'year'})
folic_acid_results.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  del sys.path[0]


Unnamed: 0,coverage_level,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,...,draw_997,draw_998,draw_999,index,location_id,measure,nutrient,subgroup,vehicle,year
0,0.2,23.425995,22.014191,16.105152,20.810685,20.067045,22.584367,10.883898,17.819524,15.274847,...,45.421631,41.38391,42.720197,0,214,rates_averted,folic acid,Lower Wealth Quintile,maize flour*,2025
1,0.5,63.858663,57.371444,46.771032,57.755667,55.001968,66.73115,30.149894,50.105018,40.950642,...,124.635197,113.552563,123.325279,1,214,rates_averted,folic acid,Lower Wealth Quintile,maize flour*,2025
2,0.8,104.291332,92.728696,77.436912,94.700649,89.936891,110.877933,49.415889,82.390513,66.626436,...,203.848762,185.721216,203.93036,2,214,rates_averted,folic acid,Lower Wealth Quintile,maize flour*,2025
3,1.0,131.246444,116.300197,97.880832,119.330637,113.22684,140.309121,62.259887,103.914176,83.743633,...,256.657806,233.833651,257.667081,3,214,rates_averted,folic acid,Lower Wealth Quintile,maize flour*,2025
4,0.2,0.552233,0.452673,0.398493,0.38326,0.465704,0.434095,0.340064,0.454441,0.410316,...,0.34474,0.344321,0.32433,4,214,pif,folic acid,Lower Wealth Quintile,maize flour*,2025


# Results Viz

In [48]:
vitamin_a_results['nutrient'] = 'vitamin a'
zinc_results['nutrient'] = 'zinc'
folic_acid_results['nutrient'] = 'folic acid'

results = pd.concat([vitamin_a_results,
                    zinc_results,
                    folic_acid_results], ignore_index=True, sort=True)
results.head()

Unnamed: 0,coverage_level,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,...,draw_997,draw_998,draw_999,index,location_id,measure,nutrient,subgroup,vehicle,year
0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil,2021
1,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil,2021
2,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil,2021
3,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil from lit,2021
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,214,pif,vitamin a,Lower Wealth Quintile,industry oil*,2021


# Coverage

In [49]:
nat_cov_baseline = pd.DataFrame()
nat_cov_counterfactual = pd.DataFrame()
for nutrient in ['vitamin a','zinc','folic acid']:
    if nutrient=='vitamin a':
        vehicles = ['oil','industry oil','wheat flour','maize flour']
    else:
        vehicles = ['wheat flour','maize flour']
    if nutrient == 'folic acid':
        subgroup = 'WRA'
    else:
        subgroup = 'U5'
    baseline_coverage, counterfactual_coverage = get_baseline_and_counterfactual_coverage(
        '/ihme/homes/alibow/notebooks/vivarium_research_lsff/data_prep/outputs/waterfall_coverage_all_vehicles.csv',
                                             location_ids,
                                             nutrient,
                                             vehicles,
                                             years,
                                             coverage_levels, subgroup)
    baseline_coverage['nutrient'] = nutrient
    counterfactual_coverage['nutrient'] = nutrient
    nat_cov_baseline = pd.concat([nat_cov_baseline,baseline_coverage], ignore_index=True, sort=True)
    nat_cov_counterfactual = pd.concat([nat_cov_counterfactual,counterfactual_coverage], ignore_index=True, sort=True)
nat_cov_baseline.head()

Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data


Unnamed: 0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,location_id,nutrient,vehicle,year
0,0.068353,0.059991,0.079454,0.071563,0.069338,0.082378,0.071233,0.07387,0.065317,0.071208,...,0.071702,0.073334,0.066454,0.069537,0.069532,0.078434,214,vitamin a,oil,2021
1,0.068353,0.059991,0.079454,0.071563,0.069338,0.082378,0.071233,0.07387,0.065317,0.071208,...,0.071702,0.073334,0.066454,0.069537,0.069532,0.078434,214,vitamin a,oil,2022
2,0.068353,0.059991,0.079454,0.071563,0.069338,0.082378,0.071233,0.07387,0.065317,0.071208,...,0.071702,0.073334,0.066454,0.069537,0.069532,0.078434,214,vitamin a,oil,2023
3,0.068353,0.059991,0.079454,0.071563,0.069338,0.082378,0.071233,0.07387,0.065317,0.071208,...,0.071702,0.073334,0.066454,0.069537,0.069532,0.078434,214,vitamin a,oil,2024
4,0.068353,0.059991,0.079454,0.071563,0.069338,0.082378,0.071233,0.07387,0.065317,0.071208,...,0.071702,0.073334,0.066454,0.069537,0.069532,0.078434,214,vitamin a,oil,2025


In [50]:
nat_cov_baseline['coverage_level'] = 'baseline'
nat_cov = pd.concat([nat_cov_baseline, nat_cov_counterfactual], ignore_index=True)
nat_cov = nat_cov.loc[nat_cov.year==2025].drop(columns='year')
nat_cov['subgroup'] = 'Total Population'
nat_cov = nat_cov.set_index([c for c in nat_cov.columns if 'draw' not in c]).mean(axis=1).reset_index()
nat_cov.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0,coverage_level,location_id,nutrient,vehicle,subgroup,0
0,baseline,214,vitamin a,oil,Total Population,0.07521
1,baseline,214,vitamin a,industry oil,Total Population,0.07521
2,baseline,214,vitamin a,wheat flour,Total Population,0.174881
3,baseline,214,vitamin a,maize flour,Total Population,0.01255
4,baseline,214,zinc,wheat flour,Total Population,0.174881


In [51]:
folic_acid_baseline_coverage, folic_acid_counterfactual_coverage= get_baseline_and_counterfactual_coverage(coverage_data_dir,
                                             location_ids,
                                             'folic acid',
                                             ['maize flour','wheat flour',
                                              'maize flour from lit','wheat flour from lit',
                                             'maize flour*','wheat flour*'],
                                             list(range(2022,2026)),
                                             coverage_levels, 'WRA', True)

Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data
Excluded location IDs [] due to missing data


In [52]:
vehicles = ['industry oil','oil','wheat flour','maize flour','wheat flour*','maize flour*','oil*','industry oil*',
           'industry oil from lit','oil from lit','wheat flour from lit','maize flour from lit']

In [53]:
folic_acid_counterfactual_coverage_prepped = (folic_acid_counterfactual_coverage
                                       .loc[((folic_acid_counterfactual_coverage.vehicle.isin([v for v in vehicles if 'zero' in v]))
                                           & (folic_acid_counterfactual_coverage.coverage_level==1))
                                           | ((folic_acid_counterfactual_coverage.vehicle.isin([v for v in vehicles if '*' in v]))
                                             & (folic_acid_counterfactual_coverage.coverage_level==1))
                                           | ((folic_acid_counterfactual_coverage.vehicle.isin([v for v in vehicles if 'from lit' in v])
                                              & (folic_acid_counterfactual_coverage.coverage_level==0.8)))
                                           | ((folic_acid_counterfactual_coverage.vehicle.isin([v for v in vehicles if 'zero' not in v
                                                                                        and '*' not in v
                                                                                        and 'from lit' not in v]))
                                             & (folic_acid_counterfactual_coverage.coverage_level!=1))])
folic_acid_counterfactual_coverage_prepped.head()

Unnamed: 0,coverage_level,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,...,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999,location_id,vehicle,year
0,0.2,0.001816,0.000889,0.003061,0.002175,0.001926,0.003389,0.002138,0.002434,0.001476,...,0.003545,0.002191,0.002374,0.001603,0.001948,0.001948,0.002946,214,maize flour,2022
1,0.5,0.001816,0.000889,0.003061,0.002175,0.001926,0.003389,0.002138,0.002434,0.001476,...,0.003545,0.002191,0.002374,0.001603,0.001948,0.001948,0.002946,214,maize flour,2022
2,0.8,0.001816,0.000889,0.003061,0.002175,0.001926,0.003389,0.002138,0.002434,0.001476,...,0.003545,0.002191,0.002374,0.001603,0.001948,0.001948,0.002946,214,maize flour,2022
4,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,...,0.2,0.2,0.2,0.2,0.2,0.2,0.2,214,maize flour,2023
5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,214,maize flour,2023


In [54]:
vitamin_a_baseline_coverage['coverage_level'] = 'baseline'
vitamin_a_baseline_coverage['nutrient'] = 'vitamin a'
vitamin_a_counterfactual_coverage_prepped['nutrient'] = 'vitamin a'
zinc_baseline_coverage['coverage_level'] = 'baseline'
zinc_baseline_coverage['nutrient'] = 'zinc'
zinc_counterfactual_coverage_prepped['nutrient'] = 'zinc'
folic_acid_baseline_coverage['coverage_level'] = 'baseline'
folic_acid_baseline_coverage['nutrient'] = 'folic acid'
folic_acid_counterfactual_coverage_prepped['nutrient'] = 'folic acid'

#max_cov_ses = pd.DataFrame()
#for nutrient in ['vitamin a','zinc','folic acid']:
#    if nutrient == 'vitamin a':
#        vehicles = ['oil','wheat flour','maize flour','industry oil',
#                   'oil from lit','industry oil from lit',
#                    'wheat flour from lit','maize flour from lit']
#    else:
#        vehicles = ['wheat flour', 'maize flour','wheat flour from lit','maize flour from lit']
#    if nutrient == 'folic acid':
#        subgroup = 'WRA'
#    else:
#        subgroup = 'U5'
#    for vehicle in vehicles:
#        print(f'calculating {nutrient}/{vehicle}')
#        cov_a, cov_b = generate_logical_coverage_draws(coverage_data_dir, location_ids, nutrient, vehicle, subgroup)
#        cov_b['coverage_level'] = 'maximum'
#        cov_b['nutrient'] = nutrient
#        cov_b['vehicle'] = vehicle
#        max_cov_ses = pd.concat([max_cov_ses, cov_b])

ses_cov = pd.concat([vitamin_a_baseline_coverage,
                    vitamin_a_counterfactual_coverage_prepped,
                    zinc_baseline_coverage,
                    zinc_counterfactual_coverage_prepped,
                    folic_acid_baseline_coverage,
                    folic_acid_counterfactual_coverage_prepped], ignore_index=True, sort=True)
l = get_ids('location')
ses_cov = ses_cov.merge(l.filter(['location_name','location_id']), on='location_id')
ses_cov['subgroup'] = 'Lower Wealth Quintile'
ses_cov = ses_cov.set_index([c for c in ses_cov.columns if 'draw' not in c]).mean(axis=1).reset_index()
ses_cov.head()

Unnamed: 0,coverage_level,location_id,nutrient,vehicle,year,location_name,subgroup,0
0,baseline,214,vitamin a,oil,2021,Nigeria,Lower Wealth Quintile,0.054369
1,baseline,214,vitamin a,oil,2022,Nigeria,Lower Wealth Quintile,0.054369
2,baseline,214,vitamin a,oil,2023,Nigeria,Lower Wealth Quintile,0.054369
3,baseline,214,vitamin a,oil,2024,Nigeria,Lower Wealth Quintile,0.054369
4,baseline,214,vitamin a,oil,2025,Nigeria,Lower Wealth Quintile,0.054369


In [55]:
vehicles = ['industry oil','oil','wheat flour','maize flour','wheat flour*','maize flour*','oil*','industry oil*',
           'industry oil from lit','oil from lit','wheat flour from lit','maize flour from lit']

ses_cov_prepped = (ses_cov.loc[ses_cov.year==2025]
                   .loc[ses_cov.vehicle.isin(vehicles)]
                  .drop(columns=['location_name','year']))

In [56]:
cov_prepped = pd.concat([ses_cov_prepped, nat_cov], ignore_index=True, sort=True)#.drop(columns='score')
cov_baseline= cov_prepped.loc[cov_prepped.coverage_level=='baseline'].rename(columns={0:'baseline'}).drop(columns='coverage_level')
cov_prepped = (cov_prepped.loc[cov_prepped.coverage_level!='baseline']
               .merge(cov_baseline, on=['location_id','nutrient','vehicle','subgroup'])
               .set_index(['coverage_level','location_id','nutrient','vehicle','subgroup'])
               .rename(columns={0:'target_coverage'})
               .stack().reset_index()
               .rename(columns={'level_5':'col_name'}))
cov_prepped[0] = cov_prepped[0] * 100
cov_prepped['col_name'] = np.where(cov_prepped.col_name=='baseline',
                                  'Percent of population eating fortified vehicle at baseline',
                                  'Percent of population eating fortified vehicle at target coverage')

cov_prepped.head()

Unnamed: 0,coverage_level,location_id,nutrient,vehicle,subgroup,col_name,0
0,0.2,214,vitamin a,oil,Lower Wealth Quintile,Percent of population eating fortified vehicle...,19.999996
1,0.2,214,vitamin a,oil,Lower Wealth Quintile,Percent of population eating fortified vehicle...,5.436868
2,0.5,214,vitamin a,oil,Lower Wealth Quintile,Percent of population eating fortified vehicle...,49.99999
3,0.5,214,vitamin a,oil,Lower Wealth Quintile,Percent of population eating fortified vehicle...,5.436868
4,0.8,214,vitamin a,oil,Lower Wealth Quintile,Percent of population eating fortified vehicle...,79.999984


# Excel file

In [57]:
vehicles = ['oil','industry oil','wheat flour','maize flour',
            'zero oil','zero industry oil','zero wheat flour','zero maize flour',
           'oil from lit','industry oil from lit','wheat flour from lit','maize flour from lit',
            'maize flour*','wheat flour*','oil*','industry oil*']
coverage_levels = [0.2,0.5,0.8,1]

In [58]:
vitamin_a_dalys_prepped = vitamin_a_dalys.groupby('location_id').sum().reset_index()
vitamin_a_dalys_prepped['nutrient'] = 'vitamin a'
zinc_dalys_prepped = zinc_dalys.groupby('location_id').sum().reset_index()
zinc_dalys_prepped['nutrient'] = 'zinc'
fa_dalys_prepped = dalys.groupby('location_id').sum().reset_index()
fa_dalys_prepped['nutrient'] = 'folic acid'

mn_dalys = (pd.concat([vitamin_a_dalys_prepped, zinc_dalys_prepped, fa_dalys_prepped], ignore_index=True)
            .set_index(['location_id','nutrient'])).reset_index()
mn_dalys_all = mn_dalys.groupby('location_id').sum().reset_index()
mn_dalys_all['nutrient'] = 'all'

mn_dalys = pd.concat([mn_dalys, mn_dalys_all], ignore_index=True)
mn_dalys['subgroup'] = 'Lower Wealth Quintile'

# convert to rates
pop = get_population(location_id=location_ids,
                    sex_id=[1,2],
                    age_group_id=[2,3,4,5],
                    year_id=2019,
                    gbd_round_id=6,
                    decomp_step='step4').groupby(['location_id']).sum().reset_index()
mn_dalys = mn_dalys.merge(pop.filter(['location_id','population']), on='location_id')
for i in list(range(0,1000)):
    mn_dalys[f'draw_{i}'] = mn_dalys[f'draw_{i}'] / mn_dalys['population'] * 100_000
    
mn_dalys = mn_dalys.drop(columns='population')
    
mn_dalys_final = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' not in v]:
    for coverage_level in coverage_levels:
        temp = mn_dalys.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_dalys_final = pd.concat([mn_dalys_final, temp], ignore_index=True, sort=True)

mn_dalys_zero = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' in v]:
    for coverage_level in coverage_levels:
        temp = mn_dalys.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_dalys_zero = pd.concat([mn_dalys_zero, temp], ignore_index=True, sort=True)
    
mn_dalys = mn_dalys_final.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index()
mn_dalys_zero = mn_dalys_zero.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index()  
   

mn_dalys.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  del sys.path[0]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,vehicle,nutrient,subgroup,coverage_level,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
214,industry oil,all,Lower Wealth Quintile,0.2,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,all,Lower Wealth Quintile,0.5,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,all,Lower Wealth Quintile,0.8,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,all,Lower Wealth Quintile,1.0,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,folic acid,Lower Wealth Quintile,0.2,4242.052323,4863.153136,4041.513172,5429.915487,4308.966355,5202.635216,3200.54594,3921.198353,3722.700565,3773.261414,...,13357.432192,9744.226221,10083.501658,9045.610657,10012.914665,13269.649553,10908.243473,13175.622335,12019.001045,13171.848271


In [59]:
vitamin_a_dalys_prepped = vitamin_a_dalys.groupby('location_id').sum().reset_index()
vitamin_a_dalys_prepped['nutrient'] = 'vitamin a'
zinc_dalys_prepped = zinc_dalys.groupby('location_id').sum().reset_index()
zinc_dalys_prepped['nutrient'] = 'zinc'
fa_dalys_prepped = dalys.groupby('location_id').sum().reset_index()
fa_dalys_prepped['nutrient'] = 'folic acid'

mn_daly_counts = (pd.concat([vitamin_a_dalys_prepped, zinc_dalys_prepped, fa_dalys_prepped], ignore_index=True)
            .set_index(['location_id','nutrient'])).reset_index()
mn_daly_counts_all = mn_daly_counts.groupby('location_id').sum().reset_index()
mn_daly_counts_all['nutrient'] = 'all'

mn_daly_counts = pd.concat([mn_daly_counts, mn_daly_counts_all], ignore_index=True)
mn_daly_counts['subgroup'] = 'Lower Wealth Quintile'

mn_daly_counts_final = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' not in v]:
    for coverage_level in coverage_levels:
        temp = mn_daly_counts.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_daly_counts_final = pd.concat([mn_daly_counts_final, temp], ignore_index=True, sort=True)

mn_daly_counts_zero = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' in v]:
    for coverage_level in coverage_levels:
        temp = mn_daly_counts.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_daly_counts_zero = pd.concat([mn_daly_counts_zero, temp], ignore_index=True, sort=True)
    
mn_daly_counts = mn_daly_counts_final.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index() / 5
mn_daly_counts_zero = mn_daly_counts_zero.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index() / 5  
mn_daly_counts.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  del sys.path[0]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,vehicle,nutrient,subgroup,coverage_level,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
214,industry oil,all,Lower Wealth Quintile,0.2,365106.294981,463208.016397,322147.426451,489635.498104,309677.2806,375894.277307,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.439224,741328.854903,776247.559911,675457.437826,806951.050189,959879.909836,776492.838522,962259.387631,867744.878724,917393.286459
214,industry oil,all,Lower Wealth Quintile,0.5,365106.294981,463208.016397,322147.426451,489635.498104,309677.2806,375894.277307,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.439224,741328.854903,776247.559911,675457.437826,806951.050189,959879.909836,776492.838522,962259.387631,867744.878724,917393.286459
214,industry oil,all,Lower Wealth Quintile,0.8,365106.294981,463208.016397,322147.426451,489635.498104,309677.2806,375894.277307,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.439224,741328.854903,776247.559911,675457.437826,806951.050189,959879.909836,776492.838522,962259.387631,867744.878724,917393.286459
214,industry oil,all,Lower Wealth Quintile,1.0,365106.294981,463208.016397,322147.426451,489635.498104,309677.2806,375894.277307,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.439224,741328.854903,776247.559911,675457.437826,806951.050189,959879.909836,776492.838522,962259.387631,867744.878724,917393.286459
214,industry oil,folic acid,Lower Wealth Quintile,0.2,284401.083273,326041.716296,270956.281657,364039.320825,288887.219137,348801.706921,214575.08375,262889.982088,249582.040153,252971.805111,...,895526.008559,653284.844648,676030.981287,606447.368802,671298.598083,889640.772926,731324.37416,883336.88024,805793.200209,883083.854597


In [60]:
gbd_data_dir = '/ihme/homes/alibow/notebooks/vivarium_research_lsff/gbd_data_summary/output_data/'
gbd_data_directory = '/share/scratch/users/ndbs/vivarium_lsff/gbd_data'
gbd_data_filepath = f'{gbd_data_directory}/multmodel_data.hdf'
vitamin_a_dalys_nat_hdf_key = '/vitamin_a_deficiency/dalys_attributable_bmgf_25_countries'
zinc_dalys_nat_hdf_key = '/zinc_deficiency/dalys_attributable_bmgf_25_countries'

vitamin_a_dalys_nat = pd.read_hdf(gbd_data_filepath, key=vitamin_a_dalys_nat_hdf_key).groupby('location_id').sum().reset_index()
vitamin_a_dalys_nat['nutrient'] = 'vitamin a'
zinc_dalys_nat = pd.read_hdf(gbd_data_filepath, key=zinc_dalys_nat_hdf_key).groupby('location_id').sum().reset_index()
zinc_dalys_nat['nutrient'] = 'zinc'
fa_dalys_nat = pd.read_pickle('results_raw/folic_acid_dalys').groupby('location_id').sum().reset_index()
fa_dalys_nat['nutrient'] = 'folic acid'

mn_dalys_nat_by_location = pd.concat([vitamin_a_dalys_nat,zinc_dalys_nat,fa_dalys_nat], ignore_index=True, sort=True)
mn_dalys_nat_by_location = mn_dalys_nat_by_location.loc[mn_dalys_nat_by_location.location_id.isin(location_ids)]
mn_dalys_nat_tot = mn_dalys_nat_by_location.groupby('location_id').sum().reset_index()
mn_dalys_nat_tot['nutrient'] = 'all'
mn_dalys_nat = pd.concat([mn_dalys_nat_by_location.reset_index(), mn_dalys_nat_tot], ignore_index=True, sort=True).drop(columns='index')
mn_dalys_nat['subgroup'] = 'Total Population'

# convert to rates
pop = get_population(location_id=location_ids,
                    sex_id=[1,2],
                    age_group_id=[2,3,4,5],
                    year_id=2019,
                    gbd_round_id=6,
                    decomp_step='step4').groupby(['location_id']).sum().reset_index()
mn_dalys_nat = mn_dalys_nat.merge(pop.filter(['location_id','population']), on='location_id')
for i in list(range(0,1000)):
    mn_dalys_nat[f'draw_{i}'] = mn_dalys_nat[f'draw_{i}'] / mn_dalys_nat['population'] * 100_000
    
mn_dalys_nat = mn_dalys_nat.drop(columns='population')

mn_dalys_nat_final = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' not in v]:
    for coverage_level in coverage_levels:
        temp = mn_dalys_nat.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_dalys_nat_final = pd.concat([mn_dalys_nat_final, temp], ignore_index=True, sort=True)

mn_dalys_nat_zero = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' in v]:
    for coverage_level in coverage_levels:
        temp = mn_dalys_nat.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_dalys_nat_zero = pd.concat([mn_dalys_nat_zero, temp], ignore_index=True, sort=True)
    
mn_dalys_nat = mn_dalys_nat_final.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index()
mn_dalys_nat_zero = mn_dalys_nat_zero.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index()
mn_dalys_nat.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,vehicle,nutrient,subgroup,coverage_level,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
214,industry oil,all,Total Population,0.2,3033.983105,3802.320453,2624.600568,3998.258397,2500.317046,3048.314341,1966.703897,2809.930938,2801.905519,2826.48936,...,7443.610679,5962.346421,6246.544974,5432.563832,6495.210827,7661.784986,6164.890188,7660.225881,6925.355984,7288.669261
214,industry oil,all,Total Population,0.5,3033.983105,3802.320453,2624.600568,3998.258397,2500.317046,3048.314341,1966.703897,2809.930938,2801.905519,2826.48936,...,7443.610679,5962.346421,6246.544974,5432.563832,6495.210827,7661.784986,6164.890188,7660.225881,6925.355984,7288.669261
214,industry oil,all,Total Population,0.8,3033.983105,3802.320453,2624.600568,3998.258397,2500.317046,3048.314341,1966.703897,2809.930938,2801.905519,2826.48936,...,7443.610679,5962.346421,6246.544974,5432.563832,6495.210827,7661.784986,6164.890188,7660.225881,6925.355984,7288.669261
214,industry oil,all,Total Population,1.0,3033.983105,3802.320453,2624.600568,3998.258397,2500.317046,3048.314341,1966.703897,2809.930938,2801.905519,2826.48936,...,7443.610679,5962.346421,6246.544974,5432.563832,6495.210827,7661.784986,6164.890188,7660.225881,6925.355984,7288.669261
214,industry oil,folic acid,Total Population,0.2,2232.659117,2559.554282,2127.112196,2857.850256,2267.877029,2738.229061,1684.497863,2063.788607,1959.316087,1985.92706,...,7030.227469,5128.540116,5307.106136,4760.847714,5269.955087,6984.026081,5741.180775,6934.538071,6325.790023,6932.551722


In [61]:
gbd_data_dir = '/ihme/homes/alibow/notebooks/vivarium_research_lsff/gbd_data_summary/output_data/'
gbd_data_directory = '/share/scratch/users/ndbs/vivarium_lsff/gbd_data'
gbd_data_filepath = f'{gbd_data_directory}/multmodel_data.hdf'
vitamin_a_dalys_nat_hdf_key = '/vitamin_a_deficiency/dalys_attributable_bmgf_25_countries'
zinc_dalys_nat_hdf_key = '/zinc_deficiency/dalys_attributable_bmgf_25_countries'

vitamin_a_dalys_nat = pd.read_hdf(gbd_data_filepath, key=vitamin_a_dalys_nat_hdf_key).groupby('location_id').sum().reset_index()
vitamin_a_dalys_nat['nutrient'] = 'vitamin a'
zinc_dalys_nat = pd.read_hdf(gbd_data_filepath, key=zinc_dalys_nat_hdf_key).groupby('location_id').sum().reset_index()
zinc_dalys_nat['nutrient'] = 'zinc'
fa_dalys_nat = pd.read_pickle('results_raw/folic_acid_dalys').groupby('location_id').sum().reset_index()
fa_dalys_nat['nutrient'] = 'folic acid'

mn_dalys_counts_nat_by_location = pd.concat([vitamin_a_dalys_nat,zinc_dalys_nat,fa_dalys_nat], ignore_index=True, sort=True)
mn_dalys_counts_nat_by_location = mn_dalys_counts_nat_by_location.loc[mn_dalys_counts_nat_by_location.location_id.isin(location_ids)]
mn_dalys_counts_nat_tot = mn_dalys_counts_nat_by_location.groupby('location_id').sum().reset_index()
mn_dalys_counts_nat_tot['nutrient'] = 'all'
mn_dalys_counts_nat = pd.concat([mn_dalys_counts_nat_by_location.reset_index(), mn_dalys_counts_nat_tot], ignore_index=True, sort=True).drop(columns='index')
mn_dalys_counts_nat['subgroup'] = 'Total Population'

mn_dalys_counts_nat_final = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' not in v]:
    for coverage_level in coverage_levels:
        temp = mn_dalys_counts_nat.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_dalys_counts_nat_final = pd.concat([mn_dalys_counts_nat_final, temp], ignore_index=True, sort=True)

mn_dalys_counts_nat_zero = pd.DataFrame()
for vehicle in [v for v in vehicles if 'zero' in v]:
    for coverage_level in coverage_levels:
        temp = mn_dalys_counts_nat.copy()
        temp['vehicle'] = vehicle
        temp['coverage_level'] = coverage_level
        mn_dalys_counts_nat_zero = pd.concat([mn_dalys_counts_nat_zero, temp], ignore_index=True, sort=True)
    
mn_dalys_counts_nat = mn_dalys_counts_nat_final.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index()
mn_dalys_counts_nat_zero = mn_dalys_counts_nat_zero.set_index(['location_id','vehicle','nutrient','subgroup','coverage_level']).sort_index()
mn_dalys_counts_nat.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,vehicle,nutrient,subgroup,coverage_level,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
214,industry oil,all,Total Population,0.2,1017041.0,1274600.0,879809.096784,1340282.0,838147.224666,1021845.0,659271.357676,941934.87247,939244.620336,947485.526564,...,2495220.0,1998676.0,2093944.0,1821084.0,2177301.0,2568356.0,2066572.0,2567833.0,2321493.0,2443281.0
214,industry oil,all,Total Population,0.5,1017041.0,1274600.0,879809.096784,1340282.0,838147.224666,1021845.0,659271.357676,941934.87247,939244.620336,947485.526564,...,2495220.0,1998676.0,2093944.0,1821084.0,2177301.0,2568356.0,2066572.0,2567833.0,2321493.0,2443281.0
214,industry oil,all,Total Population,0.8,1017041.0,1274600.0,879809.096784,1340282.0,838147.224666,1021845.0,659271.357676,941934.87247,939244.620336,947485.526564,...,2495220.0,1998676.0,2093944.0,1821084.0,2177301.0,2568356.0,2066572.0,2567833.0,2321493.0,2443281.0
214,industry oil,all,Total Population,1.0,1017041.0,1274600.0,879809.096784,1340282.0,838147.224666,1021845.0,659271.357676,941934.87247,939244.620336,947485.526564,...,2495220.0,1998676.0,2093944.0,1821084.0,2177301.0,2568356.0,2066572.0,2567833.0,2321493.0,2443281.0
214,industry oil,folic acid,Total Population,0.2,748423.9,858004.5,713042.846465,957998.2,760229.524044,917899.2,564671.273026,691815.742336,656794.842508,665715.276608,...,2356647.0,1719171.0,1779029.0,1595914.0,1766575.0,2341160.0,1924538.0,2324571.0,2120508.0,2323905.0


In [62]:
mn_dalys_fin = pd.concat([mn_dalys, mn_dalys_nat]).sort_index()
mn_dalys_zero_fin = pd.concat([mn_dalys_zero, mn_dalys_nat_zero]).sort_index()
mn_dalys_fin.head()#.reset_index().subgroup.unique()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,vehicle,nutrient,subgroup,coverage_level,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
214,industry oil,all,Lower Wealth Quintile,0.2,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,all,Lower Wealth Quintile,0.5,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,all,Lower Wealth Quintile,0.8,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,all,Lower Wealth Quintile,1.0,5445.830195,6909.089865,4805.066925,7303.275284,4619.065485,5606.740924,3534.876875,5120.752391,5077.522799,5133.173796,...,13963.554585,11057.467697,11578.305986,10074.946833,12036.271233,14317.318185,11581.964498,14352.809855,12943.056111,13683.598801
214,industry oil,all,Total Population,0.2,3033.983105,3802.320453,2624.600568,3998.258397,2500.317046,3048.314341,1966.703897,2809.930938,2801.905519,2826.48936,...,7443.610679,5962.346421,6246.544974,5432.563832,6495.210827,7661.784986,6164.890188,7660.225881,6925.355984,7288.669261


In [63]:
mn_dalys_fin_counts = pd.concat([mn_daly_counts, mn_dalys_counts_nat]).sort_index()
mn_dalys_zero_fin_counts = pd.concat([mn_daly_counts_zero, mn_dalys_counts_nat_zero]).sort_index()
mn_dalys_fin_counts.head()#.reset_index().subgroup.unique()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,vehicle,nutrient,subgroup,coverage_level,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
214,industry oil,all,Lower Wealth Quintile,0.2,365106.3,463208.0,322147.426451,489635.5,309677.2806,375894.3,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.4,741328.9,776247.6,675457.4,806951.1,959879.9,776492.8,962259.4,867744.9,917393.3
214,industry oil,all,Lower Wealth Quintile,0.5,365106.3,463208.0,322147.426451,489635.5,309677.2806,375894.3,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.4,741328.9,776247.6,675457.4,806951.1,959879.9,776492.8,962259.4,867744.9,917393.3
214,industry oil,all,Lower Wealth Quintile,0.8,365106.3,463208.0,322147.426451,489635.5,309677.2806,375894.3,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.4,741328.9,776247.6,675457.4,806951.1,959879.9,776492.8,962259.4,867744.9,917393.3
214,industry oil,all,Lower Wealth Quintile,1.0,365106.3,463208.0,322147.426451,489635.5,309677.2806,375894.3,236989.724747,343312.014156,340413.760687,344144.785817,...,936162.4,741328.9,776247.6,675457.4,806951.1,959879.9,776492.8,962259.4,867744.9,917393.3
214,industry oil,all,Total Population,0.2,1017041.0,1274600.0,879809.096784,1340282.0,838147.224666,1021845.0,659271.357676,941934.87247,939244.620336,947485.526564,...,2495220.0,1998676.0,2093944.0,1821084.0,2177301.0,2568356.0,2066572.0,2567833.0,2321493.0,2443281.0


In [64]:
index_cols = ['location_id','vehicle','nutrient','subgroup','coverage_level']
rates_sub = (results.loc[results.measure=='rates_averted']
              #.loc[results.subgroup=='Lower Wealth Quintile']
              .loc[results.year==2025]
              .set_index([c for c in results.columns if 'draw' not in c])).reset_index().drop(columns=['index','year','measure'])
step1_increment = (rates_sub.loc[rates_sub.vehicle.str.contains('zero')]
         #.loc[rates_sub.coverage_level==1]
         #.drop(columns='coverage_level')
         .set_index(index_cols)) * -1

step2_increment = (rates_sub
                    .loc[rates_sub.vehicle.isin([v for v in vehicles if 'zero' not in v])]
                    #.loc[rates_sub.coverage_level==0.8]
                    #.drop(columns='coverage_level')
                    .set_index(index_cols))
dalys_zero_fortification = mn_dalys_zero_fin + step1_increment

dalys_baseline = mn_dalys_fin.copy()

dalys_literature = (mn_dalys_fin - step2_increment)

step1_pif = step1_increment / dalys_zero_fortification * 100
step2_pif = step2_increment / dalys_baseline * 100

dalys_zero_fortification['col_name'] = 'Zero fortification DALYs per 100,000'
step1_increment['col_name'] ='Zero fortification to baseline DALY increment per 100,000'
step1_pif['col_name'] = 'Zero fortification to baseline PIF as a proportion of zero fortification DALYs'
dalys_baseline['col_name'] = 'Baseline DALYs per 100,000'
dalys_literature['col_name'] = 'Target coverage DALYs per 100,000'
step2_increment['col_name'] = 'Baseline to target coverage DALY increment per 100,000'
step2_pif['col_name'] = 'Baseline to target coverage PIF as a proportion of baseline DALYs'

data_rates = (pd.concat([dalys_zero_fortification.reset_index(),
                      dalys_baseline.reset_index(),
                      dalys_literature.reset_index(), 
                      #dalys_industry.reset_index(),
                      step1_increment.reset_index(),
                      step2_increment.reset_index(),
                      step1_pif.reset_index(),
                      step2_pif.reset_index(),
                      #step3_increment.reset_index()
                  ], ignore_index=True)
          .set_index(['location_id','col_name','vehicle','nutrient','subgroup','coverage_level'])
          .filter([c for c in dalys_baseline.columns if 'draw' in c])
          #.mean(axis=1)
              .apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1)
          .reset_index())
data_rates = pd.concat([data_rates, 
                  cov_prepped.rename(columns={0:'mean'})],#.loc[cov_prepped.coverage_level.isin(['baseline',0.8])]],#.drop(columns='coverage_level')],
                ignore_index=True, sort=True)
data_rates = data_rates.loc[data_rates.nutrient!='all']
data_rates['vehicle'] = data_rates['vehicle'].str.replace('zero ','')
l = get_ids('location')
data_rates = data_rates.merge(l.filter(['location_id','location_name']), on='location_id')
data_rates = data_rates.drop(columns='location_id').rename(columns={'location_name':'Location'})
data_rates.dropna().head()

Unnamed: 0,2.5%,50%,97.5%,col_name,count,coverage_level,max,mean,min,nutrient,std,subgroup,vehicle,Location
11,152.817911,1043.066507,2372.103925,"Zero fortification DALYs per 100,000",1000.0,1.0,4011.909676,1117.350292,-365.997379,vitamin a,588.641284,Lower Wealth Quintile,industry oil,Nigeria
12,155.050347,651.852587,1395.297342,"Zero fortification DALYs per 100,000",1000.0,0.2,2363.698715,694.170363,-152.607337,vitamin a,330.107405,Total Population,industry oil,Nigeria
13,155.99961,661.014598,1411.541494,"Zero fortification DALYs per 100,000",1000.0,0.5,2385.410029,700.621505,-153.689492,vitamin a,333.289032,Total Population,industry oil,Nigeria
14,156.948873,668.068188,1424.959495,"Zero fortification DALYs per 100,000",1000.0,0.8,2407.121343,707.072648,-154.771647,vitamin a,336.502733,Total Population,industry oil,Nigeria
15,157.581714,673.212877,1433.904829,"Zero fortification DALYs per 100,000",1000.0,1.0,2421.595553,711.373409,-155.493084,vitamin a,338.662569,Total Population,industry oil,Nigeria


In [65]:
index_cols = ['location_id','vehicle','nutrient','subgroup','coverage_level']

counts_sub_lwq = (results.loc[results.measure=='counts_averted']
              .loc[results.subgroup=='Lower Wealth Quintile']
              .loc[results.year==2025]
              .set_index([c for c in results.columns if 'draw' not in c]) / 5).reset_index().drop(columns=['index','year','measure'])
counts_sub_tot = (results.loc[results.measure=='counts_averted']
              .loc[results.subgroup=='Total Population']
              .loc[results.year==2025]
              .set_index([c for c in results.columns if 'draw' not in c])).reset_index().drop(columns=['index','year','measure'])
counts_sub = pd.concat([counts_sub_lwq, counts_sub_tot], ignore_index=True)

step1_increment = (counts_sub.loc[counts_sub.vehicle.str.contains('zero')]
         #.loc[counts_sub.coverage_level==1]
         #.drop(columns='coverage_level')
         .set_index(index_cols)) * -1

step2_increment = (counts_sub
                    .loc[counts_sub.vehicle.isin([v for v in vehicles if 'zero' not in v])]
                    #.loc[counts_sub.coverage_level==0.8]
                    #.drop(columns='coverage_level')
                    .set_index(index_cols))
dalys_zero_fortification = mn_dalys_zero_fin_counts + step1_increment

dalys_baseline = mn_dalys_fin_counts.copy()

dalys_literature = (mn_dalys_fin_counts - step2_increment)

step1_pif = step1_increment / dalys_zero_fortification * 100
step2_pif = step2_increment / dalys_baseline * 100

dalys_zero_fortification['col_name'] = 'Zero fortification DALYs'
step1_increment['col_name'] ='Zero fortification to baseline DALY increment'
dalys_baseline['col_name'] = 'Baseline DALYs'
dalys_literature['col_name'] = 'Target coverage DALYs'
step2_increment['col_name'] = 'Baseline to target coverage DALY increment'

data_counts = (pd.concat([dalys_zero_fortification.reset_index(),
                      dalys_baseline.reset_index(),
                      dalys_literature.reset_index(), 
                      #dalys_industry.reset_index(),
                      step1_increment.reset_index(),
                      step2_increment.reset_index(),
                      #step3_increment.reset_index()
                  ], ignore_index=True)
          .set_index(['location_id','col_name','vehicle','nutrient','subgroup','coverage_level'])
          .filter([c for c in dalys_baseline.columns if 'draw' in c])
          #.mean(axis=1)
               .apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1)
          .reset_index())
data_counts = pd.concat([data_counts, 
                  cov_prepped.rename(columns={0:'mean'})],#.loc[cov_prepped.coverage_level.isin(['baseline',0.8])]],#.drop(columns='coverage_level')],
                ignore_index=True, sort=True)
data_counts['vehicle'] = data_counts['vehicle'].str.replace('zero ','')
l = get_ids('location')
data_counts = data_counts.merge(l.filter(['location_id','location_name']), on='location_id')
data_counts = data_counts.drop(columns='location_id').rename(columns={'location_name':'Location'})
data_counts.dropna().head()

Unnamed: 0,2.5%,50%,97.5%,col_name,count,coverage_level,max,mean,min,nutrient,std,subgroup,vehicle,Location
19,10245.413366,69930.595388,159033.617321,Zero fortification DALYs,1000.0,1.0,268971.566309,74910.823627,-24537.663172,vitamin a,39464.439859,Lower Wealth Quintile,industry oil,Nigeria
20,51975.415829,218511.663303,467726.521624,Zero fortification DALYs,1000.0,0.2,792350.522679,232697.274895,-51156.478773,vitamin a,110657.408716,Total Population,industry oil,Nigeria
21,52293.624229,221582.919473,473171.827317,Zero fortification DALYs,1000.0,0.5,799628.510761,234859.80353,-51519.234946,vitamin a,111723.94212,Total Population,industry oil,Nigeria
22,52611.832629,223947.398579,477669.761092,Zero fortification DALYs,1000.0,0.8,806906.498843,237022.332166,-51881.99112,vitamin a,112801.227446,Total Population,industry oil,Nigeria
23,52823.971562,225671.982531,480668.383609,Zero fortification DALYs,1000.0,1.0,811758.490897,238464.017923,-52123.828569,vitamin a,113525.239924,Total Population,industry oil,Nigeria


In [66]:
data = pd.concat([data_rates,data_counts],ignore_index=True)
data = data.loc[data.nutrient!='all']
data['mean'] = np.where(data.col_name.isin([c for c in data.col_name.unique() if 'PIF' in c or 'Percent' in c]),
                       data['mean'].map('{:.1f}'.format), data['mean'].map('{:,.0f}'.format))

data.head()

Unnamed: 0,2.5%,50%,97.5%,col_name,count,coverage_level,max,mean,min,nutrient,std,subgroup,vehicle,Location
0,,,,"Zero fortification DALYs per 100,000",0.0,0.2,,,,folic acid,,Lower Wealth Quintile,industry oil,Nigeria
1,,,,"Zero fortification DALYs per 100,000",0.0,0.5,,,,folic acid,,Lower Wealth Quintile,industry oil,Nigeria
2,,,,"Zero fortification DALYs per 100,000",0.0,0.8,,,,folic acid,,Lower Wealth Quintile,industry oil,Nigeria
3,,,,"Zero fortification DALYs per 100,000",0.0,1.0,,,,folic acid,,Lower Wealth Quintile,industry oil,Nigeria
4,,,,"Zero fortification DALYs per 100,000",0.0,0.2,,,,folic acid,,Total Population,industry oil,Nigeria


In [81]:
data_zero.columns

Index(['Location', 'Vehicle', 'Nutrient', 'Population Subgroup',
       'Baseline DALYs', 'Baseline DALYs per 100,000',
       'Zero fortification DALYs', 'Zero fortification DALYs per 100,000',
       'Zero fortification to baseline DALY increment',
       'Zero fortification to baseline DALY increment per 100,000',
       'Zero fortification to baseline PIF as a proportion of zero fortification DALYs'],
      dtype='object', name='col_name')

In [86]:
# NEEDS NATIONAL INDUSTRY OIL DATA/VITAMIN A

data_zero = (data.loc[(data.col_name.isin([c for c in data.col_name.unique() if 'Zero' in c or 'zero' in c]))
                     | data.col_name.isin(['Baseline DALYs','Baseline DALYs per 100,000'])]
            .loc[data.coverage_level==1])
data_zero = data_zero.pivot_table(index=['Location','vehicle','nutrient','subgroup'],
                                 columns='col_name', values='mean', aggfunc=lambda x: ' '.join(x)).reset_index().dropna()
data_zero['vehicle'] = np.where(data_zero.vehicle.isin([v for v in vehicles if 'industry oil' in v]), 'oil (industry data)',
                          np.where(data_zero.vehicle.isin([v for v in vehicles if 'oil' in v]), 'oil (literature data)', data_zero.vehicle))
data_zero['vehicle'] = data_zero.vehicle.str.capitalize()
data_zero['nutrient'] = data_zero.nutrient.str.capitalize().str.replace('Vitamin a','Vitamin A')
data_zero = data_zero.rename(columns={'vehicle':'Vehicle','nutrient':'Nutrient','subgroup':'Population Subgroup'})
data_zero = (data_zero[['Vehicle','Nutrient','Population Subgroup',
             'Baseline DALYs', 'Baseline DALYs per 100,000',
             'Zero fortification DALYs', 'Zero fortification DALYs per 100,000',
       'Zero fortification to baseline DALY increment',
       'Zero fortification to baseline DALY increment per 100,000',
       'Zero fortification to baseline PIF as a proportion of zero fortification DALYs']]
        .sort_values(by=['Vehicle','Nutrient','Population Subgroup']))

data_zero.to_excel('results_plots/lower_wealth_quintile_tables/zero_fortification.xlsx')
data_zero

col_name,Vehicle,Nutrient,Population Subgroup,Baseline DALYs,"Baseline DALYs per 100,000",Zero fortification DALYs,"Zero fortification DALYs per 100,000",Zero fortification to baseline DALY increment,"Zero fortification to baseline DALY increment per 100,000",Zero fortification to baseline PIF as a proportion of zero fortification DALYs
18,Maize flour,Folic acid,Lower Wealth Quintile,562842,8395,563308,8402,466,7,0.1
19,Maize flour,Folic acid,Total Population,1481164,4419,1487003,4436,5839,17,0.4
20,Maize flour,Vitamin A,Lower Wealth Quintile,72643,1084,72719,1085,76,1,0.1
21,Maize flour,Vitamin A,Total Population,231256,690,232400,693,1145,3,0.5
22,Maize flour,Zinc,Lower Wealth Quintile,13484,201,13503,201,18,0,0.1
23,Maize flour,Zinc,Total Population,38748,116,38999,116,251,1,0.6
2,Oil (industry data),Vitamin A,Lower Wealth Quintile,72643,1084,74911,1117,2268,34,3.0
3,Oil (industry data),Vitamin A,Total Population,231256,690,238464,711,7208,22,3.0
38,Oil (literature data),Vitamin A,Lower Wealth Quintile,72643,1084,74289,1108,1645,25,2.2
39,Oil (literature data),Vitamin A,Total Population,231256,690,238464,711,7208,22,3.0


In [88]:
data_first_step.columns

Index(['Location', 'Vehicle', 'Nutrient', 'Population Subgroup',
       'Baseline DALYs', 'Baseline DALYs per 100,000',
       'Baseline to target coverage DALY increment',
       'Baseline to target coverage DALY increment per 100,000',
       'Baseline to target coverage PIF as a proportion of baseline DALYs',
       'Percent of population eating fortified vehicle at baseline',
       'Percent of population eating fortified vehicle at target coverage',
       'Target coverage DALYs', 'Target coverage DALYs per 100,000'],
      dtype='object', name='col_name')

In [90]:
# NEEDS NUMBER FORMATS AND NATIONAL INDUSTRY OIL DATA/VITAMIN A

data_first_step = (data.loc[(data.col_name.isin([c for c in data.col_name.unique() if 'Zero' not in c and 'zero' not in c]))
                     & (data.coverage_level==0.8)
                     & (((data['subgroup']=='Lower Wealth Quintile') 
                         & (data.vehicle.str.contains('from lit')))
                        | ((data['subgroup']=='Total Population') 
                           & (data.vehicle.isin([v for v in vehicles if '*' not in v and 'from lit' not in v]))))])
data_first_step['vehicle'] = data_first_step.vehicle.str.replace(' from lit','')
data_first_step = data_first_step.drop_duplicates()
data_first_step = data_first_step.pivot_table(index=['Location','vehicle','nutrient','subgroup','coverage_level'],
                                 columns='col_name', values='mean',aggfunc=lambda x: ' '.join(x)).reset_index().dropna()

data_first_step['vehicle'] = np.where(data_first_step.vehicle.isin([v for v in vehicles if 'industry oil' in v]), 'oil (industry data)',
                          np.where(data_first_step.vehicle.isin([v for v in vehicles if 'oil' in v]), 'oil (literature data)', 
                                   data_first_step.vehicle))
data_first_step['vehicle'] = data_first_step.vehicle.str.capitalize()
data_first_step['nutrient'] = data_first_step.nutrient.str.capitalize().str.replace('Vitamin a','Vitamin A')
data_first_step = (data_first_step
                   .rename(columns={'vehicle':'Vehicle','nutrient':'Nutrient','subgroup':'Population Subgroup'})
                   .drop(columns='coverage_level'))
data_first_step = (data_first_step[['Vehicle','Nutrient','Population Subgroup',
                'Percent of population eating fortified vehicle at baseline',
                                    'Percent of population eating fortified vehicle at target coverage',
             'Baseline DALYs', 'Baseline DALYs per 100,000',
             'Target coverage DALYs', 'Target coverage DALYs per 100,000',
       'Baseline to target coverage DALY increment',
       'Baseline to target coverage DALY increment per 100,000',
       'Baseline to target coverage PIF as a proportion of baseline DALYs']]
        .sort_values(by=['Vehicle','Nutrient','Population Subgroup']))
data_first_step.to_excel('results_plots/lower_wealth_quintile_tables/first_step_coverage.xlsx')
data_first_step

col_name,Vehicle,Nutrient,Population Subgroup,Percent of population eating fortified vehicle at baseline,Percent of population eating fortified vehicle at target coverage,Baseline DALYs,"Baseline DALYs per 100,000",Target coverage DALYs,"Target coverage DALYs per 100,000",Baseline to target coverage DALY increment,"Baseline to target coverage DALY increment per 100,000",Baseline to target coverage PIF as a proportion of baseline DALYs
6,Maize flour,Folic acid,Lower Wealth Quintile,0.3,5.3,562842,8395,553796,8260,9046,135,1.7
7,Maize flour,Folic acid,Total Population,1.3,7.1,1481164,4419,1453523,4336,27641,82,1.9
8,Maize flour,Vitamin A,Lower Wealth Quintile,0.3,5.3,72643,1084,71172,1062,1471,22,2.0
9,Maize flour,Vitamin A,Total Population,1.3,7.1,231256,690,225825,674,5431,16,2.3
10,Maize flour,Zinc,Lower Wealth Quintile,0.3,5.3,13484,201,13129,196,355,5,2.6
11,Maize flour,Zinc,Total Population,1.3,7.1,38748,116,37561,112,1187,4,3.1
2,Oil (industry data),Vitamin A,Lower Wealth Quintile,5.4,56.9,72643,1084,57287,854,15356,229,20.9
3,Oil (industry data),Vitamin A,Total Population,7.5,64.7,231256,690,176289,526,54966,164,23.7
14,Oil (literature data),Vitamin A,Lower Wealth Quintile,5.4,16.5,72643,1084,69339,1034,3304,49,4.5
15,Oil (literature data),Vitamin A,Total Population,7.5,27.1,231256,690,212396,634,18860,56,8.1


In [94]:
data_second_step = data.loc[data.col_name.isin([c for c in data.col_name.unique() if 'Zero' not in c and 'zero' not in c])]
data_second_step = data_second_step.loc[((data_second_step['subgroup']=='Lower Wealth Quintile')
                                         & (data_second_step.vehicle.isin([v for v in vehicles if '*' in v]))
                                         & (data_second_step.coverage_level==1))
                                        | ((data_second_step['subgroup']=='Total Population')
                                           & (data_second_step.coverage_level==0.8)
                                           & (data_second_step.vehicle.isin([v for v in vehicles if '*' not in v and 'from lit' not in v])))]
data_second_step['vehicle'] = data_second_step.vehicle.str.replace('*','')
data_second_step = data_second_step.drop_duplicates()
data_second_step = data_second_step.pivot_table(index=['Location','vehicle','nutrient','subgroup'],
                                 columns='col_name', values='mean',aggfunc=lambda x: ' '.join(x)).reset_index().dropna()

data_second_step['vehicle'] = np.where(data_second_step.vehicle.isin([v for v in vehicles if 'industry oil' in v]), 'oil (industry data)',
                          np.where(data_second_step.vehicle.isin([v for v in vehicles if 'oil' in v]), 'oil (literature data)', 
                                   data_second_step.vehicle))
data_second_step['vehicle'] = data_second_step.vehicle.str.capitalize()
data_second_step['nutrient'] = data_second_step.nutrient.str.capitalize().str.replace('Vitamin a','Vitamin A')
data_second_step = (data_second_step
                   .rename(columns={'vehicle':'Vehicle','nutrient':'Nutrient','subgroup':'Population Subgroup'})
                   )
data_second_step = (data_second_step[['Vehicle','Nutrient','Population Subgroup',
                'Percent of population eating fortified vehicle at baseline',
                                    'Percent of population eating fortified vehicle at target coverage',
             'Baseline DALYs', 'Baseline DALYs per 100,000',
             'Target coverage DALYs', 'Target coverage DALYs per 100,000',
       'Baseline to target coverage DALY increment',
       'Baseline to target coverage DALY increment per 100,000',
       'Baseline to target coverage PIF as a proportion of baseline DALYs']]
        .sort_values(by=['Vehicle','Nutrient','Population Subgroup']))
data_second_step.to_excel('results_plots/lower_wealth_quintile_tables/second_step_coverage.xlsx')
data_second_step

col_name,Vehicle,Nutrient,Population Subgroup,Percent of population eating fortified vehicle at baseline,Percent of population eating fortified vehicle at target coverage,Baseline DALYs,"Baseline DALYs per 100,000",Target coverage DALYs,"Target coverage DALYs per 100,000",Baseline to target coverage DALY increment,"Baseline to target coverage DALY increment per 100,000",Baseline to target coverage PIF as a proportion of baseline DALYs
6,Maize flour,Folic acid,Lower Wealth Quintile,0.3,7.1,562842,8395,550528,8212,12315,184,2.2
7,Maize flour,Folic acid,Total Population,1.3,7.1,1481164,4419,1453523,4336,27641,82,1.9
8,Maize flour,Vitamin A,Lower Wealth Quintile,0.3,7.1,72643,1084,70661,1054,1982,30,2.7
9,Maize flour,Vitamin A,Total Population,1.3,7.1,231256,690,225825,674,5431,16,2.3
10,Maize flour,Zinc,Lower Wealth Quintile,0.3,7.1,13484,201,13001,194,484,7,3.6
11,Maize flour,Zinc,Total Population,1.3,7.1,38748,116,37561,112,1187,4,3.1
2,Oil (industry data),Vitamin A,Lower Wealth Quintile,5.4,64.7,72643,1084,55007,820,17636,263,24.0
3,Oil (industry data),Vitamin A,Total Population,7.5,64.7,231256,690,176289,526,54966,164,23.7
14,Oil (literature data),Vitamin A,Lower Wealth Quintile,5.4,27.1,72643,1084,66212,988,6431,96,8.8
15,Oil (literature data),Vitamin A,Total Population,7.5,27.1,231256,690,212396,634,18860,56,8.1


In [95]:
data_third_step = data.loc[data.col_name.isin([c for c in data.col_name.unique() if 'Zero' not in c and 'zero' not in c])]
data_third_step = data_third_step.loc[((data_third_step.subgroup=='Lower Wealth Quintile') 
                                         & (data_third_step.vehicle.isin([v for v in vehicles if '*' not in v and 'from lit' not in v]))
                                         & (data_third_step.coverage_level!=1))]
data_third_step = data_third_step.drop_duplicates()
data_third_step = data_third_step.pivot_table(index=['Location','vehicle','nutrient','subgroup','coverage_level'],
                                 columns='col_name', values='mean',aggfunc=lambda x: ' '.join(x)).reset_index().dropna()
data_third_step['vehicle'] = np.where(data_third_step.vehicle.isin([v for v in vehicles if 'industry oil' in v]), 'oil (industry data)',
                          np.where(data_third_step.vehicle.isin([v for v in vehicles if 'oil' in v]), 'oil (literature data)', 
                                   data_third_step.vehicle))
data_third_step['vehicle'] = data_third_step.vehicle.str.capitalize()
data_third_step['nutrient'] = data_third_step.nutrient.str.capitalize().str.replace('Vitamin a','Vitamin A')
data_third_step = (data_third_step
                   .rename(columns={'vehicle':'Vehicle','nutrient':'Nutrient','subgroup':'Population Subgroup'})
                   .drop(columns='coverage_level'))

data_third_step = (data_third_step[['Vehicle','Nutrient','Population Subgroup',
                'Percent of population eating fortified vehicle at baseline',
                                    'Percent of population eating fortified vehicle at target coverage',
             'Baseline DALYs', 'Baseline DALYs per 100,000',
             'Target coverage DALYs', 'Target coverage DALYs per 100,000',
       'Baseline to target coverage DALY increment',
       'Baseline to target coverage DALY increment per 100,000',
       'Baseline to target coverage PIF as a proportion of baseline DALYs']]
        .sort_values(by=['Vehicle','Nutrient','Population Subgroup']))
data_third_step.to_excel('results_plots/lower_wealth_quintile_tables/third_step_coverage.xlsx')
data_third_step

col_name,Vehicle,Nutrient,Population Subgroup,Percent of population eating fortified vehicle at baseline,Percent of population eating fortified vehicle at target coverage,Baseline DALYs,"Baseline DALYs per 100,000",Target coverage DALYs,"Target coverage DALYs per 100,000",Baseline to target coverage DALY increment,"Baseline to target coverage DALY increment per 100,000",Baseline to target coverage PIF as a proportion of baseline DALYs
9,Maize flour,Folic acid,Lower Wealth Quintile,0.3,20.0,562842,8395,527340,7866,35502,530,6.5
10,Maize flour,Folic acid,Lower Wealth Quintile,0.3,50.0,562842,8395,473388,7061,89454,1334,16.3
11,Maize flour,Folic acid,Lower Wealth Quintile,0.3,80.0,562842,8395,419436,6256,143406,2139,26.2
12,Maize flour,Vitamin A,Lower Wealth Quintile,0.3,20.0,72643,1084,66931,998,5712,85,7.8
13,Maize flour,Vitamin A,Lower Wealth Quintile,0.3,50.0,72643,1084,58249,869,14394,215,19.6
14,Maize flour,Vitamin A,Lower Wealth Quintile,0.3,80.0,72643,1084,49568,739,23075,344,31.5
15,Maize flour,Zinc,Lower Wealth Quintile,0.3,20.0,13484,201,12090,180,1395,21,10.4
16,Maize flour,Zinc,Lower Wealth Quintile,0.3,50.0,13484,201,9970,149,3515,52,26.1
17,Maize flour,Zinc,Lower Wealth Quintile,0.3,80.0,13484,201,7850,117,5634,84,41.8
3,Oil (industry data),Vitamin A,Lower Wealth Quintile,5.4,20.0,72643,1084,68340,1019,4303,64,5.9


# archive code

In [71]:
assert 1==2, 'stop running'

AssertionError: stop running

In [None]:
impact = (results
          .loc[results.year==2025]
          .loc[results.measure.isin(['pif','rates_averted'])]
          .set_index([c for c in results.columns if 'draw' not in c])
          .apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1)
          .rename(columns={'mean':'mean_val','2.5%':'lower','97.5%':'upper'})
          .filter(['mean_val','lower','upper'])
          .reset_index()
          .replace('vitamin a','vitamin A')
          .drop(columns='index'))
l = get_ids('location')
impact = impact.merge(l.filter(['location_name','location_id']), on='location_id')
for col in ['mean_val','lower','upper']:
    impact[f'{col}'] = np.where(impact.measure.isin(['pif','deficiency_exposure']),
                                           impact[f'{col}'].map('{:.1f}'.format),
                                           impact[f'{col}'].map('{:,.0f}'.format))
impact['value'] = (impact['mean_val'].astype(str)
                    +'\r\n('
                    +impact['lower'].astype(str)
                    +', '
                    +impact['upper'].astype(str)
                    +')')
impact['col_name'] = (impact.nutrient.str.capitalize()
                      + ' in '
                      + impact.vehicle
                      + np.where(impact.measure == 'pif', ' PIF (percent): ',
                                np.where(impact.measure == 'rates_averted', ' DALYs averted per 100,000 person-years: ', 'ERROR'))
                      + (impact.coverage_level * 100).astype(str)
                       + '% scale-up')
impact['location_name'] = impact['location_name'] + ' ' + impact['subgroup']
impact = impact[['col_name','location_name','value']]
impact = impact.pivot_table(index='location_name',
                                     columns='col_name', 
                                     values='value',
                                     aggfunc=lambda x: ' '.join(x))   
impact = impact.replace(np.nan, 'Not modeled')
impact

In [None]:
coverage = cov_prepped
coverage = coverage.loc[coverage.year==2025].loc[coverage.nutrient!='iron']
coverage['subgroup'] = np.where(coverage.location_id==214, 'Total Population', 'Lower Wealth Quintile')
coverage['location_name'] = coverage['location_name'] + ' ' + coverage['subgroup']


coverage['value'] = ((coverage['mean'] * 100).round(1).astype(str)
                        +'\r\n('
                        +(coverage['2.5%'] * 100).round(1).astype(str)
                        +', '
                        +(coverage['97.5%'] * 100).round(1).astype(str)
                        +')')

coverage['parameter_description'] = np.where(coverage.coverage_level == 'baseline', 'Percent of population eating ' + coverage.nutrient + ' fortified ' + coverage.vehicle +' at baseline',
                                            np.where(coverage.coverage_level == 'maximum', f'Percent of population eating fortifiable ' + coverage.vehicle, 
                                            'Percent of population eating ' + coverage.nutrient + ' fortified ' + coverage.vehicle + ': ' + (coverage.coverage_level * 100).astype(str) + '0% scale-up'))

coverage['parameter_description'] = coverage['parameter_description'].str.replace('.00%','%')
coverage = coverage[['location_name','value','parameter_description']]
coverage = coverage.pivot_table(index='location_name',
                                     columns='parameter_description', 
                                     values='value',
                                     aggfunc=lambda x: ' '.join(x))   
coverage = coverage.replace(np.nan, 'Not modeled')

coverage

In [None]:
low_ses_table = pd.concat([coverage.T, impact.T])
low_ses_table.to_csv('results_plots/nigeria_lower_wealth_quintile_no_iron.csv')
low_ses_table

In [None]:
def make_ses_comparison_dot_plots(data, nutrient, vehicle, measure, coverage_levels, subtitle, wra=False):
    """This function takes a dataframe,
    nutrient (as a string),
    and measure (as a string, either: 'rates', 'counts', or 'pifs').
    """

    f, ax = plt.subplots(figsize=(7, 4), dpi=120)
    colors = ['tab:red', 'tab:orange', 'tab:green']

    location_spacer = 0.15
    coverage_spacer = 0.025
    df = (data.drop(columns='measure', errors='ignore')
          .apply(pd.DataFrame.describe, percentiles=[0.025, 0.975], axis=1).reset_index())

    for n in list(range(0, len(coverage_levels))):
        rate = (df.loc[df.year == 2025]
            .loc[df.coverage_level == coverage_levels[n]])
        for i in list(range(0, len(rate))):
            plt.plot([location_spacer * i + coverage_spacer * n, location_spacer * i + coverage_spacer * n],
                     [rate['2.5%'].values[i], rate['97.5%'].values[i]], c='black')
            plt.scatter([location_spacer * i + coverage_spacer * n], rate['2.5%'].values[i], s=50, marker='_',
                        c='black')
            plt.scatter([location_spacer * i + coverage_spacer * n], rate['97.5%'].values[i], s=50, marker='_',
                        c='black')

        x_vals = []
        for x in list(range(0, len(rate))):
            x_vals.append(location_spacer * x + coverage_spacer * n)
        plt.scatter(x_vals, rate['mean'], s=50,
                    label=f'{int(coverage_levels[n] * 100)} percent coverage', color=colors[n])

    plt.hlines(0, 0 - coverage_spacer * 2,
               location_spacer * (len(rate)) - coverage_spacer * 2,
               linestyle='dashed', color='grey', alpha=0.5)

    plt.plot()

    if wra == True:
        subpop = 'Women of Reproductive Age'
    else:
        subpop = 'children under five'

    if measure == 'rates':
        plt.title(f'DALYs averted per 100,000 person-years due to\n{nutrient} fortication in {vehicle} among {subpop}\n{subtitle}')
        plt.ylabel('DALYs Averted per 100,000')
    elif measure == 'counts':
        plt.title(f'DALYs averted due to\n{nutrient} fortication in {vehicle} among {subpop}\n{subtitle}')
        plt.ylabel('DALYs')
    elif measure == 'pifs':
        plt.title(f'Population impact fraction of {nutrient} fortication in {vehicle} \non DALYs among {subpop}\n{subtitle}')
        plt.ylabel('Population Impact Fraction (Percent)')

    plt.legend(bbox_to_anchor=[1.5, 1])

    x_ticks = []
    for x in list(range(0, len(rate))):
        x_ticks.append(location_spacer * x + coverage_spacer)
    ax.set_xticks(x_ticks)
    l = get_ids('location')
    l_names = df.loc[df.coverage_level == coverage_levels[0]].loc[df.year == 2025]
    l_names = l_names.reset_index().merge(l, on='location_id')
    l_names['label'] = l_names.location_name + ' ' + l_names.subgroup
    l_names = list(l_names.label.values)
    ax.set_xticklabels(l_names)

In [None]:
with PdfPages('results_plots/nigeria_lower_wealth_quintile_impact_by_nutrient_vehicle_pair.pdf') as pdf:
    for nutrient in ['vitamin a', 'zinc', 'folic acid']:
        if nutrient == 'vitamin a':
            vehicles = ['oil','wheat flour','maize flour','industry oil']
        else:
            vehicles = ['wheat flour','maize flour']
        for vehicle in vehicles:
            make_ses_comparison_dot_plots((results
                                .sort_values(by='subgroup', ascending=False)
                                .loc[results.measure=='rates_averted']
                                .loc[results.vehicle==vehicle]
                                .loc[results.nutrient==nutrient]
                                .set_index([c for c in results.columns if 'draw' not in c])), 
                                nutrient, vehicle, 'rates', coverage_levels, '')
            pdf.savefig(bbox_inches='tight')
            make_ses_comparison_dot_plots((results
                                .sort_values(by='subgroup', ascending=False)
                                .loc[results.measure=='pif']
                                .loc[results.vehicle==vehicle]
                                .loc[results.nutrient==nutrient]
                                .set_index([c for c in results.columns if 'draw' not in c])), 
                                nutrient, vehicle, 'pifs', coverage_levels, '')
            pdf.savefig(bbox_inches='tight')

In [None]:
def make_coverage_bar_charts(data, location_ids, nutrient, vehicle):
    plt.figure(figsize=(10, 4), dpi=120)
    
    data = (data
            .loc[data.vehicle==vehicle]
            .loc[data.nutrient.isin(['na',nutrient])]
            .loc[data.location_id.isin(location_ids)]
            .loc[data.year.isin([np.nan,2025])]
            .sort_values(by=['location_id'], ascending=True))
    data['mean'] = data['mean'] * 100
    data['2.5%'] = data['2.5%'] * 100
    data['97.5%'] = data['97.5%'] * 100
    base = data.loc[data.coverage_level=='baseline']
    alt_20 = data.loc[data.coverage_level==0.2]
    alt_50 = data.loc[data.coverage_level==0.5]
    alt_80 = data.loc[data.coverage_level==0.8]
    barWidth = 0.20
    a = np.arange(len(base))
    b = [x + barWidth for x in a]
    c = [x + barWidth for x in b]
    d = [x + barWidth for x in c]
    plt.bar(a, base['mean'], width=barWidth, yerr = [base['mean'] - base['2.5%'],base['97.5%'] - base['mean']], 
            label='Baseline Coverage')
    plt.bar(b, alt_20['mean'], width=barWidth, yerr = [alt_20['mean'] - alt_20['2.5%'],alt_20['97.5%'] - alt_20['mean']], 
            color='tab:red', label='20% Coverage')
    plt.bar(c, alt_50['mean'], width=barWidth, yerr = [alt_50['mean'] - alt_50['2.5%'],alt_50['97.5%'] - alt_50['mean']], 
            color='tab:orange', label='50% Coverage')
    plt.bar(d, alt_80['mean'], width=barWidth, yerr = [alt_80['mean'] - alt_80['2.5%'],alt_80['97.5%'] - alt_80['mean']], 
            color='tab:green', label='80% Coverage')

    df = data.loc[data.coverage_level=='maximum']
    for i in list(range(0,len(df.location_id.unique()))):
        if i == 0:
            plt.hlines(df.loc[df.location_id==df.location_id.unique()[i]]['mean'], -0.1 + i, 0.7 + i, 
                   linestyle='dashed', color='black', label='Eating industrially produced vehicle')
        else: 
            plt.hlines(df.loc[df.location_id==df.location_id.unique()[i]]['mean'], -0.1 + i, 0.7 + i, 
                   linestyle='dashed', color='black')
        plt.fill_between([-0.1 + i, 0.7 + i], [df.loc[df.location_id==df.location_id.unique()[i]]['2.5%'].values[0]] * 2,
                        [df.loc[df.location_id==df.location_id.unique()[i]]['97.5%'].values[0]] * 2,
                        alpha=0.2, color='black')
    
    plt.legend(bbox_to_anchor=[1.45,1])
    plt.title(f'{nutrient.capitalize()} in {vehicle} fortification coverage')
    plt.ylabel('Percent of population')
    plt.xticks([r + barWidth * 1.5 for r in range(len(df))], base['label'].values)

In [None]:
with PdfPages('results_plots/nigeria_lower_wealth_quintile_coverage_by_nutrient_vehicle_pair.pdf') as pdf:
    for nutrient in ['vitamin a','zinc','folic acid']:
        if nutrient == 'vitamin a':
            vehicles = ['oil','wheat flour','maize flour','industry oil']
        else:
            vehicles = ['wheat flour', 'maize flour']
        for vehicle in vehicles:
            make_coverage_bar_charts(cov_prepped, [214,214.5], nutrient, vehicle)
            pdf.savefig(bbox_inches='tight')