In [1]:
import pandas as pd, numpy as np
from db_queries import get_ids, get_outputs, get_location_metadata, get_population, get_covariate_estimates
from get_draws.api import get_draws
import scipy.stats 
import scipy.integrate as integrate
import matplotlib.pyplot as plt

The purpose of this notebook is to create generalized/customizable functions that can be used for Large Scale Food Fortification multiplication models with dichotomous outcomes (zinc, vitamin A, folic acid). The outcomes (DALYs averted) generated by this notebook assume the following:

- Complete scale-up achieved between starting baseline and alternative scenario coverage (med/high/low levels), defined according to the proportion of the population that eats industrially produced vehicles. This notebook does NOT currently consider the additional coverage over time in the alternative scenario defined according to the proportion of the population that eats the vehicle at all (due to campaign to convince additional individuals to eat fortified versions of vehicle).
- All individuals covered by fortification are assumed to be *effectively* covered. This assumption is not valid based on age- and timing-effects built into the full-scale models. These nutrient-specific effects should be added into the respective mutliplication model for the full results

In [2]:
location_ids = [163, 214, 205, 190, 189]

"""Note: full set of location IDs is shown below, but subset used here
was selected because they are the locations with non-missing coverage data
for the nutrient and vehicle of interest (vitamin A/oil)

[168, 161, 201, 202, 6, 205, 171, 141, 179, 207, 163, 11, 180, 181,
184, 15, 164, 213, 214, 165, 196, 522, 190, 189, 20]"""

ages = [2,3,4,5]
sexes = [1,2]

index_cols=['location_id','sex_id','age_group_id']

# define alternative scenario coverage levels (low, medium, high)
    # this parameter represents the proportion of additional coverage achieved in the
    # alternative scenario, defined as the difference between the proportion of the population
    # that eats the fortified vehicle and the proportion of the population that eats 
    # the industrially produced vehicle
alternative_scenario_coverage_levels = [0.25, 0.5, 0.75]

In [3]:
# vitamin A specific -- these should be replaced for other models
rei_id = 96
cause_ids = [389, 302, 341]
nonfatal_causes = [389]
nutrient = 'vitamin a'
vehicle = 'oil'

In [4]:
# define no fortification relative risk distribution
# vitamin a specific -- this should be replaced for other models

from numpy import log
from scipy.stats import norm, lognorm

# median and 0.975-quantile of lognormal distribution for RR
median = 2.22
q_975 = 5.26

# 0.975-quantile of standard normal distribution (=1.96, approximately)
q_975_stdnorm = norm().ppf(0.975)

mu = log(median) # mean of normal distribution for log(RR)
sigma = (log(q_975) - mu) / q_975_stdnorm # std dev of normal distribution for log(RR)

# Frozen lognormal distribution for RR, representing uncertainty in our effect size
# (s is the shape parameter)
rr_distribution = lognorm(s=sigma, scale=median)

In [5]:
def generate_rr_deficiency_nofort_draws(mean, std):
    """This function takes a distribution for the relative risk
    for lack of fortification of a particular nutrient and generates
    1,000 draws based on that distribution. The data is the duplicated
    so that it is the same for each location ID so that it can be easily
    used later in the calculations."""
    data = pd.DataFrame()    
    np.random.seed(7)
    data['rr'] = np.random.lognormal(mean, std, size=1000)
    draws = []
    for i in list(range(0,1000)):
        draws.append(f'draw_{i}')
    data['draws'] = draws
    data = pd.DataFrame.pivot_table(data, values='rr', columns='draws').reset_index().drop(columns=['index'])
    df = pd.DataFrame(np.repeat(data.values,len(location_ids),axis=0))
    df.columns = data.columns
    df['location_id'] = location_ids
    df = df.set_index('location_id')
    return df

In [6]:
def pull_cause_specific_dalys_deficiency_pafs(rei_id, cause_ids):
    """This function pulls PAF data from GBD for specified 
    risk outcome pairs. Note that the risk in this context 
    will/should be nutrient *deficiencies*, not the lack of 
    nutrient fortification"""
    
    data = pd.DataFrame()
    for cause_id in cause_ids:
        temp = get_draws(
            gbd_id_type=['rei_id', 'cause_id'], 
            gbd_id=[rei_id, cause_id],
            source='burdenator',
            measure_id=2, #dalys
            metric_id=2, #percent
            location_id=location_ids,
            year_id=2019,
            age_group_id=ages,
            sex_id=sexes, 
            gbd_round_id=6,
            status='best',
            decomp_step='step5',
        )
        data = pd.concat([data,temp], ignore_index=True)
    data = data.set_index(index_cols + ['cause_id'])
    data = data.drop(columns=[c for c in data.columns if 'draw' not in c]).sort_index()
    return data

In [7]:
def pull_dalys(cause_ids):
    """This function pulls dalys for specified cause IDs from GBD"""
    
    ylds = get_draws(
        gbd_id_type='cause_id', 
        gbd_id=cause_ids,
        source='como',
        measure_id=3,
        metric_id=3, # only available as rate
        location_id=location_ids,
        year_id=2019,
        age_group_id=ages,
        sex_id=sexes, 
        gbd_round_id=6,
        status='best',
        decomp_step='step5',
    ).set_index(index_cols + ['cause_id'])
    ylds = ylds.drop(columns=[c for c in ylds.columns if 'draw' not in c])
    pop = get_population(
        location_id=location_ids,
        year_id=2019,
        age_group_id=ages,
        sex_id=sexes,
        gbd_round_id=6,
        decomp_step='step4').set_index(index_cols)
    for i in list(range(0,1000)):
        ylds[f'draw_{i}'] = ylds[f'draw_{i}'] * pop['population']
    ylls = get_draws(
        gbd_id_type='cause_id', 
        gbd_id=cause_ids,
        source='codcorrect',
        measure_id=4,
        metric_id=1, 
        location_id=location_ids,
        year_id=2019,
        age_group_id=ages,
        sex_id=sexes, 
        gbd_round_id=6,
        status='latest',
        decomp_step='step5',
    ).set_index(index_cols + ['cause_id']).replace(np.nan, 0)
    ylls= ylls.drop(columns=[c for c in ylls.columns if 'draw' not in c])
    for nf in nonfatal_causes:
        nonfatal = ylls.groupby(index_cols).sum()
        nonfatal['cause_id'] = nf
        for i in list(range(0,1000)):
            nonfatal[f'draw_{i}'] = 0
    ylls = pd.concat([ylls.reset_index(), nonfatal.reset_index()]).set_index(index_cols + ['cause_id'])
    
    dalys = ylls + ylds
    return dalys

In [35]:
def load_coverage_data(nutrient, vehicle, percent_of_difference=True):
    data = pd.read_csv('/ihme/homes/alibow/notebooks/vivarium_data_analysis/pre_processing/lsff_project/data_prep/outputs/LSFF_extraction_clean_data_rich_locations_01_11_2021.csv')
    baseline_fortification_coverage = (data.loc[data.vehicle == vehicle]
             .loc[data.nutrient == nutrient]
             .loc[data.value_description == 'percent of population eating fortified vehicle'])
    counterfactual_fortification_coverage = (data.loc[data.vehicle == vehicle]
                  .loc[data.value_description == 'percent of population eating industrially produced vehicle'])
    
    # generate draws
    """This currently relies on two major assumptions:
    1. Truncated normal distribution
    2. The same percentile from the eats_fortified and eats_fortifiable distributions sampled for each draw
    
    Assumption number two is likely overly restrictive, but was chosen such that eats_fortified will 
    always be less than eats_fortifiable at the draw level (this is consistent with methodology described
    in 2017 concept model, but is achieved by setting the same random seed to sample each of these
    parameters)"""
      
    for data in [baseline_fortification_coverage, counterfactual_fortification_coverage]:
              
        data['value_std'] = (data.value_975_percentile - data.value_025_percentile) / 2 / 1.96
        data['a'] = (data.value_025_percentile - data.value_mean) / data.value_std
        data['b'] = (data.value_975_percentile - data.value_mean) / data.value_std       
        np.random.seed(11)
        for i in list(range(0,1000)):
            data[f'draw_{i}'] = scipy.stats.truncnorm.rvs(data.a, data.b, data.value_mean, data.value_std) / 100
            
    baseline_fortification_coverage = (baseline_fortification_coverage.set_index('location_id')
             .drop(columns=[c for c in baseline_fortification_coverage.columns if 'draw' not in c and c != 'location_id']))
    counterfactual_fortification_coverage = (counterfactual_fortification_coverage.set_index('location_id')
             .drop(columns=[c for c in counterfactual_fortification_coverage.columns if 'draw' not in c and c != 'location_id']))
    
    if percent_of_difference==True:   
        counterfactual_fortification_coverage_low = (counterfactual_fortification_coverage - baseline_fortification_coverage) * alternative_scenario_coverage_levels[0] + baseline_fortification_coverage
        counterfactual_fortification_coverage_low['coverage_level'] = 'low'
        counterfactual_fortification_coverage_med = (counterfactual_fortification_coverage - baseline_fortification_coverage) * alternative_scenario_coverage_levels[1] + baseline_fortification_coverage
        counterfactual_fortification_coverage_med['coverage_level'] = 'medium'
        counterfactual_fortification_coverage_high = (counterfactual_fortification_coverage - baseline_fortification_coverage) * alternative_scenario_coverage_levels[2] + baseline_fortification_coverage
        counterfactual_fortification_coverage_high['coverage_level'] = 'high'
        counterfactual_fortification_coverage_full = counterfactual_fortification_coverage.copy()
        counterfactual_fortification_coverage_full['coverage_level'] = 'full'
        
    elif percent_of_difference==False:
        counterfactual_fortification_coverage_low = (counterfactual_fortification_coverage) * alternative_scenario_coverage_levels[0]
        counterfactual_fortification_coverage_low['coverage_level'] = 'low'
        counterfactual_fortification_coverage_med = (counterfactual_fortification_coverage) * alternative_scenario_coverage_levels[1]
        counterfactual_fortification_coverage_med['coverage_level'] = 'medium'
        counterfactual_fortification_coverage_high = (counterfactual_fortification_coverage) * alternative_scenario_coverage_levels[2]
        counterfactual_fortification_coverage_high['coverage_level'] = 'high'
        counterfactual_fortification_coverage_full = counterfactual_fortification_coverage.copy()
        counterfactual_fortification_coverage_full['coverage_level'] = 'full'
    
    counterfactual_fortification_coverage = pd.concat([counterfactual_fortification_coverage_low.reset_index(), 
                            counterfactual_fortification_coverage_med.reset_index(), 
                            counterfactual_fortification_coverage_high.reset_index(),
                            counterfactual_fortification_coverage_full.reset_index()], 
                           ignore_index=True)
    counterfactual_fortification_coverage = counterfactual_fortification_coverage.set_index([c for c in counterfactual_fortification_coverage.columns if 'draw' not in c])
    
    #baseline_no_fortification_coverage = 1 - baseline_fortification_coverage
    #counterfactual_no_fortification_coverage = 1 - counterfactual_fortification_coverage
    
    return baseline_fortification_coverage, counterfactual_fortification_coverage

In [13]:
def calculate_paf_deficiency_nofort(rr_deficiency_nofort, baseline_fortification_coverage):
    """This function calculates the population attributable fraction of UNfortified food
    on the fortification outcome of interest (outcome defined in the fortification 
    effect size, which is generally nutrient deficiency)
    
    NOTE: this function does not consider age/time lags of fortification effects
    (assumes that every individual covered by fortification is effectively covered)"""
       
    paf_deficiency_nofort = ((rr_deficiency_nofort - 1) * (1 - baseline_fortification_coverage)) / ((rr_deficiency_nofort - 1) * (1 - baseline_fortification_coverage) + 1)
    return paf_deficiency_nofort

In [14]:
def calculate_pif_deficiency_nofort(paf_deficiency_nofort, baseline_fortification_coverage, counterfactual_fortification_coverage):
    """This function calculates the population impact fraction for UNfortified 
    foods and nutrient deficiency based on the location-specific coverage
    levels of fortified foods; specifically, p (1 - proportion of population
    that eats fortified vehicle) and p_start (1 - proportion of population that 
    eats industrially produced vehicle).
    
    NOTE: this function does not consider age/time lags of fortification effects
    (assumes that every individual covered by fortification is effectively covered)"""
    pif_deficiency_nofort = paf_deficiency_nofort * (counterfactual_fortification_coverage - baseline_fortification_coverage) / (1 - baseline_fortification_coverage)
    return pif_deficiency_nofort

In [15]:
def calculate_daly_reduction_by_cause(pif_deficiency_nofort, paf_dalys_deficiency, dalys):
    """This functionc calculates the population impact fraction for UNfortified 
    food and DALYs due to specific causes as well as the total number of DALYs
    averted by cause, sex, and age
    
    NOTE: this function does not consider age/time lags of fortification effects
    (assumes that every individual covered by fortification is effectively covered)"""
    
    df = pd.DataFrame()
    
    for level in ['low','medium','high','full']:
        pif_deficiency_nofort_level = (pif_deficiency_nofort.reset_index()
                                     .loc[pif_deficiency_nofort.reset_index().coverage_level == level]
                                     .drop(columns='coverage_level')
                                     .set_index('location_id'))
        pif_dalys_nofort = pif_deficiency_nofort_level * paf_dalys_deficiency
        pif_dalys_nofort['measure'] = 'pif'
        dalys_reduction = pif_dalys_nofort * dalys
        dalys_reduction['measure'] = 'dalys averted'
        dalys_reduction_overall = dalys_reduction.reset_index().groupby(index_cols + ['measure']).sum().reset_index()
        dalys_reduction_overall['cause_id'] = 294
        data = (pd.concat([pif_dalys_nofort.reset_index(), dalys_reduction.reset_index(), dalys_reduction_overall], ignore_index=True))
        data['coverage_level'] = level
        data = data.set_index(index_cols + ['measure','cause_id','coverage_level']).dropna().sort_index()
        df = pd.concat([df,data])
        
    return df

In [16]:
rr_deficiency_nofort = generate_rr_deficiency_nofort_draws(mu, sigma)
rr_deficiency_nofort.mean(axis=1)

location_id
163    2.396239
214    2.396239
205    2.396239
190    2.396239
189    2.396239
dtype: float64

In [17]:
paf_dalys_deficiency = pull_cause_specific_dalys_deficiency_pafs(rei_id, cause_ids)
paf_dalys_deficiency.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,cause_id,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
163,1,2,389,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
163,1,3,389,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
163,1,4,302,0.023614,0.022761,0.032084,0.061713,0.010529,0.016051,0.008245,0.045553,0.027902,0.032493,...,0.013291,0.040655,0.013134,0.040318,0.050091,0.018375,0.019796,0.037472,0.025155,0.00943
163,1,4,341,0.062437,0.068525,0.080122,0.158759,0.013909,0.026354,0.011891,0.113604,0.067258,0.091411,...,0.028041,0.109343,0.039931,0.107498,0.148924,0.044538,0.046736,0.096353,0.066101,0.021711
163,1,4,389,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [18]:
dalys = pull_dalys(cause_ids)
dalys.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,cause_id,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
163,1,2,302,228052.286761,256459.736242,146290.339705,192676.38112,172362.436243,187454.984499,159534.221668,176235.755329,170489.703795,151586.434309,...,233728.203639,195240.822126,264767.777947,189854.851927,201259.960318,246817.767701,211072.087665,216946.113187,161240.038151,149006.385585
163,1,2,341,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
163,1,2,389,249.085991,270.466085,270.284907,146.168022,118.041018,158.115471,160.563119,111.986426,347.919477,159.200861,...,87.646759,195.252324,129.9347,254.856255,47.148614,134.662517,118.849263,211.828232,100.17052,100.091938
163,1,3,302,207609.552344,263078.492814,198861.237612,173638.640755,255710.254455,184070.329711,167459.228561,282709.744663,228526.429634,190682.116226,...,266360.552338,251853.188296,250290.066661,189621.733181,226938.698022,290091.580737,199799.659021,257016.120929,184017.830031,188623.397182
163,1,3,341,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
baseline_fortification_coverage, counterfactual_fortification_coverage = load_coverage_data(nutrient, 
                                                                                            vehicle, 
                                                                                            percent_of_difference=True)
baseline_fortification_coverage.mean(axis=1)

location_id
163    0.243901
214    0.074497
205    0.980048
190    0.543213
189    0.537420
dtype: float64

In [24]:
paf_deficiency_nofort = calculate_paf_deficiency_nofort(rr_deficiency_nofort, 
                                                        baseline_fortification_coverage)
paf_deficiency_nofort.mean(axis=1)

location_id
163    0.443970
214    0.485210
205    0.026645
190    0.342059
189    0.344187
dtype: float64

In [25]:
pif_deficiency_nofort = calculate_pif_deficiency_nofort(paf_deficiency_nofort, 
                                                        baseline_fortification_coverage, 
                                                        counterfactual_fortification_coverage)
pif_deficiency_nofort.mean(axis=1).head()

location_id  coverage_level
163          low               0.095484
214          low               0.031349
205          low               0.000000
190          low               0.064860
189          low               0.072513
dtype: float64

In [26]:
daly_reduction = calculate_daly_reduction_by_cause(pif_deficiency_nofort, paf_dalys_deficiency, dalys)
daly_reduction.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,measure,cause_id,coverage_level,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
163,1,2,dalys averted,294,low,39.073737,22.067716,19.851739,8.851672,14.05879,18.749362,4.063897,13.658859,26.534303,24.199617,...,8.266924,16.267264,18.881375,39.95898,3.363803,14.161733,14.751978,17.040655,7.339892,7.076226
163,1,2,dalys averted,389,low,39.073737,22.067716,19.851739,8.851672,14.05879,18.749362,4.063897,13.658859,26.534303,24.199617,...,8.266924,16.267264,18.881375,39.95898,3.363803,14.161733,14.751978,17.040655,7.339892,7.076226
163,1,2,pif,389,low,0.156868,0.081591,0.073447,0.060558,0.119101,0.11858,0.02531,0.121969,0.076266,0.152007,...,0.094321,0.083314,0.145314,0.15679,0.071345,0.105165,0.124123,0.080446,0.073274,0.070697
163,1,3,dalys averted,294,low,522.240431,214.110377,152.106619,195.935534,302.27045,182.141138,86.533844,244.659397,247.584663,82.511093,...,132.256955,263.559033,264.643874,342.623531,108.073336,462.594106,179.028127,139.570401,174.807271,171.178007
163,1,3,dalys averted,389,low,522.240431,214.110377,152.106619,195.935534,302.27045,182.141138,86.533844,244.659397,247.584663,82.511093,...,132.256955,263.559033,264.643874,342.623531,108.073336,462.594106,179.028127,139.570401,174.807271,171.178007


In [27]:
# check and make sure that there are only negative dalys averted for execpted draws
    # (draws with RR for fortification < 1 and draws with negative GBD PAFs)

in_neg_draws = np.concatenate([pd.DataFrame(rr_deficiency_nofort.stack()).loc[pd.DataFrame(rr_deficiency_nofort.stack())[0] < 1].reset_index()['draws'].unique(),
            pd.DataFrame(paf_dalys_deficiency.stack()).loc[pd.DataFrame(paf_dalys_deficiency.stack())[0] < 0].reset_index()['level_4'].unique()])

out_neg_draws = pd.DataFrame(daly_reduction.stack()).reset_index().rename(columns={'level_6':'draw',0:'val'})
out_neg_draws = out_neg_draws.loc[out_neg_draws.val < 0]

assert len([c for c in out_neg_draws.draw.unique() if c not in in_neg_draws]) == 0, "Error: unexpected negative values"

In [28]:
fort_daly_reduction_by_location = daly_reduction.groupby(['location_id','measure','cause_id','coverage_level']).sum().reset_index()
fort_daly_reduction_by_location = (fort_daly_reduction_by_location
                                   .loc[fort_daly_reduction_by_location.measure=='dalys averted']
                                   .loc[fort_daly_reduction_by_location.cause_id==294])
fort_daly_reduction_by_location = (fort_daly_reduction_by_location
                                   .set_index(['location_id','measure','cause_id','coverage_level'])
                                   .apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1))
    
fort_daly_reduction_by_location

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,mean,std,min,2.5%,50%,97.5%,max
location_id,measure,cause_id,coverage_level,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
163,dalys averted,294,full,1000.0,96007.74343,55063.957051,-108195.163728,-4373.326251,92275.090245,206460.510938,317140.147063
163,dalys averted,294,high,1000.0,72005.807572,41297.967788,-81146.372796,-3279.994688,69206.317684,154845.383203,237855.110297
163,dalys averted,294,low,1000.0,24001.935857,13765.989263,-27048.790932,-1093.331563,23068.772561,51615.127734,79285.036766
163,dalys averted,294,medium,1000.0,48003.871715,27531.978525,-54097.581864,-2186.663125,46137.545123,103230.255469,158570.073531
189,dalys averted,294,full,1000.0,10996.353748,8223.412083,-8241.76621,-540.133729,9707.197315,30554.86757,53181.893257
189,dalys averted,294,high,1000.0,8247.265311,6167.559062,-6181.324658,-405.100297,7280.397986,22916.150678,39886.419943
189,dalys averted,294,low,1000.0,2749.088437,2055.853021,-2060.441553,-135.033432,2426.799329,7638.716893,13295.473314
189,dalys averted,294,medium,1000.0,5498.176874,4111.706042,-4120.883105,-270.066865,4853.598657,15277.433785,26590.946629
190,dalys averted,294,full,1000.0,10646.945736,8731.847671,-7617.963381,-501.836712,8996.605969,31971.593208,57583.271686
190,dalys averted,294,high,1000.0,7985.209302,6548.885754,-5713.472536,-376.377534,6747.454477,23978.694906,43187.453764


In [30]:
pop = (get_population(location_id=location_ids,
                    sex_id=sexes,
                    age_group_id=ages,
                    gbd_round_id=6,
                    decomp_step='step4')
       .groupby('location_id').sum())
pop = pop[['population']]
rates = pop.reset_index().merge(fort_daly_reduction_by_location.reset_index(), on='location_id')
for col in ['mean','2.5%','97.5%']:
    rates[f'{col}'] = rates[f'{col}'] / rates.population * 100_000
rates['measure'] = 'dalys averted per 100,000'
rates = rates[['location_id','coverage_level','measure','cause_id','mean','2.5%','97.5%']]
rates = rates.loc[rates.coverage_level=='full']
rates


# NOTE: the 'full' coverage level can be used to compare to previous simulation results (from 2017 data) in 2021, specifically

Unnamed: 0,location_id,coverage_level,measure,cause_id,mean,2.5%,97.5%
0,163,full,"dalys averted per 100,000",294,82.008302,-3.735626,176.355316
4,189,full,"dalys averted per 100,000",294,118.147916,-5.803349,328.290086
8,190,full,"dalys averted per 100,000",294,150.173786,-7.078342,450.955168
12,205,full,"dalys averted per 100,000",294,0.0,0.0,0.0
16,214,full,"dalys averted per 100,000",294,87.181089,-7.146911,230.779695


# Alternative target coverage rates

These coverage rates represent low/med/high coverage of 25/50/75% of the population that eats the industrially produced vehicle, as opposed to low/med/high coverage of 25/50/75% of the difference between the population that eats fortified vehicle and the population that eats the industrially produced vehicle, as shown above

In [36]:
baseline_fortification_coverage_alt, counterfactual_fortification_coverage_alt = load_coverage_data(nutrient, 
                                                                                                    vehicle,
                                                                                                    percent_of_difference=False)

In [37]:
paf_deficiency_nofort_alt = calculate_paf_deficiency_nofort(rr_deficiency_nofort, baseline_fortification_coverage_alt)
paf_deficiency_nofort_alt.mean(axis=1)

location_id
163    0.443970
214    0.485210
205    0.026645
190    0.342059
189    0.344187
dtype: float64

In [38]:
pif_deficiency_nofort_alt = calculate_pif_deficiency_nofort(paf_deficiency_nofort_alt, 
                                                            baseline_fortification_coverage_alt, 
                                                            counterfactual_fortification_coverage_alt)
pif_deficiency_nofort_alt.mean(axis=1).head()

location_id  coverage_level
163          low              -0.011986
214          low               0.002023
205          low              -0.984298
190          low              -0.240919
189          low              -0.228786
dtype: float64

In [39]:
daly_reduction_alt = calculate_daly_reduction_by_cause(pif_deficiency_nofort_alt, paf_dalys_deficiency, dalys)
daly_reduction.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,measure,cause_id,coverage_level,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
163,1,2,dalys averted,294,low,39.073737,22.067716,19.851739,8.851672,14.05879,18.749362,4.063897,13.658859,26.534303,24.199617,...,8.266924,16.267264,18.881375,39.95898,3.363803,14.161733,14.751978,17.040655,7.339892,7.076226
163,1,2,dalys averted,389,low,39.073737,22.067716,19.851739,8.851672,14.05879,18.749362,4.063897,13.658859,26.534303,24.199617,...,8.266924,16.267264,18.881375,39.95898,3.363803,14.161733,14.751978,17.040655,7.339892,7.076226
163,1,2,pif,389,low,0.156868,0.081591,0.073447,0.060558,0.119101,0.11858,0.02531,0.121969,0.076266,0.152007,...,0.094321,0.083314,0.145314,0.15679,0.071345,0.105165,0.124123,0.080446,0.073274,0.070697
163,1,3,dalys averted,294,low,522.240431,214.110377,152.106619,195.935534,302.27045,182.141138,86.533844,244.659397,247.584663,82.511093,...,132.256955,263.559033,264.643874,342.623531,108.073336,462.594106,179.028127,139.570401,174.807271,171.178007
163,1,3,dalys averted,389,low,522.240431,214.110377,152.106619,195.935534,302.27045,182.141138,86.533844,244.659397,247.584663,82.511093,...,132.256955,263.559033,264.643874,342.623531,108.073336,462.594106,179.028127,139.570401,174.807271,171.178007


In [40]:
fort_daly_reduction_by_location_alt = daly_reduction_alt.groupby(['location_id','measure','cause_id','coverage_level']).sum().reset_index()
fort_daly_reduction_by_location_alt = (fort_daly_reduction_by_location_alt
                                   .loc[fort_daly_reduction_by_location_alt.measure=='dalys averted']
                                   .loc[fort_daly_reduction_by_location_alt.cause_id==294])
fort_daly_reduction_by_location_alt = (fort_daly_reduction_by_location_alt
                                   .set_index(['location_id','measure','cause_id','coverage_level'])
                                   .apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1))
    
fort_daly_reduction_by_location_alt

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,mean,std,min,2.5%,50%,97.5%,max
location_id,measure,cause_id,coverage_level,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
163,dalys averted,294,full,1000.0,96007.74343,55063.957051,-108195.163728,-4373.326251,92275.090245,206460.510938,317140.147063
163,dalys averted,294,high,1000.0,62998.957521,36122.55765,-71091.989996,-2853.378296,60389.231959,135159.753688,206505.925183
163,dalys averted,294,low,1000.0,-3018.614296,2780.99165,-17824.766753,-9618.857341,-2523.683684,463.303376,6682.820707
163,dalys averted,294,medium,1000.0,29990.171612,17226.066121,-33988.816264,-1333.430341,28824.884411,64788.102902,98762.592267
189,dalys averted,294,full,1000.0,10996.353748,8223.412083,-8241.76621,-540.133729,9707.197315,30554.86757,53181.893257
189,dalys averted,294,high,1000.0,4444.630962,3381.153919,-3252.853258,-237.037257,3882.843615,12641.114241,21741.059077
189,dalys averted,294,low,1000.0,-8658.814611,6560.960532,-49094.459235,-24328.539781,-7471.474123,385.367595,6724.972647
189,dalys averted,294,medium,1000.0,-2107.091825,1799.842797,-15002.341738,-6472.064902,-1687.573988,97.30094,1787.793303
190,dalys averted,294,full,1000.0,10646.945736,8731.847671,-7617.963381,-501.836712,8996.605969,31971.593208,57583.271686
190,dalys averted,294,high,1000.0,3799.290938,3139.04048,-2996.973278,-180.843093,3159.002333,11813.860185,20604.594535


In [44]:
data = pd.read_csv('/ihme/homes/alibow/notebooks/vivarium_data_analysis/pre_processing/lsff_project/data_prep/outputs/LSFF_extraction_clean_data_rich_locations_01_11_2021.csv')
data = data.loc[data.location_id==163].loc[data.vehicle=='oil'].loc[data.nutrient!='vitamin d']
data

Unnamed: 0,location_id,location_name,vehicle,value_description,nutrient,value_mean,value_025_percentile,value_975_percentile
0,163,India,oil,percent of population eating fortified vehicle,vitamin a,24.3,21.1,27.9
2,163,India,oil,percent of population eating industrially prod...,na,89.4,87.0,91.8
3,163,India,oil,percent of population eating vehicle,na,100.0,100.0,100.0


In [None]:
# NOTE: coverage parameters for india are the same as previously used in the LSFF microsim

In [51]:
pd.DataFrame(dalys.groupby('location_id').sum().mean(axis=1)).rename(columns={0:'population'})/pop * 100_000

Unnamed: 0_level_0,population
location_id,Unnamed: 1_level_1
163,4687.829244
189,6267.952313
190,10648.482763
205,9644.396041
214,36530.308984


In [47]:
pop

Unnamed: 0_level_0,population
location_id,Unnamed: 1_level_1
163,117070800.0
189,9307277.0
190,7089750.0
205,4011999.0
214,33521640.0


In [52]:
dalys

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,cause_id,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
163,1,2,302,2.280523e+05,2.564597e+05,1.462903e+05,1.926764e+05,1.723624e+05,1.874550e+05,1.595342e+05,1.762358e+05,1.704897e+05,1.515864e+05,...,2.337282e+05,1.952408e+05,2.647678e+05,1.898549e+05,2.012600e+05,2.468178e+05,2.110721e+05,2.169461e+05,1.612400e+05,1.490064e+05
163,1,2,341,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
163,1,2,389,2.490860e+02,2.704661e+02,2.702849e+02,1.461680e+02,1.180410e+02,1.581155e+02,1.605631e+02,1.119864e+02,3.479195e+02,1.592009e+02,...,8.764676e+01,1.952523e+02,1.299347e+02,2.548563e+02,4.714861e+01,1.346625e+02,1.188493e+02,2.118282e+02,1.001705e+02,1.000919e+02
163,1,3,302,2.076096e+05,2.630785e+05,1.988612e+05,1.736386e+05,2.557103e+05,1.840703e+05,1.674592e+05,2.827097e+05,2.285264e+05,1.906821e+05,...,2.663606e+05,2.518532e+05,2.502901e+05,1.896217e+05,2.269387e+05,2.900916e+05,1.997997e+05,2.570161e+05,1.840178e+05,1.886234e+05
163,1,3,341,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
214,2,4,341,7.245857e+04,6.196831e+04,5.470320e+04,5.092301e+04,7.273847e+04,3.337036e+04,1.500574e+04,5.341754e+04,4.160694e+04,3.045045e+04,...,4.487982e+04,1.140759e+05,8.862803e+04,6.596207e+04,2.978295e+04,7.011890e+04,2.497070e+04,1.676846e+04,6.948321e+04,1.196006e+04
214,2,4,389,3.778603e+03,1.788632e+03,2.694246e+03,3.374926e+03,1.907051e+03,2.126858e+03,3.692565e+03,1.496534e+03,3.059461e+03,2.121289e+03,...,1.664136e+03,3.634019e+03,3.658426e+03,2.277510e+03,2.143606e+03,2.482902e+03,9.801963e+02,1.420922e+03,1.165448e+03,1.880732e+03
214,2,5,302,2.938190e+06,2.639545e+06,2.715032e+06,3.235830e+06,2.454862e+06,2.841054e+06,2.954444e+06,3.194798e+06,2.608007e+06,2.668330e+06,...,2.433145e+06,2.105123e+06,2.020242e+06,2.012091e+06,2.264868e+06,2.950872e+06,1.954988e+06,1.943401e+06,2.244735e+06,1.847843e+06
214,2,5,341,3.178544e+05,2.477280e+05,2.487067e+05,2.420544e+05,3.560477e+05,1.585589e+05,7.778850e+04,2.861582e+05,1.894905e+05,1.342484e+05,...,2.226074e+05,5.255371e+05,4.094833e+05,3.087391e+05,1.553198e+05,2.995593e+05,1.097226e+05,7.686006e+04,3.348229e+05,4.677796e+04


In [72]:
population = (get_population(gbd_round_id=6,
                           year_id=2019,
                           age_group_id=ages,
                           sex_id=sexes,
                           decomp_step='step4',
                            location_id=location_ids)
              .set_index(['location_id','sex_id','age_group_id'])
              .drop(columns=['run_id','year_id']))
daly_19 = dalys.groupby(['location_id','sex_id','age_group_id']).sum()
for i in list(range(0,1000)):
    daly_19[f'draw_{i}'] = daly_19[f'draw_{i}'] / population.population.values #* 100_000
daly_19.groupby('location_id').sum().mean(axis=1)

location_id
163    2.634849
189    1.174724
190    1.996872
205    2.544795
214    6.626335
dtype: float64