In [1]:
import pandas as pd, numpy as np
from db_queries import get_ids, get_outputs, get_location_metadata, get_population, get_covariate_estimates
from get_draws.api import get_draws
import scipy.stats 
import scipy.integrate as integrate
import matplotlib.pyplot as plt

In [2]:
from dichotomous_multiplication_model_functions import *

In [3]:
index_cols=['location_id','sex_id','age_group_id']

age_group_ids = [2,3,4,5]
sex_ids = [1,2]
coverage_levels = [0.2,0.5,0.8,1]
years = [2021,2022,2023,2024,2025]

In [4]:
# vitamin A specific -- these should be replaced for other models
rei_id = 96
cause_ids = [389, 302, 341]
nonfatal_causes = [389]
nutrient = 'vitamin a'
vehicle = 'oil'

effective_fractions = [0, 0, (365 - (365 * 0.5)) / (365 - 28), 1]

data = pd.read_csv(
        '/ihme/homes/alibow/notebooks/vivarium_data_analysis/pre_processing/lsff_project/data_prep/outputs/LSFF_extraction_clean_data_rich_locations_01_11_2021.csv')
location_ids = list(data.loc[data.nutrient==nutrient].loc[data.vehicle==vehicle].location_id.unique())

"""Note: full set of location IDs is shown below, but subset used here
was selected because they are the locations with non-missing coverage data
for the nutrient and vehicle of interest (vitamin A/oil)

[168, 161, 201, 202, 6, 205, 171, 141, 179, 207, 163, 11, 180, 181,
184, 15, 164, 213, 214, 165, 196, 522, 190, 189, 20]"""

location_ids

[163, 214, 205, 190, 189]

In [5]:
# define no fortification relative risk distribution
# vitamin a specific -- this should be replaced for other models

from numpy import log
from scipy.stats import norm, lognorm

# median and 0.975-quantile of lognormal distribution for RR
median = 2.22
q_975 = 5.26

# 0.975-quantile of standard normal distribution (=1.96, approximately)
q_975_stdnorm = norm().ppf(0.975)

mu = log(median) # mean of normal distribution for log(RR)
sigma = (log(q_975) - mu) / q_975_stdnorm # std dev of normal distribution for log(RR)

# Frozen lognormal distribution for RR, representing uncertainty in our effect size
# (s is the shape parameter)
rr_distribution = lognorm(s=sigma, scale=median)

In [6]:
baseline_coverage, counterfactual_coverage = generate_overall_coverage_rates(nutrient, 
                                                                             vehicle,
                                                                             coverage_levels,
                                                                             years)

In [7]:
# NOTE: the following functions are vitamin A specific and should not be used universally
# the coverage functions above assume that all coverage is effective coverage and can be used universally given this assumption

baseline_effective_coverage = get_effective_vitamin_a_coverage(baseline_coverage, 
                                                               sex_ids,
                                                               age_group_ids,
                                                               effective_fractions,
                                                               years)
counterfactual_effective_coverage = get_effective_vitamin_a_coverage(counterfactual_coverage, 
                                                               sex_ids,
                                                               age_group_ids,
                                                               effective_fractions,
                                                               years)

In [8]:
rr_deficiency_nofort = generate_rr_deficiency_nofort_draws(mu, sigma, location_ids)
rr_deficiency_nofort.mean(axis=1)

location_id
163    2.396239
214    2.396239
205    2.396239
190    2.396239
189    2.396239
dtype: float64

In [9]:
paf_dalys_deficiency = pull_cause_specific_dalys_deficiency_pafs(rei_id, 
                                                                 cause_ids, 
                                                                 location_ids, 
                                                                 age_group_ids, 
                                                                 sex_ids, 
                                                                 index_cols)
paf_dalys_deficiency.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,cause_id,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
163,1,2,389,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
163,1,3,389,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
163,1,4,302,0.023614,0.022761,0.032084,0.061713,0.010529,0.016051,0.008245,0.045553,0.027902,0.032493,...,0.013291,0.040655,0.013134,0.040318,0.050091,0.018375,0.019796,0.037472,0.025155,0.00943
163,1,4,341,0.062437,0.068525,0.080122,0.158759,0.013909,0.026354,0.011891,0.113604,0.067258,0.091411,...,0.028041,0.109343,0.039931,0.107498,0.148924,0.044538,0.046736,0.096353,0.066101,0.021711
163,1,4,389,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [10]:
dalys = pull_dalys(cause_ids,
                   nonfatal_causes,
                   location_ids, 
                   age_group_ids, 
                   sex_ids, 
                   index_cols)
dalys.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  ylls = pd.concat([ylls.reset_index(), nonfatal.reset_index()]).set_index(index_cols + ['cause_id'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,cause_id,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
163,1,2,302,228052.286761,256459.736242,146290.339705,192676.38112,172362.436243,187454.984499,159534.221668,176235.755329,170489.703795,151586.434309,...,233728.203639,195240.822126,264767.777947,189854.851927,201259.960318,246817.767701,211072.087665,216946.113187,161240.038151,149006.385585
163,1,2,341,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
163,1,2,389,249.085991,270.466085,270.284907,146.168022,118.041018,158.115471,160.563119,111.986426,347.919477,159.200861,...,87.646759,195.252324,129.9347,254.856255,47.148614,134.662517,118.849263,211.828232,100.17052,100.091938
163,1,3,302,207609.552344,263078.492814,198861.237612,173638.640755,255710.254455,184070.329711,167459.228561,282709.744663,228526.429634,190682.116226,...,266360.552338,251853.188296,250290.066661,189621.733181,226938.698022,290091.580737,199799.659021,257016.120929,184017.830031,188623.397182
163,1,3,341,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
paf_deficiency_nofort = calculate_paf_deficiency_nofort(rr_deficiency_nofort, 
                                                        baseline_effective_coverage)
paf_deficiency_nofort.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,year,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
163,1,2,2021,0.78595,0.447022,0.406811,0.340996,0.617978,0.620854,0.149863,0.636347,0.421241,0.759597,...,0.507946,0.455066,0.740104,0.785748,0.396677,0.557149,0.64751,0.441514,0.406003,0.393076
163,1,2,2022,0.78595,0.447022,0.406811,0.340996,0.617978,0.620854,0.149863,0.636347,0.421241,0.759597,...,0.507946,0.455066,0.740104,0.785748,0.396677,0.557149,0.64751,0.441514,0.406003,0.393076
163,1,2,2023,0.78595,0.447022,0.406811,0.340996,0.617978,0.620854,0.149863,0.636347,0.421241,0.759597,...,0.507946,0.455066,0.740104,0.785748,0.396677,0.557149,0.64751,0.441514,0.406003,0.393076
163,1,2,2024,0.78595,0.447022,0.406811,0.340996,0.617978,0.620854,0.149863,0.636347,0.421241,0.759597,...,0.507946,0.455066,0.740104,0.785748,0.396677,0.557149,0.64751,0.441514,0.406003,0.393076
163,1,2,2025,0.78595,0.447022,0.406811,0.340996,0.617978,0.620854,0.149863,0.636347,0.421241,0.759597,...,0.507946,0.455066,0.740104,0.785748,0.396677,0.557149,0.64751,0.441514,0.406003,0.393076


In [12]:
pif_deficiency_nofort = calculate_pif_deficiency_nofort(paf_deficiency_nofort, 
                                                        baseline_effective_coverage, 
                                                        counterfactual_effective_coverage)

assert np.all(pif_deficiency_nofort <= 1), "ERROR: PIFs outside of 0-1 bounds"

pif_deficiency_nofort.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,year,coverage_level,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
163,1,2,2021,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
163,1,2,2021,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
163,1,2,2021,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
163,1,2,2021,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
163,1,2,2022,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
pif_dalys_nofort, daly_reduction = calculate_final_pifs_and_daly_reductions(pif_deficiency_nofort, 
                                             paf_dalys_deficiency, 
                                             dalys, 
                                             coverage_levels,
                                             years)

In [None]:
fort_daly_reduction_by_location = (daly_reduction.groupby(['location_id','coverage_level','year']).sum()
                                   .apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1)
                                   .filter(['mean','2.5%','97.5%']))
    
fort_daly_reduction_by_location

In [None]:
pop = (get_population(location_id=location_ids,
                    sex_id=sex_ids,
                    age_group_id=age_group_ids,
                    gbd_round_id=6,
                    decomp_step='step4')
       .groupby('location_id').sum())
rates = pop.reset_index().merge(fort_daly_reduction_by_location.reset_index(), on='location_id')
for col in ['mean','2.5%','97.5%']:
    rates[f'{col}'] = rates[f'{col}'] / rates.population * 100_000
rates = rates[['location_id','year','coverage_level','mean','2.5%','97.5%']]
l = get_ids('location')
l = l[l['location_id'].isin(location_ids)]
l = l[['location_id','location_name']]
rates = rates.merge(l, on='location_id')
rates

In [None]:
# note... want to restructure these plots to bar chart or scatter so that
# it does not appear like we are modeling partial years

def make_plots(location_id):
    plt.figure()
    colors = ['tab:blue','tab:orange','tab:green']

    env = (paf_dalys_deficiency * dalys).groupby('location_id').sum().reset_index()
    env = env.loc[env.location_id==location_id]
    env = env.apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1)
    env['mean'] = env['mean'] / pop.reset_index().loc[pop.reset_index().location_id==location_id]['population'].values * 100_000
    env['2.5%'] = env['2.5%'] / pop.reset_index().loc[pop.reset_index().location_id==location_id]['population'].values * 100_000
    env['97.5%'] = env['97.5%'] / pop.reset_index().loc[pop.reset_index().location_id==location_id]['population'].values * 100_000

    plt.plot(years, [env['mean'].values[0]] * len(years), color='grey')
    plt.fill_between(years, [env['2.5%'].values[0]] * len(years), [env['97.5%'].values[0]] * len(years), color='grey', alpha=0.1)

    for i in list(range(len(coverage_levels) - 1)):
        rate = rates.loc[rates.location_id==location_id]
        rate = rate.loc[rate.coverage_level == coverage_levels[i]]

        plt.plot(rate.year, rate['mean'],color=colors[i])
        plt.fill_between(rate.year, rate['2.5%'], rate['97.5%'], color=colors[i], alpha=0.1)

    location_name = l.loc[l.location_id==location_id]['location_name'].values[0]
    plt.title(f'DALYs Averted Due to Vitamin A Fortification \n Scale-up among Children Under Five in {location_name}')
    plt.xlabel('Year')
    plt.ylabel('DALYs per 100,000 person-years')
    plt.legend(['Total Vitamin A Deficiency Burden','20% coverage','50% coverage','80% coverage'])
    plt.xticks(years)

In [None]:
for location_id in location_ids:
    make_plots(location_id)