In [1]:
import pandas as pd, numpy as np
from get_draws.api import get_draws
from db_queries import get_population
from vitamin_a_and_zinc_functions import pull_deficiency_attributable_dalys
from vivarium_helpers.id_helper import *
!date

Fri Mar 26 16:09:27 PDT 2021


# Define shared directory and `.hdf` file in which to save GBD data

In [2]:
username = !whoami
username

  and should_run_async(code)


['ndbs']

In [3]:
# GBD data will be stored in the following directory in an .hdf file.
# You can change this directory name if you want, and you may need to create it before running code below.
share_directory = f'/share/scratch/users/{username[0]}/vivarium_lsff/gbd_data'
share_directory

'/share/scratch/users/ndbs/vivarium_lsff/gbd_data'

In [4]:
hdfstore_path = f'{share_directory}/multmodel_data.hdf'

# Get location id's for all 25 countries we're modeling

In [5]:
!ls ../gbd_data_summary/input_data/

all_countries_with_ids.csv  bmgf_countries_with_ids.csv
bmgf_countries.csv	    bmgf_top_25_countries_20201203.csv


In [6]:
locations = pd.read_csv('../gbd_data_summary/input_data/bmgf_top_25_countries_20201203.csv')
location_ids = locations.location_id.to_list()
locations

Unnamed: 0,location_name,location_id
0,Angola,168
1,Bangladesh,161
2,Burkina Faso,201
3,Cameroon,202
4,China,6
5,Côte d'Ivoire,205
6,Democratic Republic of the Congo,171
7,Egypt,141
8,Ethiopia,179
9,Ghana,207


# Define age groups and sexes for which to pull data, and index columns we want to keep

In [7]:
index_cols=['location_id','sex_id','age_group_id']
age_group_ids = [2,3,4,5]
sex_ids = [1,2]
# coverage_levels = [0.2,0.5,0.8]
# years = [2021,2022,2023,2024,2025]

  and should_run_async(code)


In [8]:
ids_to_names('age_group', *age_group_ids)

age_group_id
2    Early Neonatal
3     Late Neonatal
4     Post Neonatal
5            1 to 4
Name: age_group_name, dtype: object

In [9]:
ids_to_names('sex', *sex_ids)

  and should_run_async(code)


sex_id
1      Male
2    Female
Name: sex, dtype: object

# Pull and save Vitamin A deficiency attributable DALYs

In [10]:
names_to_ids('rei', 'Vitamin A deficiency')

  and should_run_async(code)


rei_name
Vitamin A deficiency    96
Name: rei_id, dtype: int64

In [11]:
%%time
rei_id = 96
hdf_key = '/vitamin_a_deficiency/dalys_attributable_bmgf_25_countries'
vitamin_a_dalys = pull_deficiency_attributable_dalys(rei_id,
                                              location_ids,
                                              age_group_ids,
                                              sex_ids,
                                              index_cols)
vitamin_a_dalys.to_hdf(hdfstore_path, key=hdf_key)
vitamin_a_dalys.head()

  and should_run_async(code)


CPU times: user 12.3 s, sys: 18.1 s, total: 30.5 s
Wall time: 1min 8s


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
6,1,2,6.518769,13.105468,3.094651,11.038021,4.401997,11.975454,7.494744,8.345612,6.911725,3.246637,...,7.744664,6.914776,16.271661,9.998869,8.958759,10.438845,4.813849,2.642819,6.619699,4.775778
6,1,3,137.319706,160.767128,94.890071,90.369198,60.432847,55.529898,90.291739,271.638485,98.426889,103.576291,...,46.290755,114.677909,162.199784,44.200716,132.494285,77.170521,111.370798,50.678223,179.346982,140.162456
6,1,4,2655.803809,1851.342551,1589.046172,1599.004574,1346.298841,1256.386415,2277.158107,1945.723118,1648.834016,1727.435654,...,1401.487949,2418.174399,1694.596125,1575.049825,1867.62223,2990.603818,965.710245,1477.468702,1660.723099,1682.522484
6,1,5,6014.654573,5196.950929,5416.861679,5331.341355,3854.742854,3731.005037,5998.981182,5148.786063,6713.467238,4967.112706,...,4836.679039,7762.494425,9917.792849,5999.923366,5959.441779,6283.476586,2741.025158,4291.649161,4393.90072,6320.589774
6,2,2,1.257434,0.399007,0.553913,1.023849,0.758328,2.072954,1.215976,0.498824,1.484751,0.292424,...,0.617292,2.122949,0.496779,0.652097,1.054172,0.565635,0.495569,0.357408,0.691123,0.65319


# Pull and save Zinc deficiency DALYs

In [12]:
names_to_ids('rei', 'Zinc deficiency')

  and should_run_async(code)


rei_name
Zinc deficiency    97
Name: rei_id, dtype: int64

In [13]:
%%time
rei_id = list_ids('rei', 'Zinc deficiency')
# This key name should be updated when countries from other tiers are added:
hdf_key = '/zinc_deficiency/dalys_attributable_bmgf_25_countries'
zinc_dalys = pull_deficiency_attributable_dalys(rei_id,
                                              location_ids,
                                              age_group_ids,
                                              sex_ids,
                                              index_cols)
zinc_dalys.to_hdf(hdfstore_path, key=hdf_key)
zinc_dalys.head()

CPU times: user 10.4 s, sys: 14.2 s, total: 24.6 s
Wall time: 33.5 s


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
6,1,5,55.806889,0.0,29.211177,91.031178,45.761789,29.55091,11.862744,39.110218,142.270449,27.921471,...,70.827393,154.927527,29.528407,146.462447,219.59217,165.562467,62.928907,32.051667,77.849291,149.17971
6,2,5,37.542436,0.0,21.781172,86.086341,38.03233,24.744728,8.679425,33.560425,124.455756,18.714235,...,63.459794,123.302919,20.750247,115.160657,196.325055,154.511623,51.370814,35.211259,63.368359,129.279112
11,1,5,1057.924493,369.707946,517.346114,1121.865646,383.960301,326.811523,290.87645,1234.791858,1980.744828,928.505912,...,1295.128773,2676.259203,1164.434543,1504.802755,3333.240053,2298.076536,1813.060392,1378.863465,1424.56674,1897.192422
11,2,5,954.532812,197.702087,436.339271,1209.191167,371.705455,208.350528,464.778852,1270.639991,1371.653872,665.42756,...,1074.683581,2096.088331,897.944516,993.337161,2816.770108,2380.43308,1282.128241,957.580283,1063.576027,1460.532078
15,1,5,313.163965,18.266017,79.234558,128.724069,27.397756,42.252433,45.034578,168.170188,167.670791,189.359412,...,132.672371,285.220362,102.390704,220.484294,547.323432,387.181049,198.262244,86.842222,112.237688,146.72452


# Copy Ali's function for pulling DALYs for causes

## From `location_specific_results.ipynb`

In [14]:
def pull_dalys(cause_ids, nonfatal_cause_ids, location_ids, ages, sexes, index_cols):
    """
    This function pulls dalys for specified cause IDs from GBD
    -----
    INPUT (all in List() format):
    - cause
    ids for YLL models
    - nonfatal_cause_ids for YLD models
    - location_ids for which to pull dalys
    - ages (age_group_ids) for which to pull dalys
    - sexes (sex_ids) for which to pull dalys
    - index_cols with which to format output
    -----
    @returns a drawspace dataframe of DALYS attributable to each fatal or nonfatal cause_id:
        - columns = draws
        - index = multiindex(loc_id, sex_id, age_group_id, cause_id)
    """
    if len(cause_ids) + len(nonfatal_cause_ids) == 0:
        raise Exception("Must select at least one fatal or nonfatal cause_id")
        
    #init empty dfs
    ylds, ylls = pd.DataFrame(), pd.DataFrame()
    
    if len(nonfatal_cause_ids)>0:
        ylds = get_draws(
            gbd_id_type='cause_id',
            gbd_id=cause_ids,
            source='como',
            measure_id=3,
            metric_id=3,  # only available as rate
            location_id=location_ids,
            year_id=2019,
            age_group_id=ages,
            sex_id=sexes,
            gbd_round_id=6,
            status='best',
            decomp_step='step5',
        ).set_index(index_cols + ['cause_id'])
        ylds = ylds.drop(columns=[c for c in ylds.columns if 'draw' not in c])

        #convert rate to count
        pop = get_population(
            location_id=location_ids,
            year_id=2019,
            age_group_id=ages,
            sex_id=sexes,
            gbd_round_id=6,
            decomp_step='step4').set_index(index_cols)
        for i in list(range(0, 1000)):
            ylds[f'draw_{i}'] = ylds[f'draw_{i}'] * pop['population']
    else:
        print("No nonfatal ids selected; returning ylls only")
    
    if len(cause_ids)>0:
        ylls = get_draws(
            gbd_id_type='cause_id',
            gbd_id=cause_ids,
            source='codcorrect',
            measure_id=4,
            metric_id=1,
            location_id=location_ids,
            year_id=2019,
            age_group_id=ages,
            sex_id=sexes,
            gbd_round_id=6,
            status='latest',
            decomp_step='step5',
        ).set_index(index_cols + ['cause_id']).replace(np.nan, 0)
        ylls = ylls.drop(columns=[c for c in ylls.columns if 'draw' not in c])
    else:
        print("No fatal ids selected; returning ylds only")
    
    return ylls + ylds

  and should_run_async(code)


In [15]:
ids_to_names('measure', 3, 4)

measure_id
3    YLDs (Years Lived with Disability)
4             YLLs (Years of Life Lost)
Name: measure_name, dtype: object

In [16]:
ids_to_names('metric', 3, 1)

  and should_run_async(code)


metric_id
1    Number
3      Rate
Name: metric_name, dtype: object

# Pull and save DALYs due to Neural tube defects

In [17]:
names_to_ids('cause', 'Neural tube defects')

  and should_run_async(code)


cause_name
Neural tube defects    642
Name: cause_id, dtype: int64

In [19]:
%%time
cause_id = 642
hdf_key = '/neural_tube_defects/dalys_bmgf_25_countries'
neural_tube_defects_dalys = pull_dalys(
    [cause_id], [cause_id], location_ids, age_group_ids, sex_ids, index_cols
)
neural_tube_defects_dalys.to_hdf(hdfstore_path, key=hdf_key)
neural_tube_defects_dalys.head()

CPU times: user 22.2 s, sys: 1.6 s, total: 23.8 s
Wall time: 45.2 s


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,sex_id,age_group_id,cause_id,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
6,1,2,642,10322.763878,9471.983857,9326.322863,10395.966085,13510.893438,10190.255967,9542.422309,11721.644281,8732.405622,9642.032379,...,9922.34626,11560.055241,9404.223828,10986.001146,10630.453477,11978.378562,10120.298142,9298.436526,7386.087558,7474.08052
6,1,3,642,10793.369985,10263.918859,9382.185956,9968.037842,11416.271719,9862.964908,11121.482312,11598.592613,9764.89327,10017.196952,...,8034.565573,8990.057434,11906.901506,11044.259261,8330.75918,11241.229038,9985.727664,9238.886705,9739.067857,8067.928753
6,1,4,642,25336.608266,21714.315391,20047.215818,23050.370131,28084.480363,21569.777665,22029.386753,26934.517383,23360.799193,20067.716892,...,19383.308185,27212.898218,31242.177965,27961.219167,18728.674471,22540.794924,24194.220376,24114.656987,18316.178064,17181.754974
6,1,5,642,66204.817692,68020.658153,67263.522786,71341.865485,86497.514199,69578.397166,78537.426646,88361.45928,75241.19274,61302.551952,...,54270.894583,84548.082144,89238.607739,104460.090941,70193.626853,93351.585484,80049.044676,90301.053605,54608.073708,56213.929743
6,2,2,642,10878.00812,10382.724118,10116.728296,9244.719719,12568.191988,8388.912625,9137.537798,10220.456155,7906.165243,8515.270704,...,6821.211242,11308.953852,9333.432262,11717.126442,10577.374858,9341.582233,6810.005658,11231.196256,7818.548155,12510.833231


# Check what keys are now in the `HDFStore`

In [20]:
with pd.HDFStore(hdfstore_path) as store:
    print(store.keys())

['/zinc_deficiency/dalys_attributable_bmgf_25_countries', '/zinc_deficiency/dalys_attributable_tier_1_2', '/vitamin_a_deficiency/dalys_attributable_bmgf_25_countries', '/vitamin_a_deficiency/dalys_attributable_tier_1_2', '/neural_tube_defects/dalys_bmgf_25_countries']


  and should_run_async(code)
