In [1]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
import gbd_mapping as gbd
from db_queries import get_outputs, get_ids
from get_draws.api import get_draws
from vivarium import Artifact

import bep_summarizer

!whoami
!date

ndbs
Wed Feb  5 13:30:47 PST 2020


In [2]:
%load_ext autoreload
%autoreload 2

## Artifact and output directories

In [3]:
artifact_dir = '/ihme/costeffectiveness/artifacts/vivarium_gates_bep/'
results_dir = '/share/costeffectiveness/results/vivarium_gates_bep/base_model_half_day'

locations_rundates = {
    'Mali': '2020_02_04_20_04_06',
    'Tanzania': '2020_02_04_20_09_12'
}

locations = list(locations_rundates.keys())

## GBD IDs from R shiny tool

https://shiny.ihme.washington.edu/content/88/

(Can also use `get_ids`)

In [4]:
# GBD round IDs
gbd_2017 = 5

# Location IDs
tanzania = 189 # United Republic of Tanzania 
mali = 211 # Mali

# Cause IDs
all_causes = 294
diarrhea = 302 # Diarrheal diseases
lri = 322 # Lower respiratory infections
meningitis = 332
measles = 341
neonatal = 380 # Neonatal disorders
pem = 387 # Protein energy malnutrition

# Measures
deaths = 1
prevalence = 5
incidence = 6
emr = 9 # Excess mortality rate
acmr = 14 # All-cause mortality rate
csmr = 15 # Cause-specific mortality rate
relative_risk = 11
birth_prev = 38 # Birth prevalence

# Metrics
number = 1
percent = 2
rate = 3

# Age group IDs
at_birth = 164
early_nn = 2
late_nn = 3
post_nn = 4
one_to_four = 5

# Sex IDs
male=1
female=2
both=3

In [11]:
art_mali = Artifact(artifact_dir + 'mali.hdf', filter_terms=['year_start == 2017', 'age_start < 5'])

## Function to read artifact by draw, since LBWSG data was messed up

In [8]:
def read_lbwsg_data_by_draw(artifact_path, draw):
    key = 'risk_factor/low_birth_weight_and_short_gestation/exposure'
    with pd.HDFStore(artifact_path, mode='r') as store:
        index = store.get(f'{key}/index')
        draw = store.get(f'{key}/draw_{draw}')
    draw = draw.rename("value")
    data = pd.concat([index, draw], axis=1)
    return data

In [35]:
mali_lbwsg = read_lbwsg_data_by_draw(artifact_dir + 'mali.hdf', 0)
mali_lbwsg.head()

Unnamed: 0,location,sex,age_start,age_end,year_start,year_end,parameter,value
0,Mali,Female,0.0,0.019178,1990,1991,cat10,0.0
1,Mali,Female,0.0,0.019178,1990,1991,cat106,0.001379
2,Mali,Female,0.0,0.019178,1990,1991,cat11,0.001316
3,Mali,Female,0.0,0.019178,1990,1991,cat116,0.002852
4,Mali,Female,0.0,0.019178,1990,1991,cat117,0.012758


In [36]:
mali_lbwsg.shape

(73416, 8)

In [37]:
mali_lbwsg.age_start.unique()

array([0.000000e+00, 1.917808e-02, 7.671233e-02, 1.000000e+00,
       5.000000e+00, 1.000000e+01, 1.500000e+01, 2.000000e+01,
       2.500000e+01, 3.000000e+01, 3.500000e+01, 4.000000e+01,
       4.500000e+01, 5.000000e+01, 5.500000e+01, 6.000000e+01,
       6.500000e+01, 7.000000e+01, 7.500000e+01, 8.000000e+01,
       8.500000e+01, 9.000000e+01, 9.500000e+01])

## Write function to filter data and read all draws for LBWSG

In [38]:
# Convert a list of filter terms into a pandas .query() string
filter_terms = ['year_start == 2017', 'age_start < 5', "parameter == 'cat10'"]
query_string = " and ".join(filter_terms)
query_string

"year_start == 2017 and age_start < 5 and parameter == 'cat10'"

In [39]:
mali_lbwsg_reduced = mali_lbwsg.query(query_string)
mali_lbwsg_reduced

Unnamed: 0,location,sex,age_start,age_end,year_start,year_end,parameter,value
1539,Mali,Female,0.0,0.019178,2017,2018,cat10,0.002128
3135,Mali,Female,0.019178,0.076712,2017,2018,cat10,0.001435
4731,Mali,Female,0.076712,1.0,2017,2018,cat10,0.0
6327,Mali,Female,1.0,5.0,2017,2018,cat10,0.0
38247,Mali,Male,0.0,0.019178,2017,2018,cat10,0.0
39843,Mali,Male,0.019178,0.076712,2017,2018,cat10,0.0
41439,Mali,Male,0.076712,1.0,2017,2018,cat10,0.0
43035,Mali,Male,1.0,5.0,2017,2018,cat10,0.0


In [44]:
index_cols = [col for col in mali_lbwsg_reduced.columns if col != 'value']
mali_lbwsg_reduced.set_index(index_cols).rename(columns={'value': 'draw_0'})

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,draw_0
location,sex,age_start,age_end,year_start,year_end,parameter,Unnamed: 7_level_1
Mali,Female,0.0,0.019178,2017,2018,cat10,0.002128
Mali,Female,0.019178,0.076712,2017,2018,cat10,0.001435
Mali,Female,0.076712,1.0,2017,2018,cat10,0.0
Mali,Female,1.0,5.0,2017,2018,cat10,0.0
Mali,Male,0.0,0.019178,2017,2018,cat10,0.0
Mali,Male,0.019178,0.076712,2017,2018,cat10,0.0
Mali,Male,0.076712,1.0,2017,2018,cat10,0.0
Mali,Male,1.0,5.0,2017,2018,cat10,0.0


In [53]:
def read_lbwsg_data(artifact_path, filter_terms):
    query_string = " and ".join(filter_terms)
    
    data = read_lbwsg_data_by_draw(artifact_path, 0)
    data = data.query(query_string)
    index_cols = [col for col in data.columns if col != 'value']
    data = data.set_index(index_cols).rename(columns={'value': 'draw_0'})
    
    for draw in range(1,1000):
        draw_data = read_lbwsg_data_by_draw(artifact_path, draw)
        draw_data = draw_data.query(query_string)
        draw_data = draw_data.set_index(index_cols).rename(columns={'value': f'draw_{draw}'})
        data = pd.concat([data, draw_data], axis=1)
        
    return data

In [54]:
mali_lbwsg = read_lbwsg_data(artifact_dir + 'mali.hdf', ['year_start == 2017', 'age_start < 5'])
mali_lbwsg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,draw_0,draw_1,draw_2,draw_3,draw_4,draw_5,draw_6,draw_7,draw_8,draw_9,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location,sex,age_start,age_end,year_start,year_end,parameter,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
Mali,Female,0.0,0.019178,2017,2018,cat10,0.002128,0.001768,0.001924,0.002649,0.001953,0.002106,0.00214,0.001769,0.002091,0.002155,...,0.001863,0.002655,0.002027,0.001955,0.002181,0.002124,0.001714,0.002374,0.002806,0.00224
Mali,Female,0.0,0.019178,2017,2018,cat106,0.001562,0.001507,0.001917,0.001878,0.002094,0.002062,0.001822,0.00148,0.001768,0.001637,...,0.00088,0.001812,0.001664,0.001583,0.001933,0.001505,0.001596,0.00155,0.001464,0.001116
Mali,Female,0.0,0.019178,2017,2018,cat11,0.001454,0.001369,0.001351,0.001396,0.001431,0.00158,0.001661,0.001579,0.001258,0.00123,...,0.001146,0.001915,0.00133,0.001481,0.001711,0.001534,0.001337,0.00137,0.001115,0.001199
Mali,Female,0.0,0.019178,2017,2018,cat116,0.003774,0.003472,0.003759,0.004055,0.004935,0.00422,0.00416,0.003702,0.003605,0.003696,...,0.00241,0.003877,0.003615,0.003775,0.004033,0.003936,0.003703,0.003173,0.002946,0.002832
Mali,Female,0.0,0.019178,2017,2018,cat117,0.014307,0.014481,0.013703,0.013855,0.016841,0.014736,0.015148,0.014221,0.012749,0.014351,...,0.009044,0.013104,0.012619,0.014824,0.014787,0.013385,0.013363,0.013147,0.012891,0.010466


In [55]:
mali_lbwsg.shape

(456, 1000)

### Woo hoo! Looks like the function works

let's figure out what's in the index:

456 rows = (57 categories) x (4 age groups) x (2 sexes)

In [57]:
# Count the total number of LBWSG categories: 57
mali_lbwsg.index.get_level_values('parameter').nunique()

57

In [58]:
456/57 # 8 = (4 age groups) x (2 sexes)

8.0