In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='darkgrid')
import numpy as np, pandas as pd
pd.set_option('display.max_rows', 8)
from matplotlib.backends.backend_pdf import PdfPages

import gbd_mapping as gbd
from vivarium_inputs import get_raw_data

!date

Wed Jul 10 11:55:26 PDT 2019


In [2]:
gbd_covariate_estimates = pd.read_csv('/home/j/Project/simulation_science/mnch/Interventions/sq-lns/results/gbd_covariate_estimates.csv')

In [3]:
gbd_covariate_estimates

Unnamed: 0,model_version_id,covariate_id,covariate_name_short,location_id,location_name,year_id,age_group_id,age_group_name,sex_id,sex,mean_value,lower_value,upper_value
0,29804,1069,stunting_prop_haz_under_2sd,161,Bangladesh,2017,22,All Ages,3,Both,0.349966,0.349966,0.349966
1,29804,1069,stunting_prop_haz_under_2sd,201,Burkina Faso,2017,22,All Ages,3,Both,0.316688,0.316688,0.316688
2,29804,1069,stunting_prop_haz_under_2sd,179,Ethiopia,2017,22,All Ages,3,Both,0.397683,0.397683,0.397683
3,29804,1069,stunting_prop_haz_under_2sd,163,India,2017,22,All Ages,3,Both,0.356204,0.356204,0.356204
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,30296,1070,wasting_prop_whz_under_2sd,201,Burkina Faso,2017,22,All Ages,3,Both,0.151255,0.151255,0.151255
7,30296,1070,wasting_prop_whz_under_2sd,179,Ethiopia,2017,22,All Ages,3,Both,0.110381,0.110381,0.110381
8,30296,1070,wasting_prop_whz_under_2sd,163,India,2017,22,All Ages,3,Both,0.175323,0.175323,0.175323
9,30296,1070,wasting_prop_whz_under_2sd,214,Nigeria,2017,22,All Ages,3,Both,0.128652,0.128652,0.128652


In [4]:
result_dir = '/ihme/costeffectiveness/results/sqlns/verification_and_validation/'

path_for_location = {'Bangladesh': result_dir + 'vv_bangladesh/2019_07_02_11_55_19',
                     'Burkina Faso': result_dir + 'vv_burkina_faso/2019_07_02_11_56_40',
                     'Ethiopia': result_dir + 'vv_ethiopia/2019_07_02_11_58_02',
                     'India': result_dir + 'vv_india/2019_07_02_11_58_29',
                     'Nigeria': result_dir + 'vv_nigeria/2019_07_02_11_58_49'}

risk_names = ['iron_deficiency', 'child_stunting', 'child_wasting']

In [5]:
def load_and_aggregate(path):
    df = pd.read_hdf(path + '/output.hdf')
    g = df.groupby('input_draw').sum()
    return g

In [6]:
def get_year_from_template(template_string):
    return template_string.split('_in_')[1].split('_among')[0]

def get_sex_from_template(template_string):
    return template_string.split('_among_')[1].split('_in')[0].capitalize()

def get_age_group_from_template(template_string):
    return template_string.split('age_group_')[1]

def get_risk_and_severity_from_template(template_string):
    s = template_string.split('_in_')[0].split('_')
    if 'anemia' in template_string:
        return s[1], s[0]
    else:
        return '_'.join(s[:2]), s[2]

In [7]:
def get_risk_percentage_points(my_dict):
    cols = ['risk', 'year', 'sex', 'age_group', 'input_draw']
    cols_ = ['location', 'risk', 'sex', 'age_group', 'input_draw']
    results = []
    for location, path in my_dict.items():
        r = load_and_aggregate(path)
        df = r[[c for c in r.columns if 'anemia' in c or 'child' in c]]
        df = df.stack().reset_index().rename(columns={'level_1': 'label', 0: 'value'})

        df['year'] = df.label.map(get_year_from_template)
        df['sex'] = df.label.map(get_sex_from_template)
        df['age_group'] = df.label.map(get_age_group_from_template)
        df['risk'] = df.label.map(lambda x: get_risk_and_severity_from_template(x)[0])
        df['severity'] = df.label.map(lambda x: get_risk_and_severity_from_template(x)[1])
        df.drop(columns='label', inplace=True)

        exposed = df.loc[~((df.severity == 'unexposed') | (df.severity == 'cat4') | (df.severity == 'cat3'))]
        exposed = exposed.groupby(cols)['value'].sum()
        total = df.groupby(cols)['value'].sum()
        percent_exposed = (exposed/total * 100).reset_index()
        percent_exposed['location'] = location
        results.append(percent_exposed)
    
    data = pd.concat(results)
    year_mean = data.groupby(cols_)['value'].mean().reset_index()
    g = year_mean.groupby(cols_[:-1])['value']
    summary = g.describe(percentiles=[.025, .975]).filter(['mean', '2.5%', '97.5%'])
    
    return summary.reset_index()

In [8]:
df = get_risk_percentage_points(path_for_location)

In [9]:
df

Unnamed: 0,location,risk,sex,age_group,mean,2.5%,97.5%
0,Bangladesh,anemia,Female,1_to_4,52.201506,36.797721,67.598196
1,Bangladesh,anemia,Female,early_neonatal,99.811335,98.646759,100.000000
2,Bangladesh,anemia,Female,late_neonatal,99.862222,99.088049,100.000000
3,Bangladesh,anemia,Female,post_neonatal,57.214798,44.691747,71.766611
...,...,...,...,...,...,...,...
116,Nigeria,child_wasting,Male,1_to_4,10.131410,9.504854,10.751307
117,Nigeria,child_wasting,Male,early_neonatal,17.078224,13.963702,20.663186
118,Nigeria,child_wasting,Male,late_neonatal,10.163806,6.788522,14.424587
119,Nigeria,child_wasting,Male,post_neonatal,8.557653,6.273419,12.187992


In [10]:
def get_risk_exposure(my_dict, risk_names):
    results = []
    for location in my_dict.keys():
        for risk in risk_names:
            df = get_raw_data(getattr(gbd.risk_factors, risk), 'exposure', location)
            results.append(df)
    gbd_output = pd.concat(results).query('year_id == 2017 and age_group_id < 6')
    gbd_output.drop(columns=['modelable_entity_id', 'measure_id', 'metric_id'], inplace=True)
    
    return gbd_output

In [11]:
gbd_risk_exposure = get_risk_exposure(path_for_location, risk_names)



In [12]:
gbd_risk_exposure

Unnamed: 0,rei_id,location_id,year_id,age_group_id,sex_id,parameter,draw_0,draw_1,draw_10,draw_100,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
230,95,161,2017,2,1,continuous,112.043561,122.898635,115.795529,113.739507,...,114.940694,125.914798,126.068494,116.040215,117.415264,117.044742,124.172919,120.054034,121.502651,115.052157
231,95,161,2017,3,1,continuous,112.104730,115.768110,112.464294,129.380061,...,121.526716,116.450243,113.701718,106.966683,124.206769,121.174903,114.225292,107.347272,108.973133,115.899519
232,95,161,2017,4,1,continuous,101.004846,101.227596,105.479872,99.843439,...,100.406470,95.719262,100.041637,104.956643,105.845467,106.613525,97.810326,104.283911,100.685165,100.305343
233,95,161,2017,5,1,continuous,107.464062,103.860210,109.713401,100.357587,...,109.685060,116.173441,114.464848,106.818030,109.549976,111.975937,105.865563,106.870663,104.137012,104.174064
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,240,214,2017,2,2,cat4,0.593309,0.634945,0.627659,0.612725,...,0.635171,0.631679,0.624476,0.635330,0.630600,0.622136,0.620727,0.616771,0.626749,0.628996
179,240,214,2017,3,2,cat4,0.643365,0.723533,0.709678,0.703819,...,0.674749,0.730827,0.654643,0.697966,0.739101,0.711322,0.725946,0.717075,0.698585,0.698596
185,240,214,2017,4,2,cat4,0.730212,0.725850,0.738478,0.724326,...,0.776898,0.709611,0.761046,0.753831,0.752599,0.737810,0.785827,0.735913,0.690518,0.751216
191,240,214,2017,5,2,cat4,0.720627,0.716581,0.724841,0.714753,...,0.716420,0.724520,0.720339,0.720273,0.717392,0.714888,0.713473,0.722779,0.718059,0.725149


In [13]:
gbd_long = gbd_risk_exposure.set_index(list(gbd_risk_exposure.columns[:6])).stack().reset_index().rename(columns={'level_6': 'draw', 0: 'value'})

gbd_summary = gbd_long.groupby(list(gbd_long.columns[:-2]))['value'].describe(percentiles=[.025, .975]).filter(['mean', '2.5%', '97.5%']).reset_index()

In [14]:
gbd_summary

Unnamed: 0,rei_id,location_id,year_id,age_group_id,sex_id,parameter,mean,2.5%,97.5%
0,95,161,2017,2,1,continuous,118.717899,106.911195,131.240612
1,95,161,2017,2,2,continuous,115.166407,108.204102,121.663193
2,95,161,2017,3,1,continuous,114.039983,102.515398,125.759784
3,95,161,2017,3,2,continuous,113.717478,107.431240,120.589334
...,...,...,...,...,...,...,...,...,...
356,241,214,2017,5,2,cat1,0.179388,0.155099,0.204544
357,241,214,2017,5,2,cat2,0.174524,0.157163,0.198383
358,241,214,2017,5,2,cat3,0.201085,0.174746,0.234512
359,241,214,2017,5,2,cat4,0.445003,0.403790,0.473833


In [21]:
gbd_summary.query('rei_id == 241 & location_id == 214 & age_group_id == 5 & sex_id == 1')

Unnamed: 0,rei_id,location_id,year_id,age_group_id,sex_id,parameter,mean,2.5%,97.5%
352,241,214,2017,5,1,cat1,0.246288,0.228543,0.265851
353,241,214,2017,5,1,cat2,0.159403,0.145076,0.178989
354,241,214,2017,5,1,cat3,0.163924,0.146996,0.184874
355,241,214,2017,5,1,cat4,0.430385,0.399401,0.453457
