In [1]:
%matplotlib inline

import pandas as pd
pd.set_option('display.max_rows', 8)
import matplotlib.pyplot as plt
from ipywidgets import interact

!date
!whoami

Mon Jul 29 17:19:18 PDT 2019
yongqx2


In [2]:
result_dir = '/share/costeffectiveness/results/sqlns/presentation/'

In [3]:
cause_names = ['lower_respiratory_infections', 'measles', 'diarrheal_diseases', 'protein_energy_malnutrition', 'iron_deficiency', 'other_causes']
risk_names = ['anemia', 'child_stunting', 'child_wasting']

template_cols = ['coverage', 'duration', 'child_stunting_permanent', 'child_wasting_permanent', 'iron_deficiency_permanent', 'iron_deficiency_mean', 'cause', 'measure', 'input_draw']

In [4]:
# note that we have applied coefficient of variation as constant with different sqlns effect on iron deficiency
def clean_and_aggregate(path):
    r = pd.read_hdf(path + 'nigeria/2019_07_23_10_57_25/output.hdf')
    r.rename(columns={'sqlns.effect_on_child_stunting.permanent': 'child_stunting_permanent',
                      'sqlns.effect_on_child_wasting.permanent': 'child_wasting_permanent',
                      'sqlns.effect_on_iron_deficiency.permanent': 'iron_deficiency_permanent',
                      'sqlns.effect_on_iron_deficiency.mean': 'iron_deficiency_mean',
                      'sqlns.program_coverage': 'coverage',
                      'sqlns.duration': 'duration'}, inplace=True)
    r['coverage'] *= 100
    r = r.groupby(['coverage', 'duration', 'child_stunting_permanent', 'child_wasting_permanent', 'iron_deficiency_permanent', 'iron_deficiency_mean', 'input_draw']).sum()
    return r

In [5]:
r = clean_and_aggregate(result_dir)

In [6]:
def standardize_shape(data, measure):
    measure_data = data.loc[:, [c for c in data.columns if measure in c]]
    measure_data = measure_data.stack().reset_index().rename(columns={'level_7': 'label', 0: 'value'})
    if 'due_to' in measure:
        measure, cause = measure.split('_due_to_', 1)
        measure_data.loc[:, 'measure'] = measure
        measure_data.loc[:, 'cause'] = cause
    else:
        measure_data.loc[:, 'measure'] = measure  
    measure_data.drop(columns='label', inplace=True)
    
    return measure_data

In [7]:
def get_person_time(data):
    pt = standardize_shape(data, 'person_time')
    pt = pt.rename(columns={'value': 'person_time'}).drop(columns='measure')
    return pt

In [8]:
def get_disaggregated_results(data, cause_names):
    deaths = []
    ylls = []
    ylds = []
    dalys = []
    for cause in cause_names:
        if cause in cause_names[:4]:
            deaths.append(standardize_shape(data, f'death_due_to_{cause}'))
            
            ylls_sub = standardize_shape(data, f'ylls_due_to_{cause}')
            ylds_sub = standardize_shape(data, f'ylds_due_to_{cause}')
            dalys_sub = (ylds_sub.set_index([c for c in template_cols if c != 'measure']) + \
                         ylls_sub.set_index([c for c in template_cols if c != 'measure'])).reset_index()
            dalys_sub['measure'] = 'dalys'
            
            ylls.append(ylls_sub)
            ylds.append(ylds_sub)
            dalys.append(dalys_sub)
        elif cause == 'iron_deficiency':
            ylds_sub = standardize_shape(data, f'ylds_due_to_{cause}')     
            dalys_sub = ylds_sub.copy()
            dalys_sub['measure'] = 'dalys'
            
            ylds.append(ylds_sub)
            dalys.append(dalys_sub)
        else: # cause == 'other_causes'
            deaths.append(standardize_shape(data, f'death_due_to_{cause}'))
            
            ylls_sub = standardize_shape(data, f'ylls_due_to_{cause}')
            dalys_sub = ylls_sub.copy()
            dalys_sub['measure'] = 'dalys'
            
            ylls.append(ylls_sub)
            dalys.append(dalys_sub)
    
    death_data = pd.concat(deaths)
    yll_data = pd.concat(ylls)
    yld_data = pd.concat(ylds)
    daly_data = pd.concat(dalys)
    
    output = pd.concat([death_data, yll_data, yld_data, daly_data])
    output = output.set_index(template_cols).sort_index()
    
    return output.reset_index()

In [9]:
output = get_disaggregated_results(r, cause_names)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




In [10]:
df = pd.merge(output, get_person_time(r), on=[c for c in template_cols if c not in ['cause', 'measure']])

In [11]:
def get_averted_results(df):
    bau = df[df.coverage == 0.0].drop(columns=['coverage', 'person_time'])
    t = pd.merge(df, bau, on=template_cols[1:], suffixes=['', '_bau'])
    t['averted'] = t['value_bau'] - t['value']
    t.drop(columns='value_bau', inplace=True)
    
    t['value'] = (t['value']/t['person_time']) * 100_000
    t['averted'] = (t['averted']/t['person_time']) * 100_000
    
    return t

In [12]:
def get_final_table(data):
    g = data.groupby(template_cols[:-1])[['person_time', 'value', 'averted']]\
            .describe(percentiles=[.025, .975])
    
    table = g.filter([('value', 'mean'), ('value', '2.5%'), ('value', '97.5%'),
                      ('person_time', 'mean'), ('person_time', '2.5%'), ('person_time', '97.5%'),
                      ('averted', 'mean'), ('averted', '2.5%'), ('averted', '97.5%')])
    return table

In [13]:
table_shell = get_final_table(get_averted_results(df))

In [14]:
table_shell

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,value,value,value,person_time,person_time,person_time,averted,averted,averted
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,mean,2.5%,97.5%,mean,2.5%,97.5%,mean,2.5%,97.5%
coverage,duration,child_stunting_permanent,child_wasting_permanent,iron_deficiency_permanent,iron_deficiency_mean,cause,measure,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
0.0,365.25,False,False,False,0.895,diarrheal_diseases,dalys,22322.602459,16458.008165,27350.788250,258647.897072,256866.590294,260951.073611,0.000000,0.000000,0.000000
0.0,365.25,False,False,False,0.895,diarrheal_diseases,death,255.081610,187.156485,313.552954,258647.897072,256866.590294,260951.073611,0.000000,0.000000,0.000000
0.0,365.25,False,False,False,0.895,diarrheal_diseases,ylds,285.885770,192.740431,399.479451,258647.897072,256866.590294,260951.073611,0.000000,0.000000,0.000000
0.0,365.25,False,False,False,0.895,diarrheal_diseases,ylls,22036.716689,16183.537528,27077.834214,258647.897072,256866.590294,260951.073611,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100.0,730.50,True,True,True,8.950,protein_energy_malnutrition,dalys,5232.585986,3748.642849,7011.855475,258738.113138,256934.795385,261020.430353,244.675780,91.920574,469.209926
100.0,730.50,True,True,True,8.950,protein_energy_malnutrition,death,57.449334,40.562062,77.592641,258738.113138,256934.795385,261020.430353,2.706229,0.938617,5.303639
100.0,730.50,True,True,True,8.950,protein_energy_malnutrition,ylds,286.555213,194.081987,351.590413,258738.113138,256934.795385,261020.430353,12.173710,8.776743,15.629536
100.0,730.50,True,True,True,8.950,protein_energy_malnutrition,ylls,4946.030774,3486.596093,6691.328109,258738.113138,256934.795385,261020.430353,232.502070,80.422304,458.119019


# DALYs Averted by Coverage

In [15]:
@interact
def plot_dalys_averted(duration=[365.25, 730.50],
                       child_stunting_permanent=[False, True],
                       child_wasting_permanent=[False, True],
                       iron_deficiency_permanent=[False, True],
                       iron_deficiency_mean=[0.895, 4.475, 8.950]):
    
    df = table_shell.reset_index()
    
    data = df.loc[(df.duration == duration)
                  & (df.child_stunting_permanent == child_stunting_permanent)
                  & (df.child_wasting_permanent == child_wasting_permanent)
                  & (df.iron_deficiency_permanent == iron_deficiency_permanent)
                  & (df.iron_deficiency_mean == iron_deficiency_mean)
                  & (df.measure == 'dalys')]
    
    plt.figure(figsize=(12, 8))
    
    for cause in cause_names[:-1]:
        data_sub = data.loc[data.cause == cause]
        
        xx = data_sub['coverage']
        mean = data_sub[('averted', 'mean')]
        lb = data_sub[('averted', '2.5%')]
        ub = data_sub[('averted', '97.5%')]
        
        plt.plot(xx, mean, '-o', label=cause)
        plt.fill_between(xx, lb, ub, alpha=0.1)
    
    plt.title('Nigeria')
    plt.xlabel('Program Coverage (%)')
    plt.ylabel('DALYs Averted (per100,000PY)')
    plt.legend(loc=(1.05, .05))
    plt.grid()

interactive(children=(Dropdown(description='duration', options=(365.25, 730.5), value=365.25), Dropdown(descri…

# Mortality reduction rate by Coverage

In [16]:
def get_all_causes_mortality_reduction(df):
    cols = template_cols[:6] + ['input_draw', 'total_population_dead']
    data = df[cols]
    bau = data[data.coverage == 0.0].drop(columns='coverage')
    t = pd.merge(data, bau, on=cols[1:-1], suffixes=['', '_bau'])
    
    t['averted'] = t['total_population_dead_bau'] - t['total_population_dead']
    t['averted'] = t['averted']/t['total_population_dead_bau']
    t.drop(columns=['total_population_dead', 'total_population_dead_bau'], inplace=True)
    t['cause'] = 'all_causes'
    t['measure'] = 'death'
    
    g = t.groupby(template_cols[:-1])['averted'].describe(percentiles=[.025, .975])\
         .filter(['mean', '2.5%', '97.5%'])
    
    return g.reset_index()

In [17]:
@interact
def plot_all_causes_mortality_reduction(duration=[365.25, 730.50],
                                        child_stunting_permanent=[False, True],
                                        child_wasting_permanent=[False, True],
                                        iron_deficiency_permanent=[False, True],
                                        iron_deficiency_mean=[0.895, 4.475, 8.950]):

    plt.figure(figsize=(12, 8))
    
    df = get_all_causes_mortality_reduction(r.reset_index())
    df_sub = df.loc[(df.duration == duration)
                    & (df.child_stunting_permanent == child_stunting_permanent)
                    & (df.child_wasting_permanent == child_wasting_permanent)
                    & (df.iron_deficiency_permanent == iron_deficiency_permanent)
                    & (df.iron_deficiency_mean == iron_deficiency_mean)]
    
    xx = df_sub['coverage']
    mean = df_sub['mean']
    lb = df_sub['2.5%']
    ub = df_sub['97.5%']

    plt.plot(xx, mean, '-o')
    plt.fill_between(xx, lb, ub, alpha=0.2)

    plt.title('Nigeria, All Causes')
    plt.xlabel('Program Coverage (%)')
    plt.ylabel('Mortality Reduction Rate')
    plt.grid()

interactive(children=(Dropdown(description='duration', options=(365.25, 730.5), value=365.25), Dropdown(descri…

# Risk exposure in percentage points

In [18]:
def get_year_from_template(template_string):
    return template_string.split('_in_')[1].split('_among_')[0]

def get_age_group_from_template(template_string):
    return template_string.split('_in_')[1].split('_among_')[1]

def get_risk_and_severity_from_template(template_string):
    s = template_string.split('_in_')[0].split('_')
    if 'anemia' in template_string:
        return s[0], s[1]
    else:
        return '_'.join(s[:2]), s[2]

In [19]:
def get_risk_percentage_points(data):
    cols = template_cols[:-3] + ['risk', 'year', 'age_group', 'input_draw']
    df = data[[c for c in r.columns if 'anemia' in c or 'child' in c]]
    df = df.stack().reset_index().rename(columns={'level_7': 'label', 0: 'value'})

    df['year'] = df.label.map(get_year_from_template)
    df['age_group'] = df.label.map(get_age_group_from_template)
    df['risk'] = df.label.map(lambda x: get_risk_and_severity_from_template(x)[0])
    df['severity'] = df.label.map(lambda x: get_risk_and_severity_from_template(x)[1])
    df.drop(columns='label', inplace=True)

    exposed = df.loc[~((df.severity == 'unexposed') | (df.severity == 'cat4') | (df.severity == 'cat3'))]
    exposed = exposed.groupby(cols)['value'].sum()
    total = df.groupby(cols)['value'].sum()
    percent_exposed = (exposed/total * 100).reset_index()

    g = percent_exposed.groupby(list(percent_exposed.columns[:-2]))['value']
    g = g.describe(percentiles=[.025, .975]).filter(['mean', '2.5%', '97.5%'])
    
    return g.reset_index()

In [20]:
risk_estimates = get_risk_percentage_points(r)

In [21]:
@interact
def plot_risk_percentage_points(duration=[365.25, 730.50],
                                child_stunting_permanent=[False, True],
                                child_wasting_permanent=[False, True],
                                iron_deficiency_permanent=[False, True],
                                iron_deficiency_mean=[0.895, 4.475, 8.950],
                                risk=['anemia', 'child_stunting', 'child_wasting'],
                                year=[str(c) for c in range(2020, 2025)]):

    plt.figure(figsize=(12, 8))
    
    df = risk_estimates.copy()
    df_sub = df.loc[(df.duration == duration)
                    & (df.child_stunting_permanent == child_stunting_permanent)
                    & (df.child_wasting_permanent == child_wasting_permanent)
                    & (df.iron_deficiency_permanent == iron_deficiency_permanent)
                    & (df.iron_deficiency_mean == iron_deficiency_mean)
                    & (df.risk == risk)
                    & (df.year == year)]
    
    xx = df_sub['coverage']
    mean = df_sub['mean']
    lb = df_sub['2.5%']
    ub = df_sub['97.5%']

    plt.plot(xx, mean, '-o')
    plt.fill_between(xx, lb, ub, alpha=0.2)
    
    plt.title(f'{risk} percentage points in {year}')
    plt.xlabel('Program Coverage (%)')
    plt.ylabel(f'Under 5 % {risk}')
    plt.grid()

interactive(children=(Dropdown(description='duration', options=(365.25, 730.5), value=365.25), Dropdown(descri…