Author: Yongquan Xie<br>
Date: July 19, 2019<br>
Purpose: SQ-LNS presentation Nigeria results preparation<br>
Note: Yongquan and Nathaniel will give this presentation on August 1, 2019

In [None]:
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact

pd.set_option('display.max_rows', 8)

In [None]:
result_dir = '/share/costeffectiveness/results/sqlns/presentation/'

history_data = pd.read_hdf(result_dir + 'sample_history/2019_07_18_14_40_53/sample_history.hdf')

In [None]:
history_data

In [None]:
history_data[history_data.sqlns_treatment_start.notna()]

In [None]:
cause_names = ['lower_respiratory_infections', 'measles', 'diarrheal_diseases', 'protein_energy_malnutrition', 'iron_deficiency', 'other_causes']
risk_names = ['anemia', 'child_stunting', 'child_wasting']

template_cols = ['coverage', 'duration', 'child_stunting_permanent', 'child_wasting_permanent', 'iron_deficiency_permanent', 'iron_deficiency_mean', 'cause', 'measure', 'input_draw']

In [None]:
# note that we have applied coefficient of variation as constant with different sqlns effect on iron deficiency
def clean_and_aggregate(path):
    r = pd.read_hdf(path + 'nigeria/2019_07_18_13_20_17/output.hdf')
    r.rename(columns={'sqlns.effect_on_child_stunting.permanent': 'child_stunting_permanent',
                      'sqlns.effect_on_child_wasting.permanent': 'child_wasting_permanent',
                      'sqlns.effect_on_iron_deficiency.permanent': 'iron_deficiency_permanent',
                      'sqlns.effect_on_iron_deficiency.mean': 'iron_deficiency_mean',
                      'sqlns.program_coverage': 'coverage',
                      'sqlns.duration': 'duration'}, inplace=True)
    r['coverage'] *= 100
    r = r.groupby(['coverage', 'duration', 'child_stunting_permanent', 'child_wasting_permanent', 'iron_deficiency_permanent', 'iron_deficiency_mean', 'input_draw']).sum()
    return r

In [None]:
r = clean_and_aggregate(result_dir)

In [None]:
def standardize_shape(data, measure):
    measure_data = data.loc[:, [c for c in data.columns if measure in c]]
    measure_data = measure_data.stack().reset_index().rename(columns={'level_7': 'label', 0: 'value'})
    if 'due_to' in measure:
        measure, cause = measure.split('_due_to_', 1)
        measure_data.loc[:, 'measure'] = measure
        measure_data.loc[:, 'cause'] = cause
    else:
        measure_data.loc[:, 'measure'] = measure  
    measure_data.drop(columns='label', inplace=True)
    
    return measure_data

In [None]:
def get_person_time(data):
    pt = standardize_shape(data, 'person_time')
    pt = pt.rename(columns={'value': 'person_time'}).drop(columns='measure')
    return pt

In [None]:
def get_disaggregated_results(data, cause_names):
    deaths = []
    ylls = []
    ylds = []
    dalys = []
    for cause in cause_names:
        if cause in cause_names[:4]:
            deaths.append(standardize_shape(data, f'death_due_to_{cause}'))
            
            ylls_sub = standardize_shape(data, f'ylls_due_to_{cause}')
            ylds_sub = standardize_shape(data, f'ylds_due_to_{cause}')
            dalys_sub = (ylds_sub.set_index([c for c in template_cols if c != 'measure']) + \
                         ylls_sub.set_index([c for c in template_cols if c != 'measure'])).reset_index()
            dalys_sub['measure'] = 'dalys'
            
            ylls.append(ylls_sub)
            ylds.append(ylds_sub)
            dalys.append(dalys_sub)
        elif cause == 'iron_deficiency':
            ylds_sub = standardize_shape(data, f'ylds_due_to_{cause}')     
            dalys_sub = ylds_sub.copy()
            dalys_sub['measure'] = 'dalys'
            
            ylds.append(ylds_sub)
            dalys.append(dalys_sub)
        else: # cause == 'other_causes'
            deaths.append(standardize_shape(data, f'death_due_to_{cause}'))
            
            ylls_sub = standardize_shape(data, f'ylls_due_to_{cause}')
            dalys_sub = ylls_sub.copy()
            dalys_sub['measure'] = 'dalys'
            
            ylls.append(ylls_sub)
            dalys.append(dalys_sub)
    
    death_data = pd.concat(deaths)
    yll_data = pd.concat(ylls)
    yld_data = pd.concat(ylds)
    daly_data = pd.concat(dalys)
    
    output = pd.concat([death_data, yll_data, yld_data, daly_data])
    output = output.set_index(template_cols).sort_index()
    
    return output.reset_index()

In [None]:
output = get_disaggregated_results(r, cause_names)

In [None]:
df = pd.merge(output, get_person_time(r), on=[c for c in template_cols if c not in ['cause', 'measure']])

In [None]:
def get_averted_results(df):
    bau = df[df.coverage == 0.0].drop(columns=['coverage', 'person_time'])
    t = pd.merge(df, bau, on=template_cols[1:], suffixes=['', '_bau'])
    t['averted'] = t['value_bau'] - t['value']
    t.drop(columns='value_bau', inplace=True)
    
    t['value'] = (t['value']/t['person_time']) * 100_000
    t['averted'] = (t['averted']/t['person_time']) * 100_000
    
    return t

In [None]:
def get_final_table(data):
    g = data.groupby(template_cols[:-1])[['person_time', 'value', 'averted']]\
            .describe(percentiles=[.025, .975])
    
    table = g.filter([('value', 'mean'), ('value', '2.5%'), ('value', '97.5%'),
                      ('person_time', 'mean'), ('person_time', '2.5%'), ('person_time', '97.5%'),
                      ('averted', 'mean'), ('averted', '2.5%'), ('averted', '97.5%')])
    return table

In [None]:
table_shell = get_final_table(get_averted_results(df))

In [None]:
table_shell

In [None]:
@interact()
def plot_dalys_averted(duration=[365.25, 730.50],
                       child_stunting_permanent=[0, 1],
                       child_wasting_permanent=[0, 1],
                       iron_deficiency_permanent=[0, 1],
                       iron_deficiency_mean=[0.895, 4.475, 8.950]):
    
    df = table_shell.reset_index()
    
    data = df.loc[(df.duration == duration)
                  & (df.child_stunting_permanent == child_stunting_permanent)
                  & (df.child_wasting_permanent == child_wasting_permanent)
                  & (df.iron_deficiency_permanent == iron_deficiency_permanent)
                  & (df.iron_deficiency_mean == iron_deficiency_mean)
                  & (df.measure == 'dalys')]
    
    plt.figure(figsize=(12, 8))
    
    for cause in cause_names[:-1]:
        data_sub = data.loc[data.cause == cause]
        
        xx = data_sub['coverage']
        mean = data_sub[('averted', 'mean')]
        lb = data_sub[('averted', '2.5%')]
        ub = data_sub[('averted', '97.5%')]
        
        plt.plot(xx, mean, '-o', label=cause)
        plt.fill_between(xx, lb, ub, alpha=0.1)
    
    plt.title('Nigeria')
    plt.xlabel('Program Coverage (%)')
    plt.ylabel('DALYs Averted (per100,000PY)')
    plt.legend(loc=(1.05, .05))
    plt.grid()