In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact, IntSlider

pd.set_option('display.max_rows', 8)

!date
!whoami

In [None]:
import vivarium_conic_sqlns.verification_and_validation.sqlns_output_processing as sop

## Get file paths for locations

In [None]:
!ls /share/costeffectiveness/results/sqlns/presentation/country_comparison/nigeria

In [None]:
# Base directory for output files. Subdirectories are assumed to be of the form 'location/run_date/'
base_directory = '/share/costeffectiveness/results/sqlns/presentation/country_comparison'

# Map countries to the correct run date = subdirectory name
locations_run_dates = {
    'Bangladesh': '2019_07_30_19_09_43',
    'Burkina_Faso': '2019_07_30_19_15_13',
    'Ethiopia': '2019_07_30_19_18_21',
    'India': '2019_07_30_19_21_23',
    'Nigeria': '2019_07_30_19_24_09',
    }

locations = list(locations_run_dates.keys())

## Load data, check output shapes

In [None]:
all_output = sop.load_by_location_and_rundate(base_directory, locations_run_dates)
sop.print_location_output_shapes(locations, all_output) # Some seeds are missing

In [None]:
# 12 scenarios * 33 input draws * 5 random seeds = 1980 rows expected
12*33*5

## Find index columns (location, intervention, draw)

In [None]:
# Find index columns
all_output.filter(regex='location|sqlns|draw')

In [None]:
intervention_colname_mapper = {
#         'sqlns.effect_on_child_stunting.permanent': 'stunting_permanent',
#         'sqlns.effect_on_child_wasting.permanent': 'wasting_permanent',
#         'sqlns.effect_on_iron_deficiency.permanent': 'iron_permanent',
        'sqlns.duration': 'duration',
#         'sqlns.effect_on_iron_deficiency.mean': 'iron_mean',
#         'sqlns.effect_on_iron_deficiency.sd': 'iron_sd', 
        'sqlns.program_coverage': 'coverage',
    }

index_cols = ['location', 'duration', 'coverage', 'input_draw']

cause_names = ['lower_respiratory_infections', 'measles', 'diarrheal_diseases', 
               'protein_energy_malnutrition', 'iron_deficiency', 'other_causes']

# risk_names = ['anemia', 'child_stunting', 'child_wasting']

## For each location, drop draws that are missing random seeds

Part of the code for this function was prototyped in `2019_07_30_verify_yld_bug_fix.ipynb` on 7/31/2019.

In [None]:
def drop_incomplete_draws(output, num_seeds, num_scenarios):
    """For each location, drop any draws that are missing a random seed for at least one scenario."""
    # Count how many rows there are for each (location, draw) pair
    row_counts = output.groupby(['location', 'input_draw']).random_seed.count()
    
    # Subset to (location, draw) pairs where all scenarios have all seeds
    combinations = row_counts[row_counts == num_seeds*num_scenarios].reset_index()
    
    # Index the original dataframe by the complete (location, draw) pairs found above,
    # thereby dropping rows corresponding to incomplete pairs
    df = output.set_index(['location', 'input_draw']).loc[
        zip(combinations['location'], combinations['input_draw'])].reset_index()
    
    return df

In [None]:
output = drop_incomplete_draws(all_output, num_seeds=5, num_scenarios=12)
output

## Sum over random seeds, then transform dataframe to long form

In [None]:
df = sop.clean_and_aggregate(output, intervention_colname_mapper, index_cols, 'coverage')
df

In [None]:
# # Testing intermediate step
# sop.get_disaggregated_results(df, cause_names, index_cols)

In [None]:
# # Testing intermediate step
# sop.get_all_cause_results(df, index_cols)

In [None]:
results = sop.get_transformed_data(df, cause_names, index_cols)
results

## Get averted results

In [None]:
averted_df = sop.get_averted_results(results, index_cols, 'coverage')
averted_df

## Get aggregated results

Something seems to have gone wrong with India and Nigeria. Their averted results are all approximately 0 for all coverage levels.

In [None]:
aggregated_df = sop.get_final_table(averted_df, index_cols)
aggregated_df

In [None]:
# India's averted results are all very close to 0. So are Nigeria's
aggregated_df.loc[('India', 365.25, slice(80,100)), 'averted']

## Save "treated days per averted DALY" to file for plotting ICERS by location

In [None]:
idx = pd.IndexSlice
aggregated_df.loc[
    idx['Bangladesh':'Ethiopia',365.25,40,'all_causes', 'dalys'], 
    'treated_days_per_averted']

In [None]:
# aggregated_df.loc[
#     idx['Bangladesh':'Ethiopia',365.25,40,'all_causes', 'dalys'], 
#     'treated_days_per_averted'].to_csv(
#     '/snfs1/Project/simulation_science/mnch/Interventions/sq-lns/presentations/results/'
#     'treated_days_per_averted_daly_coverage40.csv'
# )

In [None]:
!ls /snfs1/Project/simulation_science/mnch/Interventions/sq-lns/presentations/results/

## Plot ICERs using minimal modification to original code to see if it works

Oops, it's plotting all locations at once. I need to loop through locations to fix it. See below.

In [None]:
measures = averted_df.measure.unique()
averted_cause_list = averted_df.cause.unique()
print(measures, averted_cause_list)

In [None]:
days_per_year = 365.25

# @interact()
# def plot_icers(duration=[365.25, 730.50],
#                     cgf_permanent=[False, True],
#                     iron_permanent=[False, True],
#                     iron_mean=[0.895, 4.475, 8.950],
#                               measure=measures,
#                               cause=averted_cause_list,
#                               cost_per_py=cost_slider,
#                   ):
def plot_icers_and_costs_and_dalys(aggregated_df,
               duration=365.25,
                    cgf_permanent=False,
                    iron_permanent=False,
                    iron_mean=4.475,
                              measure='dalys',
                              cause='all_causes',
                              cost_per_py=67,
                  ):
    
    data = aggregated_df.reset_index()
    
    data = data.loc[(data.duration == duration)
#                   & (data.child_stunting_permanent == cgf_permanent)
#                   & (data.child_wasting_permanent == cgf_permanent)
#                   & (data.iron_deficiency_permanent == iron_permanent)
#                   & (data.iron_deficiency_mean == iron_mean)
                  & (data.cause == cause)
                  & (data.measure == measure)]
    
    fig, ax = plt.subplots(2,2, figsize=(14,9))
    
    xx = data['coverage']
    
    # Plot cost vs. coverage
    mean = cost_per_py * data[('sqlns_treated_days', 'mean')] / days_per_year
    lb = cost_per_py * data[('sqlns_treated_days', '2.5%')] / days_per_year
    ub = cost_per_py * data[('sqlns_treated_days', '97.5%')] / days_per_year
    ax[0,0].plot(xx, mean, '-o')
    ax[0,0].fill_between(xx, lb, ub, alpha=0.8)
    
    # Plot averted measure vs. coverage
    mean = data[('averted', 'mean')]
    lb = data[('averted', '2.5%')]
    ub = data[('averted', '97.5%')]
    ax[1,0].plot(xx, mean, '-o', color='orange')
    ax[1,0].fill_between(xx, lb, ub, alpha=0.1, color='orange')
    
    # Plot ICERs calculated using raw values
    mean = cost_per_py * data[('treated_days_per_averted', 'mean')] / days_per_year
    lb = cost_per_py * data[('treated_days_per_averted', '2.5%')] / days_per_year
    ub = cost_per_py * data[('treated_days_per_averted', '97.5%')] / days_per_year
    ax[0,1].plot(xx, mean, '-o', color='green')
    ax[0,1].fill_between(xx, lb, ub, alpha=0.1, color='green')
    
    # Plot ICERs calculated using rates
    mean = cost_per_py * data[('treated_days_per_averted_rate', 'mean')] / days_per_year
    lb = cost_per_py * data[('treated_days_per_averted_rate', '2.5%')] / days_per_year
    ub = cost_per_py * data[('treated_days_per_averted_rate', '97.5%')] / days_per_year
    ax[1,1].plot(xx, mean, '-o', color='green')
    ax[1,1].fill_between(xx, lb, ub, alpha=0.1, color='green')

    ## Label the plots
    
    ax[0,0].set_title('Total cost vs. coverage', fontsize=16)
    ax[0,0].set_xlabel('Program Coverage (%)', fontsize=12)
    ax[0,0].set_ylabel('Cost of SQ-LNS\ntreatment ($)', fontsize=16)
    ax[0,0].grid()
#         ax[i,0].legend(loc=(0.8, -.25), fontsize=14)

    ax[1,0].set_title(f'Averted {measure} vs. coverage', fontsize=16)
    ax[1,0].set_xlabel('Program Coverage (%)', fontsize=12)
    ax[1,0].set_ylabel(f'Averted {measure}', fontsize=16)
    ax[1,0].grid()

    ax[0,1].set_title('Cost effectiveness (ICERs)\nvs. coverage', fontsize=16)
    ax[0,1].set_xlabel('Program Coverage (%)', fontsize=12)
    ax[0,1].set_ylabel(f'Cost per averted {measure}', fontsize=12)
    ax[0,1].grid()
    
    ax[1,1].set_title('Cost effectiveness (ICERs)\nvs. coverage', fontsize=16)
    ax[1,1].set_xlabel('Program Coverage (%)', fontsize=12)
    ax[1,1].set_ylabel(f'Cost per averted {measure}\n(calculated using rate difference)', fontsize=12)
    ax[1,1].grid()
        
    fig.tight_layout()

In [None]:
plot_icers_and_costs_and_dalys(aggregated_df)

## Plot ICERs and DALYs averted

In [None]:
aggregated_df.reset_index()['location'].unique()

In [None]:
def plot_icers_and_dalys(aggregated_df,
               duration=365.25,
                  measure='dalys',
                  cause='all_causes',
                  cost_per_py=67,
                  ):
    
    data = aggregated_df.reset_index()
    
    data = data.loc[(data.duration == duration)
                  & (data.cause == cause)
                  & (data.measure == measure)]
    
    fig, ax = plt.subplots(1,2, figsize=(14, 6))
    
    for location in data['location'].unique():
        loc_data = data[data['location']==location]
        xx = loc_data['coverage']
        # Plot averted measure vs. coverage
        mean = loc_data[('averted', 'mean')]
        lb = loc_data[('averted', '2.5%')]
        ub = loc_data[('averted', '97.5%')]
        ax[0].plot(xx, mean, '-o', label=location)
        ax[0].fill_between(xx, lb, ub, alpha=0.1)

        # Plot ICERs calculated using raw values
        mean = cost_per_py * loc_data[('treated_days_per_averted', 'mean')] / days_per_year
        lb = cost_per_py * loc_data[('treated_days_per_averted', '2.5%')] / days_per_year
        ub = cost_per_py * loc_data[('treated_days_per_averted', '97.5%')] / days_per_year
        ax[1].plot(xx, mean, '-o', label=location)
        ax[1].fill_between(xx, lb, ub, alpha=0.1)
    
    ## Label the plots

    ax[0].set_title(f'Averted {measure} vs. coverage', fontsize=16)
    ax[0].set_xlabel('Program Coverage (%)', fontsize=12)
    ax[0].set_ylabel(f'Averted {measure}', fontsize=16)
    ax[0].grid()
    ax[0].legend()

    ax[1].set_title('Cost effectiveness (ICERs)\nvs. coverage', fontsize=16)
    ax[1].set_xlabel('Program Coverage (%)', fontsize=12)
    ax[1].set_ylabel(f'Cost per averted {measure}', fontsize=12)
    ax[1].grid()
    ax[1].legend()
        
    fig.tight_layout()

In [None]:
plot_icers_and_dalys(aggregated_df)

## Plot ICERs only, for presentation

In [None]:
def plot_icers(aggregated_df,
               duration=365.25,
                  measure='dalys',
                  cause='all_causes',
                  cost_per_py=67,
                  ):
    
    data = aggregated_df.reset_index()
    
    data = data.loc[(data.duration == duration)
                  & (data.cause == cause)
                  & (data.measure == measure)]
    
    fig, ax = plt.subplots(figsize=(10, 6), dpi=300)
    
    for location in data['location'].unique():
        # India and Nigeria data looks wrong - omit it for presentation plot
        if location in ['India', 'Nigeria']: continue

        # Plot ICERs calculated using raw values
        loc_data = data[data['location']==location]
        xx = loc_data['coverage']
        mean = cost_per_py * loc_data[('treated_days_per_averted', 'mean')] / days_per_year
        lb = cost_per_py * loc_data[('treated_days_per_averted', '2.5%')] / days_per_year
        ub = cost_per_py * loc_data[('treated_days_per_averted', '97.5%')] / days_per_year
#         mean = cost_per_py * loc_data['mean'] / days_per_year
#         lb = cost_per_py * loc_data['2.5%'] / days_per_year
#         ub = cost_per_py * loc_data['97.5%'] / days_per_year
        ax.plot(xx, mean, '-o', label=location)
        ax.fill_between(xx, lb, ub, alpha=0.1)
    
    ## Label the plots

    ax.set_title('Cost effectiveness (ICERs) vs. coverage', fontsize=18)
    ax.set_xlabel('Program Coverage (%)', fontsize=16)
    ax.set_ylabel(f'Cost per averted DALY', fontsize=16)
    ax.grid()
    ax.legend(fontsize=14)
        
    fig.tight_layout()

In [None]:
plot_icers(aggregated_df)