In [1]:
from vivarium import Artifact
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from db_queries import get_ids, get_outputs
import scipy.stats

!whoami
!date

alibow
Wed Apr  1 15:35:05 PDT 2020


In [2]:
output_dirs = ['/ihme/costeffectiveness/results/vivarium_conic_lsff/v5.0_vitamin_a_fortification/ethiopia/2020_03_26_08_33_10/count_data/',
              '/ihme/costeffectiveness/results/vivarium_conic_lsff/v5.0_vitamin_a_fortification/india/2020_03_26_21_28_29/count_data/',
              '/ihme/costeffectiveness/results/vivarium_conic_lsff/v5.0_vitamin_a_fortification/nigeria/2020_03_24_23_27_55/count_data/']

locations = ['Ethiopia','India','Nigeria']

In [3]:
index_cols = ['year', 'age_group', 'sex', 'cause', 'folic_acid_fortification_group',
       'vitamin_a_fortification_group', 'input_draw', 'scenario']

In [4]:
# load and merge stratified count results for each location, outcome
master_counts = pd.DataFrame()
master_rates = pd.DataFrame()
for i in range(len(output_dirs)):
    yll_count = pd.read_hdf(output_dirs[i] + 'ylls.hdf').rename(columns={'value':'ylls'}).drop(columns='measure')
    yld_count = pd.read_hdf(output_dirs[i] + 'ylds.hdf').rename(columns={'value':'ylds'}).drop(columns='measure')
    daly_count = yll_count.merge(yld_count, right_on=index_cols, left_on=index_cols)
    daly_count['dalys'] = daly_count['ylls'] + daly_count['ylds']
    deaths = pd.read_hdf(output_dirs[i] + 'deaths.hdf').rename(columns={'value':'deaths'}).drop(columns='measure')
    data = daly_count.merge(deaths, right_on=index_cols, left_on=index_cols)
    person_time = pd.read_hdf(output_dirs[i] + 'person_time.hdf').rename(columns={'value':'person_time'}).drop(columns='measure')
    counts = data.merge(person_time, right_on=['year', 'age_group', 'sex', 'folic_acid_fortification_group',
       'vitamin_a_fortification_group', 'input_draw', 'scenario'], 
                     left_on=['year', 'age_group', 'sex', 'folic_acid_fortification_group',
       'vitamin_a_fortification_group', 'input_draw', 'scenario'])
    counts['location'] = locations[i]
    master_counts = master_counts.append(counts)
master_counts.head()

Unnamed: 0,year,age_group,sex,cause,folic_acid_fortification_group,vitamin_a_fortification_group,input_draw,scenario,ylls,ylds,dalys,deaths,person_time,location
0,2020,1_to_4,female,diarrheal_diseases,covered,covered,21,baseline,0.0,0.0,0.0,0.0,0.0,Ethiopia
1,2020,1_to_4,female,lower_respiratory_infections,covered,covered,21,baseline,0.0,0.0,0.0,0.0,0.0,Ethiopia
2,2020,1_to_4,female,measles,covered,covered,21,baseline,0.0,0.0,0.0,0.0,0.0,Ethiopia
3,2020,1_to_4,female,neural_tube_defects,covered,covered,21,baseline,0.0,0.0,0.0,0.0,0.0,Ethiopia
4,2020,1_to_4,female,diarrheal_diseases,covered,covered,21,folic_acid_fortification_scale_up,0.0,0.0,0.0,0.0,0.0,Ethiopia


In [36]:
def get_stratified_averted_results_aggregated(strata_cols):  
    overall = master_counts.groupby((['location','scenario','input_draw']+strata_cols), as_index=False).sum()
    overall_baseline = (overall.where(overall['scenario'] == 'baseline').dropna()
                        .set_index((['location','input_draw']+strata_cols)).drop(columns='scenario'))
    overall_intervention = (overall.where(overall['scenario'] == 'vitamin_a_fortification_scale_up').dropna()
                            .set_index((['location','input_draw']+strata_cols)).drop(columns='scenario'))
    merged = overall_baseline.merge(overall_intervention, right_on=(['location','input_draw']+strata_cols),
                                    left_on=(['location','input_draw']+strata_cols), 
                                    suffixes=('_baseline','_intervention'))
    # drop rows where there is zero person time in either the baseline or intervention
        # note... I believe this biases results towards baseline coverage >> 0, but more thorough investigation into
        # impact of this should be conducted
    merged = merged.where(merged['person_time_intervention'] != 0).where(merged['person_time_baseline'] != 0).dropna()
    for measure in ['ylls','ylds','dalys','deaths']:
        for scenario in ['baseline','intervention']:
            merged[f'{measure}_{scenario}_rate'] = merged[f'{measure}_{scenario}'] / merged[f'person_time_{scenario}'] * 100_000
        merged[f'{measure}'] = merged[f'{measure}_baseline_rate'] - merged[f'{measure}_intervention_rate'] 
    merged = (merged.reset_index().groupby((['location']+strata_cols))
                       .describe())#percentiles=[0.025,0.975,0.25,0.75]))
    return merged

In [37]:
# Calculate overall (unstratified results):
overall_results = get_stratified_averted_results_aggregated([])
overall_results['dalys']

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Ethiopia,25.0,265.605137,175.510744,-170.876934,191.102309,313.204411,382.508649,479.366574
India,25.0,211.299505,86.176562,59.681699,147.269959,224.577067,275.328516,361.335745
Nigeria,25.0,264.362084,115.670974,19.92772,210.018344,238.737353,341.353776,493.044479


In [38]:
# Calculate overall (unstratified results):
results_by_coverage_group = get_stratified_averted_results_aggregated(['vitamin_a_fortification_group'])
results_by_coverage_group['dalys']

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
location,vitamin_a_fortification_group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Ethiopia,covered,10.0,248321.361879,1068729.0,-1185060.0,-398081.121777,174225.798189,589205.6,2141109.0
Ethiopia,effectively_covered,15.0,-1380.514862,2382.713,-5461.143,-3853.689082,96.938219,230.8286,1063.916
Ethiopia,uncovered,25.0,-211.085368,106.5513,-446.8356,-269.89748,-209.65113,-149.0864,-17.68799
India,covered,25.0,62737.405893,186978.7,-257567.1,-84623.193642,8881.607784,200377.7,387734.3
India,effectively_covered,25.0,26.657933,87.76284,-117.4246,-29.673897,20.828826,65.08213,227.1501
India,uncovered,25.0,-830.815259,319.0148,-1437.033,-1091.154778,-793.031969,-600.5379,-277.6201
Nigeria,covered,25.0,841871.145713,249277.6,275747.5,712088.493673,789915.47979,1033140.0,1331007.0
Nigeria,effectively_covered,25.0,2335.054278,421.7749,1338.854,2043.897144,2315.85856,2604.079,3295.377
Nigeria,uncovered,25.0,-2889.989205,285.2553,-3464.98,-3081.967044,-2852.239065,-2684.468,-2419.353


In [32]:
# Calculate results by year:
results_by_year = get_stratified_averted_results_aggregated(['year'])
results_by_year['dalys']

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,2.5%,25%,50%,75%,97.5%,max
location,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Ethiopia,2020,25.0,253.891186,182.352186,-173.365751,-121.292078,174.517885,292.740753,351.220427,540.420442,547.940394
Ethiopia,2021,25.0,256.787926,181.113442,-183.032269,-146.027974,170.733946,296.758422,391.051646,492.505487,517.688844
Ethiopia,2022,25.0,261.468636,176.047631,-195.803344,-126.241654,160.446824,309.590589,377.777414,501.102828,513.23853
Ethiopia,2023,25.0,266.306278,182.612374,-142.651107,-116.403815,163.377605,300.568457,380.49671,531.724247,553.239493
Ethiopia,2024,25.0,282.859072,178.031494,-161.61113,-108.367208,208.259603,339.216724,400.915218,483.751814,488.121016
India,2020,25.0,192.818339,80.127628,23.203251,42.623727,153.732921,210.707863,255.248863,307.305991,309.432736
India,2021,25.0,206.043663,86.518938,45.855213,59.135162,153.173067,220.822711,272.712051,328.308857,346.186361
India,2022,25.0,214.593406,83.41397,64.313346,67.71053,162.443393,227.842462,290.621657,328.17617,346.429251
India,2023,25.0,218.221657,96.71407,59.601556,62.791089,165.93997,217.451921,278.890239,375.694262,394.872392
India,2024,25.0,216.491893,100.433555,36.985527,50.917169,150.685545,243.565669,287.781852,373.641976,455.502111


## Conclusion from this process: results stratified aggregated across years are approximately 5x the results stratified by years

In [8]:
def get_stratified_averted_results_year_adjustment(strata_cols):  
    overall = master_counts.groupby((['location','scenario','input_draw']+strata_cols), as_index=False).sum()
    overall_baseline = (overall.where(overall['scenario'] == 'baseline').dropna()
                        .set_index((['location','input_draw']+strata_cols)).drop(columns='scenario'))
    overall_intervention = (overall.where(overall['scenario'] == 'vitamin_a_fortification_scale_up').dropna()
                            .set_index((['location','input_draw']+strata_cols)).drop(columns='scenario'))
    merged = overall_baseline.merge(overall_intervention, right_on=(['location','input_draw']+strata_cols),
                                    left_on=(['location','input_draw']+strata_cols), 
                                    suffixes=('_baseline','_intervention'))
    # drop rows where there is zero person time in either the baseline or intervention
        # note... I believe this biases results towards baseline coverage >> 0, but more thorough investigation into
        # impact of this should be conducted
    merged = merged.where(merged['person_time_intervention'] != 0).where(merged['person_time_baseline'] != 0).dropna()
    for measure in ['ylls','ylds','dalys','deaths']:
        merged[f'{measure}'] = merged[f'{measure}_baseline'] - merged[f'{measure}_intervention'] 
    merged = (merged.reset_index().groupby((['location']+strata_cols))
                       .describe(percentiles=[0.025,0.975,0.25,0.75]))
    # divide by number of years so that results are per 100,000 person years
    if 'year' not in strata_cols:
        merged = merged / len(np.unique(master_counts['year']))
    return merged

In [10]:
overall_results_adjusted = get_stratified_averted_results_year_adjustment([])
overall_results_adjusted['dalys']

Unnamed: 0_level_0,count,mean,std,min,2.5%,25%,50%,75%,97.5%,max
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Ethiopia,5.0,3975.429824,2628.58004,-2557.479368,-1851.242673,2842.252351,4690.133446,5728.654607,7167.553011,7178.339215
India,5.0,2888.05986,1178.104637,815.007007,886.899139,2013.895594,3064.97369,3755.165434,4543.65138,4934.462653
Nigeria,5.0,3899.520745,1700.325394,293.805972,750.148206,3096.937354,3534.608069,5035.86641,7002.971396,7220.533131
