# Loading Packages and Data

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 8)

#import xarray as xr
import warnings
warnings.filterwarnings('ignore')
from matplotlib.backends.backend_pdf import PdfPages

from db_queries import get_outputs as go
from db_queries import get_ids
from get_draws.api import get_draws

In [3]:
ls /mnt/team/simulation_science/pub/models/vivarium_nih_us_cvd/results/final_runs/nih_us_cvd/2023_12_28_13_42_46/count_data

binned_ldl_exposure_time.csv  state_person_time.csv  ylls.csv
binned_sbp_exposure_time.csv  transition_count.csv
deaths.csv                    ylds.csv


In [4]:
sim_results_dir = '/mnt/team/simulation_science/pub/models/vivarium_nih_us_cvd/results/final_runs/nih_us_cvd/2023_12_28_13_42_46/count_data/'

# Exploring Data

In [4]:
deaths = pd.read_csv(sim_results_dir + 'deaths.csv')
deaths.cause.unique()

array(['acute_ischemic_stroke', 'acute_myocardial_infarction',
       'acute_myocardial_infarction_and_heart_failure',
       'chronic_ischemic_stroke',
       'heart_failure_from_ischemic_heart_disease',
       'heart_failure_residual', 'other_causes',
       'post_myocardial_infarction'], dtype=object)

In [5]:
tran = pd.read_csv(sim_results_dir + 'transition_count.csv')
tran.transition.unique()

array(['acute_ischemic_stroke_to_chronic_ischemic_stroke',
       'acute_myocardial_infarction_and_heart_failure_to_heart_failure_from_ischemic_heart_disease',
       'acute_myocardial_infarction_to_post_myocardial_infarction',
       'chronic_ischemic_stroke_to_acute_ischemic_stroke',
       'heart_failure_from_ischemic_heart_disease_to_acute_myocardial_infarction_and_heart_failure',
       'post_myocardial_infarction_to_acute_myocardial_infarction',
       'post_myocardial_infarction_to_heart_failure_from_ischemic_heart_disease',
       'susceptible_to_ischemic_heart_disease_and_heart_failure_to_acute_myocardial_infarction',
       'susceptible_to_ischemic_heart_disease_and_heart_failure_to_heart_failure_from_ischemic_heart_disease',
       'susceptible_to_ischemic_heart_disease_and_heart_failure_to_heart_failure_residual',
       'susceptible_to_ischemic_stroke_to_acute_ischemic_stroke'],
      dtype=object)

In [6]:
state_person_time = pd.read_csv(sim_results_dir + '/state_person_time.csv')
person_time = state_person_time.loc[state_person_time['state'].isin(["acute_ischemic_stroke", "chronic_ischemic_stroke", "susceptible_to_ischemic_stroke"])].reset_index() 

person_time.value.sum()

3069572019.143054

In [5]:
yll = pd.read_csv(sim_results_dir + 'ylls.csv')
yll.head()

Unnamed: 0.1,Unnamed: 0,sex,year,cause,measure,input_draw,scenario,location,age,value
0,0,Female,2021,acute_ischemic_stroke,ylls,29,baseline,alabama,25_to_29,0.0
1,1,Female,2021,acute_ischemic_stroke,ylls,29,baseline,alaska,25_to_29,0.0
2,2,Female,2021,acute_ischemic_stroke,ylls,29,baseline,arizona,25_to_29,0.0
3,3,Female,2021,acute_ischemic_stroke,ylls,29,baseline,arkansas,25_to_29,0.0
4,4,Female,2021,acute_ischemic_stroke,ylls,29,baseline,california,25_to_29,0.0


In [6]:
yld = pd.read_csv(sim_results_dir + 'ylds.csv')
yld.head()

Unnamed: 0.1,Unnamed: 0,sex,year,measure,input_draw,scenario,location,age,cause_of_disability,value
0,0,Female,2021,ylds,29,baseline,alabama,25_to_29,acute_ischemic_stroke,0.0
1,1,Female,2021,ylds,29,baseline,alaska,25_to_29,acute_ischemic_stroke,7.5e-05
2,2,Female,2021,ylds,29,baseline,arizona,25_to_29,acute_ischemic_stroke,0.000199
3,3,Female,2021,ylds,29,baseline,arkansas,25_to_29,acute_ischemic_stroke,0.00019
4,4,Female,2021,ylds,29,baseline,california,25_to_29,acute_ischemic_stroke,0.000232


# General PDF Function

Notes: this works for DALYs, YLLs, YLDs, deaths and incidence. Some things in the function need to be changed manually. These include: counts vs percent decrease (it is set to counts currently), and types of deaths/incidence. It was too complex to have the function auto-select the transitions or causes based on an input. The above section should be helpful in this. 

In [9]:
metric = 'dalys'
groupby_columns = ['sex', 'input_draw', 'year','scenario','location']

def subtract_baseline(group):
    baseline_value = group[group['scenario'] == 'baseline']['value'].values[0]
    group['averted_count'] = baseline_value - group['value']
    group['averted_percent'] = 100*((baseline_value - group['value'])/baseline_value)
    return group

In [10]:
def pdf_generator(metric, groupby_columns, directory, pdf_title):

    if metric == 'dalys':
        yll = pd.read_csv(sim_results_dir + 'ylls.csv')
        yll = yll.rename(columns = {'value':'yll'})
        yll = yll.groupby(groupby_columns).yll.sum().reset_index()
        yld = pd.read_csv(sim_results_dir + 'ylds.csv')
        yld = yld.rename(columns = {'value':'yld'})
        yld = yld.groupby(groupby_columns).yld.sum().reset_index()
        data = yll.merge(yld, on=groupby_columns)
        data['value'] = data['yll'] + data['yld']

    elif metric == 'cvd_deaths': 
        data = pd.read_csv(directory + 'deaths.csv')
        data = data.loc[data.cause != 'other_causes']

    elif metric == 'incidence':
        data = pd.read_csv(directory + 'transition_count.csv')
        data = data.loc[data.isin(['heart_failure_from_ischemic_heart_disease_to_acute_myocardial_infarction_and_heart_failure','post_myocardial_infarction_to_acute_myocardial_infarction','susceptible_to_ischemic_heart_disease_and_heart_failure_to_acute_myocardial_infarction'])]

    else: 
        data = pd.read_csv(directory + f'{metric}.csv')
    
    data = data.groupby(groupby_columns).value.sum().reset_index()
    
    #state_person_time = pd.read_csv(sim_results_dir + '/state_person_time.csv')
    #person_time = state_person_time.loc[state_person_time['state'].isin(["acute_ischemic_stroke", "chronic_ischemic_stroke", "susceptible_to_ischemic_stroke"])].reset_index() 
    #person_time = person_time.rename(columns = {'value':'ptvalue'})
    #person_time = person_time.groupby(groupby_columns).ptvalue.sum().reset_index()
    #data = data.merge(person_time[groupby_columns+['ptvalue']], on=groupby_columns)
    #data['value_rate'] = data['value'] / data['ptvalue']
    
    groupby_not_scenario = [x for x in groupby_columns if x != 'scenario']
    data = data.groupby(groupby_not_scenario).apply(subtract_baseline)

    groupby_not_draw = [x for x in groupby_columns if x != 'input_draw']
    data_count_final = data.groupby(groupby_not_draw).averted_count.describe(percentiles=[.025, .975]).filter(['mean', '2.5%', '97.5%']).reset_index()
    data_percent_final = data.groupby(groupby_not_draw).averted_percent.describe(percentiles=[.025, .975]).filter(['mean', '2.5%', '97.5%']).reset_index()

    with PdfPages(pdf_title) as pdf:
        graph_data = data_percent_final
        for location in graph_data.location.unique():
            for sex in graph_data.sex.unique():
                plt.figure()
                subdata = graph_data.loc[(graph_data.sex==sex) & (graph_data.location==location) & (graph_data.scenario=='baseline')]
                plt.plot(subdata['year'], subdata['mean'], marker = 'o', label = 'Baseline')
                plt.fill_between(subdata['year'], subdata['2.5%'], subdata['97.5%'], alpha = 0.3)
                subdata = graph_data.loc[(graph_data.sex==sex) & (graph_data.location==location) & (graph_data.scenario=='lifestyle_100')]
                plt.plot(subdata['year'], subdata['mean'], marker = 'o', label='Community-based NDPP')
                plt.fill_between(subdata['year'], subdata['2.5%'], subdata['97.5%'], alpha = 0.3)
                subdata = graph_data.loc[(graph_data.sex==sex) & (graph_data.location==location) & (graph_data.scenario=='outreach_100')]
                plt.plot(subdata['year'], subdata['mean'], marker = 'o', label = 'Nurse intervention to increase medication adherence')
                plt.fill_between(subdata['year'], subdata['2.5%'], subdata['97.5%'], alpha = 0.3)
                subdata = graph_data.loc[(graph_data.sex==sex) & (graph_data.location==location) & (graph_data.scenario=='polypill_100')]
                plt.plot(subdata['year'], subdata['mean'], marker = 'o', label = 'FDC antihypertensive')
                plt.fill_between(subdata['year'], subdata['2.5%'], subdata['97.5%'], alpha = 0.3)
                plt.title(f'Percent Reduction in {metric} for {location} and {sex}')
                plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
                plt.xticks(rotation=90) 
                plt.xlabel('Year')
                plt.xticks(ticks=[2022.5, 2025, 2027.5, 2030, 2032.5, 2035,2037.5, 2040], labels=['','2025','','2030','','2035','','2040'])
                plt.ylabel(f'Percent Reduction in {metric}')
                pdf.savefig(bbox_inches='tight')

In [None]:
pdf_generator(metric, groupby_columns, sim_results_dir, pdf_title='dalys_averted_1_10_24.pdf')