# Loading Packages and Data

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 8)

#import xarray as xr
import warnings
warnings.filterwarnings('ignore')
from matplotlib.backends.backend_pdf import PdfPages

from db_queries import get_outputs as go
from db_queries import get_ids, get_population
from get_draws.api import get_draws

import altair as alt
from vega_datasets import data as map_data

from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
from reportlab.lib import colors

In [2]:
ls /mnt/team/simulation_science/pub/models/vivarium_nih_us_cvd/results/final_runs/nih_us_cvd/2023_12_28_13_42_46/count_data

binned_ldl_exposure_time.csv  state_person_time.csv  ylls.csv
binned_sbp_exposure_time.csv  transition_count.csv
deaths.csv                    ylds.csv


In [3]:
sim_results_dir = '/mnt/team/simulation_science/pub/models/vivarium_nih_us_cvd/results/final_runs/nih_us_cvd/2023_12_28_13_42_46/count_data/'

In [4]:
pd.set_option('display.max_rows', None)

def convert(word):
    return ' '.join(x.capitalize() or '_' for x in word.split('_'))

In [5]:
US_pop = get_population(age_group_id= 157, #[22,1],
                              location_id=[523, 524, 525, 526, 527, 528, 529, 530,531, 532, 533,534, 535,
                                           536, 537, 538, 539, 540,541, 542, 543, 544 ,545, 546, 547, 548, 549, 550,
                                           551, 552, 553, 554, 555, 556, 557, 558, 559, 560,561, 562, 563, 564, 565, 
                                           566, 567, 568, 569, 570,571, 572, 573], 
                              year_id=2021, 
                              sex_id=[1,2],
                              release_id=9)
#US_pop = US_pop.groupby(['age_group_id','location_id']).population.mean().reset_index()
#under_5_value = US_pop[US_pop['age_group_id'] == 1].rename(columns = {'population':'under_5_pop'})
#US_pop = US_pop.merge(under_5_value[['under_5_pop','location_id']], on=['location_id'])
#US_pop['population'] -= US_pop['under_5_pop']
#US_pop = US_pop.loc[US_pop.age_group_id == 22].drop(columns=['under_5_pop'])
US_pop = US_pop.groupby(['location_id','year_id']).population.sum().reset_index()
US_pop.head()

Unnamed: 0,location_id,year_id,population
0,523,2021,3450207.0
1,524,2021,487784.6
2,525,2021,4928893.0
3,526,2021,2028129.0
4,527,2021,27350530.0


In [6]:
state_person_time = pd.read_csv(sim_results_dir + '/state_person_time.csv')
person_time = state_person_time.loc[state_person_time['state'].isin(["acute_ischemic_stroke", "chronic_ischemic_stroke", "susceptible_to_ischemic_stroke"])].reset_index() 
person_time = person_time.loc[(person_time['year'] == 2021) & (person_time.scenario == 'baseline')]
person_time = person_time.groupby(['input_draw','location']).value.sum().reset_index()
person_time = person_time.groupby(['location']).value.mean().reset_index()
person_time['location_name'] = person_time.location.apply(convert)
person_time = person_time.drop(columns=['location'])
person_time.head()

Unnamed: 0,value,location_name
0,78152.135797,Alabama
1,76384.873922,Alaska
2,77063.313347,Arizona
3,77054.014511,Arkansas
4,77973.319097,California


In [7]:
location_map = pd.read_csv('/ihme/homes/lutzes/vivarium_research_nih_us_cvd/CVD_locations.csv')
US_pop = US_pop.merge(location_map,on=['location_id'])
US_pop = US_pop.merge(person_time, on=['location_name'])
US_pop['scalar'] = US_pop['population'] / US_pop['value'] #/ 100_000
US_pop = US_pop.rename(columns = {'location_name':'location'})
US_pop.head()

Unnamed: 0,location_id,year_id,population,location,abbreviation,value,scalar
0,523,2021,3450207.0,Alabama,AL,78152.135797,44.147314
1,524,2021,487784.6,Alaska,AK,76384.873922,6.385879
2,525,2021,4928893.0,Arizona,AZ,77063.313347,63.959001
3,526,2021,2028129.0,Arkansas,AR,77054.014511,26.320869
4,527,2021,27350530.0,California,CA,77973.319097,350.767843


In [8]:
## Okay, I feel good about the scalar values here. 
## Currently set up as age group 157 is 25+, we look at the pop in 2021 in baseline for each state (which is over 25) 

##  I also tested this as age group 22 (all ages) - age group 1 (under 5) and then dividing by 
## 100,000 (the per draw total pop) and got basically the same thing 

## Also for Alabama - the total state pop is about 5 million and we had 100,000 per seed/draw which 
## would give a scalar of 50. And we are a bit different since were note including the under 5 group

In [9]:
def subtract_baseline(group):
    baseline_value = group[group['scenario'] == 'baseline']['value'].values[0]
    group['averted_count'] = baseline_value - group['value']
    group['averted_percent'] = 100*((baseline_value - group['value'])/baseline_value)
    return group

def person_time_scalar(group):
    baseline_value = group[group['scenario'] == 'baseline']['value'].values[0]
    group['pt_scalar'] = baseline_value / group['value']
    return group
'''
def make_year_chunks(dataframe):
    dataframe['year_chunks'] = np.where(dataframe['year'].isin([2021,2022,2023,2024]), 2022.5, 
                                       np.where(dataframe['year'].isin([2025,2026,2027,2028]), 2026.5, 
                                               np.where(dataframe['year'].isin([2029,2030,2031,2032]), 2030.5,
                                                       np.where(dataframe['year'].isin([2033,2034,2035,2036]),2034.5, 2038.5))))
    return dataframe
'''

def make_year_chunks(dataframe):
    dataframe['year_chunks'] = np.where(dataframe['year'].isin([2021,2022]), 2021.5, 
                                       np.where(dataframe['year'].isin([2023,2024]), 2023.5, 
                                       np.where(dataframe['year'].isin([2025,2026]), 2025.5, 
                                       np.where(dataframe['year'].isin([2027,2028]), 2027.5, 
                                       np.where(dataframe['year'].isin([2029,2030]), 2029.5,
                                       np.where(dataframe['year'].isin([2031,2032]), 2031.5,
                                       np.where(dataframe['year'].isin([2033,2034]), 2033.5,
                                       np.where(dataframe['year'].isin([2035,2036]), 2035.5, 
                                       np.where(dataframe['year'].isin([2037,2038]), 2037.5, 2039.5)))))))))
    return dataframe 

def make_new_trans(dataframe):
    dataframe['transition_new'] = np.where(dataframe.transition.isin(['susceptible_to_ischemic_heart_disease_and_heart_failure_to_acute_myocardial_infarction','heart_failure_from_ischemic_heart_disease_to_acute_myocardial_infarction_and_heart_failure','post_myocardial_infarction_to_acute_myocardial_infarction']), 'Myocardial Infarction',
                                    np.where(dataframe.transition.isin(['susceptible_to_ischemic_stroke_to_acute_ischemic_stroke','chronic_ischemic_stroke_to_acute_ischemic_stroke']),'Ischemic Stroke',
                                    np.where(dataframe.transition.isin(['post_myocardial_infarction_to_heart_failure_from_ischemic_heart_disease','susceptible_to_ischemic_heart_disease_and_heart_failure_to_heart_failure_from_ischemic_heart_disease','susceptible_to_ischemic_heart_disease_and_heart_failure_to_heart_failure_residual']),'Heart Failure','Other')))
    return dataframe

def make_age_buckets(dataframe):
    dataframe['age_bucket'] = np.where(np.isin(dataframe['age'],['25_to_29','30_to_34']),'under_35',
                                        np.where(np.isin(dataframe['age'],['35_to_39','40_to_44']),'35_to_45',
                                       np.where(np.isin(dataframe['age'],['45_to_49','50_to_54']),'45_to_55',
                                       np.where(np.isin(dataframe['age'],['55_to_59','60_to_64']),'55_to_65',
                                       np.where(np.isin(dataframe['age'],['65_to_69','70_to_74']),'65_to_75',
                                                np.where(np.isin(dataframe['age'],['75_to_79','80_to_84']),'75_to_85','over_85'))))))
    return dataframe

def make_new_causes(dataframe):
    dataframe['cause_new'] = np.where(np.isin(dataframe['cause'],['acute_ischemic_stroke','chronic_ischemic_stroke']),'Ischemic Stroke',
                                       np.where(np.isin(dataframe['cause'],['acute_myocardial_infarction','acute_myocardial_infarction_and_heart_failure','heart_failure_from_ischemic_heart_disease','post_myocardial_infarction']),'Ischemic Heart Disease','Other'))
    return dataframe

# Exploring Data

In [10]:
deaths = pd.read_csv(sim_results_dir + 'deaths.csv')
deaths.cause.unique()

array(['acute_ischemic_stroke', 'acute_myocardial_infarction',
       'acute_myocardial_infarction_and_heart_failure',
       'chronic_ischemic_stroke',
       'heart_failure_from_ischemic_heart_disease',
       'heart_failure_residual', 'other_causes',
       'post_myocardial_infarction'], dtype=object)

In [11]:
tran = pd.read_csv(sim_results_dir + 'transition_count.csv')
tran.transition.unique()

array(['acute_ischemic_stroke_to_chronic_ischemic_stroke',
       'acute_myocardial_infarction_and_heart_failure_to_heart_failure_from_ischemic_heart_disease',
       'acute_myocardial_infarction_to_post_myocardial_infarction',
       'chronic_ischemic_stroke_to_acute_ischemic_stroke',
       'heart_failure_from_ischemic_heart_disease_to_acute_myocardial_infarction_and_heart_failure',
       'post_myocardial_infarction_to_acute_myocardial_infarction',
       'post_myocardial_infarction_to_heart_failure_from_ischemic_heart_disease',
       'susceptible_to_ischemic_heart_disease_and_heart_failure_to_acute_myocardial_infarction',
       'susceptible_to_ischemic_heart_disease_and_heart_failure_to_heart_failure_from_ischemic_heart_disease',
       'susceptible_to_ischemic_heart_disease_and_heart_failure_to_heart_failure_residual',
       'susceptible_to_ischemic_stroke_to_acute_ischemic_stroke'],
      dtype=object)

# Making the Data Table

In [12]:
def make_new_trans(dataframe):
    dataframe['transition_new'] = np.where(dataframe.transition.isin(['susceptible_to_ischemic_heart_disease_and_heart_failure_to_acute_myocardial_infarction','heart_failure_from_ischemic_heart_disease_to_acute_myocardial_infarction_and_heart_failure','post_myocardial_infarction_to_acute_myocardial_infarction']), 'Myocardial Infarction',
                                    np.where(dataframe.transition.isin(['susceptible_to_ischemic_stroke_to_acute_ischemic_stroke','chronic_ischemic_stroke_to_acute_ischemic_stroke']),'Ischemic Stroke',
                                    np.where(dataframe.transition.isin(['post_myocardial_infarction_to_heart_failure_from_ischemic_heart_disease','susceptible_to_ischemic_heart_disease_and_heart_failure_to_heart_failure_from_ischemic_heart_disease','susceptible_to_ischemic_heart_disease_and_heart_failure_to_heart_failure_residual']),'Heart Failure','Other')))
    return dataframe

In [15]:
def events_pdf_table_generator(metric, groupby_columns, directory):
    data = pd.read_csv(directory + 'transition_count.csv')
    data = make_new_trans(data)
    
    data['location'] = data.location.apply(convert)
    data = data.merge(US_pop[['location','scalar']], on=['location'])
    data['value'] = data['value'] * data['scalar']
    data = data.groupby(['input_draw','scenario','year','transition_new','location']).value.sum().reset_index()    
    
    state_person_time = pd.read_csv(sim_results_dir + '/state_person_time.csv')
    person_time = state_person_time.loc[state_person_time['state'].isin(["acute_ischemic_stroke", "chronic_ischemic_stroke", "susceptible_to_ischemic_stroke"])].reset_index() 
    person_time['location'] = person_time.location.apply(convert)
    person_time = person_time.groupby(['input_draw','scenario','year','location']).value.sum().reset_index()
    pt_not_scenario = [x for x in groupby_columns if x != 'scenario']
    person_time = person_time.groupby(pt_not_scenario).apply(person_time_scalar)
    
    data = data.merge(person_time[['input_draw','scenario','year','location','pt_scalar']], on=['input_draw','scenario','year','location'])
    data['value'] = data['value'] * data['pt_scalar']
    data = data.loc[data.year > 2025]
    data = data.groupby(['input_draw','scenario','transition_new','location']).value.mean().reset_index()
    
    groupby_not_scenario = ['input_draw','transition_new','location']
    data = data.groupby(groupby_not_scenario).apply(subtract_baseline)
    
    groupby_not_draw = ['scenario','transition_new','location']
    data_not_averted = data.groupby(groupby_not_draw).value.describe(percentiles=[.025, .975]).filter(['mean', '2.5%', '97.5%']).reset_index()
    data_count_final = data.groupby(groupby_not_draw).averted_count.describe(percentiles=[.025, .975]).filter(['mean', '2.5%', '97.5%']).reset_index()
    data_percent_final = data.groupby(groupby_not_draw).averted_percent.describe(percentiles=[.025, .975]).filter(['mean', '2.5%', '97.5%']).reset_index()
    data_count_final = data_count_final.round(2)
    data_count_final = data_count_final.rename(columns={'2.5%':'95% UI Lower', '97.5%':'95% UI Upper','transition_new':'CVD Event','scenario':'Intervention', 'location':'Location','mean':'Mean'})
    data_count_final = data_count_final.loc[(data_count_final.Intervention != 'baseline') & (data_count_final['CVD Event'] != 'Other')]
    data_count_final['Intervention'] = np.where(data_count_final['Intervention'] == 'lifestyle_100', 'Community-based NDPP', 
                                               np.where(data_count_final['Intervention'] == 'outreach_100', 'Nurse intervention to increase medication adherence', 'FDC antihypertensive'))
    return data_count_final

In [16]:
groupby_columns = ['input_draw','scenario','location']
event_table = events_pdf_table_generator('count', groupby_columns, sim_results_dir)
#event_table

In [17]:
event_table.loc[-1] = event_table.columns
event_table.index = event_table.index + 1 
event_table = event_table.sort_index() 
event_table.head()

Unnamed: 0,Intervention,CVD Event,Location,Mean,95% UI Lower,95% UI Upper
0,Intervention,CVD Event,Location,Mean,95% UI Lower,95% UI Upper
205,Community-based NDPP,Heart Failure,Alabama,109.91,80.71,139.9
206,Community-based NDPP,Heart Failure,Alaska,14.89,9.49,23.34
207,Community-based NDPP,Heart Failure,Arizona,130.4,71.5,191.79
208,Community-based NDPP,Heart Failure,Arkansas,52.81,32.05,80.05


In [18]:
pdf = SimpleDocTemplate("cvd_events_by_state_df.pdf", pagesize=letter)

In [19]:
table_data = []
for i, row in event_table.iterrows():
    table_data.append(list(row))

table = Table(table_data)

In [22]:
table_style = TableStyle([
    ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
    ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
    ('INNERGRID', (0,0), (-1,-1), 0.25, colors.black),
    ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
    ('FONTSIZE', (0, 0), (-1, 0), 10),
    ('BOTTOMPADDING', (0, 0), (-1, 0), 8),
    ('BACKGROUND', (0, 1), (-1, -1), colors.white),
    ('TEXTCOLOR', (0, 1), (-1, -1), colors.black),
    ('ALIGN', (0, 1), (-1, -1), 'CENTER'),
    ('FONTSIZE', (0, 1), (-1, -1), 8),
    ('BOTTOMPADDING', (0, 1), (-1, -1), 4),
])

table.setStyle(table_style)

In [23]:
pdf_table = []
pdf_table.append(table)

pdf.build(pdf_table)

In [26]:
def deaths_generator(dataframe, groupby_columns, directory):
    data = pd.read_csv(directory + 'deaths.csv')
    data = data.loc[data.cause.isin(['heart_failure_residual','heart_failure_from_ischemic_heart_disease','acute_myocardial_infarction','acute_myocardial_infarction_and_heart_failure','post_myocardial_infarction','acute_ischemic_stroke','chronic_ischemic_stroke'])]
## ,
    
    data['location'] = data.location.apply(convert)
    data = data.merge(US_pop[['location','scalar']], on=['location'])
    data['value'] = data['value'] * data['scalar']
    data = data.groupby(groupby_columns + ['year']).value.sum().reset_index()    
    #,'transition_new'
    
    state_person_time = pd.read_csv(sim_results_dir + '/state_person_time.csv')
    person_time = state_person_time.loc[state_person_time['state'].isin(["acute_ischemic_stroke", "chronic_ischemic_stroke", "susceptible_to_ischemic_stroke"])].reset_index() 
    person_time['location'] = person_time.location.apply(convert)
    person_time = person_time.groupby(groupby_columns + ['year']).value.sum().reset_index()
    pt_not_scenario = [x for x in groupby_columns if x != 'scenario']
    person_time = person_time.groupby(pt_not_scenario).apply(person_time_scalar)
    
    data = data.merge(person_time[groupby_columns + ['year','pt_scalar']], on=(groupby_columns + ['year']))
    data['value'] = data['value'] * data['pt_scalar']
    data = data.loc[data.year > 2025]
    data = data.groupby(groupby_columns).value.mean().reset_index()
    #,'transition_new'
    
    groupby_not_scenario = [x for x in groupby_columns if x != 'scenario']
    #,'transition_new'
    data = data.groupby(groupby_not_scenario).apply(subtract_baseline).reset_index()
    
    groupby_not_draw = [x for x in groupby_columns if x != 'input_draw']
    #,'transition_new'
    data_not_averted = data.groupby(groupby_not_draw).value.describe(percentiles=[.025, .975]).filter(['mean', '2.5%', '97.5%']).reset_index()
    data_count_final = data.groupby(groupby_not_draw).averted_count.describe(percentiles=[.025, .975]).filter(['mean', '2.5%', '97.5%']).reset_index()
    data_percent_final = data.groupby(groupby_not_draw).averted_percent.describe(percentiles=[.025, .975]).filter(['mean', '2.5%', '97.5%']).reset_index()
    #returned_data = pd.DataFrame(np.where(dataframe == 'not_averted', data_not_averted, np.where(dataframe == 'count', data_count_final, data_percent_final))).reset_index()
    data_count_final = data_count_final.round(2)
    data_count_final = data_count_final.rename(columns={'2.5%':'95% UI Lower', '97.5%':'95% UI Upper','scenario':'Intervention', 'location':'Location','mean':'Mean'})
    data_count_final = data_count_final.loc[(data_count_final.Intervention != 'baseline')]
    data_count_final['Intervention'] = np.where(data_count_final['Intervention'] == 'lifestyle_100', 'Community-based NDPP', 
                                               np.where(data_count_final['Intervention'] == 'outreach_100', 'Nurse intervention to increase medication adherence', 'FDC antihypertensive'))
    return data_count_final

In [27]:
groupby_columns = ['input_draw','scenario','location']
death_table_2 = deaths_generator('oh well', groupby_columns, sim_results_dir)

In [28]:
death_table_2.loc[-1] = death_table_2.columns
death_table_2.index = death_table_2.index + 1 
death_table_2 = death_table_2.sort_index() 
death_table_2.head()

Unnamed: 0,Intervention,Location,Mean,95% UI Lower,95% UI Upper
0,Intervention,Location,Mean,95% UI Lower,95% UI Upper
52,Community-based NDPP,Alabama,134.34,86.77,174.76
53,Community-based NDPP,Alaska,16.96,11.78,23.2
54,Community-based NDPP,Arizona,161.05,116.02,214.44
55,Community-based NDPP,Arkansas,78.86,49.94,106.25


In [29]:
pdf = SimpleDocTemplate("cvd_deaths_by_state_df.pdf", pagesize=letter)

In [33]:
table_data = []
for i, row in death_table_2.iterrows():
    table_data.append(list(row))

table = Table(table_data)

In [34]:
table_style = TableStyle([
    ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
    ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
    ('INNERGRID', (0,0), (-1,-1), 0.25, colors.black),
    ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
    ('FONTSIZE', (0, 0), (-1, 0), 10),
    ('BOTTOMPADDING', (0, 0), (-1, 0), 8),
    ('BACKGROUND', (0, 1), (-1, -1), colors.white),
    ('TEXTCOLOR', (0, 1), (-1, -1), colors.black),
    ('ALIGN', (0, 1), (-1, -1), 'CENTER'),
    ('FONTSIZE', (0, 1), (-1, -1), 8),
    ('BOTTOMPADDING', (0, 1), (-1, -1), 4),
])

table.setStyle(table_style)

In [35]:
pdf_table = []
pdf_table.append(table)

pdf.build(pdf_table)

In [103]:
def make_new_causes(dataframe):
    dataframe['cause_new'] = np.where(np.isin(dataframe['cause'],['acute_ischemic_stroke','chronic_ischemic_stroke']),'Ischemic Stroke',
                                       np.where(np.isin(dataframe['cause'],['heart_failure_from_ischemic_heart_disease','heart_failure_residual']),'Heart Failure','Other'))
    return dataframe
    ##'acute_myocardial_infarction','acute_myocardial_infarction_and_heart_failure','post_myocardial_infarction'

In [37]:
def daly_generator(dataframe, groupby_columns, directory):
    yll = pd.read_csv(sim_results_dir + 'ylls.csv')
    yll = yll.rename(columns = {'value':'yll'})
    yll = yll.groupby(groupby_columns + ['year']).yll.sum().reset_index()
    yld = pd.read_csv(sim_results_dir + 'ylds.csv')
    yld = yld.rename(columns = {'value':'yld','cause_of_disability':'cause'})
    yld = yld.groupby(groupby_columns + ['year']).yld.sum().reset_index()
    data = yll.merge(yld, on=groupby_columns+['year'])
    data['value'] = data['yll'] + data['yld']
    
    data['location'] = data.location.apply(convert)
    data = data.merge(US_pop[['location','scalar']], on=['location'])
    data['value'] = data['value'] * data['scalar']
    data = data.groupby(groupby_columns + ['year']).value.sum().reset_index()    
    
    state_person_time = pd.read_csv(sim_results_dir + '/state_person_time.csv')
    person_time = state_person_time.loc[state_person_time['state'].isin(["acute_ischemic_stroke", "chronic_ischemic_stroke", "susceptible_to_ischemic_stroke"])].reset_index() 
    person_time['location'] = person_time.location.apply(convert)
    person_time = person_time.groupby(groupby_columns + ['year']).value.sum().reset_index()
    pt_not_scenario = [x for x in groupby_columns if x != 'scenario']
    person_time = person_time.groupby(pt_not_scenario).apply(person_time_scalar)
    
    data = data.merge(person_time[groupby_columns + ['year','pt_scalar']], on=(groupby_columns + ['year']))
    data['value'] = data['value'] * data['pt_scalar']
    data = data.loc[data.year > 2025]
    data = data.groupby(groupby_columns).value.mean().reset_index()
    #,'transition_new'
    
    groupby_not_scenario = [x for x in groupby_columns if x != 'scenario']
    #,'transition_new'
    data = data.groupby(groupby_not_scenario).apply(subtract_baseline).reset_index()
    
    groupby_not_draw = [x for x in groupby_columns if x != 'input_draw']
    #,'transition_new'
    data_not_averted = data.groupby(groupby_not_draw).value.describe(percentiles=[.025, .975]).filter(['mean', '2.5%', '97.5%']).reset_index()
    data_count_final = data.groupby(groupby_not_draw).averted_count.describe(percentiles=[.025, .975]).filter(['mean', '2.5%', '97.5%']).reset_index()
    data_percent_final = data.groupby(groupby_not_draw).averted_percent.describe(percentiles=[.025, .975]).filter(['mean', '2.5%', '97.5%']).reset_index()
    #returned_data = pd.DataFrame(np.where(dataframe == 'not_averted', data_not_averted, np.where(dataframe == 'count', data_count_final, data_percent_final))).reset_index()
    data_count_final = data_count_final.round(2)
    data_count_final = data_count_final.rename(columns={'2.5%':'95% UI Lower', '97.5%':'95% UI Upper','scenario':'Intervention', 'location':'Location','mean':'Mean'})
    data_count_final = data_count_final.loc[(data_count_final.Intervention != 'baseline')]
    data_count_final['Intervention'] = np.where(data_count_final['Intervention'] == 'lifestyle_100', 'Community-based NDPP', 
                                               np.where(data_count_final['Intervention'] == 'outreach_100', 'Nurse intervention to increase medication adherence', 'FDC antihypertensive'))
    return data_count_final

In [38]:
groupby_columns = ['input_draw','scenario','location']
daly_table_test = daly_generator('percent', groupby_columns, sim_results_dir)

In [39]:
daly_table_test.loc[-1] = daly_table_test.columns
daly_table_test.index = daly_table_test.index + 1 
daly_table_test = daly_table_test.sort_index() 
daly_table_test.head()

Unnamed: 0,Intervention,Location,Mean,95% UI Lower,95% UI Upper
0,Intervention,Location,Mean,95% UI Lower,95% UI Upper
52,Community-based NDPP,Alabama,2331.35,1487.49,2938.54
53,Community-based NDPP,Alaska,320.86,177.77,459.37
54,Community-based NDPP,Arizona,2841.89,1379.84,3776.37
55,Community-based NDPP,Arkansas,1442.99,922.91,1956.86


In [40]:
pdf = SimpleDocTemplate("cvd_DALYs_by_state_df.pdf", pagesize=letter)

In [41]:
table_data = []
for i, row in daly_table_test.iterrows():
    table_data.append(list(row))

table = Table(table_data)

In [42]:
table_style = TableStyle([
    ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
    ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
    ('INNERGRID', (0,0), (-1,-1), 0.25, colors.black),
    ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
    ('FONTSIZE', (0, 0), (-1, 0), 10),
    ('BOTTOMPADDING', (0, 0), (-1, 0), 8),
    ('BACKGROUND', (0, 1), (-1, -1), colors.white),
    ('TEXTCOLOR', (0, 1), (-1, -1), colors.black),
    ('ALIGN', (0, 1), (-1, -1), 'CENTER'),
    ('FONTSIZE', (0, 1), (-1, -1), 8),
    ('BOTTOMPADDING', (0, 1), (-1, -1), 4),
])

table.setStyle(table_style)

In [43]:
pdf_table = []
pdf_table.append(table)

pdf.build(pdf_table)