In [2]:
import pandas as pd
import numpy as np
from vivarium import Artifact
import os
from pathlib import Path
from db_queries import get_outputs, get_ids, get_model_results, get_population
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import gbd_mapping
from matplotlib.backends.backend_pdf import PdfPages
pd.set_option('use_inf_as_na', True)
sns.set(context = 'paper', style='whitegrid', font_scale=1.8, rc = {'axes.spines.right':False, 'axes.spines.top': False, 'figure.figsize':(12.7,8.6)}, palette='Set1')


## Change variable save_path to local directory file path for file results



In [3]:
mapping_dict_data = {'cause': {'acute_myocardial_infarction':'ischemic_heart_disease','post_myocardial_infarction':'ischemic_heart_disease','acute_ischemic_stroke':'ischemic_stroke','post_ischemic_stroke':'ischemic_stroke', 'ischemic_stroke':'ischemic_stroke', 'chronic_kidney_disease':'chronic_kidney_disease', 'ischemic_heart_disease':'ischemic_heart_disease', 'diabetes_mellitus':'diabetes_mellitus', 'other_causes':'other_causes', 'albuminuria':'chronic_kidney_disease', 'stage_iii_chronic_kidney_disease':'chronic_kidney_disease','stage_iv_chronic_kidney_disease':'chronic_kidney_disease','stage_v_chronic_kidney_disease':'chronic_kidney_disease','moderate_diabetes_mellitus':'diabetes_mellitus','severe_diabetes_mellitus':'diabetes_mellitus', 'chronic_kidney_disease':'chronic_kidney_disease', 'CVD':'CVD'},
                    'cause_client': {'ischemic_heart_disease':'Ischemic heart disease', 'ischemic_stroke':'Ischemic stroke', 'chronic_kidney_disease':'Chronic kidney disease', 'diabetes_mellitus':'Diabetes mellitus', 'other_causes':'Other causes', 'CVD':'CVD'},
                    'sequela':{378:'acute_myocardial_infarction', 379:'acute_myocardial_infarction',380:'post_myocardial_infarction', 381:'post_myocardial_infarction', 382:'post_myocardial_infarction', 383:'post_myocardial_infarction', 384:'post_myocardial_infarction', 385:'post_myocardial_infarction', 953:'post_myocardial_infarction', 1040:'post_myocardial_infarction', 5726:'post_myocardial_infarction', 386: 'acute_ischemic_stroke', 387: 'acute_ischemic_stroke', 388: 'acute_ischemic_stroke', 389: 'acute_ischemic_stroke', 390: 'acute_ischemic_stroke', 391:'post_ischemic_stroke', 392: 'post_ischemic_stroke', 393:'post_ischemic_stroke', 394:'post_ischemic_stroke', 395:'post_ischemic_stroke', 946:'post_ischemic_stroke'},
                    'location': {'china':'China', 'brazil':'Brazil', 'france':'France', 'italy':'Italy', 'russian_federation':'Russia', 'spain':'Spain', 'All locations':'All locations'},
                    'sex':{'female':'female','male':'male', 'both_sexes_combined':'both sexes'},
                    'scenario': {'baseline':'Business as Usual', 'guideline':'Intervention 1 (multiple pills)', 'guideline_and_new_treatment':'Intervention 2 (FDC)'},
                    'measure':{'death':'deaths','ylds':'ylds','ylls':'ylls'},
                    'measure_client':{'deaths':'Deaths', 'incidence':'Incidence'},
                    'model_7_results':{'brazil':'brazil/2020_04_07_18_51_12', 'china':'china/2020_04_07_18_54_07','france':'france/2020_04_07_18_51_32', 'italy':'italy/2020_04_07_18_55_54', 'russian_federation':'russian_federation/2020_04_07_18_51_57', 'spain':'spain/2020_04_07_18_52_09'}}

locations = ['brazil', 'china', 'france', 'italy','russian_federation', 'spain']
measures = ['deaths', 'person_time','population','ylds','ylls', 'prevalence', 'incidence']
save_path_j = '/home/j/Project/simulation_science/zenon/result/final_results'

In [4]:
def get_gbd_pops():
    ages = list(range(13, 21))+[30, 31, 32, 235]
    numbers = 1
    pops = get_population(age_group_id=ages, single_year_age=False, location_id=[135,6,80,86,68, 62, 92], location_set_id=1,sex_id=[1,2,3],gbd_round_id=6, with_ui=True, decomp_step='iterative', forecasted_pop=False)
    ages = get_ids('age_group')
    locs = get_ids('location')
    sexes = get_ids('sex')
    pops_ages = pd.merge(pops,ages,left_on='age_group_id', right_on='age_group_id',how='left')
    pops_locs = pd.merge(pops_ages,locs,left_on='location_id',right_on='location_id',how='left')
    pops_sexs = pd.merge(pops_locs,sexes,left_on='sex_id',right_on='sex_id',how='left')
    pops = pops_sexs.drop(columns='location_description')
    pops_40plus = pops.groupby(['location_name','sex']).population.sum().reset_index()
    pops_40plus['age_group_name'] = '40+'
    pops_frames = [pops,pops_40plus]
    pops = pd.concat(pops_frames)
    pops['sex'] = pops.sex.str.replace('Male','male')
    pops['sex'] = pops.sex.str.replace('Female','female')
    pops['sex'] = pops.sex.str.replace('Both','both sexes')
    pops['location_name'] = pops.location_name.str.replace('Russian Federation', 'Russia')
    pops = pops[['location_name','sex','age_group_name','population']]

    return pops

In [5]:
gbd_pops = get_gbd_pops()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  from ipykernel import kernelapp as app


## Zenon Model 7 results



In [6]:
path_template = Path('/share/costeffectiveness/results/vivarium_csu_zenon/v7.2_intervention/')

def get_model_outputs_sequela(locations, measure):
    outcomes = pd.DataFrame([])
    try:   
        for location in locations:
            country_loc = mapping_dict_data['model_7_results'][location]
            outcome = pd.read_hdf(str(path_template / f'{country_loc}/count_data/{measure}.hdf'))
            outcome['location'] = f'{location}'
            if measure != 'transition_count':
                outcome['measure'] = f'{measure}'
            outcomes = outcomes.append(outcome)
        return outcomes
            
    except:
        print(f'{measure} is not a measure in the outputs')

def get_model_outputs_cause(locations, measure):
    outcomes = pd.DataFrame([])
    try:   
    
        for location in locations:
            country_loc = mapping_dict_data['model_7_results'][location]
            outcome = pd.read_hdf(str(path_template / f'{country_loc}/count_data/{measure}.hdf'))
            outcome['location'] = f'{location}'
        
            if measure != 'transition_count':
                outcome['measure'] = f'{measure}'
            outcomes = outcomes.append(outcome)
            outcomes['cause'] = outcomes.cause.map(mapping_dict_data['cause'])
            outcomes = outcomes.dropna()
#         outcomes = outcomes.groupby(['age_group','sex','year','cause','input_draw','location', 'scenario']).value.sum().reset_index()

        return outcomes
            
    except:
            print(f'{measure} is not a measure in the outputs')



In [7]:
def get_pops(locations):
    outcomes = pd.DataFrame([])
    for country in locations:
        country_loc = mapping_dict_data['model_7_results'][country]
        outcome = pd.read_hdf(str(path_template / f'{country_loc}/count_data/population.hdf'))
        outcome['location'] = f'{country}'
        outcomes = outcomes.append(outcome)
    return outcomes

def get_person_time(locations):
    outcomes = pd.DataFrame([])
    for country in locations:
        country_loc = mapping_dict_data['model_7_results'][country]
        outcome = pd.read_hdf(str(path_template / f'{country_loc}/count_data/person_time.hdf'))
        outcome['location'] = f'{country}'
        outcomes = outcomes.append(outcome)
#     outcomes = outcomes.groupby(['age_group','sex','year','input_draw', 'location', 'scenario']).value.sum().reset_index()
    return outcomes

def get_person_time_misc(locations):
    outcomes = pd.DataFrame([])
    for country in locations:
        country_loc = mapping_dict_data['model_7_results'][country]
        outcome = pd.read_hdf(str(path_template / f'{country_loc}/count_data/miscellaneous_person_time.hdf'))
        outcome['location'] = f'{country}'
        outcomes = outcomes.append(outcome)
    return outcomes

In [8]:
def get_transition(locations):
    outcomes = pd.DataFrame([])
    for country in locations:
        country_loc = mapping_dict_data['model_7_results'][country]
        df = pd.read_hdf(str(path_template / f'{country_loc}/count_data/transition_count.hdf'))
        df['location'] = f'{country}'
        outcomes = outcomes.append(df)
    return outcomes

def get_transition_susceptible_to_acute(locations):
    transitions = get_model_outputs_sequela(locations, 'transition_count')
    transitions = transitions[transitions.measure.str.startswith('susceptible')]
    transitions['susceptible'] = transitions.measure.str.extract('((?<=to_).*?(?=_to))',expand = False)
    transitions['cause'] = transitions.measure.str.extract('((?<=to_).*?(?=_event_count))',expand = False)
    transitions['cause'] = transitions.cause.str.extract('((?<=_to_).*)', expand = False)
    transitions = transitions.drop('measure', axis = 1)
    return transitions

def get_transition_post_to_acute(locations):    
    transitions = get_model_outputs_sequela(locations, 'transition_count')
    transitions = transitions[transitions.measure.str.startswith('post')]
    transitions['susceptible'] = transitions.measure.str.extract('(.*?(?=_to))',expand = False)
    transitions['cause'] = transitions.measure.str.extract('((?<=to_).*?(?=_event_count))',expand = False)
    transitions = transitions.drop('measure', axis = 1)
    return transitions

In [9]:
susceptible_to_acute = get_transition_susceptible_to_acute(locations)
post_to_acute = get_transition_post_to_acute(locations)

In [10]:
def collapse_frame(df, column):
    nonidcols = ['value']
    nonidcols.extend(column)
    return(df.groupby([x for x in df.columns if x not in nonidcols])['value'].sum().reset_index())

In [81]:
def get_incidence_pops(susceptible_to_acute, post_to_acute, locations, risk_strat = False, final_cause_strat = False, age_strat = False, ages_to_include = None, cvd_strat=False, causes_to_include=None, loc_strat=False, htn_strat = True, diabetes_strat = True, year_strat=True):
    transitions = pd.concat([susceptible_to_acute, post_to_acute])
    transitions['subpopulation'] = 'total_population'
    state = get_model_outputs_sequela(locations, 'state_person_time')
    state['subpopulation'] = 'total_population'
    if ages_to_include != None:
        transitions = transitions[transitions.age_group.isin(ages_to_include)]
        state = state[state.age_group.isin(ages_to_include)]
        state = state.groupby(['age_group','sex','year','cause','input_draw','location', 'measure', 'scenario', 'subpopulation', 'acs','fpg', 'sbp', 'ldl']).value.sum().reset_index()
    state = state[(state.cause.str.contains('susceptible')) | (state.cause.str.contains('post'))]
    state['susceptible'] = state.cause.str.replace('susceptible_to_', '')
    if causes_to_include != None:
        transitions = transitions[transitions.susceptible.isin(causes_to_include)]
        state = state[state.susceptible.isin(causes_to_include)]
    state = state.drop(['cause', 'measure'], axis = 1)
    if not risk_strat:
        state = collapse_frame(state, ['acs','fpg', 'sbp', 'ldl'])
        transitions = collapse_frame(transitions, ['acs','fpg', 'sbp', 'ldl'])
    if not age_strat:
        transitions = collapse_frame(transitions, ['age_group', 'sex'])
        transitions['age_group'] = '40_plus'
        transitions['sex'] = 'both_sexes_combined'
        state = collapse_frame(state, ['age_group', 'sex'])
        state['age_group'] = '40_plus'
        state['sex'] = 'both_sexes_combined'
    if not cvd_strat:
        transitions = collapse_frame(transitions, ['cause'])
        transitions['susceptible'] = 'CVD'
        state = collapse_frame(state, ['cause'])
        state['susceptible'] = 'CVD'
    if not loc_strat:
        transitions = collapse_frame(transitions, ['location'])
        transitions['location'] = 'All locations'
        state = collapse_frame(state, ['location'])
        state['location'] = 'All locations'
    if not diabetes_strat:
        transitions_dm = transitions[(transitions.fpg=='high')]
        transitions = collapse_frame(transitions_dm, ['acs','fpg', 'sbp', 'ldl'])
        transitions['subpopulation'] = 'diabetic'
        state_dm = state[(state.fpg=='high')]
        state = collapse_frame(state_dm, ['acs','fpg', 'sbp', 'ldl'])
        state['subpopulation'] = 'diabetic'
    if not htn_strat:
        transitions_htn = transitions[(transitions.sbp=='high')]
        transitions = collapse_frame(transitions, ['acs','fpg', 'sbp', 'ldl'])
        transitions['subpopulation'] = 'hypertensive'
        state_htn = state[(state.sbp=='high')]
        state = collapse_frame(state_htn, ['acs','fpg', 'sbp', 'ldl'])
        state['subpopulation'] = 'hypertensive' 
    if not year_strat:
        transitions = collapse_frame(transitions, ['year'])
        transitions['year'] = 'All years'
        state = collapse_frame(state, ['year'])
        state['year'] = 'All years'
    if not final_cause_strat:
        transitions = collapse_frame(transitions, ['cause'])
        transitions = transitions.set_index([x for x in state.columns if x != 'value']).value
    else:
        transitions = transitions.set_index([x for x in state.columns if x != 'value'] + ['cause']).value
        #added this in to test transitions
    state = state.set_index([x for x in state.columns if x != 'value']).value
    incidence = (transitions/state).reset_index()
    incidence['measure'] = 'incidence'
    if 'cause' not in incidence.columns:
        incidence.rename(columns = {'susceptible':'cause'}, inplace = True)
    incidence['cause'] = incidence.cause.map(mapping_dict_data['cause'])
#     incidence = incidence.groupby(['age_group','sex','year','cause','input_draw','location', 'measure', 'scenario', 'subpopulation']).value.sum().reset_index()
#     incidence = incidence.fillna(0)
    return incidence

In [79]:
inc_40plus_both_sexes_cvd_pops = get_incidence_pops(susceptible_to_acute, post_to_acute, locations, risk_strat = False, final_cause_strat = False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=['ischemic_heart_disease', 'ischemic_stroke', 'post_myocardial_infarction','post_ischemic_stroke'], loc_strat=True, htn_strat=True, diabetes_strat=True, year_strat=False)



In [82]:
inc_40plus_both_sexes_cvd_pops_htn = get_incidence_pops(susceptible_to_acute, post_to_acute, locations, risk_strat = True, final_cause_strat = False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=['ischemic_heart_disease', 'ischemic_stroke', 'post_myocardial_infarction','post_ischemic_stroke'], loc_strat=True, htn_strat=False, diabetes_strat = True, year_strat=False)
inc_40plus_both_sexes_cvd_pops_diabetes = get_incidence_pops(susceptible_to_acute, post_to_acute, locations, risk_strat = True, final_cause_strat = False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=['ischemic_heart_disease', 'ischemic_stroke', 'post_myocardial_infarction','post_ischemic_stroke'], loc_strat=True, htn_strat=True, diabetes_strat=False, year_strat=False)


In [86]:
def get_incidence_rates_pops(inc_40plus_both_sexes_cvd_pops, inc_40plus_both_sexes_cvd_pops_htn, inc_40plus_both_sexes_cvd_pops_diabetes):     
    inc = inc_40plus_both_sexes_cvd_pops
    inc_htn = inc.append(inc_40plus_both_sexes_cvd_pops_htn, ignore_index=True)
    inc = inc_htn.append(inc_40plus_both_sexes_cvd_pops_diabetes, ignore_index=True)
    baseline = inc[inc.scenario == 'baseline']
    baseline = baseline.rename(columns = {'value':'baseline'})
    inc = pd.merge(inc, baseline, left_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure', 'subpopulation'], right_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure', 'subpopulation'], how='left')
    inc['difference_from_baseline_count'] = inc.apply(lambda row: (row['baseline'] - row['value']),axis=1)
    inc = inc.rename(columns={'scenario_x':'scenario'})
    inc = inc.drop(columns=['scenario_y', 'baseline'])
    scenario_1 = inc[inc.scenario == 'guideline']
    scenario_1 = scenario_1.drop(columns=['difference_from_baseline_count'])
    scenario_1 = scenario_1.rename(columns = {'value':'scenario_1'})
    inc = pd.merge(inc, scenario_1, left_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure', 'subpopulation'], right_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure', 'subpopulation'], how='left')
    inc['difference_from_scenario_1_count'] = inc.apply(lambda row: (row['scenario_1'] - row['value']),axis=1)
    inc = inc.rename(columns={'scenario_x':'scenario'})
    inc = inc.drop(columns=['scenario_y', 'scenario_1'])
    return inc

In [87]:
get_incidence_rates_pops(inc_40plus_both_sexes_cvd_pops, inc_40plus_both_sexes_cvd_pops_htn, inc_40plus_both_sexes_cvd_pops_diabetes)

Unnamed: 0,input_draw,location,scenario,subpopulation,cause,age_group,sex,year,value,measure,difference_from_baseline_count,difference_from_scenario_1_count
0,29,brazil,baseline,total_population,CVD,40_plus,both_sexes_combined,All years,0.001954,incidence,0.000000,-0.000302
1,29,brazil,guideline,total_population,CVD,40_plus,both_sexes_combined,All years,0.001652,incidence,0.000302,0.000000
2,29,brazil,guideline_and_new_treatment,total_population,CVD,40_plus,both_sexes_combined,All years,0.001616,incidence,0.000338,0.000036
3,29,china,baseline,total_population,CVD,40_plus,both_sexes_combined,All years,0.003204,incidence,0.000000,-0.000219
4,29,china,guideline,total_population,CVD,40_plus,both_sexes_combined,All years,0.002985,incidence,0.000219,0.000000
5,29,china,guideline_and_new_treatment,total_population,CVD,40_plus,both_sexes_combined,All years,0.002982,incidence,0.000221,0.000003
6,29,france,baseline,total_population,CVD,40_plus,both_sexes_combined,All years,0.002986,incidence,0.000000,-0.000455
7,29,france,guideline,total_population,CVD,40_plus,both_sexes_combined,All years,0.002531,incidence,0.000455,0.000000
8,29,france,guideline_and_new_treatment,total_population,CVD,40_plus,both_sexes_combined,All years,0.002474,incidence,0.000511,0.000056
9,29,italy,baseline,total_population,CVD,40_plus,both_sexes_combined,All years,0.003360,incidence,0.000000,-0.000317


In [11]:
ylds = get_model_outputs_cause(locations, 'ylds')
ylds['measure'] = 'ylds'
ylls = get_model_outputs_cause(locations, 'ylls')
ylls['measure'] = 'ylls'
deaths = get_model_outputs_cause(locations, 'deaths')
deaths['measure'] = 'deaths'
state = get_model_outputs_cause(locations, 'state_person_time')
state['measure'] = 'prevalence'
person_time = get_person_time(locations)


## Get rate-space disease outcomes per location

In [53]:
def get_cause_specific_rates_pops(ylds, ylls, deaths, state, person_time, risk_strat=False, age_strat=True, ages_to_include=None, cvd_strat=True, causes_to_include=None, loc_strat=True, diabetes_strat=True,htn_strat=True, year_strat=True):
    measures = pd.concat([ylds, ylls, deaths, state])
    person_time = person_time.drop(columns=['measure'])
    measures['subpopulation'] = 'total_population'
    person_time['subpopulation'] = 'total_population'

    if ages_to_include != None:
        measures = measures[measures.age_group.isin(ages_to_include)]
        person_time = person_time[person_time.age_group.isin(ages_to_include)]
        person_time = person_time.groupby(['age_group','sex','year','input_draw','location', 'scenario', 'acs','fpg', 'sbp', 'ldl', 'subpopulation']).value.sum().reset_index()

    if causes_to_include != None:
        measures = measures[measures.cause.isin(causes_to_include)]
#         person_time = person_time[person_time.cause.isin(causes_to_include)]

    if not risk_strat:
        measures = collapse_frame(measures, ['acs','fpg', 'sbp', 'ldl'])
        person_time = collapse_frame(person_time, ['acs','fpg', 'sbp', 'ldl'])
        
    if not age_strat:
        measures = collapse_frame(measures, ['age_group', 'sex'])
        measures['age_group'] = '40_plus'
        measures['sex'] = 'both_sexes_combined'
        person_time = collapse_frame(person_time, ['age_group', 'sex'])
        person_time['age_group'] = '40_plus'
        person_time['sex'] = 'both_sexes_combined'
        
    if not cvd_strat:
        measures = collapse_frame(measures, ['cause'])
        measures['cause'] = 'CVD'
#         person_time = collapse_frame(person_time, ['cause'])
#         person_time['susceptible'] = 'CVD'
        
    if not loc_strat:
        measures = collapse_frame(measures, ['location'])
        measures['location'] = 'All locations'
        person_time = collapse_frame(person_time, ['location'])
        person_time['location'] = 'All locations'
    if not diabetes_strat:
        measures_dm = measures[(measures.fpg=='high')]
        measures = collapse_frame(measures_dm, ['acs','fpg', 'sbp', 'ldl'])
        measures['subpopulation'] = 'diabetic'
        person_time_dm = person_time[(person_time.fpg=='high')]
        person_time = collapse_frame(person_time_dm, ['acs','fpg', 'sbp', 'ldl'])
        person_time['subpopulation'] = 'diabetic'
    if not htn_strat:
        measures_htn = measures[(measures.sbp=='high')]
        measures = collapse_frame(measures_htn, ['acs','fpg', 'sbp', 'ldl'])
        measures['subpopulation'] = 'hypertensive'
        person_time_htn = person_time[(person_time.sbp=='high')]
        person_time = collapse_frame(person_time_htn, ['acs','fpg', 'sbp', 'ldl'])
        person_time['subpopulation'] = 'hypertensive' 
    if not year_strat:
        measures = collapse_frame(measures, ['year'])
        measures['year'] = 'All years'
        person_time = collapse_frame(person_time, ['year'])
        person_time['year'] = 'All years'

    measures = measures.set_index([x for x in person_time.columns if x != 'value'] + ['cause', 'measure']).value

    person_time = person_time.set_index([x for x in person_time.columns if x != 'value']).value
    rates = (measures / person_time).reset_index()
#     rates = rates.fillna(0)
    return rates

In [54]:
measures_cvd_40plus = get_cause_specific_rates_pops(ylds, ylls, deaths, state, person_time, risk_strat=False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=['ischemic_heart_disease', 'ischemic_stroke', 'post_myocardial_infarction','post_ischemic_stroke'], loc_strat=True, diabetes_strat=True, htn_strat=True, year_strat=False)


In [55]:
measures_cvd_40plus_diabetes = get_cause_specific_rates_pops(ylds, ylls, deaths, state, person_time, risk_strat=True, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=['ischemic_heart_disease', 'ischemic_stroke', 'post_myocardial_infarction','post_ischemic_stroke'],loc_strat=True, diabetes_strat=False, htn_strat=True, year_strat=False)
measures_cvd_40plus_htn = get_cause_specific_rates_pops(ylds, ylls, deaths, state, person_time, risk_strat=True, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=['ischemic_heart_disease', 'ischemic_stroke', 'post_myocardial_infarction','post_ischemic_stroke'],loc_strat=True, diabetes_strat=True, htn_strat=False, year_strat=False)


In [88]:
# def get_measure_rates_diff_scenarios_pops(deaths_both_sexes_all_ages_loc_specific, deaths_both_sexes_all_ages_loc_specific_htn, deaths_both_sexes_all_ages_loc_specific_diabetes): 
def get_measure_rates_diff_scenarios_pops(measures_cvd_40plus, measures_cvd_40plus_diabetes, measures_cvd_40plus_htn, gbd_pops): 

    measures = measures_cvd_40plus
    measures_htn = measures.append(measures_cvd_40plus_htn, ignore_index = True)
    measures_all = measures_htn.append(measures_cvd_40plus_diabetes, ignore_index = True)
    measures_all = measures_all[(measures_all.measure == 'deaths')]
    measures = measures_all
#     measures = measures.drop(columns=['subpopulation'])
    baseline = measures[measures.scenario == 'baseline']
    baseline = baseline.rename(columns = {'value':'baseline'})
    measures = pd.merge(measures, baseline, left_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure', 'subpopulation'], right_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure', 'subpopulation'], how='left')
    measures['difference_from_baseline_count'] = measures.apply(lambda row: (row['baseline'] - row['value']),axis=1)
    measures = measures.rename(columns={'scenario_x':'scenario'})
    measures = measures.drop(columns=['scenario_y', 'baseline'])
    scenario_1 = measures[measures.scenario == 'guideline']
    scenario_1 = scenario_1.drop(columns=['difference_from_baseline_count'])
    scenario_1 = scenario_1.rename(columns = {'value':'scenario_1'})
    measures = pd.merge(measures, scenario_1, left_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure', 'subpopulation'], right_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure', 'subpopulation'], how='left')
    measures['difference_from_scenario_1_count'] = measures.apply(lambda row: (row['scenario_1'] - row['value']),axis=1)
    measures = measures.rename(columns={'scenario_x':'scenario'})
    measures = measures.drop(columns=['scenario_y', 'scenario_1'])
    measures = measures.append(get_incidence_rates_pops(inc_40plus_both_sexes_cvd_pops, inc_40plus_both_sexes_cvd_pops_htn, inc_40plus_both_sexes_cvd_pops_diabetes))
    measures['scenario'] = measures.scenario.map(mapping_dict_data['scenario'])
    measures['location'] = measures.location.map(mapping_dict_data['location'])
    measures['sex'] = measures.sex.map(mapping_dict_data['sex'])
    measures = measures.rename(columns={'scenario':'Scenario', 'measure':'Measure'})
    measures['cause'] = measures.cause.map(mapping_dict_data['cause_client'])
    measures['age_group'] = measures.age_group.str.replace('_', ' ')
    measures['age_group'] = measures.age_group.str.replace(' plus', '+')
    rates = pd.merge(measures,gbd_pops,left_on=['age_group','location','sex'],right_on=['age_group_name','location_name','sex'],how='left')
    rates['population_counts'] = rates.apply(lambda row: (row['value'] * row['population']),axis=1)
    rates['population_difference_from_baseline'] = rates.apply(lambda row: (row['difference_from_baseline_count'] * row['population']),axis=1)
    rates['population_difference_from_scenario_1'] = rates.apply(lambda row: (row['difference_from_scenario_1_count'] * row['population']),axis=1)

    return rates

measure_rates = get_measure_rates_diff_scenarios_pops(measures_cvd_40plus, measures_cvd_40plus_diabetes, measures_cvd_40plus_htn,gbd_pops)


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


In [89]:
measure_rates

Unnamed: 0,age_group,cause,difference_from_baseline_count,difference_from_scenario_1_count,input_draw,location,Measure,Scenario,sex,subpopulation,value,year,location_name,age_group_name,population,population_counts,population_difference_from_baseline,population_difference_from_scenario_1
0,40+,CVD,0.000000,-0.000206,29,Brazil,deaths,Business as Usual,both sexes,total_population,0.002745,All years,Brazil,40+,8.279758e+07,2.272394e+05,0.000000,-17027.092334
1,40+,CVD,0.000206,0.000000,29,Brazil,deaths,Intervention 1 (multiple pills),both sexes,total_population,0.002539,All years,Brazil,40+,8.279758e+07,2.102123e+05,17027.092334,0.000000
2,40+,CVD,0.000238,0.000032,29,Brazil,deaths,Intervention 2 (FDC),both sexes,total_population,0.002506,All years,Brazil,40+,8.279758e+07,2.075240e+05,19715.437008,2688.344675
3,40+,CVD,0.000000,-0.000096,29,China,deaths,Business as Usual,both sexes,total_population,0.002998,All years,China,40+,6.998593e+08,2.098476e+06,0.000000,-67117.844351
4,40+,CVD,0.000096,0.000000,29,China,deaths,Intervention 1 (multiple pills),both sexes,total_population,0.002903,All years,China,40+,6.998593e+08,2.031358e+06,67117.844351,0.000000
5,40+,CVD,0.000097,0.000001,29,China,deaths,Intervention 2 (FDC),both sexes,total_population,0.002901,All years,China,40+,6.998593e+08,2.030467e+06,68009.247501,891.403150
6,40+,CVD,0.000000,-0.000273,29,France,deaths,Business as Usual,both sexes,total_population,0.003740,All years,France,40+,3.451087e+07,1.290629e+05,0.000000,-9434.385113
7,40+,CVD,0.000273,0.000000,29,France,deaths,Intervention 1 (multiple pills),both sexes,total_population,0.003466,All years,France,40+,3.451087e+07,1.196285e+05,9434.385113,0.000000
8,40+,CVD,0.000294,0.000021,29,France,deaths,Intervention 2 (FDC),both sexes,total_population,0.003446,All years,France,40+,3.451087e+07,1.189094e+05,10153.552491,719.167378
9,40+,CVD,0.000000,-0.000169,29,Italy,deaths,Business as Usual,both sexes,total_population,0.004585,All years,Italy,40+,3.625954e+07,1.662657e+05,0.000000,-6142.648687


In [91]:
measure_rates.to_csv('sanofi_zenon_pop_baseline_tables_deaths_incidence.csv', index=False)