In [3]:
import pandas as pd
import numpy as np
from vivarium import Artifact
import os
from pathlib import Path
from db_queries import get_outputs, get_ids, get_model_results, get_population
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import gbd_mapping
from matplotlib.backends.backend_pdf import PdfPages
pd.set_option('use_inf_as_na', True)
sns.set(context = 'paper', style='whitegrid', font_scale=1.8, rc = {'axes.spines.right':False, 'axes.spines.top': False, 'figure.figsize':(12.7,8.6)}, palette='Set1')


## Change variable save_path to local directory file path for file results



In [4]:
mapping_dict_data = {'cause': {'acute_myocardial_infarction':'ischemic_heart_disease','post_myocardial_infarction':'ischemic_heart_disease','acute_ischemic_stroke':'ischemic_stroke','post_ischemic_stroke':'ischemic_stroke', 'ischemic_stroke':'ischemic_stroke', 'chronic_kidney_disease':'chronic_kidney_disease', 'ischemic_heart_disease':'ischemic_heart_disease', 'diabetes_mellitus':'diabetes_mellitus', 'other_causes':'other_causes', 'albuminuria':'chronic_kidney_disease', 'stage_iii_chronic_kidney_disease':'chronic_kidney_disease','stage_iv_chronic_kidney_disease':'chronic_kidney_disease','stage_v_chronic_kidney_disease':'chronic_kidney_disease','moderate_diabetes_mellitus':'diabetes_mellitus','severe_diabetes_mellitus':'diabetes_mellitus', 'chronic_kidney_disease':'chronic_kidney_disease', 'CVD':'CVD'},
                    'cause_client': {'ischemic_heart_disease':'Ischemic heart disease', 'ischemic_stroke':'Ischemic stroke', 'chronic_kidney_disease':'Chronic kidney disease', 'diabetes_mellitus':'Diabetes mellitus', 'other_causes':'Other causes', 'CVD':'CVD'},
                    'sequela':{378:'acute_myocardial_infarction', 379:'acute_myocardial_infarction',380:'post_myocardial_infarction', 381:'post_myocardial_infarction', 382:'post_myocardial_infarction', 383:'post_myocardial_infarction', 384:'post_myocardial_infarction', 385:'post_myocardial_infarction', 953:'post_myocardial_infarction', 1040:'post_myocardial_infarction', 5726:'post_myocardial_infarction', 386: 'acute_ischemic_stroke', 387: 'acute_ischemic_stroke', 388: 'acute_ischemic_stroke', 389: 'acute_ischemic_stroke', 390: 'acute_ischemic_stroke', 391:'post_ischemic_stroke', 392: 'post_ischemic_stroke', 393:'post_ischemic_stroke', 394:'post_ischemic_stroke', 395:'post_ischemic_stroke', 946:'post_ischemic_stroke'},
                    'location': {'china':'China', 'brazil':'Brazil', 'france':'France', 'italy':'Italy', 'russian_federation':'Russia', 'spain':'Spain', 'All locations':'All locations'},
                    'sex':{'female':'female','male':'male', 'both_sexes_combined':'both sexes'},
                    'scenario': {'baseline':'Business as Usual', 'guideline':'Intervention 1 (multiple pills)', 'guideline_and_new_treatment':'Intervention 2 (FDC)'},
                    'measure':{'death':'deaths','ylds':'ylds','ylls':'ylls'},
                    'measure_client':{'deaths':'Deaths', 'incidence':'Incidence'},
                    'model_7_results':{'brazil':'brazil/2020_04_07_18_51_12', 'china':'china/2020_04_07_18_54_07','france':'france/2020_04_07_18_51_32', 'italy':'italy/2020_04_07_18_55_54', 'russian_federation':'russian_federation/2020_04_07_18_51_57', 'spain':'spain/2020_04_07_18_52_09'}}

locations = ['brazil', 'china', 'france', 'italy','russian_federation', 'spain']
measures = ['deaths', 'person_time','population','ylds','ylls', 'prevalence', 'incidence']
save_path_j = '/home/j/Project/simulation_science/zenon/result/final_results'

## Zenon Model 7 results



In [5]:
path_template = Path('/share/costeffectiveness/results/vivarium_csu_zenon/v7.2_intervention/')

def get_model_outputs_sequela(locations, measure):
    outcomes = pd.DataFrame([])
    try:   
        for location in locations:
            country_loc = mapping_dict_data['model_7_results'][location]
            outcome = pd.read_hdf(str(path_template / f'{country_loc}/count_data/{measure}.hdf'))
            outcome['location'] = f'{location}'
            if measure != 'transition_count':
                outcome['measure'] = f'{measure}'
            outcomes = outcomes.append(outcome)
        return outcomes
            
    except:
        print(f'{measure} is not a measure in the outputs')

def get_model_outputs_cause(locations, measure):
    outcomes = pd.DataFrame([])
    try:   
    
        for location in locations:
            country_loc = mapping_dict_data['model_7_results'][location]
            outcome = pd.read_hdf(str(path_template / f'{country_loc}/count_data/{measure}.hdf'))
            outcome['location'] = f'{location}'
        
            if measure != 'transition_count':
                outcome['measure'] = f'{measure}'
            outcomes = outcomes.append(outcome)
            outcomes['cause'] = outcomes.cause.map(mapping_dict_data['cause'])
            outcomes = outcomes.dropna()
#         outcomes = outcomes.groupby(['age_group','sex','year','cause','input_draw','location', 'scenario']).value.sum().reset_index()

        return outcomes
            
    except:
            print(f'{measure} is not a measure in the outputs')



In [6]:
def get_pops(locations):
    outcomes = pd.DataFrame([])
    for country in locations:
        country_loc = mapping_dict_data['model_7_results'][country]
        outcome = pd.read_hdf(str(path_template / f'{country_loc}/count_data/population.hdf'))
        outcome['location'] = f'{country}'
        outcomes = outcomes.append(outcome)
    return outcomes

def get_person_time(locations):
    outcomes = pd.DataFrame([])
    for country in locations:
        country_loc = mapping_dict_data['model_7_results'][country]
        outcome = pd.read_hdf(str(path_template / f'{country_loc}/count_data/person_time.hdf'))
        outcome['location'] = f'{country}'
        outcomes = outcomes.append(outcome)
#     outcomes = outcomes.groupby(['age_group','sex','year','input_draw', 'location', 'scenario']).value.sum().reset_index()
    return outcomes

def get_person_time_misc(locations):
    outcomes = pd.DataFrame([])
    for country in locations:
        country_loc = mapping_dict_data['model_7_results'][country]
        outcome = pd.read_hdf(str(path_template / f'{country_loc}/count_data/miscellaneous_person_time.hdf'))
        outcome['location'] = f'{country}'
        outcomes = outcomes.append(outcome)
    return outcomes

In [7]:
def get_transition(locations):
    outcomes = pd.DataFrame([])
    for country in locations:
        country_loc = mapping_dict_data['model_7_results'][country]
        df = pd.read_hdf(str(path_template / f'{country_loc}/count_data/transition_count.hdf'))
        df['location'] = f'{country}'
        outcomes = outcomes.append(df)
    return outcomes

def get_transition_susceptible_to_acute(locations):
    transitions = get_model_outputs_sequela(locations, 'transition_count')
    transitions = transitions[transitions.measure.str.startswith('susceptible')]
    transitions['susceptible'] = transitions.measure.str.extract('((?<=to_).*?(?=_to))',expand = False)
    transitions['cause'] = transitions.measure.str.extract('((?<=to_).*?(?=_event_count))',expand = False)
    transitions['cause'] = transitions.cause.str.extract('((?<=_to_).*)', expand = False)
    transitions = transitions.drop('measure', axis = 1)
    return transitions

def get_transition_post_to_acute(locations):    
    transitions = get_model_outputs_sequela(locations, 'transition_count')
    transitions = transitions[transitions.measure.str.startswith('post')]
    transitions['susceptible'] = transitions.measure.str.extract('(.*?(?=_to))',expand = False)
    transitions['cause'] = transitions.measure.str.extract('((?<=to_).*?(?=_event_count))',expand = False)
    transitions = transitions.drop('measure', axis = 1)
    return transitions

In [6]:
susceptible_to_acute = get_transition_susceptible_to_acute(locations)
post_to_acute = get_transition_post_to_acute(locations)

In [8]:
def collapse_frame(df, column):
    nonidcols = ['value']
    nonidcols.extend(column)
    return(df.groupby([x for x in df.columns if x not in nonidcols])['value'].sum().reset_index())

## Add subpopulation to cause_measures function and diabetes + hypertensive subpop. to incidence and cause measures function

In [9]:
def get_incidence(susceptible_to_acute, post_to_acute, locations, risk_strat = True, final_cause_strat = False, age_strat = False, ages_to_include = None, cvd_strat=False, causes_to_include=None, loc_strat=False):
    transitions = pd.concat([susceptible_to_acute, post_to_acute])
    transitions['subpopulation'] = 'total_population'
    state['subpopulation'] = 'total_population'
    state = get_model_outputs_sequela(locations, 'state_person_time')
    if ages_to_include != None:
        transitions = transitions[transitions.age_group.isin(ages_to_include)]
        state = state[state.age_group.isin(ages_to_include)]
        state = state.groupby(['age_group','sex','year','cause','input_draw','location', 'measure', 'scenario', 'acs','fpg', 'sbp', 'ldl']).value.sum().reset_index()
    state = state[(state.cause.str.contains('susceptible')) | (state.cause.str.contains('post'))]
    state['susceptible'] = state.cause.str.replace('susceptible_to_', '')
    if causes_to_include != None:
        transitions = transitions[transitions.susceptible.isin(causes_to_include)]
        state = state[state.susceptible.isin(causes_to_include)]
    state = state.drop(['cause', 'measure'], axis = 1)
    if not risk_strat:
        state = collapse_frame(state, ['acs','fpg', 'sbp', 'ldl'])
        transitions = collapse_frame(transitions, ['acs','fpg', 'sbp', 'ldl'])
    if not age_strat:
        transitions = collapse_frame(transitions, ['age_group', 'sex'])
        transitions['age_group'] = '40_plus'
        transitions['sex'] = 'both_sexes_combined'
        state = collapse_frame(state, ['age_group', 'sex'])
        state['age_group'] = '40_plus'
        state['sex'] = 'both_sexes_combined'
    if not cvd_strat:
        transitions = collapse_frame(transitions, ['cause'])
        transitions['susceptible'] = 'CVD'
        state = collapse_frame(state, ['cause'])
        state['susceptible'] = 'CVD'
    if not loc_strat:
        transitions = collapse_frame(transitions, ['location'])
        transitions['location'] = 'All locations'
        state = collapse_frame(state, ['location'])
        state['location'] = 'All locations'
    if not diabetes_strat:
        transitions_dm = transitions[(transitions)]
        transitions = collapse_frame(transitions, ['location'])
        transitions['location'] = 'All locations'
        state = collapse_frame(state, ['location'])
        state['location'] = 'All locations'
    if not final_cause_strat:
        transitions = collapse_frame(transitions, ['cause'])
        transitions = transitions.set_index([x for x in state.columns if x != 'value']).value
    else:
        transitions = transitions.set_index([x for x in state.columns if x != 'value'] + ['cause']).value
        #added this in to test transitions
    state = state.set_index([x for x in state.columns if x != 'value']).value
    incidence = (transitions/state * 100000).reset_index()
    incidence['measure'] = 'incidence'
    if 'cause' not in incidence.columns:
        incidence.rename(columns = {'susceptible':'cause'}, inplace = True)
    incidence['cause'] = incidence.cause.map(mapping_dict_data['cause'])
#     incidence = incidence.groupby(['age_group','sex','year','cause','input_draw','location', 'measure', 'scenario']).value.sum().reset_index()
    incidence = incidence.fillna(0)
    return incidence


In [8]:
#testing the diabetes and hypertensive subpopulations
def get_incidence(susceptible_to_acute, post_to_acute, locations, risk_strat = True, final_cause_strat = False, age_strat = False, ages_to_include = None, cvd_strat=False, causes_to_include=None, loc_strat=False, diabetes_strat=False, htn_strat=False):
    transitions = pd.concat([susceptible_to_acute, post_to_acute])
    transitions['subpopulation'] = 'total_population'
    state = get_model_outputs_sequela(locations, 'state_person_time')
    state['subpopulation'] = 'total_population'

    if ages_to_include != None:
        transitions = transitions[transitions.age_group.isin(ages_to_include)]
        state = state[state.age_group.isin(ages_to_include)]
        state = state.groupby(['age_group','sex','year','cause','input_draw','location', 'measure', 'scenario', 'acs','fpg', 'sbp', 'ldl']).value.sum().reset_index()
    state = state[(state.cause.str.contains('susceptible')) | (state.cause.str.contains('post'))]
    state['susceptible'] = state.cause.str.replace('susceptible_to_', '')
    if causes_to_include != None:
        transitions = transitions[transitions.susceptible.isin(causes_to_include)]
        state = state[state.susceptible.isin(causes_to_include)]
    state = state.drop(['cause', 'measure'], axis = 1)
    if not risk_strat:
        state = collapse_frame(state, ['acs','fpg', 'sbp', 'ldl'])
        transitions = collapse_frame(transitions, ['acs','fpg', 'sbp', 'ldl'])
    if not age_strat:
        transitions = collapse_frame(transitions, ['age_group', 'sex'])
        transitions['age_group'] = '40_plus'
        transitions['sex'] = 'both_sexes_combined'
        state = collapse_frame(state, ['age_group', 'sex'])
        state['age_group'] = '40_plus'
        state['sex'] = 'both_sexes_combined'
    if not cvd_strat:
        transitions = collapse_frame(transitions, ['cause'])
        transitions['susceptible'] = 'CVD'
        state = collapse_frame(state, ['cause'])
        state['susceptible'] = 'CVD'
    if not loc_strat:
        transitions = collapse_frame(transitions, ['location'])
        transitions['location'] = 'All locations'
        state = collapse_frame(state, ['location'])
        state['location'] = 'All locations'
    if not diabetes_strat:
        transitions_dm = transitions[(transitions.fpg=='high')]
        transitions = collapse_frame(transitions_dm, ['acs','fpg', 'sbp', 'ldl'])
        transitions['subpopulation'] = 'diabetic'
        state_dm = state[(state.fpg=='high')]
        state = collapse_frame(state_dm, ['acs','fpg', 'sbp', 'ldl'])
        state['subpopulation'] = 'diabetic'
    if not htn_strat:
        transitions_dm = transitions[(transitions.sbp=='high')]
        transitions = collapse_frame(transitions_dm, ['acs','fpg', 'sbp', 'ldl'])
        transitions['subpopulation'] = 'hypertensive'
        state_dm = state[(state.sbp=='high')]
        state = collapse_frame(state_dm, ['acs','fpg', 'sbp', 'ldl'])
        state['subpopulation'] = 'hypertensive'   
    if not final_cause_strat:
        transitions = collapse_frame(transitions, ['cause'])
        transitions = transitions.set_index([x for x in state.columns if x != 'value']).value
    else:
        transitions = transitions.set_index([x for x in state.columns if x != 'value'] + ['cause']).value
        #added this in to test transitions
    state = state.set_index([x for x in state.columns if x != 'value']).value
    incidence = (transitions/state * 100000).reset_index()
    incidence['measure'] = 'incidence'
    if 'cause' not in incidence.columns:
        incidence.rename(columns = {'susceptible':'cause'}, inplace = True)
    incidence['cause'] = incidence.cause.map(mapping_dict_data['cause'])
#     incidence = incidence.groupby(['age_group','sex','year','cause','input_draw','location', 'measure', 'scenario']).value.sum().reset_index()
    incidence = incidence.fillna(0)
    return incidence


In [9]:
inc_all = get_incidence(susceptible_to_acute, post_to_acute, locations, risk_strat = True, final_cause_strat = False, age_strat = True, ages_to_include = None, cvd_strat = True, causes_to_include = None, loc_strat=True, diabetes_strat=True, htn_strat=True)


In [11]:
inc_40plus_cvd_diabetes= get_incidence(susceptible_to_acute, post_to_acute,  locations, risk_strat = True, final_cause_strat = False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=['ischemic_heart_disease', 'ischemic_stroke', 'post_myocardial_infarction','post_ischemic_stroke'],loc_strat=True, diabetes_strat=False, htn_strat=True)


In [12]:
inc_40plus_cvd_htn= get_incidence(susceptible_to_acute, post_to_acute,  locations, risk_strat = True, final_cause_strat = False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=['ischemic_heart_disease', 'ischemic_stroke', 'post_myocardial_infarction','post_ischemic_stroke'],loc_strat=True, diabetes_strat=True, htn_strat=False)


In [9]:
inc_40plus_cvd= get_incidence(susceptible_to_acute, post_to_acute,  locations, risk_strat = True, final_cause_strat = False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=['ischemic_heart_disease', 'ischemic_stroke', 'post_myocardial_infarction','post_ischemic_stroke'],loc_strat=True)
inc_all = get_incidence(susceptible_to_acute, post_to_acute, locations, risk_strat = True, final_cause_strat = False, age_strat = True, ages_to_include = None, cvd_strat = True, causes_to_include = None, loc_strat=True)
inc_40plus_all_causes = get_incidence(susceptible_to_acute, post_to_acute, locations, risk_strat = True, final_cause_strat = False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=True, causes_to_include=None,loc_strat=True)


In [13]:
def get_incidence_rates(inc_all):     
    inc = inc_all
    baseline = inc[inc.scenario == 'baseline']
    baseline = baseline.rename(columns = {'value':'baseline'})
    inc = pd.merge(inc, baseline, left_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure','subpopulation'], right_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure','subpopulation'], how='left')
    inc['difference_from_baseline_count'] = inc.apply(lambda row: (row['baseline'] - row['value']),axis=1)
    inc = inc.rename(columns={'scenario_x':'scenario'})
    inc = inc.drop(columns=['scenario_y', 'baseline'])
    scenario_1 = inc[inc.scenario == 'guideline']
    scenario_1 = scenario_1.drop(columns=['difference_from_baseline_count'])
    scenario_1 = scenario_1.rename(columns = {'value':'scenario_1'})
    inc = pd.merge(inc, scenario_1, left_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure','subpopulation'], right_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure','subpopulation'], how='left')
    inc['difference_from_scenario_1_count'] = inc.apply(lambda row: (row['scenario_1'] - row['value']),axis=1)
    inc = inc.rename(columns={'scenario_x':'scenario'})
    inc = inc.drop(columns=['scenario_y', 'scenario_1'])
    return inc

In [16]:
def get_incidence_rates(inc_40plus_cvd_htn, inc_40plus_cvd_diabetes):     
    inc = inc_40plus_cvd_htn.append(inc_40plus_cvd_diabetes, ignore_index = True)
    baseline = inc[inc.scenario == 'baseline']
    baseline = baseline.rename(columns = {'value':'baseline'})
    inc = pd.merge(inc, baseline, left_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure','subpopulation'], right_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure','subpopulation'], how='left')
    inc['difference_from_baseline_count'] = inc.apply(lambda row: (row['baseline'] - row['value']),axis=1)
    inc = inc.rename(columns={'scenario_x':'scenario'})
    inc = inc.drop(columns=['scenario_y', 'baseline'])
    scenario_1 = inc[inc.scenario == 'guideline']
    scenario_1 = scenario_1.drop(columns=['difference_from_baseline_count'])
    scenario_1 = scenario_1.rename(columns = {'value':'scenario_1'})
    inc = pd.merge(inc, scenario_1, left_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure','subpopulation'], right_on=['age_group', 'sex', 'year', 'cause', 'input_draw', 'location', 'measure','subpopulation'], how='left')
    inc['difference_from_scenario_1_count'] = inc.apply(lambda row: (row['scenario_1'] - row['value']),axis=1)
    inc = inc.rename(columns={'scenario_x':'scenario'})
    inc = inc.drop(columns=['scenario_y', 'scenario_1'])
    return inc

In [8]:
ylds = get_model_outputs_cause(locations, 'ylds')
ylds['measure'] = 'ylds'
ylls = get_model_outputs_cause(locations, 'ylls')
ylls['measure'] = 'ylls'
deaths = get_model_outputs_cause(locations, 'deaths')
deaths['measure'] = 'deaths'
state = get_model_outputs_cause(locations, 'state_person_time')
state['measure'] = 'prevalence'
person_time = get_person_time(locations)


In [16]:
deaths['cause'].unique()

array(['ischemic_stroke', 'ischemic_heart_disease',
       'chronic_kidney_disease', 'diabetes_mellitus', 'other_causes'],
      dtype=object)

In [10]:
person_time_misc = get_person_time_misc(locations)


In [207]:
pops = get_pops(locations)

## Aggregate BAU compare table

In [69]:
#testing the diabetes and hypertensive subpopulations
def get_incidence_final_report_table(susceptible_to_acute, post_to_acute, locations, risk_strat = True, final_cause_strat = False, age_strat = False, ages_to_include = None, cvd_strat=False, causes_to_include=None, loc_strat=False, diabetes_strat=False, htn_strat=False, year_strat=True):
    transitions = pd.concat([susceptible_to_acute, post_to_acute])
    transitions['subpopulation'] = 'total_population'
    state = get_model_outputs_sequela(locations, 'state_person_time')
    state['subpopulation'] = 'total_population'

    if ages_to_include != None:
        transitions = transitions[transitions.age_group.isin(ages_to_include)]
        state = state[state.age_group.isin(ages_to_include)]
        state = state.groupby(['age_group','sex','year','cause','input_draw','location', 'measure', 'scenario', 'acs','fpg', 'sbp', 'ldl']).value.sum().reset_index()
    state = state[(state.cause.str.contains('susceptible')) | (state.cause.str.contains('post'))]
    state['susceptible'] = state.cause.str.replace('susceptible_to_', '')
    if causes_to_include != None:
        transitions = transitions[transitions.susceptible.isin(causes_to_include)]
        state = state[state.susceptible.isin(causes_to_include)]
    state = state.drop(['cause', 'measure'], axis = 1)
    if not risk_strat:
        state = collapse_frame(state, ['acs','fpg', 'sbp', 'ldl'])
        transitions = collapse_frame(transitions, ['acs','fpg', 'sbp', 'ldl'])
    if not age_strat:
        transitions = collapse_frame(transitions, ['age_group', 'sex'])
        transitions['age_group'] = '40_plus'
        transitions['sex'] = 'both_sexes_combined'
        state = collapse_frame(state, ['age_group', 'sex'])
        state['age_group'] = '40_plus'
        state['sex'] = 'both_sexes_combined'
    if not cvd_strat:
        transitions = collapse_frame(transitions, ['cause'])
        transitions['susceptible'] = 'CVD'
        state = collapse_frame(state, ['cause'])
        state['susceptible'] = 'CVD'
    if not loc_strat:
        transitions = collapse_frame(transitions, ['location'])
        transitions['location'] = 'All locations'
        state = collapse_frame(state, ['location'])
        state['location'] = 'All locations'
    if not diabetes_strat:
        transitions_dm = transitions[(transitions.fpg=='high')]
        transitions = collapse_frame(transitions_dm, ['acs','fpg', 'sbp', 'ldl'])
        transitions['subpopulation'] = 'diabetic'
        state_dm = state[(state.fpg=='high')]
        state = collapse_frame(state_dm, ['acs','fpg', 'sbp', 'ldl'])
        state['subpopulation'] = 'diabetic'
    if not htn_strat:
        transitions_dm = transitions[(transitions.sbp=='high')]
        transitions = collapse_frame(transitions_dm, ['acs','fpg', 'sbp', 'ldl'])
        transitions['subpopulation'] = 'hypertensive'
        state_dm = state[(state.sbp=='high')]
        state = collapse_frame(state_dm, ['acs','fpg', 'sbp', 'ldl'])
        state['subpopulation'] = 'hypertensive'   
    if not year_strat:
        transitions = collapse_frame(transitions, 'year')
        transitions['year'] = 'All years'
        state = collapse_frame(state, 'year')
        state['year'] = 'All years'
        
    if not final_cause_strat:
        transitions = collapse_frame(transitions, ['cause'])
        transitions = transitions.set_index([x for x in state.columns if x != 'value']).value
    else:
        transitions = transitions.set_index([x for x in state.columns if x != 'value'] + ['cause']).value
        #added this in to test transitions
    state = state.set_index([x for x in state.columns if x != 'value']).value
    incidence = (transitions/1_200_000).reset_index()
    incidence['measure'] = 'incidence'
    if 'cause' not in incidence.columns:
        incidence.rename(columns = {'susceptible':'cause'}, inplace = True)
    incidence['cause'] = incidence.cause.map(mapping_dict_data['cause'])
#     incidence = incidence.groupby(['age_group','sex','year','cause','input_draw','location', 'measure', 'scenario']).value.sum().reset_index()
#     incidence = incidence.fillna(0)
    return incidence

In [70]:
incidence_both_sexes_all_ages = get_incidence_final_report_table(susceptible_to_acute, post_to_acute, locations, risk_strat = False, final_cause_strat = False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=['ischemic_heart_disease', 'ischemic_stroke', 'post_myocardial_infarction','post_ischemic_stroke'], loc_strat=False, diabetes_strat=True, htn_strat=True, year_strat=False)


In [72]:
incidence_both_sexes_all_ages['value'].unique()

array([0.02670167, 0.02401333, 0.02375667, 0.0195075 , 0.0179125 ,
       0.017815  , 0.01774833, 0.01618833, 0.01588333, 0.0278975 ,
       0.02576167, 0.02551   , 0.01993167, 0.01817833, 0.01804667,
       0.0236675 , 0.02154   , 0.02135333, 0.02020917, 0.01752833,
       0.017275  , 0.02489   , 0.02319917, 0.02274417, 0.01833417,
       0.01689333, 0.01681917, 0.02725167, 0.02424917, 0.02382583,
       0.01572417, 0.01412833, 0.01400417, 0.02477083, 0.02321083,
       0.02290083, 0.03042917, 0.02798167, 0.0276025 , 0.02631333,
       0.02407   , 0.02387583, 0.021335  , 0.0190275 , 0.01879833,
       0.0205075 , 0.01877583, 0.018535  , 0.02264667, 0.02060083,
       0.020405  , 0.0201475 , 0.01722583, 0.01705667, 0.0267375 ,
       0.024765  , 0.02438667, 0.0135475 , 0.01250667, 0.0124525 ])

In [12]:

def get_incidence_for_bau_as_comp(incidence_both_sexes_all_ages):
    incidence = incidence_both_sexes_all_ages.groupby(['scenario', 'input_draw']).value.sum().reset_index()
    incidence = incidence.groupby(['scenario']).value.mean().reset_index()
    incidence = incidence.T
    incidence = incidence.drop('scenario', axis=0).reset_index()
    incidence = incidence.rename(columns={'index':'EVENT', 0:'baseline', 1:'guideline', 2:'guideline_and_treatment'})
    incidence['EVENT'] = incidence.EVENT.str.replace('value', 'MACE')

    return incidence
    

In [13]:
agg_incidence = get_incidence_for_bau_as_comp(incidence_both_sexes_all_ages)


In [14]:
agg_incidence

Unnamed: 0,EVENT,baseline,guideline,guideline_and_treatment
0,MACE,6.37137e-08,5.78804e-08,5.72191e-08


In [87]:
def get_final_report_table(ylds, ylls, deaths, state, person_time, risk_strat=False, age_strat=True, ages_to_include=None, cvd_strat=True, causes_to_include=None, loc_strat=True, diabetes_strat=True,htn_strat=True, daly_strat=True, year_strat=True, acmr_strat=True):
    measures = pd.concat([ylds, ylls, deaths, state])
    person_time = person_time.drop(columns=['measure'])
    measures['subpopulation'] = 'total_population'
    person_time['subpopulation'] = 'total_population'

    if ages_to_include != None:
        measures = measures[measures.age_group.isin(ages_to_include)]
        person_time = person_time[person_time.age_group.isin(ages_to_include)]
        person_time = person_time.groupby(['age_group','sex','year','input_draw','location', 'scenario', 'acs','fpg', 'sbp', 'ldl']).value.sum().reset_index()

    if causes_to_include != None:
        measures = measures[measures.cause.isin(causes_to_include)]
#         person_time = person_time[person_time.cause.isin(causes_to_include)]

    if not risk_strat:
        measures = collapse_frame(measures, ['acs','fpg', 'sbp', 'ldl'])
        
    if not age_strat:
        measures = collapse_frame(measures, ['age_group', 'sex'])
        measures['age_group'] = '40_plus'
        measures['sex'] = 'both_sexes_combined'
        
    if not cvd_strat:
        measures = collapse_frame(measures, ['cause'])
        measures['cause'] = 'CVD'

        
    if not loc_strat:
        measures = collapse_frame(measures, ['location'])
        measures['location'] = 'All locations'

    if not diabetes_strat:
        measures_dm = measures[(measures.fpg=='high')]
        measures = collapse_frame(measures_dm, ['acs','fpg', 'sbp', 'ldl'])
        measures['subpopulation'] = 'diabetic'

    if not htn_strat:
        measures_htn = measures[(measures.sbp=='high')]
        measures = collapse_frame(measures_htn, ['acs','fpg', 'sbp', 'ldl'])
        measures['subpopulation'] = 'hypertensive'

    if not daly_strat:
        measures_daly = measures[(measures.measure=='ylds') | (measures.measure == 'ylls')]
        measures = collapse_frame(measures_daly, 'measure')
        measures['measure'] = 'dalys'
    
    if not year_strat:
        measures = collapse_frame(measures, 'year')
        measures['year'] = 'All years'
    
    if not acmr_strat:
        measures = collapse_frame(measures, 'cause')
        measures['cause'] = 'All causes'
        
    measures = measures.set_index([x for x in measures.columns if x != 'value']).value

    rates = (measures / 1_200_000).reset_index()
    
#     rates = rates.append(get_incidence_rates(inc_all, inc_40plus_cvd, inc_40plus_all_causes))
#     rates['scenario'] = rates.scenario.map(mapping_dict_data['scenario'])
#     rates['location'] = rates.location.map(mapping_dict_data['location'])
#     rates['sex'] = rates.sex.map(mapping_dict_data['sex'])
#     rates = rates.rename(columns={'scenario':'Scenario', 'measure':'Measure'})
#     rates['cause'] = rates.cause.map(mapping_dict_data['cause_client'])
#     rates['age_group'] = rates.age_group.str.replace('_', ' ')
#     rates['age_group'] = rates.age_group.str.replace(' plus', '+')
    rates = rates.fillna(0)
    return rates

In [88]:
dalys_both_sexes_all_ages = get_final_report_table(ylds, ylls, deaths, state, person_time, risk_strat=False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=['ischemic_heart_disease', 'ischemic_stroke', 'post_myocardial_infarction','post_ischemic_stroke'], loc_strat=True, diabetes_strat=True,htn_strat=True, daly_strat=False, year_strat=False, acmr_strat=True)
    

In [89]:
deaths_both_sexes_all_ages = get_final_report_table(ylds, ylls, deaths, state, person_time, risk_strat=False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=True, causes_to_include=None, loc_strat=False, diabetes_strat=True, htn_strat=True, daly_strat=True, year_strat=False, acmr_strat=False)


In [20]:
def get_death_for_bau_as_comp(deaths_both_sexes_all_ages):
    deaths = deaths_both_sexes_all_ages[(deaths_both_sexes_all_ages.measure == 'deaths')]
    deaths = deaths.groupby(['scenario', 'input_draw']).value.sum().reset_index()
    deaths = deaths.groupby(['scenario']).value.mean().reset_index()
    deaths = deaths.T
    deaths = deaths.drop('scenario', axis=0).reset_index()
    deaths = deaths.rename(columns={'index':'EVENT', 0:'baseline', 1:'guideline', 2:'guideline_and_treatment'})
    deaths['EVENT'] = deaths.EVENT.str.replace('value', 'ALL CAUSE MORTALITY')
    return deaths

In [90]:
def get_dalys_for_bau_as_comp(dalys_both_sexes_all_ages):
    dalys = dalys_both_sexes_all_ages
    dalys = dalys.groupby(['scenario','input_draw']).value.sum().reset_index()
    dalys = dalys.groupby(['scenario']).value.mean().reset_index()
    dalys = dalys.T
    dalys = dalys.drop('scenario', axis=0).reset_index()
    dalys = dalys.rename(columns={'index':'EVENT', 0:'baseline', 1:'guideline', 2:'guideline_and_treatment'})
    dalys['EVENT'] = dalys.EVENT.str.replace('value', 'DALYS')
    return dalys


In [22]:
agg_deaths = get_death_for_bau_as_comp(deaths_both_sexes_all_ages)

In [91]:
agg_dalys = get_dalys_for_bau_as_comp(dalys_both_sexes_all_ages)

In [92]:
def get_bau_full_table(agg_incidence, agg_deaths, agg_dalys):
    full_table = pd.concat([agg_deaths, agg_incidence, agg_dalys])
    full_table = full_table.rename(columns={'baseline':'BAU[n/N (%)]', 'guideline':'MULTIPLE PILLS (INTERVENTION 1)', 'guideline_and_treatment': 'FDC (INTERVENTION 2)'})
    full_table['ARR_1'] = full_table.apply(lambda row: (row['BAU[n/N (%)]'] - row['MULTIPLE PILLS (INTERVENTION 1)']),axis=1)
    full_table['RRR_1'] = full_table.apply(lambda row: ((row['BAU[n/N (%)]'] - row['MULTIPLE PILLS (INTERVENTION 1)'])/row['BAU[n/N (%)]']),axis=1)
    full_table['NNT4_1'] = full_table.apply(lambda row: (1/row['ARR_1']),axis=1).round(1)
    full_table['ARR_2'] = full_table.apply(lambda row: (row['BAU[n/N (%)]'] - row['FDC (INTERVENTION 2)']),axis=1)
    full_table['RRR_2'] = full_table.apply(lambda row: ((row['BAU[n/N (%)]'] - row['FDC (INTERVENTION 2)'])/row['BAU[n/N (%)]']),axis=1)
    full_table['NNT4_2'] = full_table.apply(lambda row: (1/row['ARR_2']),axis=1).round(1)
    full_table['ARR_3'] = full_table.apply(lambda row: (row['MULTIPLE PILLS (INTERVENTION 1)'] - row['FDC (INTERVENTION 2)']),axis=1)
    full_table['RRR_3'] = full_table.apply(lambda row: ((row['MULTIPLE PILLS (INTERVENTION 1)'] - row['FDC (INTERVENTION 2)'])/row['MULTIPLE PILLS (INTERVENTION 1)']),axis=1)
    full_table['NNT4_3'] = full_table.apply(lambda row: (1/row['ARR_3']),axis=1).round(1)

    return full_table

In [93]:
full_bau_comp_agg = get_bau_full_table(agg_incidence, agg_deaths, agg_dalys)

In [94]:
full_bau_comp_agg

Unnamed: 0,EVENT,BAU[n/N (%)],MULTIPLE PILLS (INTERVENTION 1),FDC (INTERVENTION 2),ARR_1,RRR_1,NNT4_1,ARR_2,RRR_2,NNT4_2,ARR_3,RRR_3,NNT4_3
0,ALL CAUSE MORTALITY,0.0624786,0.0619452,0.0618877,0.0005334167,0.008538,1874.7,0.0005909167,0.009458,1692.3,5.75e-05,0.000928,17391.3
0,MACE,6.37137e-08,5.78804e-08,5.72191e-08,5.833269e-09,0.091554,171430472.6,6.494612e-09,0.101934,153973791.7,6.613431e-10,0.011426,1512074000.0
0,DALYS,0.225428,0.215035,0.213791,0.01039232,0.0461,96.2,0.01163699,0.051622,85.9,0.00124467,0.005788,803.4


In [95]:
full_bau_comp_agg.to_csv('20200505_sanofi_zenon_agg_bau_as_comparison.csv', index=False)

## Location-specific BAU Compare table

In [73]:
transitions = pd.concat([susceptible_to_acute, post_to_acute])


In [76]:
transitions

Unnamed: 0,age_group,sex,year,input_draw,scenario,value,acs,fpg,sbp,ldl,location,susceptible,cause
0,30_to_34,female,2020,29,baseline,0.0,post,high,high,high,brazil,ischemic_heart_disease,acute_myocardial_infarction
1,30_to_34,female,2020,29,baseline,0.0,none,high,high,high,brazil,ischemic_heart_disease,acute_myocardial_infarction
2,30_to_34,female,2020,29,baseline,0.0,post,low,high,high,brazil,ischemic_heart_disease,acute_myocardial_infarction
3,30_to_34,female,2020,29,baseline,0.0,none,low,high,high,brazil,ischemic_heart_disease,acute_myocardial_infarction
4,30_to_34,female,2020,29,baseline,0.0,post,high,low,high,brazil,ischemic_heart_disease,acute_myocardial_infarction
5,30_to_34,female,2020,29,baseline,0.0,none,high,low,high,brazil,ischemic_heart_disease,acute_myocardial_infarction
6,30_to_34,female,2020,29,baseline,0.0,post,low,low,high,brazil,ischemic_heart_disease,acute_myocardial_infarction
7,30_to_34,female,2020,29,baseline,0.0,none,low,low,high,brazil,ischemic_heart_disease,acute_myocardial_infarction
8,30_to_34,female,2020,29,baseline,0.0,post,high,high,low,brazil,ischemic_heart_disease,acute_myocardial_infarction
9,30_to_34,female,2020,29,baseline,0.0,none,high,high,low,brazil,ischemic_heart_disease,acute_myocardial_infarction


In [83]:
#testing the diabetes and hypertensive subpopulations
def get_incidence_final_report_table_loc_specific(susceptible_to_acute, post_to_acute, locations, risk_strat = True, final_cause_strat = False, age_strat = False, ages_to_include = None, cvd_strat=False, causes_to_include=None, loc_strat=False, diabetes_strat=False, htn_strat=False, year_strat=True):
    transitions = pd.concat([susceptible_to_acute, post_to_acute])
    transitions['subpopulation'] = 'total_population'
    state = get_model_outputs_sequela(locations, 'state_person_time')
    state['subpopulation'] = 'total_population'

    if ages_to_include != None:
        transitions = transitions[transitions.age_group.isin(ages_to_include)]
        state = state[state.age_group.isin(ages_to_include)]
        state = state.groupby(['age_group','sex','year','cause','input_draw','location', 'measure', 'scenario', 'acs','fpg', 'sbp', 'ldl']).value.sum().reset_index()
    state = state[(state.cause.str.contains('susceptible')) | (state.cause.str.contains('post'))]
    state['susceptible'] = state.cause.str.replace('susceptible_to_', '')
    if causes_to_include != None:
        transitions = transitions[transitions.susceptible.isin(causes_to_include)]
        state = state[state.susceptible.isin(causes_to_include)]
    state = state.drop(['cause', 'measure'], axis = 1)
    if not risk_strat:
        state = collapse_frame(state, ['acs','fpg', 'sbp', 'ldl'])
        transitions = collapse_frame(transitions, ['acs','fpg', 'sbp', 'ldl'])
    if not age_strat:
        transitions = collapse_frame(transitions, ['age_group', 'sex'])
        transitions['age_group'] = '40_plus'
        transitions['sex'] = 'both_sexes_combined'
        state = collapse_frame(state, ['age_group', 'sex'])
        state['age_group'] = '40_plus'
        state['sex'] = 'both_sexes_combined'
    if not cvd_strat:
        transitions = collapse_frame(transitions, ['cause'])
        transitions['susceptible'] = 'CVD'
        state = collapse_frame(state, ['cause'])
        state['susceptible'] = 'CVD'
    if not loc_strat:
        transitions = collapse_frame(transitions, ['location'])
        transitions['location'] = 'All locations'
        state = collapse_frame(state, ['location'])
        state['location'] = 'All locations'
    if not diabetes_strat:
        transitions_dm = transitions[(transitions.fpg=='high')]
        transitions = collapse_frame(transitions_dm, ['acs','fpg', 'sbp', 'ldl'])
        transitions['subpopulation'] = 'diabetic'
        state_dm = state[(state.fpg=='high')]
        state = collapse_frame(state_dm, ['acs','fpg', 'sbp', 'ldl'])
        state['subpopulation'] = 'diabetic'
    if not htn_strat:
        transitions_dm = transitions[(transitions.sbp=='high')]
        transitions = collapse_frame(transitions_dm, ['acs','fpg', 'sbp', 'ldl'])
        transitions['subpopulation'] = 'hypertensive'
        state_dm = state[(state.sbp=='high')]
        state = collapse_frame(state_dm, ['acs','fpg', 'sbp', 'ldl'])
        state['subpopulation'] = 'hypertensive'   
    if not year_strat:
        transitions = collapse_frame(transitions, 'year')
        transitions['year'] = 'All years'
        state = collapse_frame(state, 'year')
        state['year'] = 'All years'
        
#     if not final_cause_strat:
#         transitions = collapse_frame(transitions, ['cause'])
#         transitions = transitions.set_index([x for x in state.columns if x != 'value']).value
#     else:
#         transitions = transitions.set_index([x for x in state.columns if x != 'value'] + ['cause']).value
#         #added this in to test transitions
#     state = state.set_index([x for x in state.columns if x != 'value']).value
#     incidence = (transitions/200_000).reset_index()
#     incidence['measure'] = 'incidence'
#     if 'cause' not in incidence.columns:
#         incidence.rename(columns = {'susceptible':'cause'}, inplace = True)
#     incidence['cause'] = incidence.cause.map(mapping_dict_data['cause'])
# #     incidence = incidence.groupby(['age_group','sex','year','cause','input_draw','location', 'measure', 'scenario']).value.sum().reset_index()
#     incidence = incidence.fillna(0)
#     return incidence
    return transitions

In [84]:
incidence_both_sexes_all_ages_loc_specific = get_incidence_final_report_table_loc_specific(susceptible_to_acute, post_to_acute, locations, risk_strat = False, final_cause_strat = False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=['ischemic_heart_disease', 'ischemic_stroke', 'post_myocardial_infarction','post_ischemic_stroke'], loc_strat=True, diabetes_strat=True, htn_strat=True, year_strat=False)


In [86]:
incidence_both_sexes_all_ages_loc_specific.groupby(['scenario', 'location']).value.mean().reset_index()

Unnamed: 0,scenario,location,value
0,baseline,brazil,600.1
1,baseline,china,1075.94
2,baseline,france,978.11
3,baseline,italy,1050.64
4,baseline,russian_federation,1790.04
5,baseline,spain,779.77
6,guideline,brazil,529.78
7,guideline,china,1018.51
8,guideline,france,832.64
9,guideline,italy,968.74


In [424]:
incidence_both_sexes_all_ages_loc_specific['scenario'].unique()

array(['baseline', 'guideline', 'guideline_and_new_treatment'],
      dtype=object)

In [30]:

def get_incidence_for_bau_as_comp_loc_specific(incidence_both_sexes_all_ages_loc_specific):
    incidence = incidence_both_sexes_all_ages_loc_specific.groupby(['location','scenario', 'input_draw']).value.sum().reset_index()
    incidence = incidence.groupby(['location','scenario']).value.mean().reset_index()
    bau = incidence[(incidence.scenario == 'baseline')]
    bau = bau.drop(columns=['scenario'])
    bau = bau.rename(columns={'value':'baseline'})
    int_1 = incidence[(incidence.scenario == 'guideline')]
    int_1 = int_1.rename(columns={'value':'guideline'})
    int_1 = int_1.drop(columns=['scenario'])
    int_2 = incidence[(incidence.scenario == 'guideline_and_new_treatment')]
    int_2 = int_2.drop(columns=['scenario'])
    int_2 = int_2.rename(columns={'value':'guideline_and_new_treatment'})
    incidence = pd.merge(bau, int_1, left_on='location', right_on='location', how='left')
    incidence = pd.merge(incidence, int_2, left_on='location', right_on='location', how='left')
#     incidence = incidence.rename(columns={'index':'EVENT', 0:'baseline', 1:'guideline', 2:'guideline_and_treatment'})
    incidence['EVENT'] = 'MACE'

    return incidence

In [31]:
loc_incidence = get_incidence_for_bau_as_comp_loc_specific(incidence_both_sexes_all_ages_loc_specific)


In [32]:
def get_final_report_table_loc_specific(ylds, ylls, deaths, state, person_time, risk_strat=False, age_strat=True, ages_to_include=None, cvd_strat=True, causes_to_include=None, loc_strat=True, diabetes_strat=True,htn_strat=True, daly_strat=True, year_strat=True, acmr_strat=True):
    measures = pd.concat([ylds, ylls, deaths, state])
    person_time = person_time.drop(columns=['measure'])
    measures['subpopulation'] = 'total_population'
    person_time['subpopulation'] = 'total_population'

    if ages_to_include != None:
        measures = measures[measures.age_group.isin(ages_to_include)]
        person_time = person_time[person_time.age_group.isin(ages_to_include)]
        person_time = person_time.groupby(['age_group','sex','year','input_draw','location', 'scenario', 'acs','fpg', 'sbp', 'ldl']).value.sum().reset_index()

    if causes_to_include != None:
        measures = measures[measures.cause.isin(causes_to_include)]
#         person_time = person_time[person_time.cause.isin(causes_to_include)]

    if not risk_strat:
        measures = collapse_frame(measures, ['acs','fpg', 'sbp', 'ldl'])
        
    if not age_strat:
        measures = collapse_frame(measures, ['age_group', 'sex'])
        measures['age_group'] = '40_plus'
        measures['sex'] = 'both_sexes_combined'
        
    if not cvd_strat:
        measures = collapse_frame(measures, ['cause'])
        measures['cause'] = 'CVD'

        
    if not loc_strat:
        measures = collapse_frame(measures, ['location'])
        measures['location'] = 'All locations'

    if not diabetes_strat:
        measures_dm = measures[(measures.fpg=='high')]
        measures = collapse_frame(measures_dm, ['acs','fpg', 'sbp', 'ldl'])
        measures['subpopulation'] = 'diabetic'

    if not htn_strat:
        measures_htn = measures[(measures.sbp=='high')]
        measures = collapse_frame(measures_htn, ['acs','fpg', 'sbp', 'ldl'])
        measures['subpopulation'] = 'hypertensive'

    if not daly_strat:
        measures_daly = measures[(measures.measure=='ylds') | (measures.measure == 'ylls')]
        measures = collapse_frame(measures_daly, 'measure')
        measures['measure'] = 'dalys'
    
    if not year_strat:
        measures = collapse_frame(measures, 'year')
        measures['year'] = 'All years'
    
    if not acmr_strat:
        measures = collapse_frame(measures, 'cause')
        measures['cause'] = 'All causes'
        
    measures = measures.set_index([x for x in measures.columns if x != 'value']).value

    rates = (measures / 200_000).reset_index()
    
#     rates = rates.append(get_incidence_rates(inc_all, inc_40plus_cvd, inc_40plus_all_causes))
#     rates['scenario'] = rates.scenario.map(mapping_dict_data['scenario'])
#     rates['location'] = rates.location.map(mapping_dict_data['location'])
#     rates['sex'] = rates.sex.map(mapping_dict_data['sex'])
#     rates = rates.rename(columns={'scenario':'Scenario', 'measure':'Measure'})
#     rates['cause'] = rates.cause.map(mapping_dict_data['cause_client'])
#     rates['age_group'] = rates.age_group.str.replace('_', ' ')
#     rates['age_group'] = rates.age_group.str.replace(' plus', '+')
    rates = rates.fillna(0)
    return rates

In [33]:
dalys_both_sexes_all_ages_loc_specific = get_final_report_table_loc_specific(ylds, ylls, deaths, state, person_time, risk_strat=False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'],  cvd_strat=True, causes_to_include=None, loc_strat=True, diabetes_strat=True,htn_strat=True, daly_strat=False, year_strat=False, acmr_strat=True)
    

In [34]:
deaths_both_sexes_all_ages_loc_specific = get_final_report_table_loc_specific(ylds, ylls, deaths, state, person_time, risk_strat=False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=True, causes_to_include=None, loc_strat=True, diabetes_strat=True, htn_strat=True, daly_strat=True, year_strat=False, acmr_strat=False)


In [35]:

def get_deaths_for_bau_as_comp_loc_specific(deaths_both_sexes_all_ages_loc_specific):
    deaths = deaths_both_sexes_all_ages_loc_specific.groupby(['location','scenario', 'input_draw']).value.sum().reset_index()
    deaths = deaths.groupby(['location','scenario']).value.mean().reset_index()
    bau = deaths[(deaths.scenario == 'baseline')]
    bau = bau.drop(columns=['scenario'])
    bau = bau.rename(columns={'value':'baseline'})
    int_1 = deaths[(deaths.scenario == 'guideline')]
    int_1 = int_1.rename(columns={'value':'guideline'})
    int_1 = int_1.drop(columns=['scenario'])
    int_2 = deaths[(deaths.scenario == 'guideline_and_new_treatment')]
    int_2 = int_2.drop(columns=['scenario'])
    int_2 = int_2.rename(columns={'value':'guideline_and_new_treatment'})
    deaths = pd.merge(bau, int_1, left_on='location', right_on='location', how='left')
    deaths = pd.merge(deaths, int_2, left_on='location', right_on='location', how='left')
#     incidence = incidence.rename(columns={'index':'EVENT', 0:'baseline', 1:'guideline', 2:'guideline_and_treatment'})
    deaths['EVENT'] = 'ALL CAUSE MORTALITY'

    return deaths


In [36]:

def get_dalys_for_bau_as_comp_loc_specific(dalys_both_sexes_all_ages_loc_specific):
    dalys = dalys_both_sexes_all_ages_loc_specific.groupby(['location','scenario', 'input_draw']).value.sum().reset_index()
    dalys = dalys.groupby(['location','scenario']).value.mean().reset_index()
    bau = dalys[(dalys.scenario == 'baseline')]
    bau = bau.drop(columns=['scenario'])
    bau = bau.rename(columns={'value':'baseline'})
    int_1 = dalys[(dalys.scenario == 'guideline')]
    int_1 = int_1.rename(columns={'value':'guideline'})
    int_1 = int_1.drop(columns=['scenario'])
    int_2 = dalys[(dalys.scenario == 'guideline_and_new_treatment')]
    int_2 = int_2.drop(columns=['scenario'])
    int_2 = int_2.rename(columns={'value':'guideline_and_new_treatment'})
    dalys = pd.merge(bau, int_1, left_on='location', right_on='location', how='left')
    dalys = pd.merge(dalys, int_2, left_on='location', right_on='location', how='left')
#     incidence = incidence.rename(columns={'index':'EVENT', 0:'baseline', 1:'guideline', 2:'guideline_and_treatment'})
    dalys['EVENT'] = 'DALYS'

    return dalys


In [37]:
loc_deaths = get_deaths_for_bau_as_comp_loc_specific(deaths_both_sexes_all_ages_loc_specific)
loc_dalys = get_dalys_for_bau_as_comp_loc_specific(dalys_both_sexes_all_ages_loc_specific)
loc_dalys

Unnamed: 0,location,baseline,guideline,guideline_and_new_treatment,EVENT
0,brazil,1.141454,1.128324,1.126678,DALYS
1,china,1.080812,1.074724,1.074312,DALYS
2,france,1.037312,1.021714,1.020542,DALYS
3,italy,1.017754,1.009343,1.007931,DALYS
4,russian_federation,1.739325,1.724994,1.722952,DALYS
5,spain,0.907484,0.898463,0.896947,DALYS


In [38]:
def get_bau_full_table_loc_specific(loc_deaths, loc_incidence, loc_dalys):
    full_table = pd.concat([loc_incidence, loc_deaths, loc_dalys])
    full_table = full_table.rename(columns={'baseline':'BAU[n/N (%)]', 'guideline':'MULTIPLE PILLS (INTERVENTION 1)', 'guideline_and_new_treatment': 'FDC (INTERVENTION 2)'})
    full_table['ARR_1'] = full_table.apply(lambda row: (row['BAU[n/N (%)]'] - row['MULTIPLE PILLS (INTERVENTION 1)']),axis=1)
    full_table['RRR_1'] = full_table.apply(lambda row: ((row['BAU[n/N (%)]'] - row['MULTIPLE PILLS (INTERVENTION 1)'])/row['BAU[n/N (%)]']),axis=1)
    full_table['NNT4_1'] = full_table.apply(lambda row: (1/row['ARR_1']),axis=1).round(1)
    full_table['ARR_2'] = full_table.apply(lambda row: (row['BAU[n/N (%)]'] - row['FDC (INTERVENTION 2)']),axis=1)
    full_table['RRR_2'] = full_table.apply(lambda row: ((row['BAU[n/N (%)]'] - row['FDC (INTERVENTION 2)'])/row['BAU[n/N (%)]']),axis=1)
    full_table['NNT4_2'] = full_table.apply(lambda row: (1/row['ARR_2']),axis=1).round(1)
    full_table['ARR_3'] = full_table.apply(lambda row: (row['MULTIPLE PILLS (INTERVENTION 1)'] - row['FDC (INTERVENTION 2)']),axis=1)
    full_table['RRR_3'] = full_table.apply(lambda row: ((row['MULTIPLE PILLS (INTERVENTION 1)'] - row['FDC (INTERVENTION 2)'])/row['MULTIPLE PILLS (INTERVENTION 1)']),axis=1)
    full_table['NNT4_3'] = full_table.apply(lambda row: (1/row['ARR_3']),axis=1).round(1)
    full_table['location'] = full_table.location.map(mapping_dict_data['location'])
    return full_table[['location', 'EVENT', 'BAU[n/N (%)]', 'MULTIPLE PILLS (INTERVENTION 1)', 'FDC (INTERVENTION 2)', 'ARR_1', 'RRR_1', 'NNT4_1', 'ARR_2', 'RRR_2', 'NNT4_2', 'ARR_3', 'RRR_3', 'NNT4_3']]
    

In [39]:
full_bau_comp_loc_specific = get_bau_full_table_loc_specific(loc_incidence, loc_deaths, loc_dalys)

In [40]:
full_bau_comp_loc_specific.to_csv('20200505_sanofi_zenon_bau_as_comp_location_specific.csv', index=False)

## Hypertension subpop BAU comp table

In [41]:
incidence_both_sexes_all_ages_htn = get_incidence_final_report_table(susceptible_to_acute, post_to_acute, locations, risk_strat = True, final_cause_strat = False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=['ischemic_heart_disease', 'ischemic_stroke', 'post_myocardial_infarction','post_ischemic_stroke'], loc_strat=False, diabetes_strat=True, htn_strat=False, year_strat=False)



In [42]:

def get_incidence_for_bau_as_comp(incidence_both_sexes_all_ages_htn):
    incidence = incidence_both_sexes_all_ages.groupby(['scenario', 'input_draw']).value.sum().reset_index()
    incidence = incidence.groupby(['scenario']).value.mean().reset_index()
    incidence = incidence.T
    incidence = incidence.drop('scenario', axis=0).reset_index()
    incidence = incidence.rename(columns={'index':'EVENT', 0:'baseline', 1:'guideline', 2:'guideline_and_treatment'})
    incidence['EVENT'] = incidence.EVENT.str.replace('value', 'MACE')

    return incidence
    

In [43]:
agg_incidence_htn = get_incidence_for_bau_as_comp(incidence_both_sexes_all_ages_htn)


In [44]:
agg_incidence_htn

Unnamed: 0,EVENT,baseline,guideline,guideline_and_treatment
0,MACE,6.37137e-08,5.78804e-08,5.72191e-08


In [45]:
dalys_both_sexes_all_ages_htn = get_final_report_table(ylds, ylls, deaths, state, person_time, risk_strat=True, age_strat=False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=True, causes_to_include=None, loc_strat=True, diabetes_strat=True,htn_strat=True, daly_strat=False, year_strat=False, acmr_strat=True)
    

In [46]:
deaths_both_sexes_all_ages_htn = get_final_report_table(ylds, ylls, deaths, state, person_time, risk_strat=False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=True, causes_to_include=None, loc_strat=False, diabetes_strat=True, htn_strat=True, daly_strat=True, year_strat=False, acmr_strat=False)


In [47]:
def get_death_for_bau_as_comp(deaths_both_sexes_all_ages_htn):
    deaths = deaths_both_sexes_all_ages[(deaths_both_sexes_all_ages.measure == 'deaths')]
    deaths = deaths.groupby(['scenario', 'input_draw']).value.sum().reset_index()
    deaths = deaths.groupby(['scenario']).value.mean().reset_index()
    deaths = deaths.T
    deaths = deaths.drop('scenario', axis=0).reset_index()
    deaths = deaths.rename(columns={'index':'EVENT', 0:'baseline', 1:'guideline', 2:'guideline_and_treatment'})
    deaths['EVENT'] = deaths.EVENT.str.replace('value', 'ALL CAUSE MORTALITY')
    return deaths

In [48]:
def get_dalys_for_bau_as_comp(dalys_both_sexes_all_ages_htn):
    dalys = dalys_both_sexes_all_ages
    dalys = dalys.groupby(['scenario','input_draw']).value.sum().reset_index()
    dalys = dalys.groupby(['scenario']).value.mean().reset_index()
    dalys = dalys.T
    dalys = dalys.drop('scenario', axis=0).reset_index()
    dalys = dalys.rename(columns={'index':'EVENT', 0:'baseline', 1:'guideline', 2:'guideline_and_treatment'})
    dalys['EVENT'] = dalys.EVENT.str.replace('value', 'DALYS')
    return dalys


In [49]:
agg_deaths_htn = get_death_for_bau_as_comp(deaths_both_sexes_all_ages_htn)

In [50]:
agg_dalys_htn = get_dalys_for_bau_as_comp(dalys_both_sexes_all_ages_htn)

In [51]:
def get_bau_full_table(agg_incidence_htn, agg_deaths_htn, agg_dalys_htn):
    full_table = pd.concat([agg_deaths, agg_incidence, agg_dalys])
    full_table = full_table.rename(columns={'baseline':'BAU[n/N (%)]', 'guideline':'MULTIPLE PILLS (INTERVENTION 1)', 'guideline_and_treatment': 'FDC (INTERVENTION 2)'})
    full_table['ARR_1'] = full_table.apply(lambda row: (row['BAU[n/N (%)]'] - row['MULTIPLE PILLS (INTERVENTION 1)']),axis=1)
    full_table['RRR_1'] = full_table.apply(lambda row: ((row['BAU[n/N (%)]'] - row['MULTIPLE PILLS (INTERVENTION 1)'])/row['BAU[n/N (%)]']),axis=1)
    full_table['NNT4_1'] = full_table.apply(lambda row: (1/row['ARR_1']),axis=1).round(1)
    full_table['ARR_2'] = full_table.apply(lambda row: (row['BAU[n/N (%)]'] - row['FDC (INTERVENTION 2)']),axis=1)
    full_table['RRR_2'] = full_table.apply(lambda row: ((row['BAU[n/N (%)]'] - row['FDC (INTERVENTION 2)'])/row['BAU[n/N (%)]']),axis=1)
    full_table['NNT4_2'] = full_table.apply(lambda row: (1/row['ARR_2']),axis=1).round(1)
    full_table['ARR_3'] = full_table.apply(lambda row: (row['MULTIPLE PILLS (INTERVENTION 1)'] - row['FDC (INTERVENTION 2)']),axis=1)
    full_table['RRR_3'] = full_table.apply(lambda row: ((row['MULTIPLE PILLS (INTERVENTION 1)'] - row['FDC (INTERVENTION 2)'])/row['MULTIPLE PILLS (INTERVENTION 1)']),axis=1)
    full_table['NNT4_3'] = full_table.apply(lambda row: (1/row['ARR_3']),axis=1).round(1)

    return full_table

In [52]:
full_bau_comp_agg_htn = get_bau_full_table(agg_incidence_htn, agg_deaths_htn, agg_dalys_htn)

In [53]:
full_bau_comp_agg_htn

Unnamed: 0,EVENT,BAU[n/N (%)],MULTIPLE PILLS (INTERVENTION 1),FDC (INTERVENTION 2),ARR_1,RRR_1,NNT4_1,ARR_2,RRR_2,NNT4_2,ARR_3,RRR_3,NNT4_3
0,ALL CAUSE MORTALITY,0.0624786,0.0619452,0.0618877,0.0005334167,0.008538,1874.7,0.0005909167,0.009458,1692.3,5.75e-05,0.000928,17391.3
0,MACE,6.37137e-08,5.78804e-08,5.72191e-08,5.833269e-09,0.091554,171430472.6,6.494612e-09,0.101934,153973791.7,6.613431e-10,0.011426,1512074000.0
0,DALYS,1.00908,0.999177,0.997981,0.009902043,0.009813,101.0,0.01109845,0.010999,90.1,0.001196411,0.001197,835.8


In [54]:
full_bau_comp_agg_htn.to_csv('20200505_sanofi_zenon_agg_bau_as_comparison_htn.csv', index=False)

## Diabetic subpop BAU comp table

In [55]:
incidence_both_sexes_all_ages_diabetic = get_incidence_final_report_table(susceptible_to_acute, post_to_acute, locations, risk_strat = True, final_cause_strat = False, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=['ischemic_heart_disease', 'ischemic_stroke', 'post_myocardial_infarction','post_ischemic_stroke'], loc_strat=False, diabetes_strat=False, htn_strat=True,  year_strat=False)


In [56]:

def get_incidence_for_bau_as_comp(incidence_both_sexes_all_ages_diabetic):
    incidence = incidence_both_sexes_all_ages.groupby(['scenario', 'input_draw']).value.sum().reset_index()
    incidence = incidence.groupby(['scenario']).value.mean().reset_index()
    incidence = incidence.T
    incidence = incidence.drop('scenario', axis=0).reset_index()
    incidence = incidence.rename(columns={'index':'EVENT', 0:'baseline', 1:'guideline', 2:'guideline_and_treatment'})
    incidence['EVENT'] = incidence.EVENT.str.replace('value', 'MACE')

    return incidence
    

In [57]:
agg_incidence_diabetic = get_incidence_for_bau_as_comp(incidence_both_sexes_all_ages_diabetic)


In [58]:
agg_incidence_diabetic

Unnamed: 0,EVENT,baseline,guideline,guideline_and_treatment
0,MACE,6.37137e-08,5.78804e-08,5.72191e-08


In [59]:
dalys_both_sexes_all_ages_diabetic = get_final_report_table(ylds, ylls, deaths, state, person_time, risk_strat=True, age_strat=False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=True, causes_to_include=None, loc_strat=False, diabetes_strat=False, htn_strat=True, daly_strat=False, year_strat=False, acmr_strat=True)
    

In [60]:
deaths_both_sexes_all_ages_diabetic = get_final_report_table(ylds, ylls, deaths, state, person_time, risk_strat=True, age_strat = False, ages_to_include = ['40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90_to_94', '95_plus'], cvd_strat=False, causes_to_include=None, loc_strat=False, diabetes_strat=False, htn_strat=True, daly_strat=True, year_strat=False, acmr_strat=False)


In [61]:
def get_death_for_bau_as_comp(deaths_both_sexes_all_ages_diabetic):
    deaths = deaths_both_sexes_all_ages[(deaths_both_sexes_all_ages.measure == 'deaths')]
    deaths = deaths.groupby(['scenario', 'input_draw']).value.sum().reset_index()
    deaths = deaths.groupby(['scenario']).value.mean().reset_index()
    deaths = deaths.T
    deaths = deaths.drop('scenario', axis=0).reset_index()
    deaths = deaths.rename(columns={'index':'EVENT', 0:'baseline', 1:'guideline', 2:'guideline_and_treatment'})
    deaths['EVENT'] = deaths.EVENT.str.replace('value', 'ALL CAUSE MORTALITY')
    return deaths

In [62]:
def get_dalys_for_bau_as_comp(dalys_both_sexes_all_ages_diabetic):
    dalys = dalys_both_sexes_all_ages
    dalys = dalys.groupby(['scenario','input_draw']).value.sum().reset_index()
    dalys = dalys.groupby(['scenario']).value.mean().reset_index()
    dalys = dalys.T
    dalys = dalys.drop('scenario', axis=0).reset_index()
    dalys = dalys.rename(columns={'index':'EVENT', 0:'baseline', 1:'guideline', 2:'guideline_and_treatment'})
    dalys['EVENT'] = dalys.EVENT.str.replace('value', 'DALYS')
    return dalys


In [63]:
agg_deaths_diabetic = get_death_for_bau_as_comp(deaths_both_sexes_all_ages_diabetic)

In [64]:
agg_dalys_diabetic = get_dalys_for_bau_as_comp(dalys_both_sexes_all_ages_diabetic)

In [65]:
def get_bau_full_table(agg_incidence_diabetic, agg_deaths_diabetic, agg_dalys_diabetic):
    full_table = pd.concat([agg_deaths, agg_incidence, agg_dalys])
    full_table = full_table.rename(columns={'baseline':'BAU[n/N (%)]', 'guideline':'MULTIPLE PILLS (INTERVENTION 1)', 'guideline_and_treatment': 'FDC (INTERVENTION 2)'})
    full_table['ARR_1'] = full_table.apply(lambda row: (row['BAU[n/N (%)]'] - row['MULTIPLE PILLS (INTERVENTION 1)']),axis=1)
    full_table['RRR_1'] = full_table.apply(lambda row: ((row['BAU[n/N (%)]'] - row['MULTIPLE PILLS (INTERVENTION 1)'])/row['BAU[n/N (%)]']),axis=1)
    full_table['NNT4_1'] = full_table.apply(lambda row: (1/row['ARR_1']),axis=1).round(1)
    full_table['ARR_2'] = full_table.apply(lambda row: (row['BAU[n/N (%)]'] - row['FDC (INTERVENTION 2)']),axis=1)
    full_table['RRR_2'] = full_table.apply(lambda row: ((row['BAU[n/N (%)]'] - row['FDC (INTERVENTION 2)'])/row['BAU[n/N (%)]']),axis=1)
    full_table['NNT4_2'] = full_table.apply(lambda row: (1/row['ARR_2']),axis=1).round(1)
    full_table['ARR_3'] = full_table.apply(lambda row: (row['MULTIPLE PILLS (INTERVENTION 1)'] - row['FDC (INTERVENTION 2)']),axis=1)
    full_table['RRR_3'] = full_table.apply(lambda row: ((row['MULTIPLE PILLS (INTERVENTION 1)'] - row['FDC (INTERVENTION 2)'])/row['MULTIPLE PILLS (INTERVENTION 1)']),axis=1)
    full_table['NNT4_3'] = full_table.apply(lambda row: (1/row['ARR_3']),axis=1).round(1)

    return full_table

In [66]:
full_bau_comp_agg_diabetic = get_bau_full_table(agg_incidence_diabetic, agg_deaths_diabetic, agg_dalys_diabetic)

In [68]:
full_bau_comp_agg_diabetic.to_csv('20200505_sanofi_zenon_agg_bau_as_comparison_diabetic.csv', index=False)

# Treatment type percents table

In [9]:
med_options = ['fibrates',
       'ezetimibe', 'none','low_potency_statin_low_dose',
       'low_potency_statin_high_dose',
       'high_potency_statin_low_dose',
       'high_potency_statin_high_dose',
       'low_potency_statin_low_dose_multi',
       'low_potency_statin_high_dose_multi',
       'high_potency_statin_low_dose_multi',
       'high_potency_statin_high_dose_multi',
       'low_potency_statin_low_dose_fdc',
       'low_potency_statin_high_dose_fdc',
       'high_potency_statin_low_dose_fdc',
       'high_potency_statin_high_dose_fdc']

In [11]:
def get_med_pcts(person_time_misc, med_options):
    meds = pd.DataFrame([])
    for med in med_options:
        df = person_time_misc[(person_time_misc.measure == f'{med}_person_time')]
        df['medication'] = f'{med}'
        meds = meds.append(df)
    df = meds.groupby(['age_group', 'sex', 'year', 'input_draw', 'location', 'scenario', 'medication']).value.sum().reset_index()
    df_all = df.groupby(['age_group', 'sex', 'year', 'input_draw', 'location', 'scenario']).value.sum().reset_index()
    index_columns = ['age_group', 'sex', 'year', 'input_draw','location', 'scenario']
    df = df.set_index(index_columns + ['medication']).value
    df_all = df_all.set_index(index_columns).value
    med_prop = df / df_all
    med_prop = med_prop.reset_index()
    med_prop['location'] = med_prop.location.map(mapping_dict_data['location'])
    med_prop['scenario'] = med_prop.scenario.map(mapping_dict_data['scenario'])
    med_prop = med_prop.rename(columns={'scenario':'Scenario'})

    return med_prop


In [12]:
meds = get_med_pcts(person_time_misc, med_options)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [13]:
meds

Unnamed: 0,age_group,sex,year,input_draw,location,Scenario,medication,value
0,30_to_34,female,2020,29,Brazil,Business as Usual,ezetimibe,0.003931
1,30_to_34,female,2020,29,Brazil,Business as Usual,fibrates,0.007069
2,30_to_34,female,2020,29,Brazil,Business as Usual,high_potency_statin_high_dose,0.001610
3,30_to_34,female,2020,29,Brazil,Business as Usual,high_potency_statin_high_dose_fdc,0.000044
4,30_to_34,female,2020,29,Brazil,Business as Usual,high_potency_statin_high_dose_multi,0.000066
5,30_to_34,female,2020,29,Brazil,Business as Usual,high_potency_statin_low_dose,0.036815
6,30_to_34,female,2020,29,Brazil,Business as Usual,high_potency_statin_low_dose_fdc,0.001395
7,30_to_34,female,2020,29,Brazil,Business as Usual,high_potency_statin_low_dose_multi,0.000965
8,30_to_34,female,2020,29,Brazil,Business as Usual,low_potency_statin_high_dose,0.006782
9,30_to_34,female,2020,29,Brazil,Business as Usual,low_potency_statin_high_dose_fdc,0.008050


In [17]:
def get_med_pcts_primary_prevention(person_time_misc, med_options, acs_strat=False):
    meds = pd.DataFrame([])
    for med in med_options:
        df = person_time_misc[(person_time_misc.measure == f'{med}_person_time')]
        df['medication'] = f'{med}'
        meds = meds.append(df)
    df = meds
    # commented out the medication != 'none' because 'none' may be considered a medication in other % values
#     df_all = df[(df.medication != 'none')]
    df_all = df.groupby(['age_group', 'sex', 'year', 'input_draw', 'location', 'scenario', 'acs', 'fpg', 'sbp', 'ldl']).value.sum().reset_index()
    
    if not acs_strat:
        df = df[(df.acs != 'post') & (df.medication != 'none')]
        df = collapse_frame(df, ['medication','acs','fpg', 'sbp', 'ldl'])
        df['Treatment type'] = '% of simulants on Primary Prevention Tx'
        df_all = collapse_frame(df_all, ['acs','fpg', 'sbp', 'ldl'])
        df_all['Treatment type'] = '% of simulants on Primary Prevention Tx'
        
    index_columns = ['age_group', 'sex', 'year', 'input_draw','location', 'scenario', 'acs', 'fpg', 'sbp', 'ldl']
    df = df.set_index([x for x in df_all.columns if x != 'value']).value
    df_all = df_all.set_index([x for x in df_all.columns if x != 'value']).value
    med_prop = (df / df_all).reset_index()
    med_prop['location'] = med_prop.location.map(mapping_dict_data['location'])
    med_prop['scenario'] = med_prop.scenario.map(mapping_dict_data['scenario'])
    med_prop = med_prop.rename(columns={'scenario':'Scenario'})
    med_prop = med_prop.groupby(['location', 'Scenario', 'Treatment type', 'year']).value.mean().reset_index()
    return med_prop


In [18]:
meds_primary_prevention = get_med_pcts_primary_prevention(person_time_misc, med_options, acs_strat=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [19]:
meds_primary_prevention

Unnamed: 0,location,Scenario,Treatment type,year,value
0,Brazil,Business as Usual,% of simulants on Primary Prevention Tx,2020,0.012021
1,Brazil,Business as Usual,% of simulants on Primary Prevention Tx,2021,0.015713
2,Brazil,Business as Usual,% of simulants on Primary Prevention Tx,2022,0.017334
3,Brazil,Business as Usual,% of simulants on Primary Prevention Tx,2023,0.018237
4,Brazil,Business as Usual,% of simulants on Primary Prevention Tx,2024,0.018858
5,Brazil,Intervention 1 (multiple pills),% of simulants on Primary Prevention Tx,2020,0.024177
6,Brazil,Intervention 1 (multiple pills),% of simulants on Primary Prevention Tx,2021,0.038816
7,Brazil,Intervention 1 (multiple pills),% of simulants on Primary Prevention Tx,2022,0.043924
8,Brazil,Intervention 1 (multiple pills),% of simulants on Primary Prevention Tx,2023,0.046139
9,Brazil,Intervention 1 (multiple pills),% of simulants on Primary Prevention Tx,2024,0.047236


In [185]:
# meds_primary_prevention_gb = meds_primary_prevention.groupby(['location', 'Scenario', 'Treatment type', 'year']).value.mean().reset_index()


In [20]:
meds_primary_prevention_2020 = meds_primary_prevention[(meds_primary_prevention.year =='2020')]


In [21]:
meds_primary_prevention_2024 = meds_primary_prevention[(meds_primary_prevention.year =='2024')]


In [46]:
# # locations = ['Brazil', 'China', 'France', 'Italy', 'Russia', 'Spain']
# locations = ['Spain']

# # scenarios = ['Business as Usual', 'Intervention 1 (multiple pills)',
# #        'Intervention 2 (FDC)']
# scenarios = ['Intervention 2 (FDC)']

In [29]:
def get_table_6(locations, meds, med_options, meds_primary_prevention_2020): 
    init_df = pd.DataFrame([])
    shared_cols = ['location', 'current_prescription']
    med_df = meds[(meds.year == '2020')]
#     med_df = med_df.groupby(['location', 'Scenario', 'medication']).value.sum().reset_index()
    med_df = med_df.groupby(['location', 'Scenario', 'medication']).value.mean().reset_index()
    
    agg_fibrates_ezetimibe = med_df[(med_df.medication == 'fibrates') | (med_df.medication == 'ezetimibe')]
    agg_fibrates_ezetimibe = agg_fibrates_ezetimibe.groupby(['location', 'Scenario']).value.sum().reset_index()
    agg_fibrates_ezetimibe['Treatment type'] = '% other (fibrates, ezetimibe alone)'
    
    agg_hp = med_df[(med_df.medication.str.contains('high_potency_statin'))]
    agg_hp = agg_hp.groupby(['location', 'Scenario']).value.sum().reset_index()
    agg_hp['Treatment type'] = '% HP'
    
    agg_lp = med_df[(med_df.medication.str.contains('low_potency_statin'))]
    agg_lp = agg_lp.groupby(['location', 'Scenario']).value.sum().reset_index()
    agg_lp['Treatment type'] = '% LP'
    
    agg_fdc = med_df[(med_df.medication.str.contains('fdc'))]
    agg_fdc = agg_fdc.groupby(['location', 'Scenario']).value.sum().reset_index()
    agg_fdc['Treatment type'] = '% of simulants on FDC'
    
    agg_any = med_df[(med_df.medication != 'none')]
    agg_any = agg_any.groupby(['location', 'Scenario']).value.sum().reset_index()
    agg_any['Treatment type'] = '% of simulants on any lipid lowering treatment'
    
    agg_multi = med_df[(med_df.medication.str.contains('fdc')) | (med_df.medication.str.contains('multi'))]
    agg_multi = agg_multi.groupby(['location', 'Scenario']).value.sum().reset_index()
    agg_multi['Treatment type'] = '% on multiple DRUGS (note: NOT multi-pills)'
    
    agg_none = med_df[(med_df.medication == 'none')]
    agg_none = agg_none.groupby(['location', 'Scenario']).value.sum().reset_index()
    agg_none['Treatment type'] = '% of simulants not on any lipid lowering treatment'
    
    treatment_table = pd.concat([agg_fibrates_ezetimibe, agg_hp, agg_lp, agg_fdc, agg_any, agg_multi, agg_none])
    treatment_table['year'] = '2020'
    treatment_table = treatment_table.append(meds_primary_prevention_2020, ignore_index=True)
    treatment_table = treatment_table.rename(columns={'value':'Start of simulation, Jan 1., 2020 (same for all scenarios)'})
    

#agg by high potency, low potency, fibrates + ezetimibe, and validate by all values for brzil = 1
#     for location in locations:
#         for scenario_item in scenarios:

#             df_loop = med_df[(med_df.location == location) & (med_df.Scenario == scenario_item)]

#             if len(df_loop) > 0.0:
#                 df_loop['% on ezetimibe'] = df_loop[(df_loop.medication == 'ezetimibe')]/ df_loop)
#             else:
#                 df_loop['% on ezetimibe'] = 0
#             if len(df_loop) > 0.0:
#                 df_loop['% on fibrates'] = len(df_loop[df_loop.medication == 'fibrates']) / len(df_loop)
#             else: 
#                 df_loop['% on fibrates'] = 0
#             if len(df_loop) > 0.0:
#                 df_loop['% on high potency statin'] = len(df_loop[(df_loop.medication.str.contains('high_potency_statin'))]) / len(df_loop)
#             else:
#                 df_loop['% on high potency statin'] = 0
#             if len(df_loop) > 0.0:
#                 df_loop['% on low potency statin'] = len(df_loop[(df_loop.medication.str.contains('low_potency_statin'))]) / len(df_loop)
#             else:
#                 df_loop['% on low potency statin'] = 0
#             if len(df_loop) > 0.0:
#                 df_loop['% of simulants on any lipid lowering treatment'] = len(df_loop[df_loop.medication != 'none']) / len(df_loop)
#             else:
#                 df_loop['% of simulants on any lipid lowering treatment'] = 0

#             if len(df_loop) > 0.0:
#                 df_loop['% of simulants not on any lipid lowering treatment'] = len(df_loop[df_loop.medication == 'none']) / len(df_loop)
#             else:
#                 df_loop['% of simulants not on any lipid lowering treatment'] = 0

#             df_loop['location'] = f'{location}'
#             df_loop['Scenario'] = f'{scenario_item}'

#             init_df = init_df.append(df_loop)
   
#     init_df_melt = pd.melt(init_df, id_vars=['location', 'Scenario'])
#     init_df_melt = init_df_melt[(init_df_melt.variable.str.contains('%'))]
#     init_df_melt = init_df_melt.drop_duplicates()
#     init_df_melt = init_df_melt.rename(columns={'variable':'current_prescription', 'value':'output_mean_value'})
#     init_df_melt['output_mean_value'] = init_df_melt.output_mean_value.astype(float)
#     init_df_melt= init_df_melt.round(2)
    treatment_table = treatment_table.round(2)
    return treatment_table


In [30]:
def get_table_6_end(locations, meds, med_options, meds_primary_prevention_2024): 
    init_df = pd.DataFrame([])
    shared_cols = ['location', 'current_prescription']
    med_df = meds[(meds.year == '2024')]
    med_df = med_df.groupby(['location', 'Scenario', 'medication']).value.mean().reset_index()
    
    agg_fibrates_ezetimibe = med_df[(med_df.medication == 'fibrates') | (med_df.medication == 'ezetimibe')]
    agg_fibrates_ezetimibe = agg_fibrates_ezetimibe.groupby(['location', 'Scenario']).value.sum().reset_index()
    agg_fibrates_ezetimibe['Treatment type'] = '% other (fibrates, ezetimibe alone)'
    
    agg_hp = med_df[(med_df.medication.str.contains('high_potency_statin'))]
    agg_hp = agg_hp.groupby(['location', 'Scenario']).value.sum().reset_index()
    agg_hp['Treatment type'] = '% HP'
    
    agg_lp = med_df[(med_df.medication.str.contains('low_potency_statin'))]
    agg_lp = agg_lp.groupby(['location', 'Scenario']).value.sum().reset_index()
    agg_lp['Treatment type'] = '% LP'
    
    agg_fdc = med_df[(med_df.medication.str.contains('fdc'))]
    agg_fdc = agg_fdc.groupby(['location', 'Scenario']).value.sum().reset_index()
    agg_fdc['Treatment type'] = '% of simulants on FDC'
    
    agg_any = med_df[(med_df.medication != 'none')]
    agg_any = agg_any.groupby(['location', 'Scenario']).value.sum().reset_index()
    agg_any['Treatment type'] = '% of simulants on any lipid lowering treatment'
    
    agg_multi = med_df[(med_df.medication.str.contains('fdc')) | (med_df.medication.str.contains('multi'))]
    agg_multi = agg_multi.groupby(['location', 'Scenario']).value.sum().reset_index()
    agg_multi['Treatment type'] = '% on multiple DRUGS (note: NOT multi-pills)'
    
    agg_none = med_df[(med_df.medication == 'none')]
    agg_none = agg_none.groupby(['location', 'Scenario']).value.sum().reset_index()
    agg_none['Treatment type'] = '% of simulants not on any lipid lowering treatment'
    
    treatment_table = pd.concat([agg_fibrates_ezetimibe, agg_hp, agg_lp, agg_fdc, agg_any, agg_multi, agg_none])
    treatment_table['year'] = '2024'
    treatment_table = treatment_table.append(meds_primary_prevention_2024, ignore_index=True)
    treatment_table = treatment_table.rename(columns={'value':'End of simulation, Dec. 31, 2024', 'location':'Location'})
    treatment_table = treatment_table[['Location', 'Scenario', 'Treatment type', 'End of simulation, Dec. 31, 2024']]

    treatment_table = treatment_table.round(2)
    return treatment_table


In [31]:
table_6 = get_table_6(locations, meds, med_options, meds_primary_prevention_2020)

In [32]:
table_6

Unnamed: 0,Scenario,Treatment type,location,"Start of simulation, Jan 1., 2020 (same for all scenarios)",year
0,Business as Usual,"% other (fibrates, ezetimibe alone)",Brazil,0.05,2020
1,Intervention 1 (multiple pills),"% other (fibrates, ezetimibe alone)",Brazil,0.01,2020
2,Intervention 2 (FDC),"% other (fibrates, ezetimibe alone)",Brazil,0.01,2020
3,Business as Usual,"% other (fibrates, ezetimibe alone)",China,0.01,2020
4,Intervention 1 (multiple pills),"% other (fibrates, ezetimibe alone)",China,0.00,2020
5,Intervention 2 (FDC),"% other (fibrates, ezetimibe alone)",China,0.00,2020
6,Business as Usual,"% other (fibrates, ezetimibe alone)",France,0.06,2020
7,Intervention 1 (multiple pills),"% other (fibrates, ezetimibe alone)",France,0.01,2020
8,Intervention 2 (FDC),"% other (fibrates, ezetimibe alone)",France,0.01,2020
9,Business as Usual,"% other (fibrates, ezetimibe alone)",Italy,0.03,2020


In [33]:
table_6_all = table_6.groupby(['location', 'Treatment type'])['Start of simulation, Jan 1., 2020 (same for all scenarios)'].mean().reset_index()
table_6_all = table_6_all.round(2)
table_6_all = table_6_all.rename(columns={'location':'Location'})

In [34]:
# table_6_2024 = get_table_6_end(locations, meds, med_options, meds_primary_prevention_2024).sort_values(by=['Location', 'Scenario', 'Treatment type'])
table_6_2024 = get_table_6_end(locations, meds, med_options, meds_primary_prevention_2024)



In [35]:
table_6_2024

Unnamed: 0,Location,Scenario,Treatment type,"End of simulation, Dec. 31, 2024"
0,Brazil,Business as Usual,"% other (fibrates, ezetimibe alone)",0.08
1,Brazil,Intervention 1 (multiple pills),"% other (fibrates, ezetimibe alone)",0.01
2,Brazil,Intervention 2 (FDC),"% other (fibrates, ezetimibe alone)",0.01
3,China,Business as Usual,"% other (fibrates, ezetimibe alone)",0.02
4,China,Intervention 1 (multiple pills),"% other (fibrates, ezetimibe alone)",0.00
5,China,Intervention 2 (FDC),"% other (fibrates, ezetimibe alone)",0.00
6,France,Business as Usual,"% other (fibrates, ezetimibe alone)",0.07
7,France,Intervention 1 (multiple pills),"% other (fibrates, ezetimibe alone)",0.01
8,France,Intervention 2 (FDC),"% other (fibrates, ezetimibe alone)",0.01
9,Italy,Business as Usual,"% other (fibrates, ezetimibe alone)",0.05


In [197]:
table_6_2024_bau = table_6_2024[(table_6_2024.Scenario == 'Business as Usual')]
table_6_2024_bau = table_6_2024_bau.rename(columns={'End of simulation, Dec. 31, 2024':'BAU'})
table_6_2024_bau = table_6_2024_bau.drop(columns=['Scenario'])
table_6_2024_int_1 = table_6_2024[(table_6_2024.Scenario == 'Intervention 1 (multiple pills)')]
table_6_2024_int_1 = table_6_2024_int_1.rename(columns={'End of simulation, Dec. 31, 2024':'Scenario 1 (Multiple Pills)'})
table_6_2024_int_1 = table_6_2024_int_1.drop(columns=['Scenario'])
table_6_2024_int_2 = table_6_2024[(table_6_2024.Scenario == 'Intervention 2 (FDC)')]
table_6_2024_int_2 = table_6_2024_int_2.rename(columns={'End of simulation, Dec. 31, 2024':'Scenario 2 (FDC)'})
table_6_2024_int_2 = table_6_2024_int_2.drop(columns=['Scenario'])
shared_cols = ['Location','Treatment type']
end_sim_scenario_df = pd.merge(table_6_2024_bau, table_6_2024_int_1, left_on=shared_cols, right_on=shared_cols, how='left')
end_sim_scenario_df = pd.merge(end_sim_scenario_df, table_6_2024_int_2, left_on=shared_cols, right_on=shared_cols, how='left')

full_treatment_table = pd.merge(table_6_all,end_sim_scenario_df, left_on=shared_cols, right_on=shared_cols, how='left')

In [198]:
full_treatment_table

Unnamed: 0,Location,Treatment type,"Start of simulation, Jan 1., 2020 (same for all scenarios)",BAU,Scenario 1 (Multiple Pills),Scenario 2 (FDC)
0,Brazil,% HP,0.23,0.05,0.68,0.68
1,Brazil,% LP,0.06,0.16,0.03,0.03
2,Brazil,% of simulants not on any lipid lowering treat...,0.68,0.71,0.28,0.28
3,Brazil,% of simulants on FDC,0.02,0.03,0.01,0.1
4,Brazil,% of simulants on Primary Prevention Tx,0.06,0.07,0.07,0.07
5,Brazil,% of simulants on any lipid lowering treatment,0.32,0.29,0.72,0.72
6,Brazil,% on multiple DRUGS (note: NOT multi-pills),0.04,0.05,0.1,0.1
7,Brazil,"% other (fibrates, ezetimibe alone)",0.02,0.08,0.01,0.01
8,China,% HP,0.11,0.02,0.48,0.48
9,China,% LP,0.02,0.07,0.01,0.01


In [200]:
full_treatment_table.to_csv('20200504_sanofi_zenon_treatment_type_table.csv', index=False)