In [1]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
from db_queries import get_outputs, get_ids, get_model_results, get_population
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import gbd_mapping
from matplotlib.backends.backend_pdf import PdfPages
pd.set_option('use_inf_as_na', True)
sns.set(context = 'paper', style='whitegrid', font_scale=1.8, rc = {'axes.spines.right':False, 'axes.spines.top': False, 'figure.figsize':(12.7,8.6)}, palette='Set1')


# TO-DO

* check in with RT about results from table 4 probability...most values are too low...are the calculations wrong?
* put code in github

#### Instructions

* Before you begin, download and save the 'treatment_initialization.hdf' file Rajan shared in the vivarium_csu_zenon slack channel
* Each row will be a simulant, each column will be for ‘statin - high', ‘statin - low’, ‘ezetimibe’, ‘fibrates’, and ‘FDC’ and their LDL-level. 
* What I’ll need to check is that the initialization matches up what we expect it to be, based on their LDL-level (in their location).  
* Their treatment status should be a direct function of their LDL (with some randomness). 
* I should see numbers that match with Table 2: Probability of Rx given high LDL-C = prob(Rx | LDL-C > 4.9) and Table 6: Current treatment practice - distribution by drug type  and Table 8: Distribution of therapy type.  

In [3]:
df = pd.read_hdf('treatment_initialization.hdf')
df['location'] = 'brazil'


In [7]:
df.head()

Unnamed: 0,ezetimibe,fibrates,high_potency_statin,low_potency_statin,fdc,ldl_c,location
0,False,False,none,none,False,4.7459,brazil
1,False,False,none,none,False,4.165209,brazil
2,False,False,none,none,False,4.705233,brazil
3,False,False,none,none,False,3.570518,brazil
4,False,False,none,none,False,2.949887,brazil


## Table 2 validation

In [4]:
# query the df for the population who is on treatment of either ezetimibe, fibrates, fdc, and the high/low potency statin options of 'low'/'high'
population_treated = df[(df.ezetimibe == True) | (df.fibrates == True) | (df.fdc == True) | (df.high_potency_statin == 'low') | (df.high_potency_statin == 'high') | (df.low_potency_statin == 'low') | (df.low_potency_statin == 'high')]
# query the df for the population who is not on treatment of either ezetimibe, fibrates, fdc, and the high/low potency statin options of 'low'/'high' setting all options to False or 'none'
population_untreated_and_high_ldlc = df[(df.ldl_c >= 5.0) & ((df.ezetimibe == False) & (df.fibrates == False) & (df.fdc == False) & (df.high_potency_statin == 'none') & (df.high_potency_statin == 'none') & (df.low_potency_statin == 'none') & (df.low_potency_statin == 'none'))]
# creating a variable of count of population treated
population_treated_ct = len(population_treated)
# creating a variable of count of population untreated and high LDL-c
population_untreated_and_high_ldlc_ct = len(population_untreated_and_high_ldlc)
# calculating table 2 validation check  
population_treated_ct / (population_treated_ct + population_untreated_and_high_ldlc_ct)


0.5239282304626529

## Table 6 validation

### investigating the data

In [106]:
df['high_potency_statin'].unique()

array(['none', 'low'], dtype=object)

In [105]:
df['low_potency_statin'].unique()

array(['none', 'high'], dtype=object)

In [5]:
# update dict so it doesn't require knowing the ordering

ezetimibe = df[(df.ezetimibe == True)]
population_treated = df[(df.ezetimibe == True) | (df.fibrates == True) | (df.fdc == True) | (df.high_potency_statin == 'low') | (df.high_potency_statin == 'high') | (df.low_potency_statin == 'low') | (df.low_potency_statin == 'high')]
pct_ezetimibe = len(ezetimibe) / population_treated_ct
fibrates = df[(df.fibrates == True)]
pct_fibrates = len(fibrates) / population_treated_ct
high_potency_statin = df[(df.high_potency_statin != 'none')]
pct_high_potency_statin = len(high_potency_statin) / population_treated_ct
low_potency_statin = df[(df.low_potency_statin != 'none')]
pct_low_potency_statin = len(low_potency_statin) / population_treated_ct
table_6_dict = {'current_prescription':['% on ezetimibe', '% on fibrates', '% on high potency statins', '% on low potency statins'], 
                'location':'brazil', 
                'mean_value': [pct_ezetimibe, pct_fibrates, 
                pct_high_potency_statin, pct_low_potency_statin]}
table_6_df = pd.DataFrame(data=table_6_dict)


In [6]:
table_6_df

Unnamed: 0,current_prescription,location,mean_value
0,% on ezetimibe,brazil,0.341425
1,% on fibrates,brazil,0.063987
2,% on high potency statins,brazil,0.544217
3,% on low potency statins,brazil,0.342922


### Table 8 validation

### Taking notes from Abie's instructions:

percent_on_monotherapy = / population_treated


In [66]:
rx_map = {'rx':{False:0, True:1, 'none': 0, 'low':1, 'high':1}}
column_list = ['ezetimibe', 'fibrates','fdc', 'high_potency_statin', 'low_potency_statin']
# population_monotherapy = df[(df.ezetimibe == True) + (df.fibrates == True) + (df.fdc == True) + 
#                         (df.high_potency_statin == 'low') 
#                         + (df.high_potency_statin == 'high') + (df.low_potency_statin == 'low') + (df.low_potency_statin == 'high')]
# number_of_drugs = (1*0(df.ezetimibe == True) + 1.0*(df.fibrates == True) + 1.0*(df.fdc == True) 
#                    + 1.0*(df.high_potency_statin == 'low') + 1.0*(df.high_potency_statin == 'high') 
#                    + 1.0*(df.low_potency_statin == 'low') + 1.0*(df.low_potency_statin == 'high'))
df['ezetimibe'] = df.ezetimibe.map(rx_map['rx'])
df['fibrates'] = df.fibrates.map(rx_map['rx'])
df['fdc'] = df.fdc.map(rx_map['rx'])
df['high_potency_statin'] = df.high_potency_statin.map(rx_map['rx'])
df['low_potency_statin'] = df.low_potency_statin.map(rx_map['rx'])
df['number_of_drugs'] = df[column_list].sum(axis=1)
population_monotherapy = len(df[df.number_of_drugs == 1])
population_multidrugs = len(df[df.number_of_drugs >1])
pct_monotherapy = population_monotherapy / population_treated_ct
pct_multidrugs = population_multidrugs / population_treated_ct
population_fdc_multidrug = len(df[(df.fdc == 1) & (df.number_of_drugs > 1)])
pct_fdc_multidrug = population_fdc_multidrug / population_treated_ct


In [68]:
table_8_dict = {'current_prescription':['% on monotherapy', '% on multi-drugs', '% on FDC if multi drug'], 
                'location':'brazil', 
                'mean_value': [pct_monotherapy, pct_multidrugs, pct_fdc_multidrug]}
table_8_df = pd.DataFrame(data=table_8_dict)


In [69]:
table_8_df

Unnamed: 0,current_prescription,location,mean_value
0,% on monotherapy,brazil,0.707449
1,% on multi-drugs,brazil,0.292551
2,% on FDC if multi drug,brazil,0.168301


### Adherence

In [2]:
df = pd.read_hdf('adherence.hdf')
df['location'] = 'italy'


In [3]:
df.head()

Unnamed: 0,ezetimibe,fibrates,high_potency_statin,low_potency_statin,fdc,ischemic_heart_disease,ischemic_stroke,adherent,location
0,False,False,none,none,False,susceptible_to_ischemic_heart_disease,susceptible_to_ischemic_stroke,False,italy
1,False,False,none,none,False,susceptible_to_ischemic_heart_disease,susceptible_to_ischemic_stroke,False,italy
2,False,False,none,none,False,susceptible_to_ischemic_heart_disease,susceptible_to_ischemic_stroke,False,italy
3,False,False,none,none,False,susceptible_to_ischemic_heart_disease,susceptible_to_ischemic_stroke,False,italy
4,False,False,none,none,False,susceptible_to_ischemic_heart_disease,susceptible_to_ischemic_stroke,False,italy


In [13]:
rx_map = {'rx':{False:0, True:1, 'none': 0, 'low':1, 'high':1}}
column_list = ['ezetimibe', 'fibrates','fdc', 'high_potency_statin', 'low_potency_statin']
df['ezetimibe'] = df.ezetimibe.map(rx_map['rx'])
df['fibrates'] = df.fibrates.map(rx_map['rx'])
df['fdc'] = df.fdc.map(rx_map['rx'])
df['high_potency_statin'] = df.high_potency_statin.map(rx_map['rx'])
df['low_potency_statin'] = df.low_potency_statin.map(rx_map['rx'])
df['number_of_drugs'] = df[column_list].sum(axis=1)
# i'm assuming that the count of individuals who are adherent are those where adherent is true
pop_adherent_ct = len(df[(df.adherent == True)])

# i'm assuming that the count of individuals who are adherent, one pill, with no MI are those that are 'susceptible to IHD'
pct_pop_adherent_one_pill_no_mi_ct = len(df[(df.number_of_drugs == 1) & 
                                 (df.ischemic_heart_disease == 'susceptible_to_ischemic_heart_disease')
                                & (df.adherent == True)]) / pop_adherent_ct

# i'm assuming that the count of individuals who are adherent, one pill, with no MI are those that are 'susceptible to IHD'
pct_pop_adherent_multi_pill_no_mi = len(df[(df.number_of_drugs > 1) & 
                                 (df.ischemic_heart_disease == 'susceptible_to_ischemic_heart_disease')
                                & (df.adherent == True)]) / pop_adherent_ct

# i'm assuming that the count of individuals who are adherent, one pill, with MI are those that are not 'susceptible to IHD'
pct_pop_adherent_mi_one_pill = len(df[(df.number_of_drugs == 1) & 
                                 (df.ischemic_heart_disease != 'susceptible_to_ischemic_heart_disease')
                                & (df.adherent == True)]) / pop_adherent_ct

# i'm assuming that the count of individuals who are adherent, multi pills, with MI are those that are not 'susceptible to IHD'
pct_pop_adherent_mi_multi_pill = len(df[(df.number_of_drugs > 1) & 
                                 (df.ischemic_heart_disease != 'susceptible_to_ischemic_heart_disease')
                                & (df.adherent == True)]) / pop_adherent_ct

table_4_dict = {'adherence_parameter':['Adherence - one pill, no MI', 
                                       'Adherence - multi-pill, no MI', 'Adherence after MI (one pill)', 
                                       'Adherence after MI (multi-pill)'], 
                'location':'italy', 
                'mean_value': [pct_pop_adherent_one_pill_no_mi_ct, pct_pop_adherent_multi_pill_no_mi, 
                               pct_pop_adherent_mi_one_pill, pct_pop_adherent_mi_multi_pill]}
table_4_df = pd.DataFrame(data=table_4_dict)


In [14]:
table_4_df

Unnamed: 0,adherence_parameter,location,mean_value
0,"Adherence - one pill, no MI",italy,0.773114
1,"Adherence - multi-pill, no MI",italy,0.163018
2,Adherence after MI (one pill),italy,0.051203
3,Adherence after MI (multi-pill),italy,0.012665
