In [1]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
from db_queries import get_outputs, get_ids, get_model_results, get_population
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import gbd_mapping
from matplotlib.backends.backend_pdf import PdfPages
pd.set_option('use_inf_as_na', True)
sns.set(context = 'paper', style='whitegrid', font_scale=1.8, rc = {'axes.spines.right':False, 'axes.spines.top': False, 'figure.figsize':(12.7,8.6)}, palette='Set1')


#### Instructions

* Before you begin, download and save the 'treatment_initialization.hdf' file Rajan shared in the vivarium_csu_zenon slack channel
* Each row will be a simulant, each column will be for ‘statin - high', ‘statin - low’, ‘ezetimibe’, ‘fibrates’, and ‘FDC’ and their LDL-level. 
* What I’ll need to check is that the initialization matches up what we expect it to be, based on their LDL-level (in their location).  
* Their treatment status should be a direct function of their LDL (with some randomness). 
* I should see numbers that match with Table 2: Probability of Rx given high LDL-C = prob(Rx | LDL-C > 4.9) and Table 6: Current treatment practice - distribution by drug type  and Table 8: Distribution of therapy type.  

In [2]:
df = pd.read_hdf('adherence_brazil_risk_factors.hdf')
df['location'] = 'brazil'


## Table 2 validation

In [21]:
pop_treated = len(df[(df.ldlc_treatment_category != 'none')])
pop_untreated_and_high_ldlc = len(df[(df.ldlc_treatment_category == 'none') & (df.ldl_c>=5.0)])
prob_rx_high_ldlc = pop_treated / (pop_treated + pop_untreated_and_high_ldlc)
table_2_dict = {'location':['brazil'],  
                'output_mean_value': round(prob_rx_high_ldlc, 2),
                ' table_2_value': 0.42,
                'table_2_sd': 0.19}
table_2_df = pd.DataFrame(data=table_2_dict)
table_2_df

Unnamed: 0,location,mean_value,table_2_value,table_2_sd
0,brazil,0.46,0.42,0.19


## Table 6 validation

### investigating the data

In [33]:
pct_ezetimibe = len(df[(df.ldlc_treatment_category == 'ezetimibe')])/ len(df[(df.ldlc_treatment_category != 'none')])
pct_fibrates = len(df[df.ldlc_treatment_category == 'fibrates']) / len(df[(df.ldlc_treatment_category != 'none')])
pct_high_potency = len(df[(df.ldlc_treatment_category == 'high_potency_statin_low_dose')]) / len(df[(df.ldlc_treatment_category != 'none')])
pct_low_potency = len(df[(df.ldlc_treatment_category.str.contains('low_potency_statin_'))]) / len(df[(df.ldlc_treatment_category != 'none')])
table_6_dict = {'current_prescription':['% on ezetimibe', '% on fibrates', '% on high potency statins', '% on low potency statins'], 
                'location':'brazil', 
                'output_mean_value': [round(pct_ezetimibe, 2), round(pct_fibrates, 2), 
                round(pct_high_potency, 2), round(pct_low_potency, 2)],
                'table_6_mean':[0.10, 0.14, 0.38, 0.37],
                'table_6_sd':[0.10, 0.12, 0.07, 0.16]}
table_6_df = pd.DataFrame(data=table_6_dict)
table_6_df


Unnamed: 0,current_prescription,location,mean_value,table_6_mean,table_6_sd
0,% on ezetimibe,brazil,0.07,0.1,0.1
1,% on fibrates,brazil,0.1,0.14,0.12
2,% on high potency statins,brazil,0.34,0.38,0.07
3,% on low potency statins,brazil,0.49,0.37,0.16


### Table 8 validation

### Taking notes from Abie's instructions:

percent_on_monotherapy = / population_treated


In [64]:
# pct_monotherapy = df[(df.ldc_treatment_category.str.contains('_multi'))]

pct_monotherapy = len(df[(df.ldlc_treatment_category.str.contains(r'^(?:(?!_multi).)*$')) & (df.ldlc_treatment_category != 'none')]) / len(df[(df.ldlc_treatment_category != 'none')])
pct_multi = len(df[(df.ldlc_treatment_category.str.contains('multi') & (df.ldlc_treatment_category != 'none'))]) / len(df[(df.ldlc_treatment_category != 'none')])
pct_fdc_multi = len(df[(df.ldlc_treatment_category.str.contains('fdc') & (df.ldlc_treatment_category != 'none'))]) / len(df[(df.ldlc_treatment_category != 'none')])

table_8_dict = {'current_prescription':['% on monotherapy', '% on multi-drugs', '% on FDC if multi drug'], 
                'location':'brazil', 
                'output_mean_value': [round(pct_monotherapy, 2), round(pct_multi, 2), round(pct_fdc_multi, 2)],
                'table_8_mean_value':[0.74, 0.30, 0.55],
                'table_8_sd':[0.00, 0.20, 0.04]}
table_8_df = pd.DataFrame(data=table_8_dict)



In [65]:
table_8_df

Unnamed: 0,current_prescription,location,output_mean_value,table_8_mean_value,table_8_sd
0,% on monotherapy,brazil,0.93,0.74,0.0
1,% on multi-drugs,brazil,0.07,0.3,0.2
2,% on FDC if multi drug,brazil,0.17,0.55,0.04


### Adherence

In [86]:
pop_adherent_one_pill_no_mi = len(df[(df.ldlc_treatment_category.str.contains(r'^(?:(?!_multi).)*$')) & 
                            (df.ldlc_treatment_category != 'none') & (df.ischemic_heart_disease == 'susceptible_to_ischemic_heart_disease') 
                            & (df.ischemic_stroke == 'susceptible_to_ischemic_stroke') & (df.adherent == True)]) 

pop_one_pill_no_mi = len(df[(df.ldlc_treatment_category.str.contains(r'^(?:(?!_multi).)*$') & 
                            (df.ldlc_treatment_category != 'none') & (df.ischemic_heart_disease == 'susceptible_to_ischemic_heart_disease') 
                            & (df.ischemic_stroke == 'susceptible_to_ischemic_stroke'))])


pop_adherent_multi_pill_no_mi = len(df[(df.ldlc_treatment_category.str.contains('multi') & (df.ischemic_heart_disease == 'susceptible_to_ischemic_heart_disease') & (df.ischemic_stroke == 'susceptible_to_ischemic_stroke') 
                                & (df.adherent == True))]) 

pop_multi_pill_no_mi = len(df[(df.ldlc_treatment_category.str.contains('multi') & (df.ldlc_treatment_category != 'none') 
                                & (df.ischemic_heart_disease == 'susceptible_to_ischemic_heart_disease') & (df.ischemic_stroke == 'susceptible_to_ischemic_stroke'))])


pop_adherent_one_pill_mi = len(df[(df.ldlc_treatment_category.str.contains(r'^(?:(?!_multi).)*$')) & 
                            (df.ldlc_treatment_category != 'none') & (df.ischemic_heart_disease != 'susceptible_to_ischemic_heart_disease') 
                            & (df.adherent == True)])

pop_one_pill_mi = len(df[(df.ldlc_treatment_category.str.contains(r'^(?:(?!_multi).)*$')) & 
                            (df.ldlc_treatment_category != 'none') & (df.ischemic_heart_disease != 'susceptible_to_ischemic_heart_disease')])


pop_adherent_multi_pill_mi = len(df[(df.ldlc_treatment_category.str.contains('multi') & (df.ischemic_heart_disease != 'susceptible_to_ischemic_heart_disease') & (df.adherent == True))])

pop_multi_pill_mi = len(df[(df.ldlc_treatment_category.str.contains('multi')) & (df.ischemic_heart_disease != 'susceptible_to_ischemic_heart_disease')])


pct_adherent_one_pill_no_mi = pop_adherent_one_pill_no_mi / pop_one_pill_no_mi
pop_adherent_multi_pill_no_mi = pop_adherent_multi_pill_no_mi / pop_multi_pill_no_mi
pct_adherent_one_pill_mi = pop_adherent_one_pill_mi / pop_one_pill_mi 
pct_adherent_multi_pill_mi = pop_adherent_multi_pill_mi / pop_multi_pill_mi

table_4_dict = {'adherence_parameter':['Adherence - one pill, no MI', 
                                       'Adherence - multi-pill, no MI', 'Adherence after MI (one pill)', 
                                       'Adherence after MI (multi-pill)'], 
                'location':'brazil', 
                'output_mean_value': [round(pct_adherent_one_pill_no_mi, 2), round(pop_adherent_multi_pill_no_mi, 2), 
                               round(pct_adherent_one_pill_mi, 2), round(pct_adherent_multi_pill_mi,2)],
                'table_4_mean_value':[0.55, 0.29, 0.67, 0.41],
                'table_4_sd_value':[0.11, 0.06, 0.14, 0.09]}
table_4_df = pd.DataFrame(data=table_4_dict)

In [87]:
table_4_df

Unnamed: 0,adherence_parameter,location,output_mean_value,table_4_mean_value,table_4_sd_value
0,"Adherence - one pill, no MI",brazil,0.54,0.55,0.11
1,"Adherence - multi-pill, no MI",brazil,0.26,0.29,0.06
2,Adherence after MI (one pill),brazil,0.67,0.67,0.14
3,Adherence after MI (multi-pill),brazil,0.45,0.41,0.09
