In [4]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
from db_queries import get_outputs, get_ids, get_model_results, get_population
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import gbd_mapping
from matplotlib.backends.backend_pdf import PdfPages
pd.set_option('use_inf_as_na', True)
sns.set(context = 'paper', style='whitegrid', font_scale=1.8, rc = {'axes.spines.right':False, 'axes.spines.top': False, 'figure.figsize':(12.7,8.6)}, palette='Set1')


#### Instructions

* Before you begin, download and save the 'treatment_initialization.hdf' file Rajan shared in the vivarium_csu_zenon slack channel
* Each row will be a simulant, each column will be for ‘statin - high', ‘statin - low’, ‘ezetimibe’, ‘fibrates’, and ‘FDC’ and their LDL-level. 
* What I’ll need to check is that the initialization matches up what we expect it to be, based on their LDL-level (in their location).  
* Their treatment status should be a direct function of their LDL (with some randomness). 
* I should see numbers that match with Table 2: Probability of Rx given high LDL-C = prob(Rx | LDL-C > 4.9) and Table 6: Current treatment practice - distribution by drug type  and Table 8: Distribution of therapy type.  

In [5]:
df = pd.read_hdf('treatment_initialization.hdf')
df['location'] = 'brazil'


In [7]:
df.head()

Unnamed: 0,ezetimibe,fibrates,high_potency_statin,low_potency_statin,fdc,ldl_c,location
0,False,False,none,none,False,4.7459,brazil
1,False,False,none,none,False,4.165209,brazil
2,False,False,none,none,False,4.705233,brazil
3,False,False,none,none,False,3.570518,brazil
4,False,False,none,none,False,2.949887,brazil


## Table 2 validation

In [3]:
# query the df for the population who is on treatment of either ezetimibe, fibrates, fdc, and the high/low potency statin options of 'low'/'high'
population_treated = df[(df.ezetimibe == True) | (df.fibrates == True) | (df.fdc == True) | (df.high_potency_statin == 'low') | (df.high_potency_statin == 'high') | (df.low_potency_statin == 'low') | (df.low_potency_statin == 'high')]
# query the df for the population who is not on treatment of either ezetimibe, fibrates, fdc, and the high/low potency statin options of 'low'/'high' setting all options to False or 'none'
population_untreated_and_high_ldlc = df[(df.ldl_c > 4.9) & ((df.ezetimibe == False) | (df.fibrates == False) | (df.fdc == False) | (df.high_potency_statin == 'none') | (df.high_potency_statin == 'none') | (df.low_potency_statin == 'none') | (df.low_potency_statin == 'none'))]
# creating a variable of count of population treated
population_treated_ct = len(population_treated)
# creating a variable of count of population untreated and high LDL-c
population_untreated_and_high_ldlc_ct = len(population_untreated_and_high_ldlc)
# calculating table 2 validation check  
population_treated_ct / (population_treated_ct + population_untreated_and_high_ldlc_ct)


0.40165198525759993

## Table 6 validation

### investigating the data

In [106]:
df['high_potency_statin'].unique()

array(['none', 'low'], dtype=object)

In [105]:
df['low_potency_statin'].unique()

array(['none', 'high'], dtype=object)

In [119]:
ezetimibe = df[(df.ezetimibe == True)]
population_treated = df[(df.ezetimibe == True) | (df.fibrates == True) | (df.fdc == True) | (df.high_potency_statin == 'low') | (df.high_potency_statin == 'high') | (df.low_potency_statin == 'low') | (df.low_potency_statin == 'high')]
pct_ezetimibe = len(ezetimibe) / population_treated_ct
fibrates = df[(df.fibrates == True)]
pct_fibrates = len(fibrates) / population_treated_ct
high_potency_statin = df[(df.high_potency_statin != 'none')]
pct_high_potency_statin = len(high_potency_statin) / population_treated_ct
low_potency_statin = df[(df.low_potency_statin != 'none')]
pct_low_potency_statin = len(low_potency_statin) / population_treated_ct
table_6_dict = {'current_prescription':['% on ezetimibe', '% on fibrates', '% on high potency statins', '% on low potency statins'], 
                'location':'brazil', 
                'mean_value': [pct_ezetimibe, pct_fibrates, 
                pct_high_potency_statin, pct_low_potency_statin]}
table_6_df = pd.DataFrame(data=table_6_dict)

### Table 8 validation

### Taking notes from Abie's instructions:

* if two things are true (ezetimibe = True)

* the fraction of rows (ezetimibe true, low potency statin) / (high potency true or low potency true)

* mutli- drugs: fdc is true/ divide by ezetimibe or high_potency statin + low potency statin or fibrates


