In [1]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
from db_queries import get_outputs, get_ids, get_model_results, get_population
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import gbd_mapping
from matplotlib.backends.backend_pdf import PdfPages
pd.set_option('use_inf_as_na', True)
sns.set(context = 'paper', style='whitegrid', font_scale=1.8, rc = {'axes.spines.right':False, 'axes.spines.top': False, 'figure.figsize':(12.7,8.6)}, palette='Set1')


#### Instructions

* Before you begin, download and save the 'treatment_initialization.hdf' file Rajan shared in the vivarium_csu_zenon slack channel
* Each row will be a simulant, each column will be for ‘statin - high', ‘statin - low’, ‘ezetimibe’, ‘fibrates’, and ‘FDC’ and their LDL-level. 
* What I’ll need to check is that the initialization matches up what we expect it to be, based on their LDL-level (in their location).  
* Their treatment status should be a direct function of their LDL (with some randomness). 
* I should see numbers that match with Table 2: Probability of Rx given high LDL-C = prob(Rx | LDL-C > 4.9) and Table 6: Current treatment practice - distribution by drug type  and Table 8: Distribution of therapy type.  

In [19]:
df = pd.read_hdf('adherence_brazil_risk_factors.hdf')
df['location'] = 'brazil'


In [15]:
locations = ['brazil', 'china', 'france', 'italy', 'russian_federation', 'spain'] 

df_brazil = pd.read_hdf('initial_population_brazil.hdf')
df_brazil['location'] = 'brazil'
df_china = pd.read_hdf('initial_population_china.hdf')
df_china['location'] = 'china'
df_france = pd.read_hdf('initial_population_france.hdf')
df_france['location'] = 'france'
df_italy = pd.read_hdf('initial_population_italy.hdf')
df_italy['location'] = 'italy'
df_russian_federation = pd.read_hdf('initial_population_russian_federation.hdf')
df_russian_federation['location'] = 'russian_federation'
df_spain = pd.read_hdf('initial_population_spain.hdf')
df_spain['location'] = 'spain'

df = pd.concat([df_brazil, df_china, df_france, df_italy, df_russian_federation, df_spain] )

## Table 2 validation

In [144]:
table_2_df = pd.read_csv('prob_rx_given_high_ldlc.csv')
table_2_df = table_2_df.rename(columns={'mean_value':'table_2_mean_value', 'sd_value':'table_2_sd_value'})

In [145]:
def get_table_2_validations(df, locations, table_2_df):    
    init_df = pd.DataFrame([])
    shared_cols = ['location']
    for location in locations:
        df_loop = df[(df.location == location)]

        df_loop['output_mean_value'] = len(df_loop[(df_loop.ldlc_treatment_category != 'none')]) / (len(df_loop[(df_loop.ldlc_treatment_category == 'none') & (df_loop.ldl_c>=5.0)]) + len(df_loop[(df_loop.ldlc_treatment_category != 'none')]))
        
        init_df = init_df.append(df_loop)
    init_df = init_df[['location','output_mean_value']]
    init_df = init_df.drop_duplicates()
    init_df['output_mean_value'] = init_df.output_mean_value.astype(float)
    init_df = pd.merge(init_df, table_2_df, left_on=shared_cols, right_on=shared_cols)
    init_df= init_df.round(2)
    return init_df



In [146]:
get_table_2_validations(df, locations, table_2_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


Unnamed: 0,location,output_mean_value,table_2_mean_value,table_2_sd_value
0,brazil,0.46,0.42,0.19
1,china,0.36,0.32,0.18
2,france,0.54,0.5,0.12
3,italy,0.56,0.53,0.21
4,spain,0.38,0.34,0.1


## Table 6 validation

### investigating the data

In [147]:
table_6_df = pd.read_csv('current_rx.csv')
table_6_df = table_6_df.rename(columns={'mean_value':'table_6_mean_value', 'sd_value':'table_6_sd_value'})

In [148]:
def get_table_6_validations(df, locations, table_6_df):    
    init_df = pd.DataFrame([])
    shared_cols = ['location', 'current_prescription']
    for location in locations:
        df_loop = df[(df.location == location)]

        df_loop['% on ezetimibe'] = len(df_loop[(df_loop.ldlc_treatment_category == 'ezetimibe')])/ len(df_loop[(df_loop.ldlc_treatment_category != 'none')])
        df_loop['% on fibrates'] = len(df_loop[df_loop.ldlc_treatment_category == 'fibrates']) / len(df_loop[(df_loop.ldlc_treatment_category != 'none')])
        df_loop['% on high potency statins'] = len(df_loop[(df_loop.ldlc_treatment_category == 'high_potency_statin_low_dose')]) / len(df_loop[(df_loop.ldlc_treatment_category != 'none')])
        df_loop['% on low potency statins'] = len(df_loop[(df_loop.ldlc_treatment_category.str.contains('low_potency_statin_'))]) / len(df_loop[(df_loop.ldlc_treatment_category != 'none')])
        init_df = init_df.append(df_loop)
    
    init_df = pd.melt(init_df, id_vars=['location'])
    init_df = init_df[(init_df.variable.str.contains('%'))]
    init_df = init_df.drop_duplicates()
    init_df = init_df.rename(columns={'variable':'current_prescription', 'value':'output_mean_value'})
    init_df['output_mean_value'] = init_df.output_mean_value.astype(float)
    init_df = pd.merge(init_df, table_6_df, left_on=shared_cols, right_on=shared_cols)
    init_df= init_df.round(2)
    return init_df

In [149]:
get_table_6_validations(df, locations, table_6_df)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.ht

Unnamed: 0,location,current_prescription,output_mean_value,table_6_mean_value,table_6_sd_value
0,brazil,% on ezetimibe,0.07,0.1,0.1
1,china,% on ezetimibe,0.0,0.0,0.01
2,france,% on ezetimibe,0.03,0.04,0.04
3,italy,% on ezetimibe,0.07,0.1,0.1
4,spain,% on ezetimibe,0.06,0.1,0.09
5,brazil,% on fibrates,0.1,0.14,0.12
6,china,% on fibrates,0.09,0.09,0.05
7,france,% on fibrates,0.11,0.13,0.07
8,italy,% on fibrates,0.01,0.01,0.0
9,spain,% on fibrates,0.0,0.0,0.0


## Table 8 validation

### Taking notes from Abie's instructions:

percent_on_monotherapy = / population_treated


In [70]:
table_8_doc = pd.read_csv('dist_therapy_type.csv')
table_8_doc['location'] = table_8_doc.location.str.replace('russia', 'russian_federation')
table_8_doc = table_8_doc.rename(columns={'mean_value':'table_8_mean', 'sd_value':'table_8_sd'})

In [71]:
def get_table_8_validations(df, locations, table_8_doc):    
    init_df = pd.DataFrame([])
    shared_cols = ['location', 'therapy_type']
    for location in locations:
        df_loop = df[(df.location == location)]

        df_loop['% on monotherapy'] = len(df_loop[(df_loop.ldlc_treatment_category.str.contains(r'^(?:(?!_multi).)*$')) & (df_loop.ldlc_treatment_category.str.contains(r'^(?:(?!_fdc).)*$')) & (df_loop.ldlc_treatment_category != 'none')]) / len(df_loop[(df_loop.ldlc_treatment_category != 'none')])
        df_loop['% on multi-drugs'] = len(df_loop[((df_loop.ldlc_treatment_category.str.contains('multi')) | (df_loop.ldlc_treatment_category.str.contains('fdc')))]) / len(df_loop[(df_loop.ldlc_treatment_category != 'none')])
        df_loop['% on FDC if multi drug'] = len(df_loop[(df_loop.ldlc_treatment_category.str.contains('fdc'))]) / len(df_loop[(df_loop.ldlc_treatment_category.str.contains('multi')) | (df_loop.ldlc_treatment_category.str.contains('fdc'))])

        init_df = init_df.append(df_loop)
    init_df = pd.melt(init_df, id_vars=['location'])
    init_df = init_df[(init_df.variable.str.contains('%'))]
    init_df = init_df.drop_duplicates()
    init_df = init_df.rename(columns={'variable':'therapy_type', 'value':'output_mean_value'})
    init_df['output_mean_value'] = init_df.output_mean_value.astype(float)
    init_df = pd.merge(init_df, table_8_doc, left_on=shared_cols, right_on=shared_cols)
    init_df= init_df.round(2)
    return init_df



In [72]:
get_table_8_validations(df, locations, table_8_doc)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Unnamed: 0,location,therapy_type,output_mean_value,table_8_mean,table_8_sd
0,brazil,% on monotherapy,0.76,0.7,0.0
1,china,% on monotherapy,0.98,0.98,0.0
2,france,% on monotherapy,0.87,0.85,0.0
3,italy,% on monotherapy,0.84,0.78,0.02
4,russian_federation,% on monotherapy,0.97,0.96,0.08
5,spain,% on monotherapy,0.76,0.68,0.0
6,brazil,% on multi-drugs,0.24,0.3,0.2
7,china,% on multi-drugs,0.02,0.02,0.01
8,france,% on multi-drugs,0.13,0.15,0.1
9,italy,% on multi-drugs,0.16,0.22,0.14


### Adherence

In [151]:
table_4_df = pd.read_csv('adherence_parameters.csv')
table_4_df = table_4_df.rename(columns={'mean_value':'table_4_mean', 'sd_value':'table_4_sd'})
table_4_df['location'] = table_4_df.location.str.replace('russia', 'russian_federation')

In [152]:
table_4_df

Unnamed: 0,adherence_parameter,location,table_4_mean,table_4_sd
0,"Adherence - one pill, no MI",brazil,0.55,0.11
1,"Adherence - one pill, no MI",china,0.49,0.1
2,"Adherence - one pill, no MI",france,0.66,0.14
3,"Adherence - one pill, no MI",italy,0.43,0.09
4,"Adherence - one pill, no MI",russian_federation,0.38,0.08
5,"Adherence - one pill, no MI",spain,0.7,0.15
6,"Adherence - multi-pill, no MI",brazil,0.29,0.06
7,"Adherence - multi-pill, no MI",china,0.23,0.05
8,"Adherence - multi-pill, no MI",france,0.4,0.09
9,"Adherence - multi-pill, no MI",italy,0.17,0.04


In [153]:
def get_table_4_validations(df, locations, table_4_df):    
    init_df = pd.DataFrame([])
    shared_cols = ['location', 'adherence_parameter']
    for location in locations:
        df_loop = df[(df.location == location)]
        
        df_loop['Adherence - one pill, no MI'] = len(df_loop[(df_loop.ldlc_treatment_category.str.contains(r'^(?:(?!_multi).)*$')) & 
                            (df_loop.ldlc_treatment_category != 'none') & (df_loop.ischemic_heart_disease == 'susceptible_to_ischemic_heart_disease') 
                            & (df_loop.ischemic_stroke == 'susceptible_to_ischemic_stroke') & (df_loop.adherent == True)]) / len(df_loop[(df_loop.ldlc_treatment_category.str.contains(r'^(?:(?!_multi).)*$') & 
                            (df_loop.ldlc_treatment_category != 'none') & (df_loop.ischemic_heart_disease == 'susceptible_to_ischemic_heart_disease') 
                            & (df_loop.ischemic_stroke == 'susceptible_to_ischemic_stroke'))])
        
        df_loop['Adherence - multi-pill, no MI'] = len(df_loop[(df_loop.ldlc_treatment_category.str.contains('multi') & (df_loop.ischemic_heart_disease == 'susceptible_to_ischemic_heart_disease') & (df_loop.ischemic_stroke == 'susceptible_to_ischemic_stroke') 
                                & (df_loop.adherent == True))]) / len(df_loop[(df_loop.ldlc_treatment_category.str.contains('multi') & (df_loop.ldlc_treatment_category != 'none') 
                                & (df_loop.ischemic_heart_disease == 'susceptible_to_ischemic_heart_disease') & (df_loop.ischemic_stroke == 'susceptible_to_ischemic_stroke'))])
        
        df_loop['Adherence after MI (one pill)'] = len(df_loop[(df_loop.ldlc_treatment_category.str.contains(r'^(?:(?!_multi).)*$')) & 
                            (df_loop.ldlc_treatment_category != 'none') & (df_loop.ischemic_heart_disease != 'susceptible_to_ischemic_heart_disease') 
                            & (df_loop.adherent == True)]) / len(df_loop[(df_loop.ldlc_treatment_category.str.contains(r'^(?:(?!_multi).)*$')) & 
                            (df_loop.ldlc_treatment_category != 'none') & (df_loop.ischemic_heart_disease != 'susceptible_to_ischemic_heart_disease')])
        pop_multi_pill_mi = len(df_loop[(df_loop.ldlc_treatment_category.str.contains('multi')) & (df_loop.ischemic_heart_disease != 'susceptible_to_ischemic_heart_disease')])

        if pop_multi_pill_mi > 0.0:
            df_loop['Adherence after MI (multi-pill)'] = len(df_loop[(df_loop.ldlc_treatment_category.str.contains('multi') & (df_loop.ischemic_heart_disease != 'susceptible_to_ischemic_heart_disease') & (df_loop.adherent == True))]) / len(df_loop[(df_loop.ldlc_treatment_category.str.contains('multi')) & (df_loop.ischemic_heart_disease != 'susceptible_to_ischemic_heart_disease')])  
        
        else:
            df_loop['Adherence after MI (multi-pill)'] = 0.0
            
        init_df = init_df.append(df_loop)
    
    init_df = pd.melt(init_df, id_vars=['location'])
    init_df = init_df[(init_df.variable.str.contains('Adherence'))]
    init_df = init_df.drop_duplicates()
    init_df = init_df.rename(columns={'variable':'adherence_parameter', 'value':'output_mean_value'})
    init_df['output_mean_value'] = init_df.output_mean_value.astype(float)
    init_df = pd.merge(init_df, table_4_df, left_on=shared_cols, right_on=shared_cols)
    init_df= init_df.round(2)
    return init_df


In [154]:
get_table_4_validations(df, locations, table_4_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: h

Unnamed: 0,location,adherence_parameter,output_mean_value,table_4_mean,table_4_sd
0,brazil,"Adherence - one pill, no MI",0.54,0.55,0.11
1,china,"Adherence - one pill, no MI",0.47,0.49,0.1
2,france,"Adherence - one pill, no MI",0.66,0.66,0.14
3,italy,"Adherence - one pill, no MI",0.42,0.43,0.09
4,russian_federation,"Adherence - one pill, no MI",0.37,0.38,0.08
5,spain,"Adherence - one pill, no MI",0.69,0.7,0.15
6,brazil,"Adherence - multi-pill, no MI",0.26,0.29,0.06
7,china,"Adherence - multi-pill, no MI",0.2,0.23,0.05
8,france,"Adherence - multi-pill, no MI",0.39,0.4,0.09
9,italy,"Adherence - multi-pill, no MI",0.14,0.17,0.04
