# Heart Failure PAF Calculations 

This workbook is designed to find the PAF for BMI to heart failure ONLY while NOT considering risk correlation to mediation. The more complex versiont that includes these considerations is in another workbook. 

In [1]:
from vivarium import InteractiveContext, Artifact

from datetime import datetime, timedelta
from pathlib import Path
import itertools
import matplotlib.pyplot as plt
import ipywidgets
import pandas as pd, numpy as np
pd.set_option('display.max_rows', 60)
import seaborn as sns

import numpy as np
import researchpy as rp
import statsmodels.api as sm
import statsmodels.formula.api as smf
from matplotlib.backends.backend_pdf import PdfPages
import scipy.stats
from typing import List, Tuple, Union
import scipy.stats as sp

In [2]:
path = Path('/ihme/homes/lutzes/vivarium_nih_us_cvd/src/vivarium_nih_us_cvd/model_specifications/nih_us_cvd.yaml')

In [3]:
sim = InteractiveContext(Path(path), setup=False)
sim.configuration.update({
                          'population':
                              {'population_size': 10_000,
                              },
                          'time':
                              {'start':
                                  {'year': 2021
                                  }
                              }
                          }
                        )
sim.setup()

2023-01-18 11:08:28.008 | DEBUG    | vivarium.framework.values:register_value_modifier:392 - Registering metrics.1.population_manager.metrics as modifier to metrics
2023-01-18 11:08:28.150 | DEBUG    | vivarium.framework.artifact.manager:_load_artifact:66 - Running simulation from artifact located at /ihme/costeffectiveness/artifacts/vivarium_nih_us_cvd/alabama.hdf.
2023-01-18 11:08:28.152 | DEBUG    | vivarium.framework.artifact.manager:_load_artifact:67 - Artifact base filter terms are ['draw == 0'].
2023-01-18 11:08:28.156 | DEBUG    | vivarium.framework.artifact.manager:_load_artifact:68 - Artifact additional filter terms are None.
2023-01-18 11:08:28.443 | DEBUG    | vivarium.framework.values:_register_value_producer:338 - Registering value pipeline cause_specific_mortality_rate
2023-01-18 11:08:28.444 | DEBUG    | vivarium.framework.values:_register_value_producer:338 - Registering value pipeline mortality_rate
2023-01-18 11:08:28.791 | DEBUG    | vivarium.framework.values:_regis

In [4]:
pop0 = sim.get_population()
pop0.head()

Unnamed: 0,tracked,alive,exit_time,age,location,entrance_time,sex,cause_of_death,years_of_life_lost,years_lived_with_disability,...,sbp_medication,ldlc_multiplier,outreach,polypill,ldlc_medication_adherence,sbp_medication_adherence,sbp_multiplier,ldlc_medication,visit_type,scheduled_date
0,True,alive,NaT,47.388882,Alabama,2020-12-04,Female,not_dead,0.0,0.0,...,no_treatment,1.0,cat2,cat2,cat3,cat3,1.0,no_treatment,none,NaT
1,True,alive,NaT,41.564194,Alabama,2020-12-04,Male,not_dead,0.0,0.0,...,no_treatment,1.0,cat2,cat2,cat3,cat3,1.0,no_treatment,none,NaT
2,True,alive,NaT,53.155826,Alabama,2020-12-04,Female,not_dead,0.0,0.0,...,one_drug_half_dose_efficacy,1.362,cat2,cat2,cat3,cat1,1.0,medium_intensity,none,NaT
3,True,alive,NaT,30.457692,Alabama,2020-12-04,Male,not_dead,0.0,0.0,...,no_treatment,1.0,cat2,cat2,cat1,cat3,1.0,no_treatment,none,NaT
4,True,alive,NaT,19.554806,Alabama,2020-12-04,Female,not_dead,0.0,0.0,...,no_treatment,1.0,cat2,cat2,cat1,cat3,1.0,no_treatment,none,NaT


In [5]:
pop1 = pop0[['sex','alive','age']]

In [6]:
#pop0.columns

In [7]:
#sim.list_values()

In [17]:
data1 = pd.concat([pop1,
                   sim.get_value('high_body_mass_index_in_adults.propensity')(pop0.index).rename('bmi_propensity'),
                   sim.get_value('high_body_mass_index_in_adults.exposure')(pop0.index).rename('bmi_exposure'),
                  ], axis=1)
data1.head()

Unnamed: 0,sex,alive,age,bmi_propensity,bmi_exposure
0,Female,alive,47.388882,0.208828,23.56485
1,Male,alive,41.564194,0.358604,25.661107
2,Female,alive,53.155826,0.531571,34.110372
3,Male,alive,30.457692,0.388049,26.473747
4,Female,alive,19.554806,0.672873,5.0


In [18]:
data1 = data1.loc[(data1.age > 25) & (data1.alive == 'alive')]

In [19]:
age_group = pd.cut(data1.age, bins=[25,30,35,40,45,50,55,60,65,70,75,80,85,90,125],labels=['25_to_30','30_to_35','35_to_40','40_to_45','45_to_50','50_to_55','55_to_60','60_to_65','65_to_70','70_to_75','75_to_80','80_to_85','85_to_90','90_to_125'])
data1.insert(5, 'age_group', age_group)
data1

Unnamed: 0,sex,alive,age,bmi_propensity,bmi_exposure,age_group
0,Female,alive,47.388882,0.208828,23.564850,45_to_50
1,Male,alive,41.564194,0.358604,25.661107,40_to_45
2,Female,alive,53.155826,0.531571,34.110372,50_to_55
3,Male,alive,30.457692,0.388049,26.473747,30_to_35
6,Male,alive,33.645113,0.199137,22.520013,30_to_35
...,...,...,...,...,...,...
9994,Male,alive,82.973183,0.491704,26.062478,80_to_85
9995,Female,alive,31.875800,0.356089,27.354193,30_to_35
9996,Male,alive,69.906989,0.013521,19.319172,65_to_70
9998,Female,alive,77.277694,0.623941,31.170490,75_to_80


## Adding RR and TMREL so I can find the individual RR for a Simulant

Note that while this method will randomly generate data, I eventually need to match the RR's to the simulant for the specific draw. As HF has not been added yet, I am not sure I can do this currently.

Also, we might just have the engineers add this to their initialization tasks instead! 

In [20]:
def get_lognorm_from_quantiles(median: float, lower: float, upper: float,
                               quantiles: Tuple[float, float] = (0.025, 0.975)) -> sp.lognorm:
    mu = np.log(median)
    stdnorm_quantiles = sp.norm.ppf(quantiles)
    norm_quantiles = np.log([lower, upper])
    sigma = (norm_quantiles[1] - norm_quantiles[0]) / (stdnorm_quantiles[1] - stdnorm_quantiles[0])
    return sp.lognorm(s=sigma, scale=median)

In [21]:
test2 = get_lognorm_from_quantiles(1.14,1.12,1.16).rvs(size=len(data1))

In [22]:
data1['RR'] = test2
data1.head()

Unnamed: 0,sex,alive,age,bmi_propensity,bmi_exposure,age_group,RR
0,Female,alive,47.388882,0.208828,23.56485,45_to_50,1.147547
1,Male,alive,41.564194,0.358604,25.661107,40_to_45,1.149934
2,Female,alive,53.155826,0.531571,34.110372,50_to_55,1.128442
3,Male,alive,30.457692,0.388049,26.473747,30_to_35,1.125159
6,Male,alive,33.645113,0.199137,22.520013,30_to_35,1.141271


In [23]:
np.random.seed(123)
test3 = np.random.uniform(20,25,len(data1))
test3

array([23.48234593, 21.43069667, 21.13425727, ..., 23.09954019,
       24.65176124, 22.11263259])

In [24]:
data1['TMREL'] = test3
data1.head()

Unnamed: 0,sex,alive,age,bmi_propensity,bmi_exposure,age_group,RR,TMREL
0,Female,alive,47.388882,0.208828,23.56485,45_to_50,1.147547,23.482346
1,Male,alive,41.564194,0.358604,25.661107,40_to_45,1.149934,21.430697
2,Female,alive,53.155826,0.531571,34.110372,50_to_55,1.128442,21.134257
3,Male,alive,30.457692,0.388049,26.473747,30_to_35,1.125159,22.756574
6,Male,alive,33.645113,0.199137,22.520013,30_to_35,1.141271,23.597345


## Finding the individual simulant's RR and PAF

In [25]:
data1['sim_RR'] = np.nan
for i in range(len(data1)):
    data1['sim_RR'].iloc[i] = data1['RR'].iloc[i]**[max((data1['bmi_exposure'].iloc[i] - data1['TMREL'].iloc[i]),0)]
data1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['sim_RR'].iloc[i] = data1['RR'].iloc[i]**[max((data1['bmi_exposure'].iloc[i] - data1['TMREL'].iloc[i]),0)]


Unnamed: 0,sex,alive,age,bmi_propensity,bmi_exposure,age_group,RR,TMREL,sim_RR
0,Female,alive,47.388882,0.208828,23.564850,45_to_50,1.147547,23.482346,1.011419
1,Male,alive,41.564194,0.358604,25.661107,40_to_45,1.149934,21.430697,1.805804
2,Female,alive,53.155826,0.531571,34.110372,50_to_55,1.128442,21.134257,4.797101
3,Male,alive,30.457692,0.388049,26.473747,30_to_35,1.125159,22.756574,1.550142
6,Male,alive,33.645113,0.199137,22.520013,30_to_35,1.141271,23.597345,1.000000
...,...,...,...,...,...,...,...,...,...
9994,Male,alive,82.973183,0.491704,26.062478,80_to_85,1.127541,22.670537,1.502553
9995,Female,alive,31.875800,0.356089,27.354193,30_to_35,1.130486,20.679220,2.267484
9996,Male,alive,69.906989,0.013521,19.319172,65_to_70,1.136688,23.099540,1.000000
9998,Female,alive,77.277694,0.623941,31.170490,75_to_80,1.136513,24.651761,2.302893


In [31]:
data_grouped = data1.groupby(['sex', 'age_group'], as_index=False).mean()
data_grouped

Unnamed: 0,sex,age_group,age,bmi_propensity,bmi_exposure,RR,TMREL,sim_RR
0,Female,25_to_30,27.524968,0.496819,30.96483,1.140213,22.502696,10.304585
1,Female,30_to_35,32.320013,0.529948,33.170805,1.140292,22.530328,24.76963
2,Female,35_to_40,37.568942,0.525717,31.216305,1.140108,22.413765,9.009664
3,Female,40_to_45,42.542961,0.505146,30.647594,1.140671,22.466482,13.883077
4,Female,45_to_50,47.369076,0.500788,31.814019,1.140133,22.610521,13.891266
5,Female,50_to_55,52.503612,0.498852,31.294312,1.140734,22.414429,10.153443
6,Female,55_to_60,57.588265,0.485547,31.579655,1.140294,22.496061,20.909903
7,Female,60_to_65,62.442279,0.502187,33.088281,1.139513,22.371243,34.794125
8,Female,65_to_70,67.407831,0.504204,30.820834,1.14112,22.647597,12.958946
9,Female,70_to_75,72.435931,0.544055,32.437449,1.140902,22.596889,10.105654


In [32]:
data_grouped['PAF'] = (data_grouped['sim_RR'] - 1) / data_grouped['sim_RR']
data_grouped

Unnamed: 0,sex,age_group,age,bmi_propensity,bmi_exposure,RR,TMREL,sim_RR,PAF
0,Female,25_to_30,27.524968,0.496819,30.96483,1.140213,22.502696,10.304585,0.902956
1,Female,30_to_35,32.320013,0.529948,33.170805,1.140292,22.530328,24.76963,0.959628
2,Female,35_to_40,37.568942,0.525717,31.216305,1.140108,22.413765,9.009664,0.889008
3,Female,40_to_45,42.542961,0.505146,30.647594,1.140671,22.466482,13.883077,0.92797
4,Female,45_to_50,47.369076,0.500788,31.814019,1.140133,22.610521,13.891266,0.928012
5,Female,50_to_55,52.503612,0.498852,31.294312,1.140734,22.414429,10.153443,0.901511
6,Female,55_to_60,57.588265,0.485547,31.579655,1.140294,22.496061,20.909903,0.952176
7,Female,60_to_65,62.442279,0.502187,33.088281,1.139513,22.371243,34.794125,0.97126
8,Female,65_to_70,67.407831,0.504204,30.820834,1.14112,22.647597,12.958946,0.922833
9,Female,70_to_75,72.435931,0.544055,32.437449,1.140902,22.596889,10.105654,0.901045


I expect that the known issues with BMI standard deviation still in the model are causing the PAFs very close to 1. If you use the median, the PAFs are much more reasonable. I think once the model is updated, these issues will be resolved. 