# Generate state level medication data 

This file is to generate fake data to be used in planning for adding state level variation to the medications in the CVD sim. Additionally, we will calculate the RR's in the notebook once we receive the real data. 

In [1]:
import numpy as np
import pandas as pd
import scipy.stats, random
import math
pd.set_option('display.max_rows', 200)

In [2]:
df = pd.DataFrame()

sex_list = ['female','male']
state_list = ['alabama','alaska','arizona','arkansas','california']

for state in state_list: 
    for sex in sex_list:
        df_small = pd.DataFrame()
        df_small['age_start'] = ['25','30','35','40','45','50','55','60','65','70','75','80']
        df_small['age_cat'] = [1,2,3,4,5,6,7,8,9,10,11,12]
        df_small['sex'] = sex
        df_small['state'] = state
        df = pd.concat([df,df_small])
df['SBP_med_rate'] = np.random.normal((0.05 * df.age_cat), 0.1, len(df))
df['SBP_med_rate'] = np.where(df['SBP_med_rate'] < 0, 0.1, df['SBP_med_rate'])
df['LDL_med_rate'] = np.random.normal((0.02 * df.age_cat), 0.1, len(df))
df['LDL_med_rate'] = np.where(df['LDL_med_rate'] < 0, 0.1, df['LDL_med_rate'])

df.head()

Unnamed: 0,age_start,age_cat,sex,state,SBP_med_rate,LDL_med_rate
0,25,1,female,alabama,0.1,0.093995
1,30,2,female,alabama,0.073944,0.1
2,35,3,female,alabama,0.071364,0.1
3,40,4,female,alabama,0.294286,0.111265
4,45,5,female,alabama,0.292431,0.210962


In [3]:
sbp_med_average = df.groupby(['age_start','sex']).SBP_med_rate.mean().reset_index()
sbp_med_average = sbp_med_average.rename(columns = {'SBP_med_rate':'sbp_average'})

ldl_med_average = df.groupby(['age_start','sex']).LDL_med_rate.mean().reset_index()
ldl_med_average = ldl_med_average.rename(columns = {'LDL_med_rate':'ldl_average'})

df = df.merge(sbp_med_average, on = ['age_start','sex']).merge(ldl_med_average, on = ['age_start','sex'])
df.head()

Unnamed: 0,age_start,age_cat,sex,state,SBP_med_rate,LDL_med_rate,sbp_average,ldl_average
0,25,1,female,alabama,0.1,0.093995,0.09181,0.100416
1,25,1,female,alaska,0.1,0.1,0.09181,0.100416
2,25,1,female,arizona,0.051931,0.108072,0.09181,0.100416
3,25,1,female,arkansas,0.1,0.1,0.09181,0.100416
4,25,1,female,california,0.107121,0.100011,0.09181,0.100416


In [4]:
df['sbp_rr'] = df['SBP_med_rate'] / df['sbp_average']
df['ldl_rr'] = df['LDL_med_rate'] / df['ldl_average']
df['both_rr'] = (df['sbp_rr'] + df['ldl_rr']) / 2

In [5]:
df

Unnamed: 0,age_start,age_cat,sex,state,SBP_med_rate,LDL_med_rate,sbp_average,ldl_average,sbp_rr,ldl_rr,both_rr
0,25,1,female,alabama,0.100000,0.093995,0.091810,0.100416,1.089202,0.936061,1.012631
1,25,1,female,alaska,0.100000,0.100000,0.091810,0.100416,1.089202,0.995862,1.042532
2,25,1,female,arizona,0.051931,0.108072,0.091810,0.100416,0.565630,1.076244,0.820937
3,25,1,female,arkansas,0.100000,0.100000,0.091810,0.100416,1.089202,0.995862,1.042532
4,25,1,female,california,0.107121,0.100011,0.091810,0.100416,1.166765,0.995972,1.081369
...,...,...,...,...,...,...,...,...,...,...,...
115,80,12,male,alabama,0.509277,0.349205,0.612565,0.296143,0.831385,1.179179,1.005282
116,80,12,male,alaska,0.703771,0.255155,0.612565,0.296143,1.148893,0.861594,1.005244
117,80,12,male,arizona,0.566389,0.279740,0.612565,0.296143,0.924618,0.944612,0.934615
118,80,12,male,arkansas,0.626703,0.295743,0.612565,0.296143,1.023081,0.998652,1.010866


In [6]:
df.to_csv('/mnt/share/scratch/users/sbachmei/state_medication_FAKE_data.csv')

# This section now is looking at the real data from Nikki 

In [3]:
df = pd.read_csv('/mnt/team/cvd/pub/usa_re/sim_science/brfss/smoothed_brfss_bp_chol_med_data.csv')
df.head()

Unnamed: 0,state,year,sex,age_group,percent_on_BP_meds_among_high_BP_group,percent_on_chol_meds_among_high_chol_group,percent_high_BP,percent_high_chol
0,Alabama,2021,female,25-29,0.286823,0.026759,0.090676,0.06105
1,Alabama,2021,female,30-34,0.473624,0.178988,0.158214,0.11444
2,Alabama,2021,female,35-39,0.63039,0.315225,0.230253,0.178917
3,Alabama,2021,female,40-44,0.755099,0.434965,0.306154,0.25249
4,Alabama,2021,female,45-49,0.855823,0.538051,0.38691,0.336032


In [4]:
sbp_med_average = df.groupby(['age_group','sex']).percent_on_BP_meds_among_high_BP_group.mean().reset_index()
sbp_med_average = sbp_med_average.rename(columns = {'percent_on_BP_meds_among_high_BP_group':'sbp_average'})

ldl_med_average = df.groupby(['age_group','sex']).percent_on_chol_meds_among_high_chol_group.mean().reset_index()
ldl_med_average = ldl_med_average.rename(columns = {'percent_on_chol_meds_among_high_chol_group':'ldl_average'})

df = df.merge(sbp_med_average, on = ['age_group','sex']).merge(ldl_med_average, on = ['age_group','sex'])
df.head()

Unnamed: 0,state,year,sex,age_group,percent_on_BP_meds_among_high_BP_group,percent_on_chol_meds_among_high_chol_group,percent_high_BP,percent_high_chol,sbp_average,ldl_average
0,Alabama,2021,female,25-29,0.286823,0.026759,0.090676,0.06105,0.295173,0.102458
1,Alaska,2021,female,25-29,0.010039,0.03895,0.036911,0.104137,0.295173,0.102458
2,Arizona,2021,female,25-29,0.250222,0.014844,0.057299,0.208706,0.295173,0.102458
3,Arkansas,2021,female,25-29,0.591977,0.0,0.101565,0.09974,0.295173,0.102458
4,California,2021,female,25-29,0.311544,0.121723,0.075547,0.130016,0.295173,0.102458


In [5]:
df['sbp_rr'] = df['percent_on_BP_meds_among_high_BP_group'] / df['sbp_average']
df['sbp_rr'] = np.where(df['sbp_rr'] < 0.33, 0.33, df['sbp_rr'])
df['ldl_rr'] = df['percent_on_chol_meds_among_high_chol_group'] / df['ldl_average']
df['ldl_rr'] = np.where(df['ldl_rr'] < 0.33, 0.33, df['ldl_rr'])
df['both_rr'] = (df['sbp_rr'] + df['ldl_rr']) / 2

In [6]:
df.loc[df.state.isin(['Pennsylvania'])] #,'California','Florida'])]#.head()

Unnamed: 0,state,year,sex,age_group,percent_on_BP_meds_among_high_BP_group,percent_on_chol_meds_among_high_chol_group,percent_high_BP,percent_high_chol,sbp_average,ldl_average,sbp_rr,ldl_rr,both_rr
38,Pennsylvania,2021,female,25-29,0.291631,0.024008,0.083608,0.080331,0.295173,0.102458,0.987999,0.33,0.659
89,Pennsylvania,2021,female,30-34,0.441697,0.100842,0.082677,0.100983,0.434884,0.149821,1.015666,0.673082,0.844374
140,Pennsylvania,2021,female,35-39,0.572911,0.191823,0.106452,0.137805,0.555802,0.216805,1.030784,0.884774,0.957779
191,Pennsylvania,2021,female,40-44,0.675802,0.289321,0.152799,0.190196,0.657649,0.298376,1.027603,0.969652,0.998627
242,Pennsylvania,2021,female,45-49,0.776619,0.407936,0.227611,0.258536,0.740484,0.400941,1.0488,1.017447,1.033123
293,Pennsylvania,2021,female,50-54,0.833288,0.51406,0.317332,0.354113,0.799364,0.493542,1.042438,1.041572,1.042005
344,Pennsylvania,2021,female,55-59,0.833015,0.56387,0.398104,0.437264,0.84323,0.570918,0.987887,0.987654,0.98777
395,Pennsylvania,2021,female,60-64,0.855763,0.602596,0.474866,0.482877,0.878411,0.642635,0.974217,0.937696,0.955957
446,Pennsylvania,2021,female,65-69,0.876993,0.686558,0.531528,0.507493,0.907744,0.706529,0.966124,0.971734,0.968929
497,Pennsylvania,2021,female,70-74,0.901248,0.741046,0.576901,0.520195,0.927274,0.745791,0.971933,0.993637,0.982785


In [7]:
df.to_csv('/mnt/share/scratch/users/sbachmei/state_medication_real_data_v3.csv')