# Generate state level medication data 

This file is to generate fake data to be used in planning for adding state level variation to the medications in the CVD sim. Additionally, we will calculate the RR's in the notebook once we receive the real data. 

In [1]:
import numpy as np
import pandas as pd
import scipy.stats, random
import math
#pd.set_option('display.max_rows', 200)

In [2]:
df = pd.DataFrame()

sex_list = ['female','male']
state_list = ['alabama','alaska','arizona','arkansas','california']

for state in state_list: 
    for sex in sex_list:
        df_small = pd.DataFrame()
        df_small['age_start'] = ['25','30','35','40','45','50','55','60','65','70','75','80']
        df_small['age_cat'] = [1,2,3,4,5,6,7,8,9,10,11,12]
        df_small['sex'] = sex
        df_small['state'] = state
        df = pd.concat([df,df_small])
df['SBP_med_rate'] = np.random.normal((0.05 * df.age_cat), 0.1, len(df))
df['SBP_med_rate'] = np.where(df['SBP_med_rate'] < 0, 0.1, df['SBP_med_rate'])
df['LDL_med_rate'] = np.random.normal((0.02 * df.age_cat), 0.1, len(df))
df['LDL_med_rate'] = np.where(df['LDL_med_rate'] < 0, 0.1, df['LDL_med_rate'])

df.head()

Unnamed: 0,age_start,age_cat,sex,state,SBP_med_rate,LDL_med_rate
0,25,1,female,alabama,0.1,0.093995
1,30,2,female,alabama,0.073944,0.1
2,35,3,female,alabama,0.071364,0.1
3,40,4,female,alabama,0.294286,0.111265
4,45,5,female,alabama,0.292431,0.210962


In [3]:
sbp_med_average = df.groupby(['age_start','sex']).SBP_med_rate.mean().reset_index()
sbp_med_average = sbp_med_average.rename(columns = {'SBP_med_rate':'sbp_average'})

ldl_med_average = df.groupby(['age_start','sex']).LDL_med_rate.mean().reset_index()
ldl_med_average = ldl_med_average.rename(columns = {'LDL_med_rate':'ldl_average'})

df = df.merge(sbp_med_average, on = ['age_start','sex']).merge(ldl_med_average, on = ['age_start','sex'])
df.head()

Unnamed: 0,age_start,age_cat,sex,state,SBP_med_rate,LDL_med_rate,sbp_average,ldl_average
0,25,1,female,alabama,0.1,0.093995,0.09181,0.100416
1,25,1,female,alaska,0.1,0.1,0.09181,0.100416
2,25,1,female,arizona,0.051931,0.108072,0.09181,0.100416
3,25,1,female,arkansas,0.1,0.1,0.09181,0.100416
4,25,1,female,california,0.107121,0.100011,0.09181,0.100416


In [4]:
df['sbp_rr'] = df['SBP_med_rate'] / df['sbp_average']
df['ldl_rr'] = df['LDL_med_rate'] / df['ldl_average']
df['both_rr'] = (df['sbp_rr'] + df['ldl_rr']) / 2

In [5]:
df

Unnamed: 0,age_start,age_cat,sex,state,SBP_med_rate,LDL_med_rate,sbp_average,ldl_average,sbp_rr,ldl_rr,both_rr
0,25,1,female,alabama,0.100000,0.093995,0.091810,0.100416,1.089202,0.936061,1.012631
1,25,1,female,alaska,0.100000,0.100000,0.091810,0.100416,1.089202,0.995862,1.042532
2,25,1,female,arizona,0.051931,0.108072,0.091810,0.100416,0.565630,1.076244,0.820937
3,25,1,female,arkansas,0.100000,0.100000,0.091810,0.100416,1.089202,0.995862,1.042532
4,25,1,female,california,0.107121,0.100011,0.091810,0.100416,1.166765,0.995972,1.081369
...,...,...,...,...,...,...,...,...,...,...,...
115,80,12,male,alabama,0.509277,0.349205,0.612565,0.296143,0.831385,1.179179,1.005282
116,80,12,male,alaska,0.703771,0.255155,0.612565,0.296143,1.148893,0.861594,1.005244
117,80,12,male,arizona,0.566389,0.279740,0.612565,0.296143,0.924618,0.944612,0.934615
118,80,12,male,arkansas,0.626703,0.295743,0.612565,0.296143,1.023081,0.998652,1.010866


In [6]:
df.to_csv('/mnt/share/scratch/users/sbachmei/state_medication_FAKE_data.csv')

# This section now is looking at the real data from Nikki 

In [10]:
df = pd.read_csv('/mnt/team/cvd/pub/usa_re/sim_science/brfss/brfss_bp_chol_med_data.csv')
df.head()

Unnamed: 0,state,year,sex,age_group,percent_on_BP_meds_among_high_BP_group,percent_on_chol_meds_among_high_chol_group,percent_high_BP,percent_high_chol
0,Alabama,2021,female,25-29,0.285488,0.0,0.080448,0.046799
1,Alabama,2021,female,30-34,0.502347,0.232783,0.178995,0.145891
2,Alabama,2021,female,35-39,0.557701,0.293663,0.229077,0.185504
3,Alabama,2021,female,40-44,0.809122,0.457763,0.291636,0.179281
4,Alabama,2021,female,45-49,0.844516,0.490117,0.366167,0.392853


In [11]:
sbp_med_average = df.groupby(['age_group','sex']).percent_on_BP_meds_among_high_BP_group.mean().reset_index()
sbp_med_average = sbp_med_average.rename(columns = {'percent_on_BP_meds_among_high_BP_group':'sbp_average'})

ldl_med_average = df.groupby(['age_group','sex']).percent_on_chol_meds_among_high_chol_group.mean().reset_index()
ldl_med_average = ldl_med_average.rename(columns = {'percent_on_chol_meds_among_high_chol_group':'ldl_average'})

df = df.merge(sbp_med_average, on = ['age_group','sex']).merge(ldl_med_average, on = ['age_group','sex'])
df.head()

Unnamed: 0,state,year,sex,age_group,percent_on_BP_meds_among_high_BP_group,percent_on_chol_meds_among_high_chol_group,percent_high_BP,percent_high_chol,sbp_average,ldl_average
0,Alabama,2021,female,25-29,0.285488,0.0,0.080448,0.046799,0.298296,0.116247
1,Alaska,2021,female,25-29,0.0,0.0,0.062467,0.094133,0.298296,0.116247
2,Arizona,2021,female,25-29,0.268545,0.0,0.067908,0.227291,0.298296,0.116247
3,Arkansas,2021,female,25-29,0.646522,0.0,0.077833,0.118129,0.298296,0.116247
4,California,2021,female,25-29,0.260714,0.151235,0.084964,0.125713,0.298296,0.116247


In [34]:
df['sbp_rr'] = df['percent_on_BP_meds_among_high_BP_group'] / df['sbp_average']
df['sbp_rr'] = np.where(df['sbp_rr'] == 0, 0.1, df['sbp_rr'])
df['ldl_rr'] = df['percent_on_chol_meds_among_high_chol_group'] / df['ldl_average']
df['ldl_rr'] = np.where(df['ldl_rr'] == 0, 0.1, df['ldl_rr'])
df['both_rr'] = (df['sbp_rr'] + df['ldl_rr']) / 2

In [35]:
df.head()

Unnamed: 0,state,year,sex,age_group,percent_on_BP_meds_among_high_BP_group,percent_on_chol_meds_among_high_chol_group,percent_high_BP,percent_high_chol,sbp_average,ldl_average,sbp_rr,ldl_rr,both_rr
0,Alabama,2021,female,25-29,0.285488,0.0,0.080448,0.046799,0.298296,0.116247,0.957065,0.1,0.528532
1,Alaska,2021,female,25-29,0.0,0.0,0.062467,0.094133,0.298296,0.116247,0.1,0.1,0.1
2,Arizona,2021,female,25-29,0.268545,0.0,0.067908,0.227291,0.298296,0.116247,0.900262,0.1,0.500131
3,Arkansas,2021,female,25-29,0.646522,0.0,0.077833,0.118129,0.298296,0.116247,2.167385,0.1,1.133692
4,California,2021,female,25-29,0.260714,0.151235,0.084964,0.125713,0.298296,0.116247,0.87401,1.300979,1.087494


In [36]:
df.to_csv('/mnt/share/scratch/users/sbachmei/state_medication_real_data.csv')