# Generate state level medication data 

This file is to generate fake data to be used in planning for adding state level variation to the medications in the CVD sim. Additionally, we will calculate the RR's in the notebook once we receive the real data. 

In [1]:
import numpy as np
import pandas as pd
import scipy.stats, random
import math
#pd.set_option('display.max_rows', 200)

In [2]:
df = pd.DataFrame()

sex_list = ['female','male']
state_list = ['alabama','alaska','arizona','arkansas','california']

for state in state_list: 
    for sex in sex_list:
        df_small = pd.DataFrame()
        df_small['age_start'] = ['25','30','35','40','45','50','55','60','65','70','75','80']
        df_small['age_cat'] = [1,2,3,4,5,6,7,8,9,10,11,12]
        df_small['sex'] = sex
        df_small['state'] = state
        df = pd.concat([df,df_small])
df['SBP_med_rate'] = np.random.normal((0.05 * df.age_cat), 0.1, len(df))
df['SBP_med_rate'] = np.where(df['SBP_med_rate'] < 0, 0.1, df['SBP_med_rate'])
df['LDL_med_rate'] = np.random.normal((0.02 * df.age_cat), 0.1, len(df))
df['LDL_med_rate'] = np.where(df['LDL_med_rate'] < 0, 0.1, df['LDL_med_rate'])

df.head()

Unnamed: 0,age_start,age_cat,sex,state,SBP_med_rate,LDL_med_rate
0,25,1,female,alabama,0.121581,0.041498
1,30,2,female,alabama,0.028934,0.1
2,35,3,female,alabama,0.1,0.233669
3,40,4,female,alabama,0.215141,0.035088
4,45,5,female,alabama,0.513903,0.1


In [3]:
sbp_med_average = df.groupby(['age_start','sex']).SBP_med_rate.mean().reset_index()
sbp_med_average = sbp_med_average.rename(columns = {'SBP_med_rate':'sbp_average'})

ldl_med_average = df.groupby(['age_start','sex']).LDL_med_rate.mean().reset_index()
ldl_med_average = ldl_med_average.rename(columns = {'LDL_med_rate':'ldl_average'})

df = df.merge(sbp_med_average, on = ['age_start','sex']).merge(ldl_med_average, on = ['age_start','sex'])
df.head()

Unnamed: 0,age_start,age_cat,sex,state,SBP_med_rate,LDL_med_rate,sbp_average,ldl_average
0,25,1,female,alabama,0.121581,0.041498,0.087838,0.064412
1,25,1,female,alaska,0.1,0.1,0.087838,0.064412
2,25,1,female,arizona,0.059566,0.020756,0.087838,0.064412
3,25,1,female,arkansas,0.1,0.1,0.087838,0.064412
4,25,1,female,california,0.058044,0.059804,0.087838,0.064412


In [7]:
df['sbp_rr'] = df['SBP_med_rate'] / df['sbp_average']
df['ldl_rr'] = df['LDL_med_rate'] / df['ldl_average']
df['both_rr'] = (df['sbp_rr'] + df['ldl_rr']) / 2

In [8]:
df

Unnamed: 0,age_start,age_cat,sex,state,SBP_med_rate,LDL_med_rate,sbp_average,ldl_average,sbp_rr,ldl_rr,both_rr
0,25,1,female,alabama,0.121581,0.041498,0.087838,0.064412,1.384149,0.644269,1.014209
1,25,1,female,alaska,0.100000,0.100000,0.087838,0.064412,1.138454,1.552514,1.345484
2,25,1,female,arizona,0.059566,0.020756,0.087838,0.064412,0.678134,0.322240,0.500187
3,25,1,female,arkansas,0.100000,0.100000,0.087838,0.064412,1.138454,1.552514,1.345484
4,25,1,female,california,0.058044,0.059804,0.087838,0.064412,0.660808,0.928463,0.794635
...,...,...,...,...,...,...,...,...,...,...,...
115,80,12,male,alabama,0.563964,0.196660,0.573971,0.220291,0.982566,0.892729,0.937647
116,80,12,male,alaska,0.621468,0.234985,0.573971,0.220291,1.082752,1.066704,1.074728
117,80,12,male,arizona,0.535950,0.163583,0.573971,0.220291,0.933758,0.742577,0.838167
118,80,12,male,arkansas,0.535013,0.293054,0.573971,0.220291,0.932125,1.330307,1.131216


In [9]:
df.to_csv('/ihme/homes/lutzes/state_medication_FAKE_data.csv')