In [79]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [80]:
prev = pd.read_excel('data/DiabetesPrevalence_99.xlsx', sheet_name='prevalence_99', header=1)
comps = pd.read_excel('data/complications_hospitalizations_99.xlsx', sheet_name='comp_hosp_99', header=1)

In [81]:
prev.head()

Unnamed: 0,fips,state,year,sex,sex_cat,age,age_cat,prevalence,ll,ul,count
0,1,ALABAMA,2013,0,Overall,1,18-44,0.050086,0.036471,0.0637,85.499
1,2,ALASKA,2013,0,Overall,1,18-44,0.018118,0.009594,0.026643,5.059
2,4,ARIZONA,2013,0,Overall,1,18-44,0.027343,0.013899,0.040787,65.269
3,5,ARKANSAS,2013,0,Overall,1,18-44,0.041642,0.027736,0.055548,43.107
4,6,CALIFORNIA,2013,0,Overall,1,18-44,0.029161,0.022332,0.035989,427.085


In [82]:
# create a separate df for age adjusted
age_adj_prev = prev[prev['age_cat'] == 'age adjusted']

In [83]:
# drop rows where age_cat = age adjusted, since there's no data for ll, ul, count variables
prev = prev[prev['age_cat'] != 'age adjusted']

In [84]:
# drop columns where there is no data available for age adjusted df
age_adj_prev = age_adj_prev.drop(columns = ['ll', 'ul', 'count'], axis=1)
age_adj_prev.head()

Unnamed: 0,fips,state,year,sex,sex_cat,age,age_cat,prevalence
780,1,ALABAMA,2013,1,Males,9,age adjusted,0.128879
781,2,ALASKA,2013,1,Males,9,age adjusted,0.078849
782,4,ARIZONA,2013,1,Males,9,age adjusted,0.10725
783,5,ARKANSAS,2013,1,Males,9,age adjusted,0.118038
784,6,CALIFORNIA,2013,1,Males,9,age adjusted,0.102623


In [85]:
age_adj_prev[age_adj_prev['state'] == 'United States']

Unnamed: 0,fips,state,year,sex,sex_cat,age,age_cat,prevalence
831,99,United States,2013,1,Males,9,age adjusted,0.09922
883,99,United States,2013,2,Females,9,age adjusted,0.09017
935,99,United States,2013,0,Overall,9,age adjusted,0.094181


In [86]:
age_adj_prev.sort_values(by='prevalence', ascending=False).head(3)

Unnamed: 0,fips,state,year,sex,sex_cat,age,age_cat,prevalence
780,1,ALABAMA,2013,1,Males,9,age adjusted,0.128879
884,1,ALABAMA,2013,0,Overall,9,age adjusted,0.126526
832,1,ALABAMA,2013,2,Females,9,age adjusted,0.125636


In [87]:
prev.sort_values(by='prevalence', ascending=False).head(3)

Unnamed: 0,fips,state,year,sex,sex_cat,age,age_cat,prevalence,ll,ul,count
264,1,ALABAMA,2013,1,Males,4,75+,0.316128,0.244504,0.387751,37.273
403,18,INDIANA,2013,1,Males,3,65-74,0.310007,0.269624,0.350391,75.988
594,37,NORTH CAROLINA,2013,1,Males,4,75+,0.308545,0.247221,0.369869,69.586


In [88]:
comps.head()

Unnamed: 0,fips,state,year,sex,sex_cat,age,age_cat,hf_ageadjusted,hf_count,hf_rate,...,hhns_ageadjusted,hhns_count,hhns_rate,hhns_ll,hhns_ul,dka_ageadjusted,dka_count,dka_rate,dka_ll,dka_ul
0,4,ARIZONA,2013.0,0,Overall,0,18+,6.303754,5172.0,9.67974,...,1.395376,435.0,0.814131,0.690315,0.937948,19.186359,3384.0,6.33338,5.370172,7.296587
1,4,ARIZONA,2013.0,0,Overall,1,18-44,,183.0,2.803777,...,,134.0,2.053039,1.039277,3.066801,,2206.0,33.798542,17.109298,50.487785
2,4,ARIZONA,2013.0,0,Overall,2,45-64,,1466.0,5.55583,...,,186.0,0.704901,0.534238,0.875563,,914.0,3.463867,2.625236,4.302497
3,4,ARIZONA,2013.0,0,Overall,3,65-74,,1460.0,11.564541,...,,,,,,,,,,
4,4,ARIZONA,2013.0,0,Overall,4,75+,,2063.0,26.137803,...,,,,,,,,,,


In [89]:
comps = comps.drop(columns='year')

In [90]:
aa_cols = ['fips', 'state', 'sex_cat', 'age', 'age_cat', 'hf_ageadjusted', 'stroke_ageadjusted', 
           'mi_ageadjusted', 'lea_ageadjusted', 'hypogl_ageadjusted', 'hhns_ageadjusted', 'dka_ageadjusted']

comps_aa = comps.loc[:, aa_cols]
comps_aa.head()

Unnamed: 0,fips,state,sex_cat,age,age_cat,hf_ageadjusted,stroke_ageadjusted,mi_ageadjusted,lea_ageadjusted,hypogl_ageadjusted,hhns_ageadjusted,dka_ageadjusted
0,4,ARIZONA,Overall,0,18+,6.303754,4.957159,4.624921,3.648717,2.21626,1.395376,19.186359
1,4,ARIZONA,Overall,1,18-44,,,,,,,
2,4,ARIZONA,Overall,2,45-64,,,,,,,
3,4,ARIZONA,Overall,3,65-74,,,,,,,
4,4,ARIZONA,Overall,4,75+,,,,,,,


In [91]:
comps_aa = comps_aa[comps_aa['age_cat'] == '18+']
comps_aa.head()

Unnamed: 0,fips,state,sex_cat,age,age_cat,hf_ageadjusted,stroke_ageadjusted,mi_ageadjusted,lea_ageadjusted,hypogl_ageadjusted,hhns_ageadjusted,dka_ageadjusted
0,4,ARIZONA,Overall,0,18+,6.303754,4.957159,4.624921,3.648717,2.21626,1.395376,19.186359
6,4,ARIZONA,Males,0,18+,7.483192,5.591844,6.220407,5.254467,2.360439,1.905885,20.342971
12,4,ARIZONA,Females,0,18+,5.137933,4.328915,2.977141,1.94885,2.071434,0.854327,17.966154
18,5,ARKANSAS,Overall,0,18+,10.712887,6.501089,6.247409,3.465521,2.18146,0.767636,18.523463
24,5,ARKANSAS,Males,0,18+,10.561361,6.098304,6.915747,4.180016,1.673179,0.690734,15.365308
