In [31]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import json
import folium
import rtree
from branca.colormap import linear
import plotly
import plotly.plotly as py
import plotly.figure_factory as ff
import shapely
from shapely.geometry import Point
import shapefile

%matplotlib inline

In [2]:
prev = pd.read_excel('data/DiabetesPrevalence_99.xlsx', sheet_name='prevalence_99', header=1)
comps = pd.read_excel('data/complications_hospitalizations_99.xlsx', sheet_name='comp_hosp_99', header=1)
us_states = gpd.read_file('data/states.json')
states = pd.read_csv('data/statelatlong.csv')

In [3]:
prev.head()

Unnamed: 0,fips,state,year,sex,sex_cat,age,age_cat,prevalence,ll,ul,count
0,1,ALABAMA,2013,0,Overall,1,18-44,0.050086,0.036471,0.0637,85.499
1,2,ALASKA,2013,0,Overall,1,18-44,0.018118,0.009594,0.026643,5.059
2,4,ARIZONA,2013,0,Overall,1,18-44,0.027343,0.013899,0.040787,65.269
3,5,ARKANSAS,2013,0,Overall,1,18-44,0.041642,0.027736,0.055548,43.107
4,6,CALIFORNIA,2013,0,Overall,1,18-44,0.029161,0.022332,0.035989,427.085


In [4]:
# create a separate df for age adjusted
prev_aa = prev[prev['age_cat'] == 'age adjusted']

In [5]:
# drop rows where age_cat = age adjusted, since there's no data for ll, ul, count variables
prev = prev[prev['age_cat'] != 'age adjusted']

In [6]:
# drop columns where there is no data available for age adjusted df
prev_aa = prev_aa.drop(columns = ['ll', 'ul', 'count'], axis=1)
prev_aa.head()

Unnamed: 0,fips,state,year,sex,sex_cat,age,age_cat,prevalence
780,1,ALABAMA,2013,1,Males,9,age adjusted,0.128879
781,2,ALASKA,2013,1,Males,9,age adjusted,0.078849
782,4,ARIZONA,2013,1,Males,9,age adjusted,0.10725
783,5,ARKANSAS,2013,1,Males,9,age adjusted,0.118038
784,6,CALIFORNIA,2013,1,Males,9,age adjusted,0.102623


In [7]:
prev_aa[prev_aa['state'] == 'United States']

Unnamed: 0,fips,state,year,sex,sex_cat,age,age_cat,prevalence
831,99,United States,2013,1,Males,9,age adjusted,0.09922
883,99,United States,2013,2,Females,9,age adjusted,0.09017
935,99,United States,2013,0,Overall,9,age adjusted,0.094181


In [8]:
prev_aa.sort_values(by='prevalence', ascending=False).head(3)

Unnamed: 0,fips,state,year,sex,sex_cat,age,age_cat,prevalence
780,1,ALABAMA,2013,1,Males,9,age adjusted,0.128879
884,1,ALABAMA,2013,0,Overall,9,age adjusted,0.126526
832,1,ALABAMA,2013,2,Females,9,age adjusted,0.125636


In [9]:
prev.sort_values(by='prevalence', ascending=False).head(3)

Unnamed: 0,fips,state,year,sex,sex_cat,age,age_cat,prevalence,ll,ul,count
264,1,ALABAMA,2013,1,Males,4,75+,0.316128,0.244504,0.387751,37.273
403,18,INDIANA,2013,1,Males,3,65-74,0.310007,0.269624,0.350391,75.988
594,37,NORTH CAROLINA,2013,1,Males,4,75+,0.308545,0.247221,0.369869,69.586


In [10]:
comps.head()

Unnamed: 0,fips,state,year,sex,sex_cat,age,age_cat,hf_ageadjusted,hf_count,hf_rate,...,hhns_ageadjusted,hhns_count,hhns_rate,hhns_ll,hhns_ul,dka_ageadjusted,dka_count,dka_rate,dka_ll,dka_ul
0,4,ARIZONA,2013.0,0,Overall,0,18+,6.303754,5172.0,9.67974,...,1.395376,435.0,0.814131,0.690315,0.937948,19.186359,3384.0,6.33338,5.370172,7.296587
1,4,ARIZONA,2013.0,0,Overall,1,18-44,,183.0,2.803777,...,,134.0,2.053039,1.039277,3.066801,,2206.0,33.798542,17.109298,50.487785
2,4,ARIZONA,2013.0,0,Overall,2,45-64,,1466.0,5.55583,...,,186.0,0.704901,0.534238,0.875563,,914.0,3.463867,2.625236,4.302497
3,4,ARIZONA,2013.0,0,Overall,3,65-74,,1460.0,11.564541,...,,,,,,,,,,
4,4,ARIZONA,2013.0,0,Overall,4,75+,,2063.0,26.137803,...,,,,,,,,,,


In [11]:
comps = comps.drop(columns='year')

In [12]:
aa_cols = ['fips', 'state', 'sex_cat', 'age', 'age_cat', 'hf_ageadjusted', 'stroke_ageadjusted', 
           'mi_ageadjusted', 'lea_ageadjusted', 'hypogl_ageadjusted', 'hhns_ageadjusted', 'dka_ageadjusted']

comps_aa = comps.loc[:, aa_cols]
comps_aa.head()

Unnamed: 0,fips,state,sex_cat,age,age_cat,hf_ageadjusted,stroke_ageadjusted,mi_ageadjusted,lea_ageadjusted,hypogl_ageadjusted,hhns_ageadjusted,dka_ageadjusted
0,4,ARIZONA,Overall,0,18+,6.303754,4.957159,4.624921,3.648717,2.21626,1.395376,19.186359
1,4,ARIZONA,Overall,1,18-44,,,,,,,
2,4,ARIZONA,Overall,2,45-64,,,,,,,
3,4,ARIZONA,Overall,3,65-74,,,,,,,
4,4,ARIZONA,Overall,4,75+,,,,,,,


In [13]:
comps_aa = comps_aa[comps_aa['age_cat'] == '18+']

In [14]:
comps_aa.state = comps_aa.state.str.title()

In [15]:
us_states

Unnamed: 0,id,name,geometry
0,AL,Alabama,"POLYGON ((-87.359296 35.00118, -85.606675 34.9..."
1,AK,Alaska,"(POLYGON ((-131.602021 55.117982, -131.569159 ..."
2,AZ,Arizona,"POLYGON ((-109.042503 37.000263, -109.04798 31..."
3,AR,Arkansas,"POLYGON ((-94.473842 36.501861, -90.152536 36...."
4,CA,California,"POLYGON ((-123.233256 42.006186, -122.378853 4..."
5,CO,Colorado,"POLYGON ((-107.919731 41.003906, -105.728954 4..."
6,CT,Connecticut,"POLYGON ((-73.053528 42.039048, -71.7993089999..."
7,DE,Delaware,"POLYGON ((-75.414089 39.804456, -75.5071970000..."
8,FL,Florida,"POLYGON ((-85.497137 30.997536, -85.004212 31...."
9,GA,Georgia,"POLYGON ((-83.109191 35.00118, -83.322791 34.7..."


In [16]:
comps_aa_map_df = pd.merge(comps_aa, states, left_on='state', right_on='City', how='left')

In [17]:
comps_aa_map_df = comps_aa_map_df.drop(columns=['City'])

In [18]:
comps_aa_map_df.head()

Unnamed: 0,fips,state,sex_cat,age,age_cat,hf_ageadjusted,stroke_ageadjusted,mi_ageadjusted,lea_ageadjusted,hypogl_ageadjusted,hhns_ageadjusted,dka_ageadjusted,State,Latitude,Longitude
0,4,Arizona,Overall,0,18+,6.303754,4.957159,4.624921,3.648717,2.21626,1.395376,19.186359,AZ,34.168219,-111.930907
1,4,Arizona,Males,0,18+,7.483192,5.591844,6.220407,5.254467,2.360439,1.905885,20.342971,AZ,34.168219,-111.930907
2,4,Arizona,Females,0,18+,5.137933,4.328915,2.977141,1.94885,2.071434,0.854327,17.966154,AZ,34.168219,-111.930907
3,5,Arkansas,Overall,0,18+,10.712887,6.501089,6.247409,3.465521,2.18146,0.767636,18.523463,AR,34.751928,-92.131378
4,5,Arkansas,Males,0,18+,10.561361,6.098304,6.915747,4.180016,1.673179,0.690734,15.365308,AR,34.751928,-92.131378


In [19]:
comps_aa_map_df = comps_aa_map_df[comps_aa_map_df['sex_cat'] == 'Overall']

In [20]:
comps_aa_map_df.head()

Unnamed: 0,fips,state,sex_cat,age,age_cat,hf_ageadjusted,stroke_ageadjusted,mi_ageadjusted,lea_ageadjusted,hypogl_ageadjusted,hhns_ageadjusted,dka_ageadjusted,State,Latitude,Longitude
0,4,Arizona,Overall,0,18+,6.303754,4.957159,4.624921,3.648717,2.21626,1.395376,19.186359,AZ,34.168219,-111.930907
3,5,Arkansas,Overall,0,18+,10.712887,6.501089,6.247409,3.465521,2.18146,0.767636,18.523463,AR,34.751928,-92.131378
6,6,California,Overall,0,18+,7.474181,4.826483,4.387449,2.619858,2.188418,0.683226,10.638079,CA,37.271875,-119.270415
9,8,Colorado,Overall,0,18+,5.811875,4.583799,4.446724,3.160065,2.220475,1.113477,23.695929,CO,38.997934,-105.550567
12,12,Florida,Overall,0,18+,9.278267,6.074835,5.895808,3.675792,2.940134,1.665444,19.299155,FL,27.975728,-83.833017


In [23]:
comps_aa_map_df['geom'] = comps_aa_map_df.apply(lambda x: Point((float(x.Longitude), float(x.Latitude))), axis=1)

In [24]:
comps_aa_map_df.head()

Unnamed: 0,fips,state,sex_cat,age,age_cat,hf_ageadjusted,stroke_ageadjusted,mi_ageadjusted,lea_ageadjusted,hypogl_ageadjusted,hhns_ageadjusted,dka_ageadjusted,State,Latitude,Longitude,geom
0,4,Arizona,Overall,0,18+,6.303754,4.957159,4.624921,3.648717,2.21626,1.395376,19.186359,AZ,34.168219,-111.930907,POINT (-111.930907 34.1682185)
3,5,Arkansas,Overall,0,18+,10.712887,6.501089,6.247409,3.465521,2.18146,0.767636,18.523463,AR,34.751928,-92.131378,POINT (-92.13137840000002 34.7519275)
6,6,California,Overall,0,18+,7.474181,4.826483,4.387449,2.619858,2.188418,0.683226,10.638079,CA,37.271875,-119.270415,POINT (-119.2704153 37.2718745)
9,8,Colorado,Overall,0,18+,5.811875,4.583799,4.446724,3.160065,2.220475,1.113477,23.695929,CO,38.997934,-105.550567,POINT (-105.550567 38.9979339)
12,12,Florida,Overall,0,18+,9.278267,6.074835,5.895808,3.675792,2.940134,1.665444,19.299155,FL,27.975728,-83.833017,POINT (-83.83301659999999 27.9757279)


In [25]:
comps_aa_map_geo = gpd.GeoDataFrame(comps_aa_map_df, crs = us_states.crs, geometry = comps_aa_map_df.geom)

In [32]:
complications_geo_df = gpd.sjoin(comps_aa_map_geo, us_states, op='within')


invalid value encountered in ? (vectorized)



In [34]:
complications_geo_df = complications_geo_df.drop(columns=['index_right', 'name'])

In [40]:
m = folium.Map(location = [36.161278209287914, -86.77756457127047], zoom_start=4)

for row in complications_geo_df.iterrows():
    row_values = row[1]
    location = [row_values['Latitude'], row_values['Longitude']]
    popup = (str(row_values['state']) + '<br/>' +
            'Heart Failure: ' + str(row_values['hf_ageadjusted'])).replace("'", "`")
    marker = folium.Marker(location = location, popup = popup, icon=folium.Icon(color='green',
                                                                               icon='fa-hospital-o',
                                                                               icon_color='white',
                                                                               prefix='fa'))
    marker.add_to(m)
    
m