In [1]:
#################################################################################################################################
#######         Create Choropleth Map of US States showing Age Adjusted Death Rates                                       #######

In [2]:
#import modules
import pandas as pd
from config import (API_Key, plotly_key)
#import gmaps

import plotly
plotly.tools.set_credentials_file(username='eyangeric', api_key=plotly_key)
plotly.tools.set_config_file(world_readable=True, sharing='public')

In [3]:
#dataset of leading causes of death
Death_Rates = pd.read_csv("Datasets/NCHS_-_Leading_Causes_of_Death__United_States.csv")

#dataset with the US State specific geolocations
US_States_GeoLocations = pd.read_csv("Datasets/US_States_GeoLocations.csv")

In [4]:
#restrict dataset to just the State and State abbreviations columns
# US_State_Abbrev = US_States_GeoLocations[['State', 'State_Abbr']]

In [5]:
#unique causes of death
Death_Rates['113 Cause Name'].unique()

array(['Accidents (unintentional injuries) (V01-X59,Y85-Y86)',
       'All Causes', "Alzheimer's disease (G30)",
       'Malignant neoplasms (C00-C97)',
       'Chronic lower respiratory diseases (J40-J47)',
       'Diabetes mellitus (E10-E14)',
       'Diseases of heart (I00-I09,I11,I13,I20-I51)',
       'Influenza and pneumonia (J09-J18)',
       'Nephritis, nephrotic syndrome and nephrosis (N00-N07,N17-N19,N25-N27)',
       'Cerebrovascular diseases (I60-I69)',
       'Intentional self-harm (suicide) (*U03,X60-X84,Y87.0)'],
      dtype=object)

In [6]:
#remove rows for entire overall US numbers
Death_Rates = Death_Rates[(Death_Rates['State'] != 'United States') | (Death_Rates['State'] != 'District of Columbia')]

#merge Death Rates with State Abbreviation Dataset
Death_Rates = pd.merge(Death_Rates, US_State_Abbrev, on = 'State', how = 'left')

#restrict dataset to 2016 Diseases of Heart
HD_2016 = Death_Rates[(Death_Rates['113 Cause Name'] == 'Diseases of heart (I00-I09,I11,I13,I20-I51)') & 
                      (Death_Rates['Year'] == 2016)]

#restrict dataset to 2008 Diseases of Heart
HD_2008 = Death_Rates[(Death_Rates['113 Cause Name'] == 'Diseases of heart (I00-I09,I11,I13,I20-I51)') & 
                      (Death_Rates['Year'] == 2008)]

#restrict dataset to 2000 Diseases of Heart
HD_2000 = Death_Rates[(Death_Rates['113 Cause Name'] == 'Diseases of heart (I00-I09,I11,I13,I20-I51)') & 
                      (Death_Rates['Year'] == 2000)]

#restrict dataset to 2016 Cancer
Cancer_2016 = Death_Rates[(Death_Rates['113 Cause Name'] == 'Malignant neoplasms (C00-C97)') & 
                      (Death_Rates['Year'] == 2016)]

#restrict dataset to 2008 Cancer
Cancer_2008 = Death_Rates[(Death_Rates['113 Cause Name'] == 'Malignant neoplasms (C00-C97)') & 
                      (Death_Rates['Year'] == 2008)]

#restrict dataset to 2000 Cancer
Cancer_2000 = Death_Rates[(Death_Rates['113 Cause Name'] == 'Malignant neoplasms (C00-C97)') & 
                      (Death_Rates['Year'] == 2000)]

In [7]:
# #Separate Geolocation varible into Latitude and Longitude columns
# lats = []
# lngs = []

# #separte GeoLocation column into Latitude and Longitude Lists
# for i, row in US_States_GeoLocations.iterrows():
#     geo_loc_partition = row[3].partition(',')
#     lats.append(geo_loc_partition[0].replace('(', ''))
#     lngs.append(geo_loc_partition[2].replace(')', ''))
    
# GeoLocations = pd.DataFrame({'Latitude' : lats, 'Longitude': lngs})

# #Join Data Frames
# US_States_Geo_Cleaned = US_States_GeoLocations.join(GeoLocations) 

# #remove Guam and Puerto Rico
# US_States_Geo_Cleaned = US_States_Geo_Cleaned[US_States_Geo_Cleaned['State'].isin(['Guam', 'Puerto Rico', 'Virgin Islands']) == False]

# #check new Data Frame
# US_States_Geo_Cleaned.head()

In [8]:
# #Merge Geolocation dataset with death rate dataset
# US_States_Death_Final = pd.merge(HD_2016, US_States_Geo_Cleaned, on=['State', 'Year'])
# US_States_Death_Final.head()

In [9]:
# Configure gmaps with API key
# gmaps.configure(api_key=API_Key)

In [10]:
# Store Geolocation colunms
# locations = US_States_Death_Final[["Latitude", "Longitude"]].astype(float)

# Store Age-adjusted death rates
# Age_ADJ_Death_Rates = US_States_Death_Final['Age-adjusted Death Rate'].astype(float)

In [11]:
# Create a poverty Heatmap layer
# fig = gmaps.figure()

# heat_layer = gmaps.heatmap_layer(locations, weights=Age_ADJ_Death_Rates, 
#                                  dissipating=False, max_intensity=100,
#                                  point_radius = 1)

# # Adjust heat_layer setting to help with heatmap dissipating on zoom
# heat_layer.dissipating = False
# heat_layer.max_intensity = 100
# heat_layer.point_radius = 1

# fig.add_layer(heat_layer)

# fig

In [12]:
#Color scheme for Heart Disease choropleth maps
HD_scl = [[0.0, 'rgb(255,244,244)'],[0.2, 'rgb(255,224,224)'],[0.4, 'rgb(255,186,186)'],\
            [0.6, 'rgb(255,130,130)'],[0.8, 'rgb(255,68,68)'],[1.0, 'rgb(255,0,0)']]

#Color scheme for Cancer choropleth maps
Cancer_scl = [[0.0, 'rgb(229, 219, 255)'],[0.2, 'rgb(200, 178, 255)'],[0.4, 'rgb(166, 130, 255)'],\
            [0.6, 'rgb(134, 84, 255)'],[0.8, 'rgb(111, 50, 255)'],[1.0, 'rgb(76, 0, 255)']]

In [13]:
# #Set up text of each US state in the chloropleth map
# text_list = []
# for i, row in HD_2016.iterrows():
#     text = row[3]
#     text_list.append(text)
    
# HD_2016['text'] = text_list

In [22]:
######### List of plot inputs for Heart Disease #########
HD2016_data = [ dict(
        type='choropleth',
        colorscale = HD_scl,
        autocolorscale = False,
        locations = HD_2016['State_Abbr'],
        z = HD_2016['Age-adjusted Death Rate'].astype(float),
        locationmode = 'USA-states',
        text = HD_2016['State'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Age-Adjusted Heart Disease Death Rates")
        ) ]

HD2008_data = [ dict(
        type='choropleth',
        colorscale = HD_scl,
        autocolorscale = False,
        locations = HD_2008['State_Abbr'],
        z = HD_2008['Age-adjusted Death Rate'].astype(float),
        locationmode = 'USA-states',
        text = HD_2008['State'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Age-Adjusted Heart Disease Death Rates")
        ) ]

HD2000_data = [ dict(
        type='choropleth',
        colorscale = HD_scl,
        autocolorscale = False,
        locations = HD_2000['State_Abbr'],
        z = HD_2000['Age-adjusted Death Rate'].astype(float),
        locationmode = 'USA-states',
        text = HD_2000['State'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Age-Adjusted Heart Disease Death Rates")
        ) ]

######### List of plot inputs for Cancer #########
Cancer2016_data = [ dict(
        type='choropleth',
        colorscale = Cancer_scl,
        autocolorscale = False,
        locations = Cancer_2016['State_Abbr'],
        z = Cancer_2016['Age-adjusted Death Rate'].astype(float),
        locationmode = 'USA-states',
        text = Cancer_2016['State'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Age-Adjusted Cancer Death Rates")
        ) ]

Cancer2008_data = [ dict(
        type='choropleth',
        colorscale = Cancer_scl,
        autocolorscale = False,
        locations = Cancer_2008['State_Abbr'],
        z = Cancer_2008['Age-adjusted Death Rate'].astype(float),
        locationmode = 'USA-states',
        text = Cancer_2008['State'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Age-Adjusted Cancer Death Rates")
        ) ]

Cancer2000_data = [ dict(
        type='choropleth',
        colorscale = Cancer_scl,
        autocolorscale = False,
        locations = Cancer_2000['State_Abbr'],
        z = Cancer_2000['Age-adjusted Death Rate'].astype(float),
        locationmode = 'USA-states',
        text = Cancer_2000['State'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Age-Adjusted Cancer Death Rates")
        ) ]

In [15]:
#Plot layout for all maps
layout = dict(
        title = '2016 US Age-Adjusted Death Rates (per 100,000 People) by State',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(0, 0, 255)'),
             )

In [16]:
#2016 choropleth map of heart disease age-adjusted death rates
HD2016_fig = dict( data=HD2016_data, layout=layout )
plotly.plotly.iplot( HD2016_fig, filename='HD2016-cloropleth-map' )

In [17]:
#2008 choropleth map of heart disease age-adjusted death rates
HD2008_fig = dict( data=HD2008_data, layout=layout )
plotly.plotly.iplot( HD2008_fig, filename='HD2008-cloropleth-map' )

In [18]:
#2000 choropleth map of heart disease age-adjusted death rates
HD2000_fig = dict( data=HD2000_data, layout=layout )
plotly.plotly.iplot( HD2000_fig, filename='HD2000-cloropleth-map' )

In [19]:
#2016 choropleth map of cancer age-adjusted death rates
Cancer2016_fig = dict( data=Cancer2016_data, layout=layout )
plotly.plotly.iplot( Cancer2016_fig, filename='Cancer2016-cloropleth-map' )

In [20]:
#20108 choropleth map of cancer age-adjusted death rates
Cancer2008_fig = dict( data=Cancer2008_data, layout=layout )
plotly.plotly.iplot( Cancer2008_fig, filename='Cancer2008-cloropleth-map' )

In [23]:
#20100 choropleth map of cancer age-adjusted death rates
Cancer2000_fig = dict( data=Cancer2000_data, layout=layout )
plotly.plotly.iplot( Cancer2000_fig, filename='Cancer2000-cloropleth-map' )