In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline
import os

In [2]:
# Raw Data
import pandas as pd
dr = pd.read_csv('deathrate.csv')

# Rename Column
dr = dr.rename(columns = {"Unnamed: 0": "Number"}) 

# Drop Mc Kean County and kalawao County due to missing poverty data
dr = dr[dr['County'] != 'Mc Kean County, PA']
dr = dr[dr['County'] != 'Kalawao County, HI']

# Drop duplicate rows
dr = dr[dr['Number'] != 788]
dr = dr[dr['Number'] != 3924]
dr = dr[dr['Number'] != 7060]
dr = dr[dr['Number'] != 10196]

# Extract out state and county to two new columns
dr['State'] = dr.apply(lambda row: row['County'][-2:], axis=1)
dr['City'] = dr.apply(lambda row: row['County'].split(',')[0], axis=1)

# Merge latitude and longitude with county
# Obatined from https://simplemaps.com/data/us-cities
cities = pd.read_csv('cities.csv')
cities = cities.drop(columns=['city_ascii', 'state_name', 'population', 'population_proper', 'density', 'source', 'incorporated', 'timezone', 'zips', 'id'])
cities_mean = cities.groupby('county_fips').mean()
dr = dr.merge(cities_mean, left_on='FIPS', right_on='county_fips', how='left')

# Manually insert missing latatiude and longitude
# Obtained data from google
dr.loc[dr['FIPS'] == 44001, 'lat'] = 41.7258
dr.loc[dr['FIPS'] == 44001, 'lng'] = -71.3112
dr.loc[dr['FIPS'] == 46113, 'lat'] = 43.2437
dr.loc[dr['FIPS'] == 46113, 'lng'] = -102.6216
dr.loc[dr['FIPS'] == 2270, 'lat'] = 62.1458
dr.loc[dr['FIPS'] == 2270, 'lng'] = -162.8919


In [3]:
# Install Folium (Used for maps)

!pip install --upgrade folium
import folium
print(folium.__version__, "should be at least 0.3.0")

Requirement already up-to-date: folium in /Users/JoeBorja/anaconda/lib/python3.5/site-packages
Requirement already up-to-date: branca>=0.3.0 in /Users/JoeBorja/anaconda/lib/python3.5/site-packages (from folium)
Collecting numpy (from folium)
  Using cached https://files.pythonhosted.org/packages/f4/47/9cc508af5af902609f2139a296d1056213bd12a5677f1e2b90bd5f50191b/numpy-1.16.1-cp35-cp35m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl
Collecting requests (from folium)
  Using cached https://files.pythonhosted.org/packages/7d/e3/20f3d364d6c8e5d2353c72a67778eb189176f08e873c9900e10c0287b84b/requests-2.21.0-py2.py3-none-any.whl
Requirement already up-to-date: six in /Users/JoeBorja/anaconda/lib/python3.5/site-packages (from folium)
Requirement already up-to-date: jinja2 in /Users/JoeBorja/anaconda/lib/python3.5/site-packages (from folium)
Requirement already up-to-date: certifi>=2017.4.17 in /Users/JoeBorja/anaconda/lib/python3.5/site-packages

In [4]:
# Map displaying the distribution of counties

import folium
import folium.plugins # The Folium Javascript Map Library

US_CENTER = (39, -98)
us_map = folium.Map(location=US_CENTER, zoom_start=4)

cluster = folium.plugins.MarkerCluster()
for _, r in dr[['lat', 'lng', 'County']].dropna().sample(3133).iterrows():
    cluster.add_child(
        folium.Marker([float(r["lat"]), float(r["lng"])], popup=r['County']))

us_map = folium.Map(location=US_CENTER, zoom_start=4)
us_map.add_child(cluster)
us_map

In [6]:
# Map displaying the density of counties

import folium
import folium.plugins

US_CENTER = (39, -98)
us_map = folium.Map(location=US_CENTER, zoom_start=4)
locs = dr[['lat', 'lng']].astype('float').dropna().as_matrix()
heatmap = folium.plugins.HeatMap(locs.tolist(), radius = 10)
us_map.add_child(heatmap)



In [185]:
# Map displaying the average (deathrate per county) for each state
# Change YEAR to see other years

dr_year = dr[dr['Year'] == 1999]
dr_year = dr_year.groupby('State').mean().sort_values(['Deathrate'], ascending=False)
dr_year.to_csv('deathrate_by_state', sep=',')

state_geo = os.path.join('us-states.json')

state_deathrate = os.path.join('deathrate_by_state')
state_data = pd.read_csv(state_deathrate)
os.remove('deathrate_by_state')

m = folium.Map(location=[48, -102], zoom_start=3)
bins = list = [0,4,8,12,16,20]
folium.Choropleth(
    geo_data=state_geo,
    name='choropleth',
    data=state_data,
    columns=['State', 'Deathrate'],
    key_on='feature.id',
    fill_color='OrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Deathrate',
    bins = bins,
    reset=True
).add_to(m)

folium.LayerControl().add_to(m)

m