In [1]:
# Import relevant libraries
import glob
import requests
import pandas as pd
import numpy as np
import string
import googlemaps
import gmaps
import matplotlib.pyplot as plt
import json
import warnings
import gmaps.geojson_geometries
from matplotlib.cm import viridis
from matplotlib.colors import to_hex
from matplotlib.cm import viridis

# Hide warning messages
from ipywidgets.embed import embed_minimal_html
warnings.filterwarnings('ignore')

# Google developer API key
from config import gkey

# Access maps with unique API key
gmaps.configure(api_key=gkey)
gm = googlemaps.Client(key=gkey)
gmaps.configure(api_key=gkey) 

file = "Resources/county_export_final_fg.csv"

In [2]:
# Read Superfund locations
url = "Resources/priorities_list_full.json"
with open(url) as datafile:
    data = json.load(datafile)
superfund_site_df = pd.DataFrame(data)
superfund_locations = superfund_site_df[['latitude', 'longitude']]
superfund_locations = superfund_locations.dropna()

# Number of Sites per State
state_count = superfund_site_df.groupby(['State_name']).count()
state_count = state_count['site_epa_id']
state_count
state_count_df = pd.DataFrame(state_count)
state_count = state_count_df.rename(index=str, columns={"site_epa_id": "Sites"})
sorted_state_count = state_count.sort_values(by="Sites", ascending=False)
sorted_state_count.to_csv("Resources/Superfund_count_by_state.csv")
sorted_state_count.head()

Unnamed: 0_level_0,Sites
State_name,Unnamed: 1_level_1
New Jersey,115
California,98
Pennsylvania,97
New York,85
Michigan,65


In [3]:
# Read County data
print("Processing: " + str(file))
read_file = pd.read_csv(file, encoding='latin1')
cancer_df = pd.DataFrame(read_file)
#cancer_df.head()

Processing: Resources/county_export_final_fg.csv


In [4]:
combined_data = "Resources/combined_data.csv"
read_file = pd.read_csv(combined_data, encoding='latin1')
combined_data_df = pd.DataFrame(read_file)
#combined_data_df.head()

In [5]:
result = pd.merge(combined_data_df, cancer_df, on='County', how='left')
result = result.dropna()

# Filter out extra characters and the states that don't contain information.
result['Age-Adjusted Incident Rate'] = result['Age-Adjusted Incident Rate'].map(lambda x: x.lstrip('').rstrip(' #'))
result['Age-Adjusted Incident Rate'] = result['Age-Adjusted Incident Rate'].map(lambda x: x.lstrip('').rstrip(' *'))
result = result[result['State'] != 'Kansas']
result = result[result['State'] != 'Minnesota']
result = result[result['State'] != 'Nevada']
result = result[result['Age-Adjusted Incident Rate'] != '']

# Separate locations by Cancer Type
breast_df      = result.set_index('Cancer Type').filter(like='Breast', axis=0)
colon_df       = result.set_index('Cancer Type').filter(like='Colon & Rectum', axis=0)
lung_df        = result.set_index('Cancer Type').filter(like='Lung & Bronchus', axis=0)
prostate_df    = result.set_index('Cancer Type').filter(like='Prostate', axis=0)

In [6]:
# do geocode for the whole mega city
geocode_result = gm.geocode('Russia')[0]  # change the name into your city of interest

# get the center of the city
center_lat=geocode_result['geometry']['location']['lat']
center_lng=geocode_result['geometry']['location']['lng']
print('center=',center_lat,center_lng)

center= 61.52401 105.318756


In [7]:
def drawHeatMap(location, val, zoom, intensity, radius):
    # setting the data and parameters
    heatmap_layer = gmaps.heatmap_layer(locations, val, dissipating = True)
    heatmap_layer.max_intensity = intensity
    heatmap_layer.point_radius = radius
    # draw the heatmap into a figure
    fig = gmaps.figure(map_type='HYBRID', center = [center_lat,center_lng], zoom_level=zoom)
    fig.add_layer(heatmap_layer)
    marker_layer = gmaps.marker_layer(superfund_locations)
    fig.add_layer(marker_layer)
    return fig

# Create Counties overlay with GeoJson data
with open('Resources/gz_2010_us_050_00_5m.json') as f:
    geometry = json.load(f)
fig = gmaps.figure()
color = (0, 0, 0, 0.3)
geojson_layer = gmaps.geojson_layer(geometry,fill_color=color)

In [8]:
# Breast
breast_locations = breast_df[['County','Latitude', 'Longitude','Age-Adjusted Incident Rate']]
breast_locations['Age-Adjusted Incident Rate'] = breast_locations['Age-Adjusted Incident Rate'].astype('float64') 
breast_locations['Age-Adjusted Incident Rate'] = breast_locations['Age-Adjusted Incident Rate'].div(15)
b_locations = breast_locations[['Latitude', 'Longitude']]
weights = breast_locations['Age-Adjusted Incident Rate']

fig = gmaps.figure(map_type='HYBRID')
fig.add_layer(gmaps.heatmap_layer(b_locations, weights=weights, max_intensity = 80, point_radius = 10, dissipating = True))
#marker_layer = gmaps.marker_layer(superfund_locations)
#fig.add_layer(marker_layer)
fig.add_layer(geojson_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [9]:
# Lung
lung_locations = lung_df[['County','Latitude', 'Longitude','Age-Adjusted Incident Rate']]
lung_locations['Age-Adjusted Incident Rate'] = lung_locations['Age-Adjusted Incident Rate'].astype('float64') 
lung_locations['Age-Adjusted Incident Rate'] = lung_locations['Age-Adjusted Incident Rate'].div(15)
l_locations = lung_locations[['Latitude', 'Longitude']]
weights = lung_locations['Age-Adjusted Incident Rate']

fig = gmaps.figure(map_type='HYBRID')
fig.add_layer(gmaps.heatmap_layer(l_locations, weights=weights, max_intensity = 80, point_radius = 10, dissipating = True))
#marker_layer = gmaps.marker_layer(superfund_locations)
#fig.add_layer(marker_layer)
fig.add_layer(geojson_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [10]:
# Colon
colon_locations = colon_df[['County','Latitude', 'Longitude','Age-Adjusted Incident Rate']]
colon_locations['Age-Adjusted Incident Rate'] = colon_locations['Age-Adjusted Incident Rate'].astype('float64') 
colon_locations['Age-Adjusted Incident Rate'] = colon_locations['Age-Adjusted Incident Rate'].div(15)
c_locations = colon_locations[['Latitude', 'Longitude']]
weights = colon_locations['Age-Adjusted Incident Rate']

fig = gmaps.figure(map_type='HYBRID')
fig.add_layer(gmaps.heatmap_layer(c_locations, weights=weights, max_intensity = 80, point_radius = 10, dissipating = True))
#marker_layer = gmaps.marker_layer(superfund_locations)
#fig.add_layer(marker_layer)
fig.add_layer(geojson_layer)
fig


Figure(layout=FigureLayout(height='420px'))

In [11]:
# Prostate
prostate_locations = prostate_df[['County','Latitude', 'Longitude','Age-Adjusted Incident Rate']]
prostate_locations['Age-Adjusted Incident Rate'] = prostate_locations['Age-Adjusted Incident Rate'].astype('float64') 
prostate_locations['Age-Adjusted Incident Rate'] = prostate_locations['Age-Adjusted Incident Rate'].div(15)
p_locations = prostate_locations[['Latitude', 'Longitude']]
weights = prostate_locations['Age-Adjusted Incident Rate']

fig = gmaps.figure(map_type='HYBRID')
fig.add_layer(gmaps.heatmap_layer(p_locations, weights=weights, max_intensity = 80, point_radius = 10, dissipating = True))
#marker_layer = gmaps.marker_layer(superfund_locations)
#fig.add_layer(marker_layer)
fig.add_layer(geojson_layer)
fig


Figure(layout=FigureLayout(height='420px'))

In [12]:
# All
all_locations = result[['County','Latitude', 'Longitude','Age-Adjusted Incident Rate']]
all_locations['Age-Adjusted Incident Rate'] = all_locations['Age-Adjusted Incident Rate'].astype('float64') 
all_locations['Age-Adjusted Incident Rate'] = all_locations['Age-Adjusted Incident Rate'].div(15)
a_locations = all_locations[['Latitude', 'Longitude']]
weights = all_locations['Age-Adjusted Incident Rate']

fig = gmaps.figure(map_type='HYBRID')
fig.add_layer(gmaps.heatmap_layer(a_locations, weights=weights, max_intensity = 80, point_radius = 10, dissipating = True))
marker_layer = gmaps.marker_layer(superfund_locations)
fig.add_layer(marker_layer)
fig.add_layer(geojson_layer)
fig

Figure(layout=FigureLayout(height='420px'))