In [1]:
import pandas as pd

# COVID-19 data downloaded from the CDC
df = pd.read_csv("United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv")

# Mapping code heavily borrowed from Shivangi Patel- https://laptrinhx.com/a-complete-guide-to-an-interactive-geographical-map-using-python-2941699295/

C:\Users\forlu\.julia\conda\3\lib\site-packages\numpy\.libs\libopenblas.IPBC74C7KURV7CB2PKT5Z5FNR3SIBV4J.gfortran-win_amd64.dll
C:\Users\forlu\.julia\conda\3\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll
  stacklevel=1)


In [2]:
# check out the dataset
df.head()

Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,conf_death,prob_death,new_death,pnew_death,created_at,consent_cases,consent_deaths
0,5/4/2020,KY,5245,5245.0,0.0,115,0.0,261,261.0,0.0,8,0.0,5/5/2020 17:25,Agree,Agree
1,4/22/2020,NV,4081,,,144,0.0,179,,,9,0.0,4/22/2020 16:22,,
2,4/6/2020,WV,345,,,21,,4,,,1,,4/5/2020 16:22,Agree,Not agree
3,4/30/2020,IA,7145,,,302,0.0,162,,,14,0.0,5/1/2020 21:00,Not agree,Not agree
4,3/5/2020,GA,2,,,-5,,0,,,0,,3/26/2020 16:22,Agree,Agree


In [3]:
# sort data so we have the dates, new case numbers, and states
import numpy as np
dates = list(df.groupby(['submission_date','state']).new_case.sum().index.get_level_values(0).tolist())
#print(len(dates))
states = list(df.groupby(['submission_date','state']).new_case.sum().index.get_level_values(1).tolist())
new_case = list(df.groupby(['submission_date','state']).new_case.sum())
#print(np.max(np.array(new_case)))
#print(dates[0])
#print(dates[len(dates)-1])
df_final = pd.DataFrame(list(zip(np.array(dates), np.array(states), np.array(new_case))), columns =['submission_date', 'state', 'new_case']) 
df_final.head()

Unnamed: 0,submission_date,state,new_case
0,1/22/2020,AK,0
1,1/22/2020,AL,0
2,1/22/2020,AR,0
3,1/22/2020,AS,0
4,1/22/2020,AZ,0


In [4]:
# we will use geopandas to create the map framework

import geopandas as gpd

In [5]:
# map framework file which was downloaded from https://www.naturalearthdata.com/
shapefile2 = 'states/ne_110m_admin_1_states_provinces.shp'

In [6]:
# to create the map framework and labels we need the state names, the 'codes' for each state (ex. IL for Illinois), 
# and the map geometry 

gdf2 = gpd.read_file(shapefile2)[['name', 'iso_3166_2', 'geometry']]
print(gdf2)

                    name iso_3166_2  \
0              Minnesota      US-MN   
1                Montana      US-MT   
2           North Dakota      US-ND   
3                 Hawaii      US-HI   
4                  Idaho      US-ID   
5             Washington      US-WA   
6                Arizona      US-AZ   
7             California      US-CA   
8               Colorado      US-CO   
9                 Nevada      US-NV   
10            New Mexico      US-NM   
11                Oregon      US-OR   
12                  Utah      US-UT   
13               Wyoming      US-WY   
14              Arkansas      US-AR   
15                  Iowa      US-IA   
16                Kansas      US-KS   
17              Missouri      US-MO   
18              Nebraska      US-NE   
19              Oklahoma      US-OK   
20          South Dakota      US-SD   
21             Louisiana      US-LA   
22                 Texas      US-TX   
23           Connecticut      US-CT   
24         Massachusetts 

In [7]:
# we pull out and rename the columns of interest to us 
gdf2.columns = ['state', 'state_code', 'geometry']

# the state codes had a US- in front of each one which was unnecessary 
gdf2['state_code']  = gdf2['state_code'].str.strip('US-')
gdf2.head()

Unnamed: 0,state,state_code,geometry
0,Minnesota,MN,"POLYGON ((-89.95766 47.28691, -90.13175 47.292..."
1,Montana,MT,"POLYGON ((-116.04823 49.00037, -113.05950 49.0..."
2,North Dakota,ND,"POLYGON ((-97.22894 49.00089, -97.21414 48.902..."
3,Hawaii,HI,"MULTIPOLYGON (((-155.93665 19.05939, -155.9080..."
4,Idaho,ID,"POLYGON ((-116.04823 49.00037, -115.96780 47.9..."


In [8]:
#we Filter data for a specific date.

df_final_1222020 = df_final[df_final['submission_date']=='9/9/2020']

In [9]:
#Perform left merge to preserve every row in gdf.

# merging CDC data and map data into one dataframe
merged2 = gdf2.merge(df_final_1222020, left_on = 'state_code', right_on = 'state', how = 'left')

# how = left allows preservation of geometry data so there are no blank spaces in the map if there is missing data
print(merged2)


                 state_x state_code  \
0              Minnesota         MN   
1                Montana         MT   
2           North Dakota         ND   
3                 Hawaii         HI   
4                  Idaho         ID   
5             Washington         WA   
6                Arizona         AZ   
7             California         CA   
8               Colorado         CO   
9                 Nevada         NV   
10            New Mexico         NM   
11                Oregon         OR   
12                  Utah          T   
13               Wyoming         WY   
14              Arkansas         AR   
15                  Iowa         IA   
16                Kansas          K   
17              Missouri         MO   
18              Nebraska         NE   
19              Oklahoma         OK   
20          South Dakota          D   
21             Louisiana         LA   
22                 Texas         TX   
23           Connecticut         CT   
24         Massachusetts 

In [10]:
import json

In [11]:
#Read data to json.
merged_json = json.loads(merged2.to_json())

In [12]:
#Convert to String like object.
json_data = json.dumps(merged_json)

In [13]:
# we are first going to make a static map and then add interactivity in the next step

from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer

In [14]:
#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = json_data)

In [15]:
#Define a sequential multi-hue color palette.
palette = brewer['YlGnBu'][8]

In [16]:
#Reverse color order so that dark blue is highest obesity.
palette = palette[::-1]

In [17]:
#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 5000, nan_color = '#d9d9d9')

In [18]:
#Define custom tick labels for color bar.
tick_labels = {'0': '0', '1000': '1000', '2000':'2000', '3000':'3000', '4000':'4000', '5000':'5000'}


In [19]:
#Create color bar. 
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)

In [20]:
#Create figure object.
p = figure(title = 'New coronavirus cases, 9/9/2020', plot_height = 600 , plot_width = 950, toolbar_location = None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

In [21]:
#Add patch renderer to figure. 
p.patches('xs','ys', source = geosource,fill_color = {'field' :'new_case', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)

In [22]:
#Specify figure layout.
p.add_layout(color_bar, 'below')

In [23]:
#Display figure inline in Jupyter Notebook.
output_notebook()

In [24]:
#Display figure.
show(p)

In [26]:
# Now we will create an interactive version of the map - so we can look through the dates

from bokeh.io import curdoc, output_notebook
from bokeh.models import Slider, HoverTool, DateSlider, DateRangeSlider
from bokeh.layouts import widgetbox, row, column

In [27]:
#Define function that returns json_data for date selected by user.
    
def json_data(selectedDate):
    dt = selectedDate
    df_dt = df_final[df_final['submission_date']== dt]
    merged = gdf2.merge(df_dt, left_on = 'state_code', right_on =     'state', how = 'left')
    merged_json = json.loads(merged.to_json())
    json_data = json.dumps(merged_json)
    return json_data

In [29]:
#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = json_data('9/9/2020'))

In [30]:
import bokeh
#Define a sequential multi-hue color palette.
palette = bokeh.palettes.viridis(16)
#Reverse color order so that dark blue is highest obesity.
palette = palette[::-1]
#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors. Input nan_color.
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 5000, nan_color = '#d9d9d9')

In [31]:
#Define custom tick labels for color bar.
tick_labels = {'0': '0', '1000': '1000', '2000':'2000', '3000':'3000', '4000':'4000', '5000':'5000'}

In [32]:
#Add hover tool
hover = HoverTool(tooltips = [ ('State/region','@state_code'),('new cases', '@new_case')])

In [33]:
#Create color bar. 
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
                     border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)

In [34]:
#Create figure object.
p = figure(title = 'Number of new coronavirus cases, 9/9/2020', plot_height = 600 , plot_width = 950, toolbar_location = None, tools = [hover])
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

In [35]:
#Add patch renderer to figure. 
p.patches('xs','ys', source = geosource,fill_color = {'field' :'new_case', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)

In [36]:
#Specify layout
p.add_layout(color_bar, 'below')

In [37]:
import datetime 

# Define the callback function: update_plot
def update_plot(attr, old, new):
    dt = slider.value_as_datetime 
    dt = dt.strftime("%m/%d/%Y") # output of slider.value_as_datetime is date time object but we need string for 
                                 # comparison to our datafrome
    
    split =list(dt)              # we need to format out leading zeros from m and d to match our dataframe for dates
    if split[0] == '0':
        del split[0]
    
    if (split[2]) == '0':
        del split[2]

    if (split[3]) == '0':
        del split[3]
    
    dt = ''.join(split)
    
    
    new_data = json_data(dt)
    geosource.geojson = new_data
    p.title.text = 'Number of new coronavirus cases, %s' %dt
    
# Make a slider object: slider 
# Used a date slider
slider = DateSlider(name='Date', start=(datetime.datetime(2020, 1, 22)), end=(datetime.datetime(2020, 9, 9)), value=(datetime.datetime(2020, 9, 9)))

slider.on_change('value', update_plot)

In [38]:
# Make a column layout of widgetbox(slider) and plot, and add it to the current document
layout = column(p,widgetbox(slider))
curdoc().add_root(layout)



In [39]:
#Display plot inline in Jupyter notebook
output_notebook()

In [40]:
#Display plot
show(layout)

You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html



In [None]:
# to see and utilize the interactivity open a command line window in the same directory and run the below line of code

# bokeh serve --show filename.ipynb command