In [1]:
from IPython.core.display import display, HTML,display_html
display(HTML("<style>.container { width:95% !important; }</style>"))

In [2]:
import pandas as pd
import numpy as np
import geopandas as gpd
from datetime import datetime
import json

In [3]:
# Top 10 countries most affected by COVID-19 Pandemic
top10_countries = ['US','Russia','Brazil','Spain','United Kingdom','Italy','France','Germany','Turkey','Iran','India']
top10_countrycodes = ['USA','RUS','BRA','ESP','GBR','ITA','FRA','DEU','TUR','IRN','IND']

In [4]:
shapefile = 'countries_shapedata/ne_110m_admin_0_countries.shp'
#Read shapefile using Geopandas
gdf = gpd.read_file(shapefile)[['ADMIN', 'ADM0_A3', 'geometry']]
#Rename columns.
gdf.columns = ['country', 'country_code', 'geometry']
gdf.head()

Unnamed: 0,country,country_code,geometry
0,Fiji,FJI,"(POLYGON ((180 -16.06713266364245, 180 -16.555..."
1,United Republic of Tanzania,TZA,POLYGON ((33.90371119710453 -0.950000000000000...
2,Western Sahara,SAH,POLYGON ((-8.665589565454809 27.65642588959236...
3,Canada,CAN,"(POLYGON ((-122.84 49.00000000000011, -122.974..."
4,United States of America,USA,"(POLYGON ((-122.84 49.00000000000011, -120 49...."


In [5]:
#Drop row corresponding to 'Antarctica' to save figure canvas area
print(gdf[gdf['country'] == 'Antarctica'])
gdf = gdf.drop(gdf.index[159])

        country country_code  \
159  Antarctica          ATA   

                                              geometry  
159  (POLYGON ((-48.66061601418252 -78.047018731598...  


In [6]:
covid_df = pd.read_csv('COVID-19-time-series-clean-complete.csv')
covid_df.tail()

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered
22687,2020-05-22,West Bank and Gaza,423,2,346.0,75.0,0,0,0
22688,2020-05-22,Western Sahara,6,0,6.0,0.0,0,0,0
22689,2020-05-22,Yemen,209,33,0.0,176.0,12,0,0
22690,2020-05-22,Zambia,920,7,336.0,577.0,54,0,34
22691,2020-05-22,Zimbabwe,51,4,18.0,29.0,0,0,0


In [7]:
## Add Country Code column for top 10 countries
for country, country_code in zip(top10_countries, top10_countrycodes):
    covid_df.loc[covid_df['Country/Region'] == country, 'country_code'] = country_code

In [8]:
covid_top10df = covid_df[covid_df['Country/Region'].isin(top10_countries)]
# covid_top10df['Date'] = pd.to_datetime(covid_top10df['Date'],dayfirst = True)
covid_top10df.reset_index(inplace = True, drop = True)
covid_top10df

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,country_code
0,2020-01-22,Brazil,0,0,0.0,0.0,0,0,0,BRA
1,2020-01-22,France,0,0,0.0,0.0,0,0,0,FRA
2,2020-01-22,Germany,0,0,0.0,0.0,0,0,0,DEU
3,2020-01-22,India,0,0,0.0,0.0,0,0,0,IND
4,2020-01-22,Iran,0,0,0.0,0.0,0,0,0,IRN
...,...,...,...,...,...,...,...,...,...,...
1337,2020-05-22,Russia,326448,3249,99825.0,223374.0,8894,150,7144,RUS
1338,2020-05-22,Spain,234824,28628,150376.0,55820.0,1787,688,0,ESP
1339,2020-05-22,Turkey,154500,4276,116111.0,34113.0,952,27,1121,TUR
1340,2020-05-22,US,1600937,95979,350135.0,1154823.0,23790,1277,51717,USA


In [9]:
covid_top10df[covid_top10df['country_code'] == 'IND']

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,country_code
3,2020-01-22,India,0,0,0.0,0.0,0,0,0,IND
14,2020-01-23,India,0,0,0.0,0.0,0,0,0,IND
25,2020-01-24,India,0,0,0.0,0.0,0,0,0,IND
36,2020-01-25,India,0,0,0.0,0.0,0,0,0,IND
47,2020-01-26,India,0,0,0.0,0.0,0,0,0,IND
...,...,...,...,...,...,...,...,...,...,...
1290,2020-05-18,India,100328,3156,39233.0,57939.0,4630,131,2438,IND
1301,2020-05-19,India,106475,3302,42309.0,60864.0,6147,146,3076,IND
1312,2020-05-20,India,112028,3434,45422.0,63172.0,5553,132,3113,IND
1323,2020-05-21,India,118226,3584,48553.0,66089.0,6198,150,3131,IND


In [10]:
covid_top10df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1342 entries, 0 to 1341
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Date            1342 non-null   object 
 1   Country/Region  1342 non-null   object 
 2   Confirmed       1342 non-null   int64  
 3   Deaths          1342 non-null   int64  
 4   Recovered       1342 non-null   float64
 5   Active          1342 non-null   float64
 6   New cases       1342 non-null   int64  
 7   New deaths      1342 non-null   int64  
 8   New recovered   1342 non-null   int64  
 9   country_code    1342 non-null   object 
dtypes: float64(2), int64(5), object(3)
memory usage: 105.0+ KB


In [11]:
first_date = covid_top10df['Date'][0]
recent_date = '2020-04-03'
last_date = covid_top10df['Date'][len(covid_top10df)-1]

In [12]:
from bokeh.io import curdoc, output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, DateSlider, HoverTool
from bokeh.palettes import brewer
from bokeh.layouts import widgetbox, row, column
output_notebook()

### Make Interactive Plot with Date Slider

In [13]:
def json_data(selected_date):
    '''Function that returns json_data for year selected by user'''    
    df_date = covid_top10df[covid_top10df['Date'] == selected_date]
    merged = gdf.merge(df_date, left_on = 'country_code', right_on = 'country_code', how = 'left')
    merged.fillna('No data', inplace = True)
    merged_json = json.loads(merged.to_json())
    json_data = json.dumps(merged_json)
    return json_data



def geoplot(doc):
    #Input GeoJSON source that contains features for plotting.
    source = GeoJSONDataSource(geojson = json_data(recent_date))

    #Define a sequential multi-hue color palette.
    palette = brewer['YlGnBu'][6]

    #Reverse color order so that dark blue is highest obesity.
    palette = palette[::-1]

    #Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors. Input nan_color.
    color_mapper = LinearColorMapper(palette = palette, low = 0, high = covid_top10df['Confirmed'].max(), nan_color = '#d9d9d9')

    #Define custom tick labels for color bar.
    tick_labels = {'0': '0', '10000':'10,000', '50000':'50,000', '100000':'0.1M', '250000':'0.2M','500,000':'0.5M','1000000':'>1M'}

    #Add hover tool ,,Active   
    hover = HoverTool(tooltips = [ ('Country/region','@country'),('ConfirmedCases', '@Confirmed'),('Deaths', '@Deaths'),('Recovered', '@Recovered')])


    #Create color bar. 
    color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20, border_line_color=None, 
                         location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)

    #Create figure object.
    p = figure(title = 'Covid-19 - Cases in Top 10 Countries as on {}'.format(recent_date), plot_height = 600 , plot_width = 1200, 
               toolbar_location = "right", tools = [hover,'save']) 
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None

    #Add patch renderer to figure. 
    p.patches('xs','ys', source = source,fill_color = {'field' :'Confirmed', 'transform' : color_mapper},
              line_color = 'black', line_width = 0.25, fill_alpha = 1)


    p.add_layout(color_bar, 'below')

    # Define the callback function: update_plot
    def update_plot(attr, old, new):
        date_value = slider.value
        selected_date = datetime.fromtimestamp(date_value/1000).strftime('%Y-%m-%d')  
        date_in_words = datetime.fromtimestamp(date_value/1000).strftime('%A, %d %B %Y')
        source.geojson = json_data(selected_date)
        p.title.text = 'Covid-19 - Cases in Top 10 Countries as on {}'.format(date_in_words)

    # Make a slider object: slider 
    slider = DateSlider(start=first_date, end=last_date, value=recent_date, step=1, title="Date")
    slider.on_change('value', update_plot)

    # Make a column layout of widgetbox(slider) and plot, and add it to the current document
    doc.add_root(column(slider,p))

In [14]:
show(geoplot)