# Mapping the Spread of Covid-19 with Python

source: https://python-bloggers.com/2020/03/mapping-the-spread-of-covid-19-with-python/

In [2]:
!pip install panel



In [3]:
!pip install panel



In [4]:
!pip install branca



In [5]:
!pip install folium



In [6]:
!pip install pyshp



In [7]:
!pip install fiona



In [8]:
!pip install geopandas



In [9]:
!pip install panel



In [10]:
!pip install bokeh



In [11]:
import branca.colormap as cm
import folium

import shapefile as shp
import fiona

import geopandas as gpd
import numpy as np
import pandas as pd

import json
import matplotlib as mpl
import pylab as plt

from bokeh.io import output_file, show, output_notebook, export_png
from bokeh.models import ColumnDataSource, GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.plotting import figure
from bokeh.palettes import brewer

import panel as pn
import panel.widgets as pnw
pn.extension()
output_notebook()

ImportError: libcharset.so.1: cannot open shared object file: No such file or directory

In [3]:
#bokeh.__version__

In [4]:
# source: https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset/data
#corona_df = pd.read_csv("data/covid_19_data.csv")
corona_df = pd.read_csv("data/covid_19_data.csv")

In [5]:
corona_df.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0


In order to map our data, we need a shapefile. A shapefile is a geospatial vector data format. The shapefile format can spatially describe vector features such as points, lines, and polygons. We can read this file using GeoPandas.<br>
source: https://hub.arcgis.com/datasets/a21fdb46d23e4ef896f31475217cbb08_1/data

In [13]:
countries = gpd.read_file('data/Countries_WGS84/Countries_WGS84.shp')
#shape = fiona.open("1-11-99n.shp")

In [14]:
countries.tail()

Unnamed: 0,OBJECTID,CNTRY_NAME,geometry
246,247,Western Samoa,(POLYGON ((-172.596496582031 -13.5091133117676...
247,248,Swaziland,"POLYGON ((30.8994426727295 -26.7719497680664, ..."
248,249,Yemen,"(POLYGON ((48.6863899230957 14.0374994277955, ..."
249,250,Zambia,"POLYGON ((30.2130165100098 -14.9817161560058, ..."
250,251,Zimbabwe,"POLYGON ((32.4888763427734 -21.3444480895996, ..."


In [15]:
corona_df = corona_df.replace({'Country/Region' : 
                      dict.fromkeys(['Taiwan', 
                                     'Mainland China', 
                                     'Hong Kong', 
                                     'Macau'], 
                                     'China')})

In [16]:
corona_df = corona_df.replace({'Country/Region' : 'US'}, 
                                'United States')

In [17]:
corona_df = corona_df.replace({'Country/Region' : 'UK'}, 
                                'United Kingdom')
corona_df = corona_df.replace({'Country/Region' : 'North Ireland'}, 
                                'United Kingdom')
corona_df = corona_df.replace({'Country/Region' : 'Republic of Ireland'}, 
                                'Ireland')
corona_df = corona_df.replace({'Country/Region' : 'Vatican City'}, 
                                'Italy')
countries = countries.replace({'CNTRY_NAME' : 'Byelarus'}, 
                               'Belarus')
countries = countries.replace({'CNTRY_NAME' : 'Macedonia'}, 
                               'North Macedonia')

Make sure that the country column has the same name in both files

In [18]:
countries = countries.rename(columns={'CNTRY_NAME': 'Country/Region'})

Some countries are included in the data despite having zero confirmed cases. So we remove these:

In [19]:
corona_df = corona_df[corona_df.Confirmed != 0]

We then sort our data by country and reset the index:

In [20]:
corona_df.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,China,1/22/2020 17:00,1.0,0.0,0.0
5,6,01/22/2020,Guangdong,China,1/22/2020 17:00,26.0,0.0,0.0


In [None]:
corona_df.to_csv('corona_df.csv', index = False, header=True)

In [28]:
#compression_opts = dict(method='zip', archive_name='corona_df_13042020.csv')  
#corona_df.to_csv('corona_df_13042020.zip', index=False, compression=compression_opts)  

In [29]:
sorted_df = corona_df.sort_values(['Country/Region', 
                     'ObservationDate']).reset_index(drop=True)

Some countries, such as China, are split into different provinces/states. Since we just want the total number of cases per country, we get the sum for each country at each date:

In [30]:
sum_df = sorted_df.groupby(['Country/Region', 'ObservationDate'], as_index=False).sum()

Now we can join the data and the shapefile together:

In [31]:
joined_df = sum_df.merge(countries, on='Country/Region')

We are going to plot the log of the number of confirmed cases for each country, as there are a couple of countries, such as China and Italy, with a lot more cases compared to other countries.

In [32]:
joined_df['log_Confirmed'] = np.log10(joined_df['Confirmed'])

We also need to convert the ObservationDate to unix time in nanoseconds:

In [33]:
joined_df['date_sec'] = pd.to_datetime(joined_df['ObservationDate']).astype(int) / 10**9
joined_df['date_sec'] = joined_df['date_sec'].astype(int).astype(str)

We can now select the columns needed for the map and discard the others:

In [34]:
joined_df = joined_df[['Country/Region', 'date_sec', 'log_Confirmed', 'geometry']]

In [39]:
joined_df.tail(5)

Unnamed: 0,Country/Region,date_sec,log_Confirmed,geometry
6934,Zimbabwe,1586304000,1.041393,"POLYGON ((32.4888763427734 -21.3444480895996, ..."
6935,Zimbabwe,1586390400,1.041393,"POLYGON ((32.4888763427734 -21.3444480895996, ..."
6936,Zimbabwe,1586476800,1.113943,"POLYGON ((32.4888763427734 -21.3444480895996, ..."
6937,Zimbabwe,1586563200,1.146128,"POLYGON ((32.4888763427734 -21.3444480895996, ..."
6938,Zimbabwe,1586649600,1.146128,"POLYGON ((32.4888763427734 -21.3444480895996, ..."


In [40]:
joined_df.to_csv('joined_df.csv', index = False, header=True)

## Time to map

A choropleth is a type of map where regions are shaded or patterned proportionally to a data variable. We are going to make a choropleth with a timeslider, to show the spread of COVID-19 over time. The TimeSliderChoropleth class needs at least two arguments: a GeoJSON file containing the features (in this case, the countries) and a style dictionary. The style dictionary should have the following form:

In [None]:
styledict = {
    : {
        : {'color': , 'opacity': }
        : {'color': , 'opacity': }
        ...
        },
    ...,
    : {
        : {'color': , 'opacity': }
        : {'color': , 'opacity': }
        ...
        }
}

We have to first initialise the map. Folium allows the use of different map tiles. If we do not specify a map, it defaults to OpenStreetMap. Here, we will use 'cartodbpositron':

In [41]:
mymap = folium.Map(tiles='cartodbpositron')
mymap.save(outfile='infinite_scroll.html')

In [42]:
mymap

Now we have a map of the world. However, there are a couple of problems: the continents are continually repeated and the map can be panned endlessly from either side. In order to prevent this from happening, we set a minimum zoom and set max_bounds=True:

In [43]:
mymap_fix_boundary = folium.Map(min_zoom=2, max_bounds=True, tiles='cartodbpositron')
mymap_fix_boundary.save(outfile='fix_boundary.html')

ou might need to change the value of min_zoom depending on your platform. Now we define a color map in terms of the log of the number of confirmed cases:

In [44]:
max_colour = max(joined_df['log_Confirmed'])
min_colour = min(joined_df['log_Confirmed'])
cmap = cm.linear.YlOrRd_09.scale(min_colour, max_colour)
joined_df['colour'] = joined_df['log_Confirmed'].map(cmap)

Next, we construct our style dictionary:

In [45]:
country_list = joined_df['Country/Region'].unique().tolist()
country_idx = range(len(country_list))

style_dict = {}
for i in country_idx:
    country = country_list[i]
    result = joined_df[joined_df['Country/Region'] == country]
    inner_dict = {}
    for _, r in result.iterrows():
        inner_dict[r['date_sec']] = {'color': r['colour'], 'opacity': 0.7}
    style_dict[str(i)] = inner_dict

Then we need to make a dataframe containing the features for each country:



In [46]:
countries_df = joined_df[['geometry']]
countries_gdf = gpd.GeoDataFrame(countries_df)
countries_gdf = countries_gdf.drop_duplicates().reset_index()

Finally, we create our map and add a colourbar:

In [47]:
from folium.plugins import TimeSliderChoropleth

slider_map = folium.Map(min_zoom=2, max_bounds=True,tiles='cartodbpositron')

_ = TimeSliderChoropleth(
    data=countries_gdf.to_json(),
    styledict=style_dict,

).add_to(slider_map)

_ = cmap.add_to(slider_map)
cmap.caption = "Log of number of confirmed cases"
slider_map.save(outfile='TimeSliderChoropleth.html')