In [1]:
#Import Packages
import os
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.graph_objects as go
import plotly.io as pio
import math

In [2]:
#Show all outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [227]:
#Data Retrieval
'''
Timestamp values have a precision in fractional seconds that range 
from 0 to 9. For example, a precision of 0 means that no fractional 
seconds are stored, 3 means that the timestamp stores milliseconds, 
and 9 means a precision of nanoseconds. 0 is the minimum precision, 
and 9 is the maximum.
'''
DF = pd.read_excel('COVID-19-geographic-disbtribution-worldwide-2020-05-29.xlsx', dtype={'DateRep':'datetime64[0]', 'Countries and territories':'str', 'GeoId':'str' })
DF.info()            
DF.dtypes
DF.columns
DF.head()

'\nTimestamp values have a precision in fractional seconds that range \nfrom 0 to 9. For example, a precision of 0 means that no fractional \nseconds are stored, 3 means that the timestamp stores milliseconds, \nand 9 means a precision of nanoseconds. 0 is the minimum precision, \nand 9 is the maximum.\n'

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20084 entries, 0 to 20083
Data columns (total 11 columns):
dateRep                    20084 non-null datetime64[ns]
day                        20084 non-null int64
month                      20084 non-null int64
year                       20084 non-null int64
cases                      20084 non-null int64
deaths                     20084 non-null int64
countriesAndTerritories    20084 non-null object
geoId                      20008 non-null object
countryterritoryCode       19808 non-null object
popData2018                19803 non-null float64
continentExp               20084 non-null object
dtypes: datetime64[ns](1), float64(1), int64(5), object(4)
memory usage: 1.7+ MB


dateRep                    datetime64[ns]
day                                 int64
month                               int64
year                                int64
cases                               int64
deaths                              int64
countriesAndTerritories            object
geoId                              object
countryterritoryCode               object
popData2018                       float64
continentExp                       object
dtype: object

Index(['dateRep', 'day', 'month', 'year', 'cases', 'deaths',
       'countriesAndTerritories', 'geoId', 'countryterritoryCode',
       'popData2018', 'continentExp'],
      dtype='object')

Unnamed: 0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2018,continentExp
0,2020-05-29,29,5,2020,580,8,Afghanistan,AF,AFG,37172386.0,Asia
1,2020-05-28,28,5,2020,625,7,Afghanistan,AF,AFG,37172386.0,Asia
2,2020-05-27,27,5,2020,658,1,Afghanistan,AF,AFG,37172386.0,Asia
3,2020-05-26,26,5,2020,591,1,Afghanistan,AF,AFG,37172386.0,Asia
4,2020-05-25,25,5,2020,584,2,Afghanistan,AF,AFG,37172386.0,Asia


In [228]:
#Select only the required columns
DF_Sel = DF[['dateRep', 'cases', 'deaths', 'countryterritoryCode', 'popData2018']]
DF_Sel.columns
DF_Sel = DF_Sel.rename(columns={'dateRep':'Date', 'countryterritoryCode':'Country', 'popData2018':'Population'})
DF_Sel.columns
DF_Sel.columns = [col.capitalize() for col in DF_Sel.columns]
DF_Sel.columns

Index(['dateRep', 'cases', 'deaths', 'countryterritoryCode', 'popData2018'], dtype='object')

Index(['Date', 'cases', 'deaths', 'Country', 'Population'], dtype='object')

Index(['Date', 'Cases', 'Deaths', 'Country', 'Population'], dtype='object')

In [229]:
DF_Sel.head()

Unnamed: 0,Date,Cases,Deaths,Country,Population
0,2020-05-29,580,8,AFG,37172386.0
1,2020-05-28,625,7,AFG,37172386.0
2,2020-05-27,658,1,AFG,37172386.0
3,2020-05-26,591,1,AFG,37172386.0
4,2020-05-25,584,2,AFG,37172386.0


In [230]:
#Take off the time from date
DF_Sel["Date"] = DF_Sel["Date"].map(lambda x: datetime.date(x))

In [231]:
#Obtain Country Vs Date for Daily Cases
Daily_Cases = pd.pivot_table(DF_Sel,index=['Country'],values=["Cases"],
               columns=["Date"],aggfunc=[np.sum], fill_value=0, margins=False)

In [232]:
#Get rid of extra layers of the column names
Daily_Cases.columns = Daily_Cases.columns.get_level_values(2)

In [233]:
#reset index and obtain it as column
Daily_Cases.reset_index(inplace=True)

In [234]:
Daily_Cases.head()

Date,Country,2019-12-31,2020-01-01,2020-01-02,2020-01-03,2020-01-04,2020-01-05,2020-01-06,2020-01-07,2020-01-08,...,2020-05-20,2020-05-21,2020-05-22,2020-05-23,2020-05-24,2020-05-25,2020-05-26,2020-05-27,2020-05-28,2020-05-29
0,ABW,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,AFG,0,0,0,0,0,0,0,0,0,...,581,492,531,540,782,584,591,658,625,580
2,AGO,0,0,0,0,0,0,0,0,0,...,2,2,6,2,0,9,0,2,0,2
3,ALB,0,0,0,0,0,0,0,0,0,...,1,15,5,12,8,9,6,25,21,26
4,AND,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0


In [235]:
# A Choropleth Map is a map composed of colored polygons. It is used to represent spatial variations of a quantity. 
# go.Choropleth graph objects (like many other plotly objects) have a go.layout.Geo object which can be used to control
# the appearance of the base map onto which data is plotted


In [236]:
#MAX = max(Daily_Cases.iloc[:, 1:].max()) #Max value in the data frame
#MAX_SCALE=math.ceil(MAX/(10**(len(str(MAX))-1)))*((10**(len(str(MAX))-1))) #Find the upperbound of Max value; 48529 gets converted to 50000

#Frames = [datetime.strftime(date, "%d-%m-%y") for date in Daily_Cases.columns[1:]][15:] #Dates staring from 15th January
Frames = Daily_Cases.columns[1:][15:]
fig = go.Figure(
    data=[go.Choropleth(
    locations=Daily_Cases['Country'], # Spatial coordinates
    z = Daily_Cases[Frames[0]], # Data to be color-coded
    locationmode = 'ISO-3', 
    colorscale = 'Reds',
    colorbar_title = "Confirmed Cases",)],
    layout=go.Layout(
        title_text='COVID-19 Daily Cases Worldwide from Jan 15 to Mar 29',
        geo=dict(
        showframe=True,
        showcoastlines=True,
        projection_type='equirectangular'
    ),
        annotations = [dict(
            x=0.55,
            y=0.1,
            xref='paper',
            yref='paper',
            text='Data Source: <a href="https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide">\
            European Centre for Disease Prevention and Control</a>',
            showarrow = True
        )],
        updatemenus=[dict(
            type="buttons",
            buttons=[dict(label="Play",
                          method="animate",
                          args=[None])])]
    ),
    frames=[go.Frame(data=[go.Choropleth(
    locations=Daily_Cases['Country'], # Spatial coordinates
    z = Daily_Cases[Frames[i]], # Data to be color-coded
    locationmode = 'ISO-3',
    #colorscale = 'Reds',
    colorscale= [
        [0, 'rgb(255, 255, 255)'],        
        [1, 'rgb(255, 0, 0)']], 
    colorbar_title = "Confirmed Cases",),]) for i in range(1, len(Frames))]
)

fig.show()

In [237]:
pio.write_html(fig, file='index.html', auto_open=True)