In [15]:
import pandas as pd
import numpy as np

In [18]:
#Download geojson data from datahub

#API reference https://datahub.io/core/geo-countries#python
from datapackage import Package

package = Package('https://datahub.io/core/geo-countries/datapackage.json')

# print list of all resources:
print(package.resource_names)

for resource in package.descriptor['resources']:
    print(resource['path'])
#Download the "https://pkgstore.datahub.io/core/geo-countries/geo-countries_zip/data/b11a931a6d3f0b0f262de2c8817361e6/geo-countries_zip.zip"
#data

#Extract countries.geojson file from the zip

['validation_report', 'geo-countries_zip', 'countries']
https://pkgstore.datahub.io/core/geo-countries/validation_report/data/d751713988987e9331980363e24189ce/validation_report.json
https://pkgstore.datahub.io/core/geo-countries/geo-countries_zip/data/b11a931a6d3f0b0f262de2c8817361e6/geo-countries_zip.zip
https://pkgstore.datahub.io/core/geo-countries/countries/archive/23f420f929e0e09c39d916b8aaa166fb/countries.geojson


In [2]:
# How to get ISO_A3 code from Lat Long?
# Use google API to get the country name based on lat long
#From https://console.cloud.google, API's and Services create an API key
#Save API Key in a file
with open('API_key.txt', 'r') as f:
    API_key=f.read()

In [375]:
import requests

def reverse_geo_encoding(loc):
    lat, long = loc
    try:
        resp = requests.get('https://maps.googleapis.com/maps/api/geocode/json?latlng={},{}&key={}'.format(lat, long, API_key))
        resp_json = resp.json()
        country = [entry['long_name'] for entry in resp_json['results'][0]['address_components'] if 'country' in entry['types']][0]
    except:
        print('ERROR: for {}'.format(loc))
        country = None
    
    return country
        

In [4]:
#Create a country and code dictionary
Country_Code_Dict = {gj['features'][i]['properties']['ADMIN']:gj['features'][i]['properties']['ISO_A3'] for i in range(len(gj['features']))}  


In [5]:
# Levenshtein distance

In [6]:
def levenshtein_dist(str1, str2):
    str1=str1.lower()
    str2=str2.lower()
    a, b = len(str2), len(str1)
    #Create a matrix of size(a+1, b+1)
    M=np.zeros(shape=(a+1,b+1), dtype=int)
    #Fill the 0th row and column
    M[:,0] = range(a+1)
    M[0,:] = range(b+1)
    #Fill the raws one by one
    for i in range(1,a+1):
        for j in range(1,b+1):
            #Last digit equal
            if str1[j-1]==str2[i-1]:
                M[i,j]=M[i-1, j-1]
            else:
                M[i,j] = min(M[i, j-1], M[i-1, j-1], M[i-1, j])+1
    return M[a,b]
                

In [7]:
def closest_country(country, Dict):
    '''
    Input: Country name, Dictionary/List of countries
    Output: Closest in the list
    '''
    dict_dist = {name:levenshtein_dist(country, name) for name in Dict}
    closest = sorted(dict_dist, key=lambda x: dict_dist[x])[0]
    return closest

In [8]:
#Load geojson
import geojson
with open('countries.geojson') as f:
    gj = geojson.load(f)

In [11]:
#Read data
DF = pd.read_excel('COVID-19-geographic-disbtribution-worldwide-2020-03-21.xlsx', dtype={'DateRep':'datetime64[ns]', 'Countries and territories':'str', 'GeoId':'str' })                


In [12]:
DF.dtypes

DateRep                      datetime64[ns]
Day                                   int64
Month                                 int64
Year                                  int64
Cases                                 int64
Deaths                                int64
Countries and territories            object
GeoId                                object
dtype: object

In [13]:
DF_Sel = DF[['DateRep', 'Cases', 'Deaths', 'Countries and territories']]
DF_Sel.columns = ['Date', 'Cases', 'Deaths', 'Country']

In [16]:
PIVOT = pd.pivot_table(DF_Sel,index=["Country"],values=["Cases"],
               columns=["Date"],aggfunc=[np.sum], fill_value=0, margins=True)

In [17]:
#PIVOT_CUMSUM = PIVOT.cumsum(axis=1)
PIVOT_CUMSUM = PIVOT

In [18]:
PIVOT_CUMSUM.columns = PIVOT_CUMSUM.columns.get_level_values(2)

In [19]:
from datetime import datetime

A=[str(x).split()[0] for x in list(PIVOT_CUMSUM.columns)]
sorted(A) == A
PIVOT_CUMSUM.columns = A

In [20]:
PIVOT_CUMSUM.reset_index(inplace=True)

In [21]:
PIVOT_CUMSUM

Unnamed: 0,Country,2019-12-31,2020-01-01,2020-01-02,2020-01-03,2020-01-04,2020-01-05,2020-01-06,2020-01-07,2020-01-08,...,2020-03-13,2020-03-14,2020-03-15,2020-03-16,2020-03-17,2020-03-18,2020-03-19,2020-03-20,2020-03-21,All
0,Afghanistan,0,0,0,0,0,0,0,0,0,...,0,0,3,6,5,1,0,0,2,24
1,Albania,0,0,0,0,0,0,0,0,0,...,12,10,5,4,9,4,4,11,0,70
2,Algeria,0,0,0,0,0,0,0,0,0,...,4,2,11,11,12,0,13,9,12,94
3,Andorra,0,0,0,0,0,0,0,0,0,...,0,1,0,3,9,0,39,22,0,75
4,Antigua_and_Barbuda,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,Venezuela,0,0,0,0,0,0,0,0,0,...,0,0,10,5,18,0,0,0,3,36
173,Vietnam,0,0,0,0,0,0,0,0,0,...,5,5,4,4,4,0,15,9,2,87
174,Zambia,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,2,0,0,2
175,Zimbabwe,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1


In [22]:
PIVOT_CUMSUM['Lev_Country']=PIVOT_CUMSUM.apply(lambda x: closest_country(x['Country'], Country_Code_Dict), axis=1)

In [23]:
PIVOT_CUMSUM.iloc[np.where(PIVOT_CUMSUM['Lev_Country'] != PIVOT_CUMSUM['Country'])]

Unnamed: 0,Country,2019-12-31,2020-01-01,2020-01-02,2020-01-03,2020-01-04,2020-01-05,2020-01-06,2020-01-07,2020-01-08,...,2020-03-14,2020-03-15,2020-03-16,2020-03-17,2020-03-18,2020-03-19,2020-03-20,2020-03-21,All,Lev_Country
4,Antigua_and_Barbuda,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,Antigua and Barbuda
10,Bahamas,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,2,1,4,Panama
20,Bosnia_and_Herzegovina,0,0,0,0,0,0,0,0,0,...,0,0,15,3,0,15,8,0,44,Bosnia and Herzegovina
22,Brunei_Darussalam,0,0,0,0,0,0,0,0,0,...,12,3,10,4,2,12,5,5,78,Belarus
24,Burkina_Faso,0,0,0,0,0,0,0,0,0,...,0,1,0,17,0,6,7,7,40,Burkina Faso
25,CANADA,0,0,0,0,0,0,0,0,0,...,0,0,0,0,145,121,156,0,422,Canada
29,Cape_Verde,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,2,2,Cape Verde
30,Cases_on_an_international_conveyance_Japan,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,696,Dhekelia Sovereign Base Area
31,Cayman_Islands,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,2,3,Cayman Islands
32,Central_African_Republic,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,Central African Republic


In [24]:
#10 Bahamas
#22 Brunei_Darussalam
#29 Cases_on_an_international_conveyance_Japan
#36 Congo
#38 Cote_dIvoire
#52 Eswatini
#73 Holy_See
#138 Serbia 
#168 All
Manual_Dict = { 'Bahamas': 'The Bahamas', 'Brunei_Darussalam': 'Brunei', 'Cases_on_an_international_conveyance_Japan':'Diamond Princess', 'Congo':'Democratic Republic of the Congo', 'Cote_dIvoire':'Ivory Coast', 'Eswatini': 'Swaziland', 'Holy_See': 'Vatican', 'Serbia':'Republic of Serbia', 'All':'All' }                                     
#[word for word in  Country_Code_Dict if re.search('serb', word.lower())]

In [25]:
PIVOT_CUMSUM['Lev_Country'] = PIVOT_CUMSUM.apply(lambda x: Manual_Dict.setdefault(x['Country'], x['Lev_Country']), axis=1)  



In [26]:
#PIVOT_CUMSUM.iloc[np.where(PIVOT_CUMSUM['Lev_Country'] != PIVOT_CUMSUM['Country'])]
#Time to get the country code
PIVOT_CUMSUM['CC'] = PIVOT_CUMSUM['Lev_Country'].map(Country_Code_Dict)

In [31]:
# With Animations

In [28]:
import plotly.graph_objects as go
import math
import datetime
MAX = max(PIVOT_CUMSUM.iloc[:-1, 1:-3].max())
MAX_SCALE=math.ceil(MAX/(10**(len(str(MAX))-1)))*((10**(len(str(MAX))-1)))

Frames = ['2020-01-21', '2020-01-22', '2020-01-23',
       '2020-01-24', '2020-01-25', '2020-01-26', '2020-01-27', '2020-01-28',
       '2020-01-29', '2020-01-30', '2020-01-31', '2020-02-01', '2020-02-02',
       '2020-02-03', '2020-02-04', '2020-02-05', '2020-02-06', '2020-02-07',
       '2020-02-08', '2020-02-09', '2020-02-10', '2020-02-11', '2020-02-12',
       '2020-02-13', '2020-02-14', '2020-02-15', '2020-02-16', '2020-02-17',
       '2020-02-18', '2020-02-19', '2020-02-20', '2020-02-21', '2020-02-22',
       '2020-02-23', '2020-02-24', '2020-02-25', '2020-02-26', '2020-02-27',
       '2020-02-28', '2020-02-29', '2020-03-01', '2020-03-02', '2020-03-03',
       '2020-03-04', '2020-03-05', '2020-03-06', '2020-03-07', '2020-03-08',
       '2020-03-09', '2020-03-10', '2020-03-11', '2020-03-12', '2020-03-13',
       '2020-03-14', '2020-03-15', '2020-03-16', '2020-03-17', '2020-03-18',
       '2020-03-19', '2020-03-20', '2020-03-21']


fig = go.Figure(
    data=[go.Choropleth(
    locations=PIVOT_CUMSUM['CC'], # Spatial coordinates
    z = PIVOT_CUMSUM[Frames[0]], # Data to be color-coded
    locationmode = 'ISO-3', 
    colorscale = 'Reds',
    colorbar_title = "Confirmed Cases",)],
    layout=go.Layout(
        title_text='COVID-19 Daily Cases Worldwide from Jan 21 to Mar 21',
        geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
        annotations = [dict(
            x=0.55,
            y=0.1,
            xref='paper',
            yref='paper',
            text='Data Source: <a href="https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide">\
            European Centre for Disease Prevention and Control</a>',
            showarrow = False
        )],
        updatemenus=[dict(
            type="buttons",
            buttons=[dict(label="Play",
                          method="animate",
                          args=[None])])]
    ),
    frames=[go.Frame(data=[go.Choropleth(
    locations=PIVOT_CUMSUM['CC'], # Spatial coordinates
    z = PIVOT_CUMSUM[Frames[i]], # Data to be color-coded
    locationmode = 'ISO-3',
    #colorscale = 'Reds',
    colorscale= [
        [0, 'rgb(255, 255, 255)'],        
        [1, 'rgb(255, 0, 0)']], 
    colorbar_title = "Confirmed Cases",),]) for i in range(1, len(Frames))]
)

fig.show()




In [33]:
fig.write_image("Plot.webp")

In [40]:
import chart_studio

with open('API_Chart_Studio.txt', 'r') as api_cs:
    api_key = api_cs.read()
    

In [41]:
api_key

'MiZ6H4SYjkCG0y4lRWSd'

In [42]:
username = 'bnarath' # your username
api_key = api_key # your api key - go to profile > settings > regenerate key
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)

In [43]:
import chart_studio.plotly as py
py.plot(fig, filename = 'fig', auto_open=True)

'https://plot.ly/~bnarath/1/'

In [50]:
import plotly.io as pio
pio.write_html(fig, file='index.html', auto_open=True)