In [1]:
import numpy as np
import pandas as pd
import os
import urllib
%matplotlib inline
import plotly as py
import plotly.express as px
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [2]:
url = "https://covid19.who.int/WHO-COVID-19-global-data.csv"
file_path = os.path.join("dataset","covid")

In [3]:
os.makedirs(file_path, exist_ok=True)
csv_path = os.path.join(file_path, "WHO-COVID-19-global-data.csv")
urllib.request.urlretrieve(url,csv_path)

('dataset\\covid\\WHO-COVID-19-global-data.csv',
 <http.client.HTTPMessage at 0x2435790dc48>)

In [4]:
df = pd.read_csv(csv_path)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33821 entries, 0 to 33820
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Date_reported       33821 non-null  object
 1    Country_code       33672 non-null  object
 2    Country            33821 non-null  object
 3    WHO_region         33821 non-null  object
 4    New_cases          33821 non-null  int64 
 5    Cumulative_cases   33821 non-null  int64 
 6    New_deaths         33821 non-null  int64 
 7    Cumulative_deaths  33821 non-null  int64 
dtypes: int64(4), object(4)
memory usage: 2.1+ MB


In [6]:
df.columns

Index(['Date_reported', ' Country_code', ' Country', ' WHO_region',
       ' New_cases', ' Cumulative_cases', ' New_deaths', ' Cumulative_deaths'],
      dtype='object')

In [7]:
df.columns = [col.strip() for col in df.columns] #removes extra white spaces in the column names

In [8]:
df1 = df.groupby(["Date_reported", "Country"]).sum().reset_index()

In [9]:
df1["Death_rate"] = (df.Cumulative_deaths/df.Cumulative_cases).round(3)

In [10]:
df1.tail()

Unnamed: 0,Date_reported,Country,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths,Death_rate
33816,2020-08-09,Viet Nam,23,812,0,10,0.019
33817,2020-08-09,Yemen,1,1801,0,513,0.019
33818,2020-08-09,Zambia,739,7903,4,203,0.022
33819,2020-08-09,Zimbabwe,124,4575,0,102,0.023
33820,2020-08-09,"occupied Palestinian territory, including east...",426,18374,6,103,0.022


In [11]:
fig = px.choropleth(df1, 
                    locations="Country", 
                    locationmode = "country names",
                    color="Cumulative_cases",
                    animation_frame="Date_reported",
                    hover_name="Country",
                    hover_data=["Cumulative_cases","Cumulative_deaths","Death_rate"],
                    color_continuous_scale="spectral",
                   )
fig.update_layout(
    title_text = 'Global Spread of Coronavirus over Time',
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
        showocean=True,
        showlakes=True,
        lakecolor = "rgb(17,61,166)",
        oceancolor = 'rgb(154,209,237)',
        projection = {'type':'kavrayskiy7'}
    ))
    
fig.show()