In [1]:
import numpy as np
import pandas as pd
import os
import urllib
%matplotlib inline
import plotly as py
import plotly.express as px
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [2]:
url = "https://covid19.who.int/WHO-COVID-19-global-data.csv"
file_path = os.path.join("dataset","covid")

In [3]:
os.makedirs(file_path, exist_ok=True)
csv_path = os.path.join(file_path, "WHO-COVID-19-global-data.csv")
urllib.request.urlretrieve(url,csv_path)

('dataset\\covid\\WHO-COVID-19-global-data.csv',
 <http.client.HTTPMessage at 0x29b6ed53b08>)

In [4]:
df = pd.read_csv(csv_path)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31226 entries, 0 to 31225
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Date_reported       31226 non-null  object
 1    Country_code       31089 non-null  object
 2    Country            31226 non-null  object
 3    WHO_region         31226 non-null  object
 4    New_cases          31226 non-null  int64 
 5    Cumulative_cases   31226 non-null  int64 
 6    New_deaths         31226 non-null  int64 
 7    Cumulative_deaths  31226 non-null  int64 
dtypes: int64(4), object(4)
memory usage: 1.9+ MB


In [6]:
df.columns

Index(['Date_reported', ' Country_code', ' Country', ' WHO_region',
       ' New_cases', ' Cumulative_cases', ' New_deaths', ' Cumulative_deaths'],
      dtype='object')

In [7]:
df.columns = [col.strip() for col in df.columns] #removes extra white spaces in the column names

In [8]:
df.head()

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
0,2020-02-24,AF,Afghanistan,EMRO,1,1,0,0
1,2020-02-25,AF,Afghanistan,EMRO,0,1,0,0
2,2020-02-26,AF,Afghanistan,EMRO,0,1,0,0
3,2020-02-27,AF,Afghanistan,EMRO,0,1,0,0
4,2020-02-28,AF,Afghanistan,EMRO,0,1,0,0


In [9]:
df1 = df.groupby(["Date_reported", "Country"]).sum().reset_index()

In [10]:
df1.head()

Unnamed: 0,Date_reported,Country,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
0,2020-01-11,China,41,41,1,1
1,2020-01-12,China,0,41,0,1
2,2020-01-13,China,0,41,0,1
3,2020-01-13,Thailand,1,1,0,0
4,2020-01-14,China,0,41,0,1


In [11]:
df1["Death_rate"] = (df.Cumulative_deaths/df.Cumulative_cases).round(3)

In [12]:
df1.tail()

Unnamed: 0,Date_reported,Country,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths,Death_rate
31221,2020-07-28,Viet Nam,11,431,0,0,0.013
31222,2020-07-28,Yemen,10,1695,4,484,0.014
31223,2020-07-28,Zambia,71,4552,1,140,0.014
31224,2020-07-28,Zimbabwe,192,2704,2,36,0.014
31225,2020-07-28,"occupied Palestinian territory, including east...",332,13457,1,80,0.013


In [13]:
fig = px.choropleth(df1, 
                    locations="Country", 
                    locationmode = "country names",
                    color="Cumulative_cases",
                    animation_frame="Date_reported",
                    hover_name="Country",
                    hover_data=["Cumulative_cases","Cumulative_deaths","Death_rate"],
                    color_continuous_scale="spectral",
                   )
fig.update_layout(
    title_text = 'Global Spread of Coronavirus over Time',
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
        showocean=True,
        showlakes=True,
        lakecolor = "rgb(17,61,166)",
        oceancolor = 'rgb(154,209,237)',
        projection = {'type':'kavrayskiy7'}
    ))
    
fig.show()