## <center>CORONA VIRUS - CONFIRMED CASES TIMELINE</center>

In [2]:
# For this practical example we will need the following libraries
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

import plotly.express as px
import plotly.graph_objects as go

from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)

In [3]:
# Import dataset and first exploratory analysis
path= './data/covid_19_data.csv'
raw_data = pd.read_csv(path)
raw_data.head(5)

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0


In [4]:
# Check if the dataset has null values and the types of the columns
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205951 entries, 0 to 205950
Data columns (total 8 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   SNo              205951 non-null  int64  
 1   ObservationDate  205951 non-null  object 
 2   Province/State   150574 non-null  object 
 3   Country/Region   205951 non-null  object 
 4   Last Update      205951 non-null  object 
 5   Confirmed        205951 non-null  float64
 6   Deaths           205951 non-null  float64
 7   Recovered        205951 non-null  float64
dtypes: float64(3), int64(1), object(4)
memory usage: 12.6+ MB


In [5]:
# Data manipulation
data = raw_data.rename(columns= {'ObservationDate':'Date', 'Country/Region': 'Country'})
# Change Date type to datetime to sort in ascending way
data['Date'] = pd.to_datetime(data['Date'])
df_country_timeline = data[data['Confirmed']>0]
df_country_timeline = df_country_timeline.groupby(['Country', 'Date']).sum().sort_values(by= 'Date', ascending= True).reset_index()
df_country_timeline['Date'] = df_country_timeline['Date'].apply(lambda x: x.strftime('%Y-%m-%d'))

In [6]:
# Create the choropleth plot using Plotly. 
fig = px.choropleth(df_country_timeline, locations= 'Country', locationmode= 'country names',
                   color= 'Confirmed', hover_name = 'Country', animation_frame= 'Date',
                   color_continuous_scale= 'viridis')

fig.update_layout(title = {'text': '<b>Global Spread of Coronavirus</b>',
                          'x': 0.5,
                          'font': {'size': 25,
                                  'color': 'black'}},
                 geo= dict(showframe = False, showcoastlines = False))

fig.show()