In [2]:
#Importing the required libraries
import numpy as np
import pandas as pd
from functools import reduce

In [5]:
#### Part 1 : Preparing the data
# 1.1 Downloading csv into dataframe
df_confirmed = pd.read_csv("time_series_covid19_confirmed_global.csv")
df_deaths = pd.read_csv("time_series_covid19_deaths_global.csv")
df_recovered = pd.read_csv("time_series_covid19_recovered_global.csv")

In [6]:
df_confirmed.head(5)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,40,74,84,94,110,110,120,170,174,237
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,104,123,146,174,186,197,212,223,243,259
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,230,264,302,367,409,454,511,584,716,847
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,133,164,188,224,267,308,334,370,376,390
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,3,3,3,4,4,5,7,7,7,8


In [11]:
# 1.2 Tidying the data
# Using melt() command in pandas
id_list = df_confirmed.columns.to_list()[:4]
vars_list = df_confirmed.columns.to_list()[4:]
confirmed_tidy = pd.melt(df_confirmed, id_vars=id_list,\
     value_vars=vars_list, var_name='Date', value_name='Confirmed')
deaths_tidy = pd.melt(df_deaths, id_vars=id_list,\
     value_vars=vars_list, var_name='Date', value_name='Deaths')
recovered_tidy = pd.melt(df_recovered, id_vars=id_list,\
     value_vars=vars_list, var_name='Date', value_name='recovered')

In [None]:
# 1.3 Merging the three dataframes into one
data_frames = [confirmed_tidy, deaths_tidy, recovered_tidy]          
df_corona = reduce(lambda left, right: pd.merge(left, right, on =\
               id_list+['Date'], how='outer'), data_frames)

In [None]:
# 1.4 Each row should only represent one observation
id_vars = df_corona.columns[:5]
data_type = ['Confirmed', 'Deaths', 'recovered']
df_corona = pd.melt(df_corona, id_vars=id_vars,\
          value_vars=data_type, var_name='type', value_name='Count')
df_corona['Date'] = pd.to_datetime(df_corona['Date'],\
            format='%m/%d/%y', errors='raise')

In [12]:
df_corona.head(5)

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,type,Count
0,,Afghanistan,33.0,65.0,2020-01-22,Confirmed,0.0
1,,Albania,41.1533,20.1683,2020-01-22,Confirmed,0.0
2,,Algeria,28.0339,1.6596,2020-01-22,Confirmed,0.0
3,,Andorra,42.5063,1.5218,2020-01-22,Confirmed,0.0
4,,Angola,-11.2027,17.8739,2020-01-22,Confirmed,0.0


In [21]:
del df_corona['Province/State']

In [42]:
df_corona.tail(5)

Unnamed: 0,Country/Region,Lat,Long,Date,type,Count
55375,Timor-Leste,-8.8742,125.7275,2020-03-31,recovered,0.0
55376,Canada,56.1304,-106.3468,2020-04-01,recovered,1324.0
55377,Mozambique,-18.6657,35.5296,2020-04-01,recovered,0.0
55378,Syria,34.8021,38.9968,2020-04-01,recovered,0.0
55379,Timor-Leste,-8.8742,125.7275,2020-04-01,recovered,0.0


In [23]:
corona_sums = df_corona.groupby(['type', 'Date'],\
                     as_index=False).agg({'Count':'sum'})

In [47]:
corona_sums

Unnamed: 0,type,Date,Count
0,Confirmed,2020-01-22,555.0
1,Confirmed,2020-01-23,654.0
2,Confirmed,2020-01-24,941.0
3,Confirmed,2020-01-25,1434.0
4,Confirmed,2020-01-26,2118.0
...,...,...,...
208,recovered,2020-03-28,139415.0
209,recovered,2020-03-29,149082.0
210,recovered,2020-03-30,164566.0
211,recovered,2020-03-31,178034.0


In [55]:
import plotly_express as px
def plot_timeseries(df):
    fig = px.line(df, x='Date', y='Count', color='type',\
             template='plotly_dark')

    fig.update_layout(legend_orientation="h")
    return(fig)

fig = plot_timeseries(corona_sums)
fig.show()