In [1]:
# Custom code to set path
import os
import sys
import time
from functools import reduce
from pathlib import Path

def add_module_path_to_system():
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)
    
    return module_path 
module_path = add_module_path_to_system()

import pandas as pd # not required to set path 
from plotly.subplots import make_subplots
import plotly.graph_objects as go 
import requests
from plotly.offline import init_notebook_mode, iplot

init_notebook_mode(connected=True)  # initiate notebook for offline plot 

In [2]:
# User Defined functions - Just to make plotting easier and customize
def create_plotly_figure(rows=1, columns=1, title='', x_axis_title='', y_axis_title='', template='gridon'):
    """
    This method creates a template Plotly figure object that can be further customized to the users needs

    :param int rows: number of rows in plotly figure, defaults to 1
    :param int columns: number of columns in plotly figure, defaults to 1
    :param str title: name of the figure
    :param str x_axis_title: name of the x-axis
    :param str y_axis_title: name of the y-axis
    :param str template: template of the plotly figure, acceptable arguments include
     'ggplot2', 'seaborn', 'simple_white', 'plotly','plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
     'ygridoff', 'gridon', 'none'. Defaults to 'gridon'
    :return: plotly figure object
    :rtype: plotly.graph_object
    """
    fig = make_subplots(rows=rows, cols=columns)
    fig.update_layout(template=template, title_text=title, xaxis_title=x_axis_title, yaxis_title=y_axis_title)
    return fig

def plot_multiple_fields_time_series(input_df, x_axis_columns, y_axis_columns, range_slider=True, rows=1,                                          columns=1, title='', x_axis_title='', y_axis_title='', template='gridon'):
    """
    This method creates a time-series plot where multiple fields can be plotted.
    Each column is added a trace to the plot

    :param pd.DataFrame input_df: input data frame with all the data to be used in the plot
    :param list x_axis_columns: column names to be used in x-axis. At this point, a single column is expected
    :param list y_axis_columns: column names to be plotted on y-axis
    :param bool range_slider: determines whether to have a range slider to filter the dates. Defaults to True
    :param int rows: number of rows in plotly figure, defaults to 1
    :param int columns: number of columns in plotly figure, defaults to 1
    :param str title: name of the figure
    :param str x_axis_title: name of the x-axis
    :param str y_axis_title: name of the y-axis
    :param str template: template of the plotly figure, acceptable arguments include
     'ggplot2', 'seaborn', 'simple_white', 'plotly','plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
     'ygridoff', 'gridon', 'none'. Defaults to 'gridon'
    :return: figure with traces
    :rtype: plotly.graph_object
    """
    fig = create_plotly_figure(rows=rows, columns=columns, title=title, x_axis_title=x_axis_title,
                               y_axis_title=y_axis_title, template=template)

    # Add traces to figure
    if input_df.empty:
        raise ValueError('\n Input Data Frame provided is empty')
    else:
        for column in y_axis_columns:
            fig.add_trace(go.Scatter(x=input_df[x_axis_columns[0]], y=input_df[column], name=column))

    # Update layout with slider
    if range_slider:
        fig.update_layout(xaxis_rangeslider_visible=True)

    return fig

In [3]:
### Import COVID-19 Data sets - Demonstration of Pandas ability to import 
current_directory = os.getcwd()
raw_data_folder = current_directory + '\\' + 'covid-19_data' 
contents = os.listdir(raw_data_folder)
csv_files = [f for f in contents if f.endswith('.csv')]

all_data_dict = {}
for f in csv_files: 
    f_name = f.split(sep='.')[0]
    file_path = raw_data_folder + '\\' + f 
    df = pd.read_csv(file_path)
    if df.empty:
        pass
    else: 
        all_data_dict[f_name] = df
all_data_dict.keys()

dict_keys(['time_series_covid19_confirmed_global', 'time_series_covid19_confirmed_global_iso3_regions', 'time_series_covid19_deaths_global', 'time_series_covid19_deaths_global_iso3_regions', 'time_series_covid19_recovered_global', 'time_series_covid19_recovered_global_iso3_regions'])

In [4]:
# Explore the data that is available - let us start with confirmed cases worldwide 
confirmed_cases_global = all_data_dict['time_series_covid19_confirmed_global']
confirmed_cases_global.head(5)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/24/20,3/25/20,3/26/20,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,74,84,94,110,110,120,170,174,237,273
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,123,146,174,186,197,212,223,243,259,277
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,264,302,367,409,454,511,584,716,847,986
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,164,188,224,267,308,334,370,376,390,428
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,3,3,4,4,5,7,7,7,8,8


In [5]:
# Let us say we want to explore the rate of change or spread in different countries
# In order to do that, there are some columns in the original data that we can't use - enter COPY 
# By copying a dataframe, a new data frame is created which has no reference to the old one. Any changes
# made to this new data frame are not reflected in the original data frame object 
confirmed_cases_global_copy = confirmed_cases_global.copy(deep=True)
confirmed_cases_global_copy.drop(columns=["Lat", "Long", "Province/State"], inplace=True)
confirmed_cases_global_by_country = confirmed_cases_global_copy.groupby(by="Country/Region", axis=0).sum()
confirmed_cases_global_by_country

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,3/24/20,3/25/20,3/26/20,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,74,84,94,110,110,120,170,174,237,273
Albania,0,0,0,0,0,0,0,0,0,0,...,123,146,174,186,197,212,223,243,259,277
Algeria,0,0,0,0,0,0,0,0,0,0,...,264,302,367,409,454,511,584,716,847,986
Andorra,0,0,0,0,0,0,0,0,0,0,...,164,188,224,267,308,334,370,376,390,428
Angola,0,0,0,0,0,0,0,0,0,0,...,3,3,4,4,5,7,7,7,8,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Venezuela,0,0,0,0,0,0,0,0,0,0,...,84,91,107,107,119,119,135,135,143,146
Vietnam,0,2,2,2,2,2,2,2,2,2,...,134,141,153,163,174,188,203,212,218,233
West Bank and Gaza,0,0,0,0,0,0,0,0,0,0,...,59,59,84,91,98,109,116,119,134,161
Zambia,0,0,0,0,0,0,0,0,0,0,...,3,12,16,22,28,29,35,35,36,39


In [6]:
confirmed_cases_global_by_country = confirmed_cases_global_by_country.T 
desired_columns = confirmed_cases_global_by_country.columns
confirmed_cases_global_by_country.index.names = ['Date']
confirmed_cases_global_by_country = confirmed_cases_global_by_country.reset_index(drop=False)
figure_1 = plot_multiple_fields_time_series(input_df=confirmed_cases_global_by_country,
                                            x_axis_columns=['Date'], 
                                            y_axis_columns=desired_columns, 
                                            range_slider=True, 
                                            rows=1,                                          
                                            columns=1, 
                                            title='COVID-19 Confirmed Cases by Country',
                                            x_axis_title='Date', 
                                            y_axis_title='Total Confirmed Cases By Country', 
                                            template='gridon')
iplot(figure_1)

In [7]:
# Let us say we want to understand the rate of death and rate of recovery and how it differs by countries
# Please note that these calculations are not valid as a measure because recoveries and deaths are still
# ongoing and not an event that has completed. This is being calculated strictly for demonstration purposes 
confirmed_cases_global_by_country = confirmed_cases_global_by_country.set_index('Date')

confirmed_death_global = all_data_dict['time_series_covid19_deaths_global']
confirmed_death_global_copy = confirmed_death_global.copy(deep=True)
confirmed_death_global_copy.drop(columns=["Lat", "Long", "Province/State"],inplace=True)
confirmed_death_by_country = confirmed_death_global_copy.groupby(by="Country/Region", axis=0).sum()
confirmed_death_by_country = confirmed_death_by_country.T 
desired_columns_death = confirmed_death_by_country.columns
confirmed_death_by_country.index.names = ['Date']
rate_of_death_by_country = confirmed_death_by_country.div(confirmed_cases_global_by_country).reset_index()
# confirmed_death_by_country = confirmed_death_by_country.reset_index(drop=False)

confirmed_recovery_global = all_data_dict['time_series_covid19_recovered_global']
confirmed_recovery_global_copy = confirmed_recovery_global.copy(deep=True)
confirmed_recovery_global_copy.drop(columns=["Lat", "Long", "Province/State"],inplace=True)
confirmed_recovery_by_country = confirmed_recovery_global_copy.groupby(by="Country/Region", axis=0).sum()
confirmed_recovery_by_country = confirmed_recovery_by_country.T 
desired_columns_recovery = confirmed_recovery_by_country.columns
confirmed_recovery_by_country.index.names = ['Date']
rate_of_recovery_by_country = confirmed_recovery_by_country.div(confirmed_cases_global_by_country).reset_index()
# confirmed_recovery_by_country = confirmed_recovery_by_country.reset_index(drop=False).reset_index()

In [8]:
figure_2 = plot_multiple_fields_time_series(input_df=rate_of_death_by_country,
                                            x_axis_columns=['Date'], 
                                            y_axis_columns=desired_columns, 
                                            range_slider=True, 
                                            rows=1,                                          
                                            columns=1, 
                                            title='COVID-19 Rate of Death by Country',
                                            x_axis_title='Date', 
                                            y_axis_title='Rate of Death with time By Country', 
                                            template='gridon')
iplot(figure_2)

In [10]:
figure_3 = plot_multiple_fields_time_series(input_df=rate_of_recovery_by_country,
                                            x_axis_columns=['Date'], 
                                            y_axis_columns=desired_columns, 
                                            range_slider=True, 
                                            rows=1,                                          
                                            columns=1, 
                                            title='COVID-19 Rate of Recovery by Country',
                                            x_axis_title='Date', 
                                            y_axis_title='Rate of Recovery with time By Country', 
                                            template='gridon')
iplot(figure_3)