In [45]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('ggplot')

from model import read_data
import plotly.express as px

# Retrieve data

In [46]:
url_confirmed = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"
url_deaths    = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv"
url_recovered = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv"

In [47]:
confirmed_df = read_data(url_confirmed)
deaths_df    = read_data(url_deaths)
recovered_df = read_data(url_recovered)

In [48]:
def melt_me(df, value_name):
    
    melted_df = df.copy().reset_index()
    melted_df = melted_df.melt(id_vars='Date', value_name=value_name)

    return melted_df

## Melt to long format

In [43]:
confirmed_df = melt_me(confirmed_df, value_name='Confirmed')
deaths_df    = melt_me(deaths_df,    value_name='Deaths')
recovered_df = melt_me(deaths_df,    value_name='Recovered')

## Countries by region

In [4]:
countries_df = pd.read_csv('countries.csv', sep='\t', engine = 'python', header = 0).reset_index()

countries_df.columns = ['Country', 'Region', 'Hemisphere']

countries_df = countries_df.set_index('Country')

# By country since X-th case

In [21]:
EPSILON = 1000

px.line(data_frame=confirmed_df.query(f'Confirmed > {EPSILON}'), 
        y = 'Confirmed', color = 'Country/Region', 
        log_y=True, 
        title = f'Confirmed cases by country since the {EPSILON}-th case')

# Regional statistics

Group by geographical region.

In [30]:
_df = confirmed_df.pivot(index = 'Date', columns='Country/Region', values='Confirmed')
by_regions_gp = _df.groupby(by = countries_df['Region'], axis = 1)

## Total confirmed

In [40]:
by_regions_total_df = by_regions_gp.sum()
by_regions_total_df.index.name = 'Date'

# Long format
by_regions_total_df = by_regions_total_df.reset_index()
by_regions_total_df = by_regions_total_df.melt(id_vars='Date', value_name='Confirmed')

In [41]:
px.line(by_regions_total_df, 
        x = 'Date', y = 'Confirmed', 
        color = 'Region', 
        title='Confirmed by Region', 
        log_y= True)