In [90]:
import pandas as pd
import hvplot.pandas
import holoviews as hv
from holoviews import opts
import panel as pn
import numpy as np
from datetime import timedelta  
from scipy.optimize import curve_fit
from bokeh.models.formatters import DatetimeTickFormatter
import matplotlib.colors as mcolors
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

pn.extension()
pd.options.plotting.backend = 'holoviews'

hv.renderer('bokeh').theme = 'light_minimal' # Graph style
def_opts = {'width': 500, 'height': 400, 'padding': 0.1, 'shared_axes': False, 
            'yformatter': '%d', 'xlabel': 'Days since Feb 24', 
            'xformatter': DatetimeTickFormatter(days = '%b %d')}
opts.defaults(opts.Scatter(**def_opts), opts.Curve(**def_opts))

In [91]:
cmap = list(mcolors.TABLEAU_COLORS.values())  # List of Hexdead values corresponding to default colormap of Bokeh/mpl

def Plot_All (df, log = True, lin = True, kind='scatter', ylabel = 'Number of cases', yformatter = '%d'):
    '''
    Plots the data in the following way:
    One tab for each country and each tab can have two tabs for linear and log plot.
    The first tab is the plot of all Countries together
    
    ARGUMENTS:
    - df: Dataframe containing all data
    - log: set to False if you don't want the logarithmic scale tab
    - lin: set to False if you don't want the linear scale tab
    - kind: you can choose the plot kind (scatter by default)
    - ylabel: label of y axis
    '''    
    xformatter = DatetimeTickFormatter(days = '%b %d')   # Formatter for date time axis, so that date is e.g. 'Jan 25'
    
    opts_comm = {'title' : '', 'width' : 600, 'height':400, 'padding' : 0.1,   # Options in common between linear and log plots
                 'kind' : kind,  'xformatter': xformatter, 'xlabel': ''}
    opts_lin = {**opts_comm}    # Options for linear plots
    opts_log = {**opts_comm, **{'logy' : True, 'yformatter' : yformatter, 'ylim' : [2e-1, None]}}  # Options for log plots       
    
    # Create the first tab with all countries in the same plot
    if log == False: plt_all = df.plot(**opts_lin)
    elif lin == False: plt_all = df.plot(**opts_log)
    else: plt_all = pn.Tabs(('Linear', df.plot(**opts_lin)),
                            ('Logarithmic', df.plot(**opts_log)))
    tab_states = pn.Tabs(('All', plt_all))
    # Create other tabs, one for each country
    for i, Country in enumerate(df.columns):
        opts_lin['color'] = cmap[i]
        opts_log['color'] = cmap[i]
        if log == False: plt_country = df[Country].plot(**opts_lin)
        elif lin == False: plt_country = df[Country].plot(**opts_log)
        else: plt_country = pn.Tabs(('Linear', df[Country].plot(**opts_lin)),
                                    ('Logarithmic', df[Country].plot(**opts_log)))
        tab_states.append((Country, plt_country))
    return tab_states

# COVID-19 Data analysis
**Data source**: 
https://ourworldindata.org/coronavirus-source-data.
Data come directly from World Health Organization daily reports.

**Source code that you can execute**:
https://mybinder.org/v2/gh/gioarma/covid-19_analysis/b5e55e36aa6ddaf4a797740d2fdfbb707ce901a1?filepath=Covid_19.ipynb

## Countires with most infected people

These are the 10 countries with most Coronavirus cases in the world.

In [3]:
data = 'https://covid.ourworldindata.org/data/ecdc/total_cases.csv'
df = pd.read_csv(data, index_col='date')
df.drop(['World'], axis = 1,inplace = True)
df = df.fillna(0.0).astype(int)                         # Convert data from float to int
df.index = pd.to_datetime(df.index, format='%Y/%m/%d')  # Change index to DateTime for plotting
pn.pane.DataFrame(df.iloc[-1, :].sort_values(ascending = False).head(10), header = False, width = 300)

## Total Cases

Among all countries, we show the data for the following ones:

* China
* Italy
* France
* Germany
* Spain
* United States
* United Kingdom

The plot below shows the number of total cases in the selected countries.
You can view all of them in the same plot, or one by one by switching tabs.
For each plot you can also view it in linear or logarithmic scale.

In [4]:
Countries = ['Italy', 'China', 'France', 'Germany', 'Spain', 'United States', 'United Kingdom']
TotCases = df.filter(Countries)
sorted_columns = list(TotCases.max().sort_values(ascending = False).index)
TotCases = TotCases[sorted_columns]
pn.Column('## Data of the last 5 days for your selected countries', 
          pn.pane.DataFrame(TotCases.tail(), width=800, max_cols=10)).servable()
pn.Row(Plot_All(TotCases), pn.Spacer(width = 50),
        pn.pane.Markdown(""" 
        <br><br><br>
        ### Tip:
        You can activate the items in the side bar to move the plots, zoom and save the image."""))

## New cases

Here you can see how many *new* cases are recorded each day in the selected countries

In [5]:
NewCases = TotCases.diff().fillna(0.0).astype(int)                    # Calculate new cases as difference between successive rows, then convert to int

pn.Row(Plot_All(NewCases, log = False, kind = 'line', ylabel = 'Number of new cases'), 
       pn.Spacer(width = 50), 
       pn.pane.Markdown(""" 
        <br><br><br>
        ### Tip:
        In the "All" tab you can deactivate specific plots by clicking on the corresponding country in the side bar."""))

# Growth Factor

The growth factor is given by the number of new cases one day $N_d$ divided by the number of cases the previous day $N_{d-1}$:

$$G = \frac{N_d}{N_{d-1}}$$

When $G$ reaches the value of **1**, then we are probably at the inflection point, where the number of new cases every day starts to deadrease

In [6]:
G = NewCases/NewCases.shift() #Divide by same df, but shifted back by 1
G.replace(np.inf, np.nan, inplace = True)
G.replace(0, np.nan, inplace = True)
panel_G = Plot_All(G.dropna(), log = True, ylabel = 'Growth factor', 
                   kind = 'scatter', lin = False, yformatter = '%.1f')
panel_G

# Italy

For this part we take data from Protezione Civile, which are more reliable.

Source: https://github.com/pcm-dpc/COVID-19/tree/master/dati-andamento-nazionale

In [92]:
dati_italia = 'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv'
columns = ['data', 'totale_casi', 'deceduti', 'dimessi_guariti', 'totale_attualmente_positivi', 'nuovi_attualmente_positivi']
Italia = pd.read_csv(dati_italia, usecols = columns, index_col='data')
Italia.index = pd.to_datetime(Italia.index)
Italia.columns = ['tot_pos', 'new_pos', 'rec', 'dead', 'tot']
Italia = Italia[['tot', 'rec', 'dead', 'tot_pos', 'new_pos']]

# Calculate new columns
Italia['new'] = Italia.tot.diff()                                # New daily cases

opts = {'width': 450, 'height': 400, 'xlabel': '', 'shared_axes': False,  'padding':0.1}
plt_TotItalia = Italia.tot.plot(label = 'Total Cases' , color = cmap[0], **opts)
plt_NewItalia = Italia.new.plot(label = 'New cases',color = cmap[1], **opts)
plt_RecItalia = Italia.rec.plot(label = 'Total Recovered',color = cmap[2], **opts)
plt_DeadItalia = Italia.dead.plot(label = 'Total Dead',color = cmap[3], **opts)
plt_TotPosItalia = Italia.tot_pos.plot(label = 'Total currently infected',color = cmap[4], **opts)
plt_NewPosItalia = Italia.new_pos.plot(label = 'New daily infected',color = cmap[5], **opts)

pltList_Italia = [plt_TotItalia, plt_NewItalia, plt_RecItalia, 
                  plt_DeadItalia, plt_TotPosItalia, plt_NewPosItalia]
hv.Layout(pltList_Italia).cols(2)

## Predictions

In [106]:
def sigmoid(t, K, A, r):
    return K/(1+A*np.exp(-r*(t)))

extra_days = 30    # How much we extend the fit beyond the current date for prediction
m = 10

N = Italia.index.size
C2m, Cm, C = Italia.tot[N-2*m], Italia.tot[N-m], Italia.tot[N-1]
K_guess = Cm*(C2m*Cm-2*C2m*C+Cm*C)/(Cm**2-C*C2m)
r_guess = 1/m*np.log(C*(Cm-C2m)/(C2m*(C-Cm)))
A_guess = (C-Cm)*(Cm-C2m)/(Cm**2-C*C2m)*(C/C2m*(Cm-C2m)/(C-Cm))**((N-1-m)/m)
init_guesses = [K_guess, A_guess, r_guess]   

# The index is in datetime format, with which I can't fit
# -> I consider days from beginning (0,1,2,3,...) as index
popt, pcov = curve_fit(sigmoid, [i for i in range(len(Italia.index))],
                       Italia.tot[0:N].values, 
                       p0 = init_guesses)

fit = pd.Series(index = pd.date_range(Italia.index[0], Italia.index[-1]+timedelta(days = extra_days), freq='D'), 
                data = sigmoid(np.arange(0, Italia.index.size+extra_days-1, 1), *popt))

opts = {'width': 600, 'height': 400, 'yformatter' : '%d', 'xlabel': ''}

plt_TotPred = Italia.tot.plot( kind = 'scatter', label = 'Data', **opts) * fit.plot(color = 'red', label = 'Prediction', **opts) * \
              hv.HLine(popt[0]).opts(color = 'black', line_width=1)* \
              hv.Text(text='Predicted final size: '+str(int(popt[0])), y =popt[0]+5e3, x = fit.index[12], fontsize=11)
plt_NewPred = Italia.new.plot(kind = 'scatter', label = 'Data', c = cmap[1], **opts)*\
              fit.diff().plot(color = 'red', label = 'Prediction')
(plt_TotPred.opts(title = 'Total cases') + plt_NewPred.opts(title = 'New cases')).cols(1)

In [8]:
%%html
<script src="https://cdn.rawgit.com/parente/4c3e6936d0d7a46fd071/raw/65b816fb9bdd3c28b4ddf3af602bfd6015486383/code_toggle.js"></script>