# Table of content:
* [1 - Import library](#import_library)
* [2 - Import and wrangle data](#import_data)
    * [2.1 - Import confirmed and deaths](#import_CSSE_data)
    * [2.2 - Enrich with cartopy](#enrich_with_cartopy)
    * [2.3 - Smooth, derive and more](#smooth-derive)
    * [2.4 - Enrich with patient zero and confinement dates](#patient_zero_confinement_dates)
    * [2.5 - Display some data](#display_data)
* [3 - Data analysis](#data_analysis)
    * [3.1 -  Geographical evolution in the world](#geographical_evolution)
    * [3.2 - Analysis by continent](#analysis_continent)
    * [3.3 - Analysis by country](#analysis_country)
    * [3.4 - Comparison between countries](#comparison_countries)
* [4 - Model](#model)

# 1 - Import library <a name="import_library"></a>

In [1]:
import os
import datetime
import pandas as pd
import numpy as np
from scipy.signal import savgol_filter
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib import cm
import math
%matplotlib inline

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.io.shapereader as shpreader

from IPython.display import IFrame

# 2 - Import and wrangle data <a name="import_data"></a>
Data are aggregated by the Johns Hopkins University Center for Systems Science and Engineering (JHU CSSE). <br>
More details on the origin and description of the date are available in the Readme of the github repository.<br>
https://github.com/CSSEGISandData/COVID-19

In [2]:
BASE_URL = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/'

## 2.1 - Import confirmed and deaths <a name="import_CSSE_data"></a>

In [3]:
df_dict={}
for value_name in ['confirmed', 'deaths']:
    df = pd.read_csv(os.path.join(BASE_URL, 'time_series_covid19_{}_global.csv'.format(value_name)))

    df_value = df.drop(['Province/State', 'Lat', 'Long'], axis=1) \
                 .groupby(by='Country/Region') \
                 .agg('sum') \
                 .reset_index()

    date_list = df_value.columns.values.tolist()
    date_list.remove('Country/Region')
    df_value = pd.melt(df_value,
                       id_vars=['Country/Region'],
                       value_vars=date_list,
                       value_name=value_name,
                       var_name='date')
    df_value['date'] = pd.to_datetime(df_value['date'])

    df_dict[value_name] = df_value
    

### Merge and order data by country and date

In [4]:
df = pd.merge(df_dict['confirmed'], df_dict['deaths'], on=['Country/Region', 'date'])
df.sort_values(by=['Country/Region', 'date'], inplace=True)
df.reset_index(drop=True, inplace=True)

## 2.2 - Enrich with cartopy  <a name="enrich_with_cartopy"></a>

### Rename some country name to be aligned with cartopy naming

In [5]:
def rename_country(df):
    if df in country:
        return country[df]
    else:
        return df

country={
    'US': 'United States of America',
    'Western Sahara': 'W. Sahara',
    'Congo (Kinshasa)': 'Dem. Rep. Congo',
    'Congo (Brazzaville)':'Congo',
    'Dominican Republic': 'Dominican Rep.',
    "Cote d'Ivoire": "Côte d'Ivoire",
    'Central African Republic': 'Central African Rep.',
    'Korea, South': 'South Korea',
    'Taiwan*': 'Taiwan',
    'South Sudan': 'S. Sudan',
    'Bosnia and Herzegovina': 'Bosnia and Herz.',
    'Equatorial Guinea': 'Eq. Guinea',
    'Burma': 'Myanmar',
    'Eswatini': 'eSwatini',
    'North Macedonia': 'Macedonia',
    'West Bank and Gaza': 'Palestine'
}

df['Country/Region'] = df['Country/Region'].apply(lambda x: rename_country(x))

### Get population / continent from cartopy

In [6]:
shpfilename = shpreader.natural_earth(resolution='110m',
                                      category='cultural',
                                      name='admin_0_countries')
reader = shpreader.Reader(shpfilename)
countries = reader.records()

population = {}
continent = {}
countries_not_in_CSSE = []

for country in countries:
    #print(country.attributes)
    population[country.attributes['NAME']]=country.attributes['POP_EST']
    continent[country.attributes['NAME']]=country.attributes['CONTINENT']
    if not country.attributes['NAME'] in df['Country/Region'].unique():
        countries_not_in_CSSE.append(country.attributes['NAME'])

### Print cartopy countries that are not available in CSSE data

In [7]:
print(countries_not_in_CSSE)

['Falkland Is.', 'Greenland', 'Fr. S. Antarctic Lands', 'Lesotho', 'Puerto Rico', 'Vanuatu', 'North Korea', 'Tajikistan', 'Turkmenistan', 'New Caledonia', 'Solomon Is.', 'Antarctica', 'N. Cyprus', 'Somaliland']


### Enrich with population / continent

In [8]:
population['Singapore'] = 5600000
continent['Singapore'] = 'Asia'

df['population'] = df['Country/Region'].map(population)
df['continent'] = df['Country/Region'].map(continent)

### Print CSSE countries that are not available in cartopy

In [9]:
countries_not_in_cartopy = df[df['continent'].isnull()]['Country/Region'].unique()
print(countries_not_in_cartopy)

['Andorra' 'Antigua and Barbuda' 'Bahrain' 'Barbados' 'Cabo Verde'
 'Diamond Princess' 'Dominica' 'Grenada' 'Holy See' 'Liechtenstein'
 'MS Zaandam' 'Maldives' 'Malta' 'Mauritius' 'Monaco'
 'Saint Kitts and Nevis' 'Saint Lucia' 'Saint Vincent and the Grenadines'
 'San Marino' 'Sao Tome and Principe' 'Seychelles']


## 2.3 - Smooth, derive and more  <a name="smooth-derive"></a>

In [10]:
def smooth_data(df, param):
    try:
        df[param] = savgol_filter(df[param] , 7, 3)
    except:
        pass
    return df

In [11]:
def derive_data(df, param):
    try:
        df[param + '_per_day'] = savgol_filter(df[param] , 7, 3, deriv=1)
    except:
        df[param + '_per_day'] = df[param] - df[param].shift(1)
    return df

In [12]:
def confirmed_computed(df):
    df['confirmed_computed'] = df['deaths'].shift(-6, fill_value=0) / 0.01
    df['confirmed_ratio'] = 10.
    for i in df.index:
        if (df.loc[i, 'confirmed_computed']==0) & (i>df.index.min()):
            if (df.loc[i-1, 'confirmed']!=0):
                df.loc[i, 'confirmed_ratio'] = df.loc[i-1, 'confirmed_computed'] / df.loc[i-1, 'confirmed']
                df.loc[i, 'confirmed_computed'] = df.loc[i-1, 'confirmed_computed'] + df.loc[i, 'confirmed_per_day'] * df.loc[i,'confirmed_ratio']
    return df

In [13]:
for param in ['confirmed', 'deaths']:
    df = df.groupby('Country/Region').apply(lambda x: smooth_data(x, param))
    df = df.groupby('Country/Region').apply(lambda x: derive_data(x, param))

A new number of confirmed cases with 1% mortality rate is computed to get a better estimate of the actual number of cases across the world and remove the test bias between countries.

In [14]:
df = df.groupby('Country/Region').apply(lambda x: confirmed_computed(x))
df = df.groupby('Country/Region').apply(lambda x: smooth_data(x, 'confirmed_computed'))
df = df.groupby('Country/Region').apply(lambda x: derive_data(x, 'confirmed_computed'))

Mortality rate in %

In [15]:
df['mortality'] = df['deaths'] / df['confirmed'] * 100

Data (confirmed, deaths overall and per day) for 10.000 inhabitants

In [16]:
df['confirmed_per_cap'] = df['confirmed'] / df['population'] * 10000
df['confirmed_computed_per_cap'] = df['confirmed_computed'] / df['population'] * 10000
df['deaths_per_cap'] = df['deaths'] / df['population'] * 10000

df['confirmed_per_day_per_cap'] = df['confirmed_per_day'] / df['population'] * 10000
df['confirmed_computed_per_day_per_cap'] = df['confirmed_computed_per_day'] / df['population'] * 10000
df['deaths_per_day_per_cap'] = df['deaths_per_day'] / df['population'] * 10000

## 2.4 - Enrich with patient zero and confinement dates for some countries  <a name="patient_zero_confinement_dates"></a>

In [17]:
patient_zero = {'China': pd.to_datetime("2020-01-06"),
               'Italy': pd.to_datetime("2020-02-11"),
               'Iran': pd.to_datetime("2020-02-12"),
               'South Korea': pd.to_datetime("2020-02-09"),
               'Spain': pd.to_datetime("2020-02-18"),
               'France': pd.to_datetime("2020-02-22"),
               'Germany': pd.to_datetime("2020-03-01"),
               'United States of America': pd.to_datetime("2020-02-24"),
               'United Kingdom': pd.to_datetime("2020-02-26"),
               'Netherlands': pd.to_datetime("2020-03-01"),
               'Belgium': pd.to_datetime("2020-03-02"),
               'Switzerland': pd.to_datetime("2020-03-01"),
               'Brazil': pd.to_datetime("2020-03-06"),
               'Turkey': pd.to_datetime("2020-03-06"),
               'Sweden': pd.to_datetime("2020-03-06"),
               'Indonesia': pd.to_datetime("2020-03-06"),
               'Portugal': pd.to_datetime("2020-03-07"),
               'Philippines': pd.to_datetime("2020-03-06"),
               'Austria': pd.to_datetime("2020-03-08"),
               'Denmark': pd.to_datetime("2020-03-08"),
              }

confinement = {'China': pd.to_datetime("2020-01-23"),
               'Italy': pd.to_datetime("2020-03-08"),
               'Spain': pd.to_datetime("2020-03-14"),
               'France': pd.to_datetime("2020-03-16"),
               'Germany': pd.to_datetime("2020-03-22"),
               'United States of America': pd.to_datetime("2020-03-20"),
               'United Kingdom': pd.to_datetime("2020-03-23"),
               'Netherlands': pd.to_datetime("2020-03-23"),
               'Belgium': pd.to_datetime("2020-03-18"),
               'Switzerland': pd.to_datetime("2020-03-16"),
               'Turkey': pd.to_datetime("2020-03-27"),
               'Indonesia': pd.to_datetime("2020-03-29"),
               'Portugal': pd.to_datetime("2020-03-20"),
               'Philippines': pd.to_datetime("2020-03-16"),
               'Austria': pd.to_datetime("2020-03-16"),
               'Denmark': pd.to_datetime("2020-03-11"),
              }

df['patient_zero'] = df['Country/Region'].map(patient_zero)
df['confinement'] = df['Country/Region'].map(confinement)
df['contagion_days'] = (df['date'] - df['patient_zero']) / pd.to_timedelta(1, unit='D')
df['contagion_days_confinement'] = (df['confinement'] - df['patient_zero']) / pd.to_timedelta(1, unit='D')

## 2.5 - Display some data <a name="display_data"></a>

In [18]:
df.head()

Unnamed: 0,Country/Region,date,confirmed,deaths,population,continent,confirmed_per_day,deaths_per_day,confirmed_computed,confirmed_ratio,...,confirmed_per_cap,confirmed_computed_per_cap,deaths_per_cap,confirmed_per_day_per_cap,confirmed_computed_per_day_per_cap,deaths_per_day_per_cap,patient_zero,confinement,contagion_days,contagion_days_confinement
0,Afghanistan,2020-01-22,0.0,0.0,34124811.0,Asia,0.0,0.0,0.0,10.0,...,0.0,0.0,0.0,0.0,0.0,0.0,NaT,NaT,,
1,Afghanistan,2020-01-23,0.0,0.0,34124811.0,Asia,0.0,0.0,0.0,10.0,...,0.0,0.0,0.0,0.0,0.0,0.0,NaT,NaT,,
2,Afghanistan,2020-01-24,0.0,0.0,34124811.0,Asia,0.0,0.0,0.0,10.0,...,0.0,0.0,0.0,0.0,0.0,0.0,NaT,NaT,,
3,Afghanistan,2020-01-25,0.0,0.0,34124811.0,Asia,0.0,0.0,0.0,10.0,...,0.0,0.0,0.0,0.0,0.0,0.0,NaT,NaT,,
4,Afghanistan,2020-01-26,0.0,0.0,34124811.0,Asia,0.0,0.0,0.0,10.0,...,0.0,0.0,0.0,0.0,0.0,0.0,NaT,NaT,,


### Print list of countries

In [19]:
print(df['Country/Region'].unique())

['Afghanistan' 'Albania' 'Algeria' 'Andorra' 'Angola'
 'Antigua and Barbuda' 'Argentina' 'Armenia' 'Australia' 'Austria'
 'Azerbaijan' 'Bahamas' 'Bahrain' 'Bangladesh' 'Barbados' 'Belarus'
 'Belgium' 'Belize' 'Benin' 'Bhutan' 'Bolivia' 'Bosnia and Herz.'
 'Botswana' 'Brazil' 'Brunei' 'Bulgaria' 'Burkina Faso' 'Myanmar'
 'Burundi' 'Cabo Verde' 'Cambodia' 'Cameroon' 'Canada'
 'Central African Rep.' 'Chad' 'Chile' 'China' 'Colombia' 'Congo'
 'Dem. Rep. Congo' 'Costa Rica' "Côte d'Ivoire" 'Croatia' 'Cuba' 'Cyprus'
 'Czechia' 'Denmark' 'Diamond Princess' 'Djibouti' 'Dominica'
 'Dominican Rep.' 'Ecuador' 'Egypt' 'El Salvador' 'Eq. Guinea' 'Eritrea'
 'Estonia' 'eSwatini' 'Ethiopia' 'Fiji' 'Finland' 'France' 'Gabon'
 'Gambia' 'Georgia' 'Germany' 'Ghana' 'Greece' 'Grenada' 'Guatemala'
 'Guinea' 'Guinea-Bissau' 'Guyana' 'Haiti' 'Holy See' 'Honduras' 'Hungary'
 'Iceland' 'India' 'Indonesia' 'Iran' 'Iraq' 'Ireland' 'Israel' 'Italy'
 'Jamaica' 'Japan' 'Jordan' 'Kazakhstan' 'Kenya' 'South Korea' 'Ko

### Print list of countries with max deaths

In [20]:
df.groupby(by='Country/Region') \
  .agg({'deaths':'max'}) \
  .reset_index() \
  .sort_values(by='deaths', ascending=False) \
  .reset_index() \
  .head(10)

Unnamed: 0,index,Country/Region,deaths
0,175,United States of America,32847.214286
1,82,Italy,22184.166667
2,156,Spain,19313.619048
3,59,France,18039.357143
4,174,United Kingdom,13764.238095
5,78,Iran,4868.404762
6,16,Belgium,4848.785714
7,63,Germany,4070.0
8,35,China,3346.119048
9,119,Netherlands,3335.238095


# 3 - Data analysis <a name="data_analysis"></a>

## 3.1 -  Geographical evolution in the world <a name="geographical_evolution"></a>

In [21]:
class MplColorHelper:

  def __init__(self, cmap_name, start_val, stop_val):
    self.cmap_name = cmap_name
    self.cmap = plt.get_cmap(cmap_name, 10)
    self.norm = mpl.colors.Normalize(vmin=start_val, vmax=stop_val)
    self.scalarMap = cm.ScalarMappable(norm=self.norm, cmap=self.cmap)

  def get_rgb(self, val):
    return self.scalarMap.to_rgba(val)


In [22]:
def plot_contagion_map(region, date_string, param):

    fig = plt.figure(figsize=(18, 20))
    ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
    
    if region == 'World':
        ax.set_extent([-180, 180, -60, 90], crs=ccrs.PlateCarree())
    elif region == 'Europe':
        ax.set_extent([-30, 50, 34, 72], crs=ccrs.PlateCarree())

    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.OCEAN)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle=':')
    ax.add_feature(cfeature.LAKES, alpha=0.5)
    ax.add_feature(cfeature.RIVERS)

    mask_date = df['date'] == datetime.datetime.strptime(date_string, ' %d %b %Y ')
    ax.text(0.5, 1.05, date_string,
            transform=ax.transAxes, fontsize=20,
            horizontalalignment='center',verticalalignment='center',
            )

    # define colormap (min=0, max=y*10^x)
    x = math.floor(math.log10(df[param].max()))
    y = math.floor(df[param].max()/ 10**x) + 1
    COL = MplColorHelper('Reds', 0, y*10**x)
    
    shpfilename = shpreader.natural_earth(resolution='110m',
                                      category='cultural',
                                      name='admin_0_countries')
    reader = shpreader.Reader(shpfilename)
    countries = reader.records()
    
    for country in countries:
        if country.attributes['NAME'] in df[mask_date]['Country/Region'].unique():
            mask_country = df['Country/Region'] == country.attributes['NAME']
            if not df[mask_date & mask_country].empty:
                val = df[mask_date & mask_country][param].values[0]
                ax.add_geometries(country.geometry, ccrs.PlateCarree(),
                                  facecolor=COL.get_rgb(val),
                                  edgecolor=(0,0,0)
                                 )

    sm = COL.scalarMap
    plt.colorbar(sm,ax=ax, orientation='vertical', pad=0.02, shrink=0.32)
    
    plt.show()

options=['World', 'Europe']

selection_region = widgets.Dropdown(
    options=options,
    value=options[0],
    description='Region',
    disabled=False,
)
    
dates = pd.date_range(df['date'].min(), df['date'].max(), freq='D').tolist()
options = [date.strftime(' %d %b %Y ') for date in dates]

selection_slider = widgets.SelectionSlider(
    options=options,
    value=options[0],
    description='Date',
    orientation='horizontal',
    layout={'width': '500px'}
)

options=['confirmed', 'confirmed_computed', 'deaths',
         'confirmed_per_day', 'confirmed_computed_per_day', 'deaths_per_day',
         'confirmed_per_cap', 'confirmed_computed_per_cap', 'deaths_per_cap',
         'confirmed_per_day_per_cap', 'confirmed_computed_per_day_per_cap', 'deaths_per_day_per_cap',
        ]

selection_param = widgets.Dropdown(
    options=options,
    value=options[0],
    description='Param',
    disabled=False,
)

interact(plot_contagion_map, region=selection_region, date_string = selection_slider, param=selection_param)


interactive(children=(Dropdown(description='Region', options=('World', 'Europe'), value='World'), SelectionSli…

<function __main__.plot_contagion_map(region, date_string, param)>

## 3.2 - Analysis by continent <a name="analysis_continent"></a>
### Plot type
    - Area --> lasagna analysis, data are added on top of each other (show overall sum and split)
    - Line --> raw display of the data
    
### Analysis
    - Continent --> data are aggregated by continent
    - World --> countries with top 10 number of deaths
    - Asia, Africa ... --> countries with top 10 number of deaths in the given continent

In [23]:
def plot_contagion_by_continent(plot_type, analysis, param):
    if analysis == 'Continent':
        group = 'continent'
        mask = pd.Series(True, index=np.arange(df.shape[0]))
    if analysis == 'World':
        group = 'Country/Region'
        country_list = df.groupby(by='Country/Region') \
                         .agg({'deaths':'max'}) \
                         .sort_values(by='deaths', ascending=False) \
                         .head(10) \
                         .index
        mask = df['Country/Region'].isin(country_list)
    if analysis in ['Asia', 'Europe', 'North America', 'South America', 'Africa', 'Oceania']:
        group = 'Country/Region'
        country_list = df[df['continent']==analysis].groupby(by='Country/Region') \
                         .agg({'deaths':'max'}) \
                         .sort_values(by='deaths', ascending=False) \
                         .head(10) \
                         .index
        mask = df['Country/Region'].isin(country_list)       

    df_tmp = df[mask].groupby(by=['date', group]) \
               .agg({param: 'sum'}).reset_index() \
               .pivot(index='date', columns=group, values= param)
    for var in df_tmp.columns:
        df_tmp[var] = df_tmp[var].apply(lambda x: x if x>0 else 0)
    
    if plot_type == 'Area':
        ax = df_tmp.plot(kind='area', grid=True)
    if plot_type == 'Line':
        ax = df_tmp.plot(kind='line', grid=True)

    
selection_plot = widgets.Dropdown(
    options=['Area', 'Line'],
    value='Area',
    description='Plot type',
    disabled=False,
)

selection_group = widgets.Dropdown(
    options=['Continent', 'World', 'Asia', 'Europe', 'North America', 'South America', 'Africa', 'Oceania'],
    value='Continent',
    description='Analysis',
    disabled=False,
)

options=['confirmed', 'confirmed_computed', 'deaths',
         'confirmed_per_day', 'confirmed_computed_per_day', 'deaths_per_day',
         'confirmed_per_cap', 'confirmed_computed_per_cap', 'deaths_per_cap',
         'confirmed_per_day_per_cap', 'confirmed_computed_per_day_per_cap', 'deaths_per_day_per_cap',
        ]

selection_param = widgets.Dropdown(
    options=options,
    value=options[0],
    description='Param',
    disabled=False,
)

interact(plot_contagion_by_continent,
         plot_type = selection_plot,
         analysis = selection_group,
         param=selection_param)

interactive(children=(Dropdown(description='Plot type', options=('Area', 'Line'), value='Area'), Dropdown(desc…

<function __main__.plot_contagion_by_continent(plot_type, analysis, param)>

## 3.3 - Analysis by country <a name="analysis_country"></a>

In [24]:
def plot_contagion_by_country(country, param_list):
    mask = df['Country/Region'] == country
    if 'mortality' in param_list:
        param_list = list(param_list)
        param_list.remove('mortality')
        if param_list != []:
            ax1 = df[mask].plot(x='date', y=list(param_list), grid=True)
            ax1.legend(loc='upper left')
            ax2 = ax1.twinx()
            df[mask].plot(x='date', y=['mortality'], c='k', linestyle='--', ax=ax2)
            ax2.legend(loc='upper right')
        else:
            ax1 = df[mask].plot(x='date', y='deaths', linewidth=0, grid=True)
            ax1.legend().remove()
            ax2 = ax1.twinx()
            df[mask].plot(x='date', y=['mortality'], c='k', linestyle='--', ax=ax2)
            ax2.legend(loc='upper right')
    else:
        param_list = list(param_list)
        ax1 = df[mask].plot(x='date', y=list(param_list), grid=True)
        ax1.legend(loc='upper left')


selection_slider = widgets.Dropdown(
    options=df['Country/Region'].unique(),
    value='France',
    description='Country',
    disabled=False,
)

options=['confirmed', 'confirmed_computed', 'deaths',
         'confirmed_per_day', 'confirmed_computed_per_day', 'deaths_per_day',
         'confirmed_per_cap', 'confirmed_computed_per_cap', 'deaths_per_cap',
         'confirmed_per_day_per_cap', 'confirmed_computed_per_day_per_cap', 'deaths_per_day_per_cap',
         'mortality'
        ]
selection_list = widgets.SelectMultiple(
    options=options,
    value=['confirmed', 'deaths', 'mortality'],
    description='Param',
    rows=len(options),
    disabled=False
)

interact(plot_contagion_by_country, country = selection_slider, param_list=selection_list)

interactive(children=(Dropdown(description='Country', index=61, options=('Afghanistan', 'Albania', 'Algeria', …

<function __main__.plot_contagion_by_country(country, param_list)>

## 3.4 - Comparison between countries <a name="comparison_countries"></a>

In [25]:
def plot_compare_contagion(confinement_date, paramx, param1, param2, countries):
    if paramx == 'date':
        start_date = datetime.date.today() + datetime.timedelta(-60)
        end_date = datetime.date.today()
        xlim=(start_date, end_date)
    if paramx == 'contagion_days':
        xlim=(0, 60)
    fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(8, 6))
    for country in countries:
        mask = df['Country/Region'] == country
        df[mask].plot(x=paramx, y=param1, label=country, xlim=xlim, grid=True, ax=ax[0])
        df[mask].plot(x=paramx, y=param2, label=country, grid=True, ax=ax[1])
        
        if confinement_date:
            
            if paramx == 'date':
                confinement_date = df[mask]['confinement'].min()
                if type(confinement_date) is pd.Timestamp:
                    ax[0].axvline(x=confinement_date, color=plt.gca().lines[-1].get_color(), linestyle='--')
                    ax[1].axvline(x=confinement_date, color=plt.gca().lines[-1].get_color(), linestyle='--')

            if paramx == 'contagion_days':
                contagion_days_confinement = df[mask]['contagion_days_confinement'].min()
                if not math.isnan(contagion_days_confinement):
                    ax[0].axvline(x=contagion_days_confinement, color=plt.gca().lines[-1].get_color(), linestyle='--')
                    ax[1].axvline(x=contagion_days_confinement, color=plt.gca().lines[-1].get_color(), linestyle='--')

    handles, labels = ax[0].get_legend_handles_labels()
    ax[0].legend().remove()
    ax[1].legend().remove()
    ax[0].set_ylabel(param1)
    ax[1].set_ylabel(param2)
    fig.legend(handles, labels, bbox_to_anchor=(1.3, 0.9))
    plt.tight_layout()
    
selection_confinement = widgets.Checkbox(
    value=False,
    description='Confinement date',
    indent=True
)

selection_param_x = widgets.Dropdown(
    options=['date', 'contagion_days'],
    value='contagion_days',
    description='Paramx',
    disabled=False,
)

options=['confirmed', 'confirmed_computed', 'deaths',
         'confirmed_per_day', 'confirmed_computed_per_day', 'deaths_per_day',
         'confirmed_per_cap', 'confirmed_computed_per_cap', 'deaths_per_cap',
         'confirmed_per_day_per_cap', 'confirmed_computed_per_day_per_cap', 'deaths_per_day_per_cap',
        ]

selection_param_1 = widgets.Dropdown(
    options=options,
    value=options[0],
    description='Param1',
    disabled=False,
)

selection_param_2 = widgets.Dropdown(
    options=options,
    value=options[2],
    description='Param2',
    disabled=False,
)

country_list = df.groupby(by='Country/Region') \
                 .agg({'deaths':'max'}) \
                 .sort_values(by='deaths', ascending=False) \
                 .head(10) \
                 .index

selection_list = widgets.SelectMultiple(
    options=df['Country/Region'].unique(),
    value=list(country_list),
    description='Countries',
    rows=10,
    disabled=False
)

interact(plot_compare_contagion,
         confinement_date=selection_confinement,
         paramx=selection_param_x,
         param1=selection_param_1,
         param2=selection_param_2,
         countries = selection_list)


interactive(children=(Checkbox(value=False, description='Confinement date'), Dropdown(description='Paramx', in…

<function __main__.plot_compare_contagion(confinement_date, paramx, param1, param2, countries)>

# 4 - Model <a name="model"></a>

Hypotheses <br>
    - simple model with a variable reproduction number R0
    - RO increases number of cases every 3 days
    - R0 is reduced (with some delay) when confinement is enforced
    - 1% mortality rate

The model is parametric and can be adapted to best fit the data for each country. <br>

Be careful, because the model is a bit rough and only works in the exponential growth of the epidemy. It's more a way to assess the spread in each country and compare the evolution between countries. <br>

For information on reproduction number, see wikipedia
https://en.wikipedia.org/wiki/Basic_reproduction_number

A video made by 3 american researchers also explains why R0 is so important in epidemic in general and for Covid19 in particular.

In [26]:
IFrame(src="https://commons.wikimedia.org/wiki/File:COVID19_in_numbers-_R0,_the_case_fatality_rate_and_why_we_need_to_flatten_the_curve.webm?embedplayer=yes", width=512, height=288)

In [27]:
def plot_with_model(country, m1, m2, delta_days):
    n=3
    patient_zero_date=patient_zero[country]
    if country in confinement:
        confinement_date=confinement[country]
    periods=90/n

    df_tmp=pd.DataFrame()
    df_tmp['date'] = pd.date_range(patient_zero_date, periods=periods, freq='{}d'.format(n))

    df_tmp.loc[0, 'confirmed_model'] = 300
    df_tmp.loc[0, 'confirmed_model_with_confinement'] = 300
    df_tmp.loc[1, 'confirmed_model'] = 600
    df_tmp.loc[1, 'confirmed_model_with_confinement'] = 600

    for i in range(2, len(df_tmp)):
        df_tmp.loc[i, 'confirmed_model'] = (df_tmp.loc[i-1, 'confirmed_model'] - df_tmp.loc[i-2, 'confirmed_model'])* m1 + df_tmp.loc[i-1, 'confirmed_model']
        if country in confinement: 
            if df_tmp.loc[i, 'date']<confinement_date + datetime.timedelta(delta_days):
                df_tmp.loc[i, 'confirmed_model_with_confinement'] = (df_tmp.loc[i-1, 'confirmed_model_with_confinement'] - df_tmp.loc[i-2, 'confirmed_model_with_confinement']) * m1 + df_tmp.loc[i-1, 'confirmed_model_with_confinement']
            else:
                df_tmp.loc[i, 'confirmed_model_with_confinement'] = (df_tmp.loc[i-1, 'confirmed_model_with_confinement'] - df_tmp.loc[i-2, 'confirmed_model_with_confinement']) * m2 + df_tmp.loc[i-1, 'confirmed_model_with_confinement']

    df_tmp['deaths_model'] = df_tmp['confirmed_model'].shift(2)*0.01
    df_tmp['deaths_model_with_confinement'] = df_tmp['confirmed_model_with_confinement'].shift(2)*0.01

    mask = df['Country/Region'] == country
    fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(8, 6))
    df[mask].plot(x='date', y=['confirmed_computed'], grid=True, ax=ax[0])
    df_tmp.plot(kind='line',
                 x='date',
                 y=['confirmed_model', 'confirmed_model_with_confinement'],
                 grid=True, ax=ax[0])
    ax[0].set_ylim(0, 1.5*df[mask]['confirmed_computed'].max())
    
    df[mask].plot(x='date', y='deaths', grid=True, ax=ax[1])
    df_tmp.plot(kind='line',
                 x='date',
                 y=['deaths_model', 'deaths_model_with_confinement'],
                 grid=True, ax=ax[1])
    ax[1].set_ylim(0, 1.5*df[mask]['deaths'].max())

options = list(patient_zero)
options.sort()
    
selection_slider = widgets.Dropdown(
    options=options,
    value='France',
    description='Country',
    disabled=False,
)

selection_slider_m1 = widgets.FloatSlider(
    value=2.0,
    min=1.0,
    max=3.0,
    step=0.1,
    description='R0 before confinement:',
    layout={'width': '400px'},
    style = {'description_width': 'initial'}
)

selection_slider_m2 = widgets.FloatSlider(
    value=1.2,
    min=0.5,
    max=3.0,
    step=0.1,
    description='R0 after confinement:',
    layout={'width': '400px'},
    style = {'description_width': 'initial'}, 
)

selection_slider_delta_days = widgets.IntSlider(
    value=6,
    min=0,
    max=15,
    step=3,
    description='confinement delay:',
    layout={'width': '400px'},
    style = {'description_width': 'initial'},
)

interact(plot_with_model,
         country=selection_slider,
         m1=selection_slider_m1,
         m2=selection_slider_m2,
         delta_days=selection_slider_delta_days,
        )


interactive(children=(Dropdown(description='Country', index=5, options=('Austria', 'Belgium', 'Brazil', 'China…

<function __main__.plot_with_model(country, m1, m2, delta_days)>