# Table of content:
* [1 - Import library](#import_library)
* [2 - Import and wrangle data](#import_data)
    * [2.1 - Import deces, reanimation, hospitalisation and gueris](#import_opencovid19-fr_data)
    * [2.2 - Enrich with french departement data](#enrich_with_departement)
    * [2.3 - Smooth, derive and more](#smooth-derive)
    * [2.4 - Display some data](#display_data)
* [3 - Data analysis](#data_analysis)
    * [3.1 -  Geographical evolution in France](#geographical_evolution)
    * [3.2 - Comparison between departements](#comparison_departements)

# 1 - Import library <a name="import_library"></a>

In [1]:
import os
import datetime
import pandas as pd
import numpy as np
from scipy.signal import savgol_filter
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib import cm
import math
%matplotlib inline

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.io.shapereader as shpreader
import shapefile
from shapely.geometry import Polygon
from shapely.ops import cascaded_union, unary_union

from IPython.display import IFrame

import urllib.request
from zipfile import ZipFile

# 2 - Import and wrangle data <a name="import_data"></a>
Data are aggregated by opencovid19-fr team from various sources (ministere de la sante, sante publique france, prefecture ...). <br>
More details on the origin and description of the date are available in the Readme of the github repository.<br>
https://github.com/opencovid19-fr/data

## 2.1 - Import deces, reanimation, hospitalisation and gueris <a name="import_opencovid19-fr_data"></a>

In [2]:
URL='https://raw.githubusercontent.com/opencovid19-fr/data/master/dist/chiffres-cles.csv'
df = pd.read_csv(URL)
df['date'] = pd.to_datetime(df['date'])

In [3]:
df=df[df['source_type']=='sante-publique-france-data'].copy()
df=df[['date', 'maille_code', 'deces', 'reanimation', 'hospitalises', 'gueris']].copy()
df.sort_values(by=['maille_code', 'date'], inplace=True)
df.reset_index(drop=True, inplace=True)

## 2.2 - Enrich with french departement data  <a name="enrich_with_departement"></a>

In [4]:
def rename_departement(df):
    try:
        return 'DEP-{:02d}'.format(int(df))
    except:
        return 'DEP-{}'.format(df)

In [5]:
URL = 'https://www.regions-et-departements.fr/fichiers/departements-francais.csv'
df_departement = pd.read_csv(URL, sep='\t', engine='python', skipfooter=2)                   
df_departement['maille_code_dep']=df_departement['NUMÉRO'].apply(lambda x: rename_departement(x))
df_departement.rename({'NOM': 'departement', 'POPULATION': 'population',
                       'SUPERFICIE (km²)': 'superficie', 'DENSITE (habitants/km2)': 'densite',
                      'REGION': 'region', 'CHEF LIEU': 'chef lieu'},
                      axis=1, inplace=True)
df_departement.drop(['NUMÉRO'], axis=1,inplace=True)
df_departement.head()

Unnamed: 0,departement,region,chef lieu,superficie,population,densite,maille_code_dep
0,Ain,Auvergne-Rhône-Alpes,Bourg-en-Bresse,5762,631877,109.7,DEP-01
1,Aisne,Hauts-de-France,Laon,7369,538659,73.1,DEP-02
2,Allier,Auvergne-Rhône-Alpes,Moulins,7340,341613,46.5,DEP-03
3,Alpes-de-Haute-Provence,Provence-Alpes-Côte d'Azur,Digne,6925,161799,23.4,DEP-04
4,Hautes-Alpes,Provence-Alpes-Côte d'Azur,Gap,5549,140916,25.4,DEP-05


In [6]:
df=df.merge(df_departement, left_on='maille_code', right_on='maille_code_dep')
df.drop(['maille_code_dep'], axis=1,inplace=True)

## 2.3 - Smooth, derive and more  <a name="smooth-derive"></a>

In [7]:
def smooth_data(df, param):
    try:
        df[param] = savgol_filter(df[param] , 7, 3)
    except:
        pass
    return df

In [8]:
def derive_data(df, param):
    try:
        df[param + '_per_day'] = savgol_filter(df[param] , 7, 3, deriv=1)
    except:
        df[param + '_per_day'] = df[param] - df[param].shift(1)
    return df

In [9]:
for param in ['deces', 'reanimation', 'hospitalises', 'gueris']:
    df = df.groupby('maille_code').apply(lambda x: smooth_data(x, param))
    df = df.groupby('maille_code').apply(lambda x: derive_data(x, param))

Data (deces, reanimation, hospitalises, gueris overall and per day) for 10.000 inhabitants

In [10]:
for param in ['deces', 'reanimation', 'hospitalises', 'gueris', 'deces_per_day', 'reanimation_per_day', 'hospitalises_per_day', 'gueris_per_day']:
    df[param+'_per_cap'] = df[param] / df['population'] * 10000

## 2.4 - Display some data <a name="display_data"></a>

In [11]:
df.head()

Unnamed: 0,date,maille_code,deces,reanimation,hospitalises,gueris,departement,region,chef lieu,superficie,...,hospitalises_per_day,gueris_per_day,deces_per_cap,reanimation_per_cap,hospitalises_per_cap,gueris_per_cap,deces_per_day_per_cap,reanimation_per_day_per_cap,hospitalises_per_day_per_cap,gueris_per_day_per_cap
0,2020-03-18,DEP-01,0.0,0.166667,2.428571,1.119048,Ain,Auvergne-Rhône-Alpes,Bourg-en-Bresse,5762,...,-3.068216,-0.679989,0.0,0.002638,0.038434,0.01771,0.00192,-0.010772,-0.048557,-0.010761
1,2020-03-19,DEP-01,0.0,-0.309524,1.119048,0.857143,Ain,Auvergne-Rhône-Alpes,Bourg-en-Bresse,5762,...,-0.273715,-0.316327,0.0,-0.004898,0.01771,0.013565,0.000215,-0.002433,-0.004332,-0.005006
2,2020-03-20,DEP-01,0.0,-0.166667,1.904762,0.714286,Ain,Auvergne-Rhône-Alpes,Bourg-en-Bresse,5762,...,2.040627,0.321145,0.0,-0.002638,0.030145,0.011304,-0.000736,0.004839,0.032295,0.005082
3,2020-03-21,DEP-01,0.0,0.428571,4.619048,1.190476,Ain,Auvergne-Rhône-Alpes,Bourg-en-Bresse,5762,...,3.874811,1.232426,0.0,0.006783,0.0731,0.01884,-0.000933,0.011043,0.061322,0.019504
4,2020-03-22,DEP-01,0.0,1.047619,9.142857,3.142857,Ain,Auvergne-Rhône-Alpes,Bourg-en-Bresse,5762,...,5.250567,2.794974,0.0,0.016579,0.144694,0.049738,-0.001083,0.016213,0.083095,0.044233


### Print list of departements

In [12]:
print(df['departement'].unique())

['Ain' 'Aisne' 'Allier' 'Alpes-de-Haute-Provence' 'Hautes-Alpes'
 'Alpes-Maritimes' 'Ardèche' 'Ardennes' 'Ariège' 'Aube' 'Aude' 'Aveyron'
 'Bouches-du-Rhône' 'Calvados' 'Cantal' 'Charente' 'Charente-Maritime'
 'Cher' 'Corrèze' "Côte-d'Or" "Côtes d'Armor" 'Creuse' 'Dordogne' 'Doubs'
 'Drôme' 'Eure' 'Eure-et-Loir' 'Finistère' 'Corse-du-Sud' 'Haute-Corse'
 'Gard' 'Haute-Garonne' 'Gers' 'Gironde' 'Hérault' 'Ille-et-Vilaine'
 'Indre' 'Indre-et-Loire' 'Isère' 'Jura' 'Landes' 'Loir-et-Cher' 'Loire'
 'Haute-Loire' 'Loire-Atlantique' 'Loiret' 'Lot' 'Lot-et-Garonne' 'Lozère'
 'Maine-et-Loire' 'Manche' 'Marne' 'Haute-Marne' 'Mayenne'
 'Meurthe-et-Moselle' 'Meuse' 'Morbihan' 'Moselle' 'Nièvre' 'Nord' 'Oise'
 'Orne' 'Pas-de-Calais' 'Puy-de-Dôme' 'Pyrénées-Atlantiques'
 'Hautes-Pyrénées' 'Pyrénées-Orientales' 'Bas-Rhin' 'Haut-Rhin' 'Rhône'
 'Haute-Saône' 'Saône-et-Loire' 'Sarthe' 'Savoie' 'Haute-Savoie' 'Paris'
 'Seine-Maritime' 'Seine-et-Marne' 'Yvelines' 'Deux-Sèvres' 'Somme' 'Tarn'
 'Tarn-et-Garo

### Print list of departements with max deaths

In [13]:
df.groupby(by='departement') \
  .agg({'deces':'max'}) \
  .reset_index() \
  .sort_values(by='deces', ascending=False) \
  .reset_index() \
  .head(10)

Unnamed: 0,index,departement,deces
0,76,Paris,1090.380952
1,93,Val-de-Marne,680.952381
2,36,Haut-Rhin,626.690476
3,46,Hauts-de-Seine,621.97619
4,86,Seine-St-Denis,609.97619
5,71,Moselle,542.619048
6,92,Val-D'Oise,422.619048
7,11,Bas-Rhin,397.833333
8,81,Rhône,370.166667
9,73,Nord,332.285714


# 3 - Data analysis <a name="data_analysis"></a>

## 3.1 -  Geographical evolution in France <a name="geographical_evolution"></a>

In [14]:
# Get shapefile of French departement
url = 'https://www.data.gouv.fr/fr/datasets/r/3096e551-c68d-40ce-8972-a228c94c0ad1'
urllib.request.urlretrieve(url, 'shapefile_departement.zip')

if os.path.isfile('shapefile'):
    os.mkdir('shapefile')

with ZipFile('shapefile_departement.zip', 'r') as zipObj:
   zipObj.extractall('shapefile')

In [15]:
class MplColorHelper:

  def __init__(self, cmap_name, start_val, stop_val):
    self.cmap_name = cmap_name
    self.cmap = plt.get_cmap(cmap_name, 10)
    self.norm = mpl.colors.Normalize(vmin=start_val, vmax=stop_val)
    self.scalarMap = cm.ScalarMappable(norm=self.norm, cmap=self.cmap)

  def get_rgb(self, val):
    return self.scalarMap.to_rgba(val)


In [16]:
def plot_contagion_map(date_string, param):
    
    fig = plt.figure(figsize=(12, 12))
    ax = fig.add_subplot(1, 1, 1, projection = ccrs.PlateCarree())
    ax.set_extent([-7, 10, 40, 54], crs=ccrs.PlateCarree())
    ax.add_feature(cfeature.BORDERS)
    ax.add_feature(cfeature.LAKES)
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.OCEAN)
    ax.add_feature(cfeature.RIVERS)
    ax.add_feature(cfeature.COASTLINE)

    ax2 = fig.add_subplot(1, 2, 2, projection = ccrs.PlateCarree())
    ax2.set_extent([2.1, 2.65, 48.65, 49.05], crs=ccrs.PlateCarree())
    ax2.add_feature(cfeature.LAND)
    
    mask_date = df['date'] == datetime.datetime.strptime(date_string, ' %d %b %Y ')
    
    ax.text(0.5, 1.05, date_string,
            transform=ax.transAxes, fontsize=20,
            horizontalalignment='center',verticalalignment='center',
            )

    # define colormap (min=0, max=y*10^x)
    if param in ['reanimation_per_day', 'hospitalises_per_day', 'reanimation_per_day_per_cap', 'hospitalises_per_day_per_cap']:
        x_max = math.floor(math.log10(df[param].max()))
        y_max = math.floor(df[param].max()/ 10**x_max) + 1

        x_min = math.floor(math.log10(abs(df[param].min())))
        y_min = math.floor(abs(df[param].min()/ 10**x_min)) + 1
        if y_max*10**x_max > y_min*10**x_min:
            COL = MplColorHelper('bwr', -y_max*10**x_max, y_max*10**x_max)
        else:
            COL = MplColorHelper('bwr', -y_min*10**x_min, y_max*10**x_max)
    else:
        x_max = math.floor(math.log10(df[param].max()))
        y_max = math.floor(df[param].max()/ 10**x_max) + 1
        COL = MplColorHelper('Reds', 0, y_max*10**x_max)


    for department, shape in zip(departments, shapes_dep):

        if len(shape.parts) == 1:
            # Un seul polygone
            poly = Polygon(shape.points)
        else:
            # Il faut les fusionner
            ind = list(shape.parts) + [len(shape.points)]
            pols = [Polygon(shape.points[ind[i]:ind[i+1]]) for i in range(0, len(shape.parts))]
            poly = unary_union(pols)

        if 'DEP-'+department[0] in df[mask_date]['maille_code'].unique():
            mask_department = df[mask_date]['maille_code'] == 'DEP-'+department[0]
            if not df[mask_date & mask_department].empty:
                val = df[mask_date & mask_department][param].values[0]
                ax.add_geometries([poly], ccrs.PlateCarree(),
                                              facecolor=COL.get_rgb(val),
                                              edgecolor=(0,0,0)
                                             )
                if 'DEP-'+department[0] in ['DEP-75', 'DEP-92', 'DEP-93', 'DEP-94']:
                    ax2.add_geometries([poly], ccrs.PlateCarree(),
                                                  facecolor=COL.get_rgb(val),
                                                  edgecolor=(0,0,0)
                                                 )
                        
    sm = COL.scalarMap
    plt.colorbar(sm,ax=ax, orientation='vertical', pad=0.05, shrink=0.5)

    pos=ax2.get_position()
    pos = [pos.x0 + 0.05, pos.y0 + 0.25,  pos.width / 2.5, pos.height / 2.5] 
    ax2.set_position(pos)
    plt.show()
    
dates = pd.date_range(df['date'].min(), df['date'].max(), freq='D').tolist()
options = [date.strftime(' %d %b %Y ') for date in dates]

selection_slider = widgets.SelectionSlider(
    options=options,
    value=options[0],
    description='Date',
    orientation='horizontal',
    layout={'width': '500px'}
)

options=['deces', 'reanimation', 'hospitalises', 'gueris',
         'deces_per_day', 'reanimation_per_day', 'hospitalises_per_day', 'gueris_per_day',
         'deces_per_cap', 'reanimation_per_cap', 'hospitalises_per_cap', 'gueris_per_cap',
         'deces_per_day_per_cap', 'reanimation_per_day_per_cap', 'hospitalises_per_day_per_cap', 'gueris_per_day_per_cap',
         'population', 'superficie', 'densite']

selection_param = widgets.Dropdown(
    options=options,
    value=options[0],
    description='Param',
    disabled=False,
)

shpfilename = 'shapefile/departements-20140306-100m.shp'
reader = shapefile.Reader(shpfilename, encoding='ISO8859-1')
shapes_dep = reader.shapes()
departments = reader.records()


interact(plot_contagion_map, date_string = selection_slider, param=selection_param)



interactive(children=(SelectionSlider(description='Date', layout=Layout(width='500px'), options=(' 18 Mar 2020…

<function __main__.plot_contagion_map(date_string, param)>

## 3.2 - Comparison between departements <a name="comparison_departements"></a>

In [17]:
def plot_compare_contagion(param1, param2, departements):
    
    start_date = datetime.date.today() + datetime.timedelta(-90)
    end_date = datetime.date.today()
    xlim=(start_date, end_date)

    fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(8, 6))
    for departement in departements:
        mask = df['departement'] == departement
        df[mask].plot(x='date', y=param1, label=departement, xlim=xlim, grid=True, ax=ax[0])
        df[mask].plot(x='date', y=param2, label=departement, grid=True, ax=ax[1])
        
    handles, labels = ax[0].get_legend_handles_labels()
    ax[0].legend().remove()
    ax[1].legend().remove()
    ax[0].set_ylabel(param1)
    ax[1].set_ylabel(param2)
    fig.legend(handles, labels, bbox_to_anchor=(1.3, 0.9))
    plt.tight_layout()
    

options=['deces', 'reanimation', 'hospitalises', 'gueris',
         'deces_per_day', 'reanimation_per_day', 'hospitalises_per_day', 'gueris_per_day',
         'deces_per_cap', 'reanimation_per_cap', 'hospitalises_per_cap', 'gueris_per_cap',
         'deces_per_day_per_cap', 'reanimation_per_day_per_cap', 'hospitalises_per_day_per_cap', 'gueris_per_day_per_cap',
        ]

selection_param_1 = widgets.Dropdown(
    options=options,
    value=options[0],
    description='Param1',
    disabled=False,
)

selection_param_2 = widgets.Dropdown(
    options=options,
    value=options[1],
    description='Param2',
    disabled=False,
)

country_list = df.groupby(by='departement') \
                 .agg({'deces':'max'}) \
                 .sort_values(by='deces', ascending=False) \
                 .head(10) \
                 .index

selection_list = widgets.SelectMultiple(
    options=df['departement'].unique(),
    value=list(country_list),
    description='departement',
    rows=10,
    disabled=False
)

interact(plot_compare_contagion,
         param1=selection_param_1,
         param2=selection_param_2,
         departements = selection_list)



interactive(children=(Dropdown(description='Param1', options=('deces', 'reanimation', 'hospitalises', 'gueris'…

<function __main__.plot_compare_contagion(param1, param2, departements)>