# Table of content:
* [1 - Import library](#import_library)
* [2 - Import and wrangle data](#import_data)
    * [2.1 - Death by day](#import_death_by_day)
    * [2.2 - Death by sex, age, place](#import_death_by_sex_age_place)
* [3 - Data analysis](#data_analysis)
    * [3.1 - Geographical evolution in France](#geographical_evolution)
    * [3.2 - Evolution by departement](#evolution_departement)

# 1 - Import library <a name="import_library"></a>

In [1]:
import os
import datetime
import pandas as pd
import numpy as np
from scipy.signal import savgol_filter
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib import cm
import math
%matplotlib inline

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.io.shapereader as shpreader
import shapefile
from shapely.geometry import Polygon
from shapely.ops import cascaded_union, unary_union

from IPython.display import IFrame

import urllib.request
from zipfile import ZipFile

# 2 - Import and wrangle data <a name="import_data"></a>
Data are aggregated by insee <br>
https://www.insee.fr/fr/information/4470857

In [2]:
insee_path = 'https://www.insee.fr/fr/statistiques/fichier/4470857/'
insee_date = '2020-05-07'

In [3]:
def comp_avg_2018_2019(df, param):
    df['{}_variation'.format(param)] = (df[param]/df[df['annee']<2020][param].mean()-1)*100
    return df

## 2.1 - Death by day <a name="import_death_by_day"></a>

In [4]:
URL='{0}/{1}_deces_quotidiens_departement_csv.zip'.format(insee_path, insee_date)
df = pd.read_csv(URL, delimiter=';', encoding = "ISO-8859-1")
df['date'] = pd.to_datetime(df['Date_evenement'], format='%d/%m/%Y')

df_2018=df[['date', 'Zone', 'Total_deces_2018']].copy()
df_2018.rename(columns={'Total_deces_2018': 'deces'}, inplace=True)
df_2018['annee'] = 2018

df_2019=df[['date', 'Zone', 'Total_deces_2019']].copy()
df_2019.rename(columns={'Total_deces_2019': 'deces'}, inplace=True)
df_2019['annee'] = 2019

df_2020=df[['date', 'Zone', 'Total_deces_2020']].copy()
df_2020.rename(columns={'Total_deces_2020': 'deces'}, inplace=True)
df_2020['annee'] = 2020

df_date = pd.concat([df_2018, df_2019, df_2020], axis=0).reset_index(drop=True)

for param in ['deces']:
    df_date = df_date.groupby(['date', 'Zone']) \
                      .apply(lambda x: comp_avg_2018_2019(x, param))

In [5]:
df_date.head()

Unnamed: 0,date,Zone,deces,annee,deces_variation
0,2020-03-01,France,2136.0,2018,6.586826
1,2020-03-02,France,4327.0,2018,6.720927
2,2020-03-03,France,6455.0,2018,7.717981
3,2020-03-04,France,8545.0,2018,7.113757
4,2020-03-05,France,10746.0,2018,7.331203


## 2.2 - Death by sex, age, place <a name="import_death_by_sex_age_place"></a>

In [6]:
# Get shapefile of French departement
url = '{0}/{1}_deces_sexe_age_lieu_csv.zip'.format(insee_path, insee_date)
urllib.request.urlretrieve(url, 'insee_stat.zip')

if os.path.isfile('insee_stat'):
    os.mkdir('insee_stat')

with ZipFile('insee_stat.zip', 'r') as zipObj:
   zipObj.extractall('insee_stat')

In [7]:
ls insee_stat

2020-04-30_deces_parage_regdept.csv
2020-04-30_deces_parlieu_jour_France.csv
2020-04-30_deces_parlieu_regdept.csv
2020-04-30_deces_parsexe_age_jour_France.csv
2020-04-30_deces_parsexe_regdept.csv
2020-05-07_deces_parage_regdept.csv
2020-05-07_deces_parlieu_jour_France.csv
2020-05-07_deces_parlieu_regdept.csv
2020-05-07_deces_parsexe_age_jour_France.csv
2020-05-07_deces_parsexe_regdept.csv
Documentation.pdf


In [8]:
path = 'insee_stat/{}_deces_parsexe_regdept.csv'.format(insee_date)
df_sexe = pd.read_csv(path, delimiter=';', encoding = "ISO-8859-1")
df_sexe['Zone'] = np.where(df_sexe['Zone']=='FE', 'France', df_sexe['Zone'])

df_sexe_2018=df_sexe[['Zone', 'LibelleZone', 'Total_Deces2018', 
                      'Femmes_Deces2018', 'Hommes_Deces2018']].copy()
df_sexe_2018.rename(columns={'Total_Deces2018': 'Total', 'Femmes_Deces2018': 'Femmes',
         'Hommes_Deces2018': 'Hommes'}, inplace=True)
df_sexe_2018['annee'] = 2018

df_sexe_2019=df_sexe[['Zone', 'LibelleZone', 'Total_Deces2019', 
                      'Femmes_Deces2019', 'Hommes_Deces2019']].copy()
df_sexe_2019.rename(columns={'Total_Deces2019': 'Total', 'Femmes_Deces2019': 'Femmes',
         'Hommes_Deces2019': 'Hommes'}, inplace=True)
df_sexe_2019['annee'] = 2019

df_sexe_2020=df_sexe[['Zone', 'LibelleZone', 'Total_Deces2020', 
                      'Femmes_Deces2020', 'Hommes_Deces2020']].copy()
df_sexe_2020.rename(columns={'Total_Deces2020': 'Total', 'Femmes_Deces2020': 'Femmes',
         'Hommes_Deces2020': 'Hommes'}, inplace=True)
df_sexe_2020['annee'] = 2020

df_sexe = pd.concat([df_sexe_2018, df_sexe_2019, df_sexe_2020], axis=0).reset_index(drop=True)

for param in ['Total', 'Femmes', 'Hommes']:
    df_sexe = df_sexe.groupby('Zone') \
                     .apply(lambda x: comp_avg_2018_2019(x, param))

In [9]:
path = 'insee_stat/{}_deces_parage_regdept.csv'.format(insee_date)
df_age = pd.read_csv(path, delimiter=';', encoding = "ISO-8859-1")
df_age['Zone'] = np.where(df_age['Zone']=='FE', 'France', df_age['Zone'])

df_age_2018=df_age[['Zone', 'LibelleZone', 'Total_Deces2018',
       'Total_0_24ans_Deces2018', 'Total_25_49ans_Deces2018',
       'Total_50_64ans_Deces2018', 'Total_65_74ans_Deces2018',
       'Total_75_84ans_Deces2018', 'Total_85ans_plus_Deces2018']].copy()
df_age_2018.rename(columns={'Total_Deces2018': 'Total', 'Total_0_24ans_Deces2018': '0_24ans',
         'Total_25_49ans_Deces2018': '25_49ans', 'Total_50_64ans_Deces2018': '50_64ans',
         'Total_65_74ans_Deces2018': '65_74ans', 'Total_75_84ans_Deces2018': '75_84ans',
         'Total_85ans_plus_Deces2018': '85ans_plus'
        }, inplace=True)
df_age_2018['annee'] = 2018

df_age_2019=df_age[['Zone', 'LibelleZone', 'Total_Deces2019',
       'Total_0_24ans_Deces2019', 'Total_25_49ans_Deces2019',
       'Total_50_64ans_Deces2019', 'Total_65_74ans_Deces2019',
       'Total_75_84ans_Deces2019', 'Total_85ans_plus_Deces2019']].copy()

df_age_2019.rename(columns={'Total_Deces2019': 'Total', 'Total_0_24ans_Deces2019': '0_24ans',
         'Total_25_49ans_Deces2019': '25_49ans', 'Total_50_64ans_Deces2019': '50_64ans',
         'Total_65_74ans_Deces2019': '65_74ans', 'Total_75_84ans_Deces2019': '75_84ans',
         'Total_85ans_plus_Deces2019': '85ans_plus'
        }, inplace=True)
df_age_2019['annee'] = 2019


df_age_2020=df_age[['Zone', 'LibelleZone', 'Total_Deces2020',
       'Total_0_24ans_Deces2020', 'Total_25_49ans_Deces2020',
       'Total_50_64ans_Deces2020', 'Total_65_74ans_Deces2020',
       'Total_75_84ans_Deces2020', 'Total_85ans_plus_Deces2020']].copy()
df_age_2020.rename(columns={'Total_Deces2020': 'Total', 'Total_0_24ans_Deces2020': '0_24ans',
         'Total_25_49ans_Deces2020': '25_49ans', 'Total_50_64ans_Deces2020': '50_64ans',
         'Total_65_74ans_Deces2020': '65_74ans', 'Total_75_84ans_Deces2020': '75_84ans',
         'Total_85ans_plus_Deces2020': '85ans_plus'
        }, inplace=True)
df_age_2020['annee'] = 2020

df_age = pd.concat([df_age_2018, df_age_2019, df_age_2020], axis=0).reset_index(drop=True)

for param in ['Total', '0_24ans', '25_49ans', '50_64ans', '65_74ans', '75_84ans', '85ans_plus']:
    df_age = df_age.groupby('Zone') \
                    .apply(lambda x: comp_avg_2018_2019(x, param))

In [10]:
path = 'insee_stat/{}_deces_parlieu_regdept.csv'.format(insee_date)
df_lieu = pd.read_csv(path, delimiter=';', encoding = "ISO-8859-1")
df_lieu['Zone'] = np.where(df_lieu['Zone']=='FE', 'France', df_lieu['Zone'])

df_lieu_2018=df_lieu[['Zone', 'LibelleZone', 'Total_Deces2018', 
                      'Hopital_Clinique_Deces2018', 'Maison_retraite_Deces2018',
                      'Domicile_Deces2018']].copy()
df_lieu_2018.rename(columns={'Total_Deces2018': 'Total', 'Hopital_Clinique_Deces2018': 'Hopital_Clinique',
         'Maison_retraite_Deces2018': 'Maison_retraite', 'Domicile_Deces2018': 'Domicile'
        }, inplace=True)
df_lieu_2018['annee'] = 2018

df_lieu_2019=df_lieu[['Zone', 'LibelleZone', 'Total_Deces2019',
                      'Hopital_Clinique_Deces2019', 'Maison_retraite_Deces2019',
                      'Domicile_Deces2019']].copy()
df_lieu_2019.rename(columns={'Total_Deces2019': 'Total', 'Hopital_Clinique_Deces2019': 'Hopital_Clinique',
         'Maison_retraite_Deces2019': 'Maison_retraite', 'Domicile_Deces2019': 'Domicile'
        }, inplace=True)
df_lieu_2019['annee'] = 2019

df_lieu_2020=df_lieu[['Zone', 'LibelleZone', 'Total_Deces2020',
                      'Hopital_Clinique_Deces2020', 'Maison_retraite_Deces2020',
                      'Domicile_Deces2020']].copy()
df_lieu_2020.rename(columns={'Total_Deces2020': 'Total', 'Hopital_Clinique_Deces2020': 'Hopital_Clinique',
         'Maison_retraite_Deces2020': 'Maison_retraite', 'Domicile_Deces2020': 'Domicile'
        }, inplace=True)
df_lieu_2020['annee'] = 2020

df_lieu = pd.concat([df_lieu_2018, df_lieu_2019, df_lieu_2020], axis=0).reset_index(drop=True)

for param in ['Total', 'Hopital_Clinique', 'Maison_retraite', 'Domicile']:
    df_lieu = df_lieu.groupby('Zone') \
                      .apply(lambda x: comp_avg_2018_2019(x, param))

In [11]:
merge_list = list(df_lieu.columns.values)
merge_list.remove('LibelleZone')
merge_list.remove('Total')
merge_list.remove('Total_variation')
merge_list              
df = df_sexe.merge(df_lieu[merge_list], on=['Zone', 'annee'])

merge_list = list(df_age.columns.values)
merge_list.remove('LibelleZone')
merge_list.remove('Total')
merge_list.remove('Total_variation')
merge_list              
df = df.merge(df_age[merge_list], on=['Zone', 'annee'])

df = df[df['Zone'].isin(df_date['Zone'].unique())]

In [12]:
df.head()

Unnamed: 0,Zone,LibelleZone,Total,Femmes,Hommes,annee,Total_variation,Femmes_variation,Hommes_variation,Hopital_Clinique,...,50_64ans,65_74ans,75_84ans,85ans_plus,0_24ans_variation,25_49ans_variation,50_64ans_variation,65_74ans_variation,75_84ans_variation,85ans_plus_variation
0,Dept_01,Ain,782,418,364,2018,4.475618,4.630788,4.297994,296,...,64,99,154,429,0.0,19.298246,14.285714,10.0,1.650165,2.021403
1,Dept_02,Aisne,1034,528,506,2018,9.129288,9.657321,8.583691,641,...,109,143,224,519,0.0,19.298246,-0.909091,-4.347826,8.21256,15.977654
2,Dept_03,Allier,790,385,405,2018,2.530824,2.666667,2.402023,458,...,77,117,182,391,33.333333,35.483871,1.986755,6.849315,4.297994,-0.761421
3,Dept_04,Alpes-de-Haute-Provence,356,171,185,2018,9.3702,4.587156,14.197531,152,...,29,38,69,213,-100.0,0.0,28.888889,5.555556,-1.428571,12.401055
4,Dept_05,Hautes-Alpes,243,115,128,2018,0.621118,-4.958678,6.224066,140,...,19,34,46,133,0.0,20.0,-5.0,7.936508,-14.018692,4.724409


# 3 - Data analysis <a name="data_analysis"></a>

## 3.1 -  Geographical evolution in France <a name="geographical_evolution"></a>

In [13]:
# Get shapefile of French departement
url = 'https://www.data.gouv.fr/fr/datasets/r/3096e551-c68d-40ce-8972-a228c94c0ad1'
urllib.request.urlretrieve(url, 'shapefile_departement.zip')

if os.path.isfile('shapefile'):
    os.mkdir('shapefile')

with ZipFile('shapefile_departement.zip', 'r') as zipObj:
   zipObj.extractall('shapefile')

In [14]:
class MplColorHelper:

  def __init__(self, cmap_name, start_val, stop_val):
    self.cmap_name = cmap_name
    self.cmap = plt.get_cmap(cmap_name, 10)
    self.norm = mpl.colors.Normalize(vmin=start_val, vmax=stop_val)
    self.scalarMap = cm.ScalarMappable(norm=self.norm, cmap=self.cmap)

  def get_rgb(self, val):
    return self.scalarMap.to_rgba(val)

In [15]:
def plot_deces_map(variation, param):
    
    if variation:
        param = param+'_variation'
    
    fig = plt.figure(figsize=(12, 12))
    ax = fig.add_subplot(1, 1, 1, projection = ccrs.PlateCarree())
    ax.set_extent([-7, 10, 40, 54], crs=ccrs.PlateCarree())
    ax.add_feature(cfeature.BORDERS)
    ax.add_feature(cfeature.LAKES)
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.OCEAN)
    ax.add_feature(cfeature.RIVERS)
    ax.add_feature(cfeature.COASTLINE)

    ax2 = fig.add_subplot(1, 2, 2, projection = ccrs.PlateCarree())
    ax2.set_extent([2.1, 2.65, 48.65, 49.05], crs=ccrs.PlateCarree())
    ax2.add_feature(cfeature.LAND)

    # define colormap (min=0, max=y*10^x)
    if 'variation' in param:
        x_max = math.floor(math.log10(df_tmp[param].max()))
        y_max = math.floor(df_tmp[param].max()/ 10**x_max) + 1

        x_min = math.floor(math.log10(abs(df_tmp[param].min())))
        y_min = math.floor(abs(df_tmp[param].min()/ 10**x_min)) + 1
        if y_max*10**x_max > y_min*10**x_min:
            COL = MplColorHelper('bwr', -y_max*10**x_max, y_max*10**x_max)
        else:
            COL = MplColorHelper('bwr', -y_min*10**x_min, y_max*10**x_max)
        COL = MplColorHelper('bwr', -200, 200)
    else:
        x_max = math.floor(math.log10(df_tmp[param].max()))
        y_max = math.floor(df_tmp[param].max()/ 10**x_max) + 1
        COL = MplColorHelper('Reds', 0, y_max*10**x_max)
        COL = MplColorHelper('Reds', 0, 5000)


    for department, shape in zip(departments, shapes_dep):

        if len(shape.parts) == 1:
            # Un seul polygone
            poly = Polygon(shape.points)
        else:
            # Il faut les fusionner
            ind = list(shape.parts) + [len(shape.points)]
            pols = [Polygon(shape.points[ind[i]:ind[i+1]]) for i in range(0, len(shape.parts))]
            poly = unary_union(pols)

        if 'Dept_'+department[0] in df_tmp['Zone'].unique():
            mask_department = df_tmp['Zone'] == 'Dept_'+department[0]
            if not df_tmp[mask_department].empty:
                val = df_tmp[mask_department][param].values[0]
                ax.add_geometries([poly], ccrs.PlateCarree(),
                                              facecolor=COL.get_rgb(val),
                                              edgecolor=(0,0,0)
                                             )
                if 'Dept_'+department[0] in ['Dept_75', 'Dept_92', 'Dept_93', 'Dept_94']:
                    ax2.add_geometries([poly], ccrs.PlateCarree(),
                                                  facecolor=COL.get_rgb(val),
                                                  edgecolor=(0,0,0)
                                                 )
                        
    sm = COL.scalarMap
    plt.colorbar(sm,ax=ax, orientation='vertical', pad=0.05, shrink=0.5)

    pos=ax2.get_position()
    pos = [pos.x0 + 0.05, pos.y0 + 0.25,  pos.width / 2.5, pos.height / 2.5] 
    ax2.set_position(pos)
    plt.show()

mask = df['annee'] == 2020
mask &= df['Zone'] != 'France'
df_tmp = df[mask].copy().dropna().reset_index()

selection_variation = widgets.Checkbox(
    value=False,
    description='variation',
    indent=True
)

options=['Total',
         '0_24ans', '25_49ans', '50_64ans', '65_74ans', '75_84ans', '85ans_plus',
         'Hopital_Clinique', 'Maison_retraite', 'Domicile',
         'Femmes', 'Hommes']

selection_param = widgets.Dropdown(
    options=options,
    value=options[0],
    description='Param',
    disabled=False,
)

shpfilename = 'shapefile/departements-20140306-100m.shp'
reader = shapefile.Reader(shpfilename, encoding='ISO8859-1')
shapes_dep = reader.shapes()
departments = reader.records()


interact(plot_deces_map,
         variation = selection_variation, 
         param=selection_param)

interactive(children=(Checkbox(value=False, description='variation'), Dropdown(description='Param', options=('…

<function __main__.plot_deces_map(variation, param)>

## 3.2 - Evolution by departement <a name="evolution_departement"></a>

In [16]:
def plot_departement(variation, departement):
    
    start_date = datetime.date.today() + datetime.timedelta(-70)
    end_date = datetime.date.today()
    xlim=(start_date, end_date)

    fig, ax = plt.subplots(nrows=4, ncols=1, figsize=(8, 10))

    values = 'deces'
    y_age = ['0_24ans', '25_49ans', '50_64ans', '65_74ans', '75_84ans', '85ans_plus']
    y_lieu = ['Hopital_Clinique', 'Maison_retraite', 'Domicile']
    y_sexe = ['Femmes', 'Hommes']
    
    if variation:
        values = values + '_variation'
        y_age = [val + '_variation' for val in y_age]
        y_lieu = [val + '_variation' for val in y_lieu]
        y_sexe = [val + '_variation' for val in y_sexe]
        
    mask = df_date['Zone'] == departement
    df_date[mask].pivot(index='date', columns='annee', values=values) \
                 .plot(xlim=xlim, grid=True, ax=ax[0])

    mask = df['Zone']==departement
    df[mask].set_index('annee').plot(kind='bar', y=y_age, grid=True, ax=ax[1])

    df[mask].set_index('annee').plot(kind='bar', y=y_lieu, grid=True, ax=ax[2])

    df[mask].set_index('annee').plot(kind='bar', y=y_sexe, grid=True, ax=ax[3])

    for i in range(0,4):
        ax[i].set_ylabel(values)
        ax[i].legend(loc='center left', bbox_to_anchor=(1, 0.5))

    plt.tight_layout()


selection_variation = widgets.Checkbox(
    value=False,
    description='variation',
    indent=True
)
    
selection_list = widgets.Dropdown(
    options=df['Zone'].unique(),
    value=df['Zone'].unique()[0],
    description='departement',
    rows=10,
    disabled=False
)

interact(plot_departement,
         variation = selection_variation, 
         departement = selection_list)

interactive(children=(Checkbox(value=False, description='variation'), Dropdown(description='departement', opti…

<function __main__.plot_departement(variation, departement)>