https://www.data.gouv.fr/fr/datasets/chiffres-cles-concernant-lepidemie-de-covid19-en-france/

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# load_timeseries

In [2]:
#hide
import requests
import io
import os
os.environ['NO_PROXY'] = 'raw.githubusercontent.com'

def load_timeseries( 
                    base_url='https://raw.githubusercontent.com/opencovid19-fr/data/master/dist/'):
    # Thanks to kasparthommen for the suggestion to directly downloadCSSEGISandData
    url = f'{base_url}/chiffres-cles.csv'
    print(url)
    csv = requests.get(url).text
    df = pd.read_csv(io.StringIO(csv))
    return df


In [3]:
df_france = load_timeseries()

https://raw.githubusercontent.com/opencovid19-fr/data/master/dist//chiffres-cles.csv


# Recuperation du fichier brut

In [4]:
df_france

Unnamed: 0,date,granularite,maille_code,maille_nom,cas_confirmes,deces,reanimation,hospitalises,gueris,depistes,source_nom,source_url,source_type
0,2020-01-24,departement,DEP-16,Charente,0.0,,,,,,ARS Nouvelle-Aquitaine,https://www.nouvelle-aquitaine.ars.sante.fr/co...,agences-regionales-sante
1,2020-01-24,departement,DEP-17,Charente-Maritime,0.0,,,,,,ARS Nouvelle-Aquitaine,https://www.nouvelle-aquitaine.ars.sante.fr/co...,agences-regionales-sante
2,2020-01-24,departement,DEP-19,Corrèze,0.0,,,,,,ARS Nouvelle-Aquitaine,https://www.nouvelle-aquitaine.ars.sante.fr/co...,agences-regionales-sante
3,2020-01-24,departement,DEP-23,Creuse,0.0,,,,,,ARS Nouvelle-Aquitaine,https://www.nouvelle-aquitaine.ars.sante.fr/co...,agences-regionales-sante
4,2020-01-24,departement,DEP-24,Dordogne,0.0,,,,,,ARS Nouvelle-Aquitaine,https://www.nouvelle-aquitaine.ars.sante.fr/co...,agences-regionales-sante
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3554,2020-03-25,region,REG-84,Auvergne-Rhône-Alpes,2093.0,,,,,,Santé publique France,https://www.santepubliquefrance.fr/maladies-et...,sante-publique-france
3555,2020-03-25,region,REG-93,Provence-Alpes-Côte d'Azur,1927.0,,,,,,Santé publique France,https://www.santepubliquefrance.fr/maladies-et...,sante-publique-france
3556,2020-03-25,region,REG-94,Corse,233.0,15.0,9.0,39.0,,,ARS Corse,https://www.corse.ars.sante.fr/informations-ut...,agences-regionales-sante
3557,2020-03-25,region,REG-94,Corse,225.0,,,,,,Santé publique France,https://www.santepubliquefrance.fr/maladies-et...,sante-publique-france


# Region

In [5]:
df_region = df_france[df_france.granularite == 'region']

## Nettoyage region

In [6]:
df_region_clean = df_region.drop(['granularite', 'maille_code', 'source_nom', 'source_url', 'source_type'], axis=1)

df_region_clean['date']=pd.to_datetime(df_region_clean['date'])
df_region_clean.info()
df_region_clean

<class 'pandas.core.frame.DataFrame'>
Int64Index: 865 entries, 15 to 3557
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   date           865 non-null    datetime64[ns]
 1   maille_nom     865 non-null    object        
 2   cas_confirmes  721 non-null    float64       
 3   deces          341 non-null    float64       
 4   reanimation    229 non-null    float64       
 5   hospitalises   232 non-null    float64       
 6   gueris         201 non-null    float64       
 7   depistes       0 non-null      float64       
dtypes: datetime64[ns](1), float64(6), object(1)
memory usage: 60.8+ KB


Unnamed: 0,date,maille_nom,cas_confirmes,deces,reanimation,hospitalises,gueris,depistes
15,2020-01-24,Île-de-France,2.0,,,2.0,,
16,2020-01-24,Île-de-France,2.0,,,2.0,,
17,2020-01-24,Nouvelle-Aquitaine,1.0,,,,,
18,2020-01-24,Nouvelle-Aquitaine,1.0,,,1.0,,
19,2020-01-24,Nouvelle-Aquitaine,1.0,,,1.0,,
...,...,...,...,...,...,...,...,...
3553,2020-03-25,Occitanie,1082.0,,,,,
3554,2020-03-25,Auvergne-Rhône-Alpes,2093.0,,,,,
3555,2020-03-25,Provence-Alpes-Côte d'Azur,1927.0,,,,,
3556,2020-03-25,Corse,233.0,15.0,9.0,39.0,,


In [7]:
df_region_clean=df_region_clean[['maille_nom', 'date', 'cas_confirmes', 'deces', 'reanimation']]

## Enlever les - dans les noms de regions pour eviter les doublons (meme region mais 2 noms differents)

In [8]:
df_region_clean['maille_nom']=df_region_clean.maille_nom.str.replace('-', ' ', regex=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_region_clean['maille_nom']=df_region_clean.maille_nom.str.replace('-', ' ', regex=False)


## Passage en colonne, index sont les dates

In [9]:
df_region_clean.drop_duplicates(subset=['date', 'maille_nom'], keep='last', inplace=True)
df_region_clean_indexed = df_region_clean.set_index(['date', 'maille_nom'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_region_clean.drop_duplicates(subset=['date', 'maille_nom'], keep='last', inplace=True)


In [10]:
df_region_colonnes = df_region_clean_indexed.unstack()
df_region_colonnes

Unnamed: 0_level_0,cas_confirmes,cas_confirmes,cas_confirmes,cas_confirmes,cas_confirmes,cas_confirmes,cas_confirmes,cas_confirmes,cas_confirmes,cas_confirmes,...,reanimation,reanimation,reanimation,reanimation,reanimation,reanimation,reanimation,reanimation,reanimation,reanimation
maille_nom,Auvergne Rhône Alpes,Bourgogne Franche Comté,Bretagne,Centre Val de Loire,Corse,Grand Est,Guadeloupe,Guyane,Hauts de France,La Réunion,...,Hauts de France,La Réunion,Martinique,Mayotte,Normandie,Nouvelle Aquitaine,Occitanie,Pays de la Loire,Provence Alpes Côte d'Azur,Île de France
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-01-24,,,,,,,,,,,...,,,,,,,,,,
2020-01-25,,,,,,,,,,,...,,,,,,,,,,
2020-01-26,,,,,,,,,,,...,,,,,,,,,,
2020-01-27,,,,,,,,,,,...,,,,,,,,,,
2020-01-28,,,,,,,,,,,...,,,,,,,,,,
2020-01-29,,,,,,,,,,,...,,,,,,,,,,
2020-01-30,,,,,,,,,,,...,,,,,,,,,,
2020-01-31,,,,,,,,,,,...,,,,,,,,,,
2020-02-03,,,,,,,,,,,...,,,,,,,,,,
2020-02-05,,,,,,,,,,,...,,,,,,,,,,


In [11]:
df_region_colonnes.columns


MultiIndex([('cas_confirmes',       'Auvergne Rhône Alpes'),
            ('cas_confirmes',    'Bourgogne Franche Comté'),
            ('cas_confirmes',                   'Bretagne'),
            ('cas_confirmes',        'Centre Val de Loire'),
            ('cas_confirmes',                      'Corse'),
            ('cas_confirmes',                  'Grand Est'),
            ('cas_confirmes',                 'Guadeloupe'),
            ('cas_confirmes',                     'Guyane'),
            ('cas_confirmes',            'Hauts de France'),
            ('cas_confirmes',                 'La Réunion'),
            ('cas_confirmes',                 'Martinique'),
            ('cas_confirmes',                    'Mayotte'),
            ('cas_confirmes',                  'Normandie'),
            ('cas_confirmes',         'Nouvelle Aquitaine'),
            ('cas_confirmes',                  'Occitanie'),
            ('cas_confirmes',           'Pays de la Loire'),
            ('cas_confir