In [1]:
%matplotlib inline

import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import time
import json

In [2]:
fname = 'dep-name.json'
with open(fname, 'r') as file:
    dep_name = json.loads(file.read())

In [3]:
tic = time.time()

In [4]:
fname = 'sp-pos-quot-dep-2021-03-28-19h20.csv'
d = pd.read_csv(fname, sep = ';', parse_dates = ['jour'], dtype = {'dep': str})

In [5]:
d.sample(10)

Unnamed: 0,dep,jour,P,T,cl_age90,pop
83433,25,2021-03-02,2,27,90,6624.0
5475,2,2020-11-09,12,106,89,25627.0
355870,977,2020-05-30,0,0,90,35.0
329036,94,2020-09-04,42,754,49,189650.0
300280,86,2020-06-18,1,13,29,51459.0
215638,61,2021-02-03,10,178,59,38069.0
245241,70,2020-08-25,0,40,79,23536.0
168955,48,2020-10-03,0,3,69,11069.0
319683,91,2020-12-14,29,452,19,178366.0
27258,8,2021-01-27,6,41,9,28385.0


In [6]:
d.dtypes

dep                 object
jour        datetime64[ns]
P                    int64
T                    int64
cl_age90             int64
pop                float64
dtype: object

In [7]:
d.describe()

Unnamed: 0,P,T,cl_age90,pop
count,362648.0,362648.0,362648.0,362648.0
mean,23.628599,343.060985,48.272727,117333.9
std,91.030944,1052.507968,30.265807,235177.3
min,0.0,0.0,0.0,35.0
25%,0.0,22.0,19.0,23259.5
50%,3.0,85.0,49.0,50706.0
75%,15.0,258.0,79.0,106621.5
max,4365.0,45740.0,90.0,2588988.0


In [8]:
d['incidence'] = d['P'] / d['pop'] * 100000
d['positivite'] = d['P'] / d['T'] * 100

In [9]:
d.sample(10)

Unnamed: 0,dep,jour,P,T,cl_age90,pop,incidence,positivite
110929,31,2021-01-25,112,1396,59,174175.0,64.303143,8.022923
335438,971,2020-07-14,0,5,49,48928.0,0.0,0.0
176237,50,2020-10-31,7,108,69,69331.0,10.096494,6.481481
8402,3,2020-09-19,0,7,90,7107.0,0.0,0.0
294019,84,2020-08-22,1,22,9,63998.0,1.562549,4.545455
55196,16,2021-01-30,13,52,90,6701.0,194.000895,25.0
175033,50,2020-07-14,0,0,19,56234.0,0.0,
209536,60,2020-06-10,1,51,89,31232.0,3.201844,1.960784
320677,91,2021-03-14,28,204,59,169548.0,16.514497,13.72549
236956,67,2021-03-11,45,942,59,155713.0,28.899321,4.77707


In [10]:
deps = d['dep'].unique()
ages = d['cl_age90'].unique()

In [11]:
tic1 = time.time()
d['incidence hebdo'] = np.zeros(len(d))
for dep in deps:
    for age in ages:
        d2 = d[(d['dep'] == dep) & (d['cl_age90'] == age)]
        d['temp'] = d2.apply(lambda x : (d2[ (d2['jour'] <= x['jour']) 
                                                        & (d2['jour'] > x['jour'] - np.timedelta64(1,'W'))]
                                                    ['incidence']
                                                    .sum()), axis = 1)
        d['incidence hebdo'] = d['incidence hebdo'] + d['temp'].fillna(0)
    toc1 = time.time()
    print('{:.2f} s : département {} ({})'.format(-tic1+toc1, dep, dep_name[dep]))

2.34 s : département 01 (Ain)
4.66 s : département 02 (Aisne)
6.98 s : département 03 (Allier)
9.30 s : département 04 (Alpes-de-Haute-Provence)
11.61 s : département 05 (Hautes-Alpes)
13.94 s : département 06 (Alpes-Maritimes)
16.25 s : département 07 (Ardèche)
18.57 s : département 08 (Ardennes)
20.88 s : département 09 (Ariège)
23.20 s : département 10 (Aube)
25.51 s : département 11 (Aude)
27.84 s : département 12 (Aveyron)
30.17 s : département 13 (Bouches-du-Rhône)
32.49 s : département 14 (Calvados)
34.80 s : département 15 (Cantal)
37.15 s : département 16 (Charente)
39.49 s : département 17 (Charente-Maritime)
41.81 s : département 18 (Cher)
44.13 s : département 19 (Corrèze)
46.47 s : département 21 (Côte-d'Or)
48.79 s : département 22 (Côtes-d'Armor)
51.12 s : département 23 (Creuse)
53.47 s : département 24 (Dordogne)
55.80 s : département 25 (Doubs)
58.13 s : département 26 (Drôme)
60.47 s : département 27 (Eure)
62.83 s : département 28 (Eure-et-Loir)
65.20 s : départemen

In [12]:
d = d.drop(columns = ['temp'])
fname2 = fname[:-4] + '-processed.csv'
d.to_csv(fname2, sep = ';')

In [13]:
d.sample(10)

Unnamed: 0,dep,jour,P,T,cl_age90,pop,incidence,positivite,incidence hebdo
46605,14,2020-09-05,0,17,90,10772.0,0.0,0.0,0.0
266829,76,2020-10-25,28,233,29,142048.0,19.711647,12.017167,508.278892
251799,72,2020-07-18,0,12,90,9356.0,0.0,0.0,0.0
167085,47,2021-02-27,2,116,69,47036.0,4.252062,1.724138,70.159027
136880,39,2020-08-01,1,12,79,26160.0,3.82263,8.333333,7.64526
198143,56,2021-01-29,5,87,9,76812.0,6.5094,5.747126,32.546998
179170,51,2020-09-11,16,386,29,68195.0,23.462131,4.145078,108.512354
27061,8,2021-01-09,4,101,19,31952.0,12.518778,3.960396,262.894342
56019,17,2020-06-02,0,15,79,77944.0,0.0,0.0,0.0
193196,55,2020-09-18,2,75,39,20573.0,9.72148,2.666667,48.607398


In [14]:
toc = time.time()
print('notebook run in {:.2f} minutes.'.format((-tic+toc)/60))

notebook run in 4.09 minutes.
