In [1]:
# Set root directory as working directory
import os
os.chdir('../')

In [2]:
import json
import numpy as np
import pandas as pd
from scipy.stats import gamma
from matplotlib import pyplot as plt

# Data preprocessing

In [3]:
data = pd.read_csv('data/raw/casos_hosp_uci_def_sexo_edad_provres.csv')
data = data.groupby(['grupo_edad', 'fecha'], as_index=False)['num_casos'].sum()
data = data.pivot_table(values='num_casos', index='fecha', columns='grupo_edad')
data = data.reset_index()
data['fecha'] = pd.to_datetime(data['fecha'])
data = data.drop(columns=['NC'])
data = data.rename_axis(None, axis=1)
data

Unnamed: 0,fecha,0-9,10-19,20-29,30-39,40-49,50-59,60-69,70-79,80+
0,2020-01-01,0,0,0,0,0,0,0,0,0
1,2020-01-02,0,0,0,0,0,0,0,0,0
2,2020-01-03,0,0,0,0,0,0,0,0,0
3,2020-01-04,0,0,0,0,0,0,0,0,0
4,2020-01-05,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
812,2022-03-23,1196,2424,2639,3793,4455,3144,2324,1782,1372
813,2022-03-24,984,1830,1947,2993,3697,2683,1990,1691,1213
814,2022-03-25,982,1587,1673,2588,3248,2547,1858,1698,1289
815,2022-03-26,437,777,648,1187,1565,1162,1048,903,701


In [4]:
# Wave definition (Informe nº 162. Situación de COVID-19 en España.)
waves_date = [
    (data['fecha'] <= pd.to_datetime('2020-06-21')),
    (data['fecha'] > pd.to_datetime('2020-06-21')) & (data['fecha'] <= pd.to_datetime('2020-12-06')),
    (data['fecha'] > pd.to_datetime('2020-12-06')) & (data['fecha'] <= pd.to_datetime('2021-03-14')),
    (data['fecha'] > pd.to_datetime('2021-03-14')) & (data['fecha'] <= pd.to_datetime('2021-06-19')),
    (data['fecha'] > pd.to_datetime('2021-06-19')) & (data['fecha'] <= pd.to_datetime('2021-10-13')),
    (data['fecha'] > pd.to_datetime('2021-10-13')) & (data['fecha'] <= pd.to_datetime('2022-03-27'))
]

waves_name = ['wave_1', 'wave_2', 'wave_3', 'wave_4', 'wave_5', 'wave_6']

data['wave'] = np.select(waves_date, waves_name)
data

Unnamed: 0,fecha,0-9,10-19,20-29,30-39,40-49,50-59,60-69,70-79,80+,wave
0,2020-01-01,0,0,0,0,0,0,0,0,0,wave_1
1,2020-01-02,0,0,0,0,0,0,0,0,0,wave_1
2,2020-01-03,0,0,0,0,0,0,0,0,0,wave_1
3,2020-01-04,0,0,0,0,0,0,0,0,0,wave_1
4,2020-01-05,0,0,0,0,0,0,0,0,0,wave_1
...,...,...,...,...,...,...,...,...,...,...,...
812,2022-03-23,1196,2424,2639,3793,4455,3144,2324,1782,1372,wave_6
813,2022-03-24,984,1830,1947,2993,3697,2683,1990,1691,1213,wave_6
814,2022-03-25,982,1587,1673,2588,3248,2547,1858,1698,1289,wave_6
815,2022-03-26,437,777,648,1187,1565,1162,1048,903,701,wave_6


In [5]:
# Output
data.to_csv('data/cases_age_wave.csv', index=False)