In [None]:
from display.display_figures import display_data, display_REstim, display_REstim_by_dpt

# For data reading purposes
from include.load_data.get_counts import get_real_counts_by_county
from include.load_data import get_counts

# For independent denoising
from include.optim_tools.sliding_median import sliding_median

# For Rt estimation
from include.estim.Rt_MLE import Rt_MLE
from include.estim.Rt_Gamma import Rt_Gamma
from include.estim.Rt_Univariate import Rt_U
from include.estim.Rt_UnivariateOutliers import Rt_U_O
from include.estim.Rt_Multivariate import Rt_M

## Univariate reproduction number estimations on real-world infection counts
### Data fetching

dataBasis choice between `SPF` (Santé Publique France) and `JHU` (Johns Hopkins University)
&nbsp;
country choice should only be
   * `France` for `SPF`
   * any country available at https://coronavirus.jhu.edu/data for `JHU`

In [None]:
# Choice of country, dates, regularization parameters & computation
dataBasis = 'JHU'
country = 'France'
firstDay = '2021-11-01'  # first day of the chosen time period
lastDay = '2022-08-03'  # last day of the chosen time period

# Fetch data and associated dates
ZData, options = get_counts.get_real_counts(country, firstDay, lastDay, dataBasis)

display_data(ZData, options=options);

### Maximum Likelihood Estimator
$$\mathsf{R}^{\mathsf{MLE}} = \frac{ \mathsf{Z}}{\mathsf{Z}\Phi}$$

In [None]:
# Analytic computation
Rt_MLE, options_MLE = Rt_MLE(ZData, options=options)

# Display figures
display_REstim(Rt_MLE, options_MLE);

### Bayesian Estimator

Following the article "A New Framework and Software to Estimate Time-Varying Reproduction Numbers During Epidemics", Cori et al., 2013, Am. Journal of Epidemiology, R estimation is made using Bayesian framework resulting in:

$$\mathsf{R}_t^\Gamma = \frac{1 + \sum_{s=t-\tau}^{t}\mathsf{Z}_s}{5 + \sum_{s=t-\tau}^{t}(\Phi\star\mathsf{Z})_s}$$

Here $\tau = 15$

In [None]:
# Choice of the hyperparameter
tau = 15

# Analytic computation
Rt_Gamma, options_Gamma = Rt_Gamma(ZData, tau=tau, options=options)

# Display results
display_REstim(Rt_Gamma, options_Gamma);

### Univariate estimation

1. $\mathsf{Z}^{\mathsf{denoised}} =$ `sliding_median`$(\mathsf{Z}, \alpha)$
    &nbsp;

    with sliding median that can be tuned using $\alpha \geq 0$:
    - small $\alpha$ : lot of data denoised
    - great $\alpha$ : few data denoised

2. Solving the Penalized Likelihood functional with Chambolle-Pock algorithm

    $$ \boldsymbol{\mathsf{R}}^{\mathsf{U}} = \underset{\boldsymbol{\mathsf{R}} \in \mathbb{R}_+^{ T}}{\mathrm{argmin}} \,\mathrm{D}_{\mathsf{KL}}(\boldsymbol{\mathsf{Z}}^{\mathsf{denoised}} | \boldsymbol{\mathsf{p}}) + \lambda_{\mathsf{L}}^{\mathsf{U}} \lVert \mathsf{L} \boldsymbol{\mathsf{R}}\rVert_1 , \quad \mathsf{p}_t = \mathsf{R}_t \Phi^{\boldsymbol{\mathsf{Z}}}_t$$

In [None]:
# Choice of denoising parameter alpha and regularization parameter muR
alpha = 0.1
muR = 50

# Denoising with sliding median then solving with Chambolle-Pock algorithm
Z_denoised = sliding_median(ZData, alpha)
Rt_U, options_U = Rt_U(Z_denoised, muR=muR, options=options)

# Display results
display_REstim(Rt_U, options_U);

### Univariate Outliers estimation

Solving the following Univariate variational estimator with temporal regularization with misreported counts O explicit modelisation, using Chambolle-Pock algorithm

$$\boldsymbol{\mathsf{R}}^{\mathsf{U-O}}, \boldsymbol{\mathsf{O}}^{\mathsf{U-O}} = \, \underset{\boldsymbol{\mathsf{R}} \in \mathbb{R}_+^{ T}, \boldsymbol{\mathsf{O}} \in \mathbb{R}^{ T}}{\mathrm{argmin}} \,\mathrm{D}_{\mathsf{KL}}(\boldsymbol{\mathsf{Z}} | \boldsymbol{\mathsf{p}}) + \lambda_{\mathsf{L}}^{\mathsf{U}} \lVert \mathsf{L} \boldsymbol{\mathsf{R}}\rVert_1 + \lambda_\mathsf{O}^\mathsf{U} \lVert \mathsf{O}\rVert_1 , \quad \mathsf{p}_t = \mathsf{R}_t \Phi^{\boldsymbol{\mathsf{Z}}}_t$$

* A greater $\lambda_\mathsf{L}^\mathsf{U}$ means less slope ruptures
* A greater $\lambda_\mathsf{O}^\mathsf{U}$ means less denoising

For more examples, see [include/settings.RegularizationSettings](include/settings.py)

In [None]:
# Choice of denoising parameter alpha and regularization parameter muR
lambdaR = 3.5
lambdaO = 0.03

# Solving with Chambolle-Pock algorithm
Rt_UO, Ot_UO, options_UO = Rt_U_O(ZData, lambdaR=lambdaR, lambdaO=lambdaO, options=options)

# Display results
display_REstim(Rt_UO, options=options_UO);

In [None]:
# Gather all methods
REstimations = {'MLE': Rt_MLE, 'Gamma': Rt_Gamma, 'U': Rt_U, 'U-O': Rt_UO}
optionsAll = {'MLE': options_MLE, 'Gamma': options_Gamma, 'U': options_U, 'U-O': options_UO}

# Display comparison between methods
display_REstim(REstimations, optionsAll, comparison=True);

## Multivariate reproduction number estimations on real-world infection counts

In [None]:
# Choice of dates
firstDay = '2021-11-03'
lastDay = '2022-03-04'

# Retrieve associated infection counts by county
ZData_multi, optionsSpat = get_real_counts_by_county(firstDay, lastDay)

In [None]:
# Choice of regularization parameters
lambdaR = 50  # temporal regularization
lambdaS = 0.005  # spatial regularization

In [None]:
# This computation takes up to 18 minutes ! -------------------------
REstimate, options_M = Rt_M(ZData_multi, lambdaR, lambdaS, optionsSpat)

In [None]:
selectedCounties = ['2A', '2B',  '23', '75']
display_REstim_by_dpt(REstimate, selectedCounties, options_M);

In [None]:
# # For debug usage only --------------------
# from scipy.io import loadmat
# # savemat('test_demo.mat', {'REstimate': REstimate,
# #                           'datesUpdated': datesUpdated,
# #                           'dataCrop': dataCrop,
# #                           'counties': optionsSpat['counties']})
#
# file = loadmat('test_demo.mat', squeeze_me=True)
# REstimate = file['REstimate']
# datesUpdated = file['datesUpdated']
# dataCrop = file['dataCrop']
# counties = file['counties'] #  optionsSpat['counties']
# options_M = {'dates': datesUpdated, 'data': dataCrop, 'counties': counties}
#
# selectedCounties = ['2A', '2B',  '23', '75']
# display_REstim_by_dpt(REstimate, selectedCounties, options_M);