In [1]:
import pymc3 as pm
import theano
import theano.tensor as tt
import numpy as np
import pandas as pd
import arviz as az
import subprocess

func_dict = {"mean": np.mean, 
             "q2.5": lambda x: np.percentile(x, 2.5), 
             "q97.5": lambda x: np.percentile(x, 97.5)}

output_dir = "../../results/Scenario-2/sens_CUTOFF_TIME"
!rm -rf {output_dir}
!mkdir -p {output_dir}
output_data_dir = output_dir + "/datasets"
!mkdir -p {output_data_dir}

from scipy.integrate import quad

class Integrate(theano.Op):
    def __init__(self, expr, var, *extra_vars):
        super().__init__()
        self._expr = expr
        self._var = var
        self._extra_vars = extra_vars
        self._func = theano.function(
            [var] + list(extra_vars),
            self._expr,
            on_unused_input='ignore')
    
    def make_node(self, start, stop, *extra_vars):
        self._extra_vars_node = extra_vars
        assert len(self._extra_vars) == len(extra_vars)
        self._start = start
        self._stop = stop
        vars = [start, stop] + list(extra_vars)
        return theano.Apply(self, vars, [tt.dscalar().type()])
    
    def perform(self, node, inputs, out):
        start, stop, *args = inputs
        val = quad(self._func, start, stop, args=tuple(args))[0]
        out[0][0] = np.array(val)
        
    def grad(self, inputs, grads):
        start, stop, *args = inputs
        out, = grads
        replace = dict(zip(self._extra_vars, args))
        
        replace_ = replace.copy()
        replace_[self._var] = start
        dstart = out * theano.clone(-self._expr, replace=replace_)
        
        replace_ = replace.copy()
        replace_[self._var] = stop
        dstop = out * theano.clone(self._expr, replace=replace_)

        grads = tt.grad(self._expr, self._extra_vars)
        dargs = []
        for grad in grads:
            integrate = Integrate(grad, self._var, *self._extra_vars)
            darg = out * integrate(start, stop, *args)
            dargs.append(darg)
            
        return [dstart, dstop] + dargs

In [4]:
%%time

!rm -rf data_tmp_CUTOFF_TIME_2
!mkdir -p data_tmp_CUTOFF_TIME_2

start = theano.shared(0.)
stop = theano.shared(250.)
μ = theano.shared(2.838)
σ = theano.shared(0.520)

for idx, day in enumerate(np.arange(17,26,1)):
    t0 = '2019-12-08'
    CUTOFF_TIME = '2020-01-%02d'%day
    
    print(CUTOFF_TIME)
    subprocess.call(['Rscript', 'prepare_data.R', './data_tmp_CUTOFF_TIME_2', t0, CUTOFF_TIME])
    
    df = pd.read_csv("data_tmp_CUTOFF_TIME_2/data.csv")
    df_onset2death = pd.read_csv("data_tmp_CUTOFF_TIME_2/data_onset2death.csv")
    df_onset2report = pd.read_csv("data_tmp_CUTOFF_TIME_2/data_onset2report.csv")

    for idx0, flnm in enumerate(['data.csv', 'data_onset2death.csv', 'data_onset2report.csv']):
        !cp data_tmp_CUTOFF_TIME_2/{flnm} {output_data_dir}/{CUTOFF_TIME}_{flnm}
            
    # module for onset2report
    with pm.Model() as model_reporting_delay:
        a_delay = pm.HalfNormal('a_delay', sd=5)
        b_delay = pm.HalfCauchy('b_delay', 2.5)
        timeOnsetToDeath = df_onset2report.dist.values
        pm.Gamma('likelihood_delay', a_delay, b_delay, observed=timeOnsetToDeath)
        pm.Deterministic('mean_delay', a_delay/b_delay);
        pm.Deterministic('sd_delay', np.sqrt(a_delay)/b_delay);
        trace_reporting_delay = pm.sample(10000, tune=5000, cores=10, target_accept=.85, init='advi')

    res_delay = pm.summary(trace_reporting_delay, var_names=['a_delay', 'b_delay', 'mean_delay'])['mean']
    df_res = az.summary(trace_reporting_delay, var_names=['mean_delay', 'sd_delay', 'a_delay', 'b_delay'], stat_funcs=func_dict, extend=False, round_to=5).reset_index().rename(columns={'index': 'var'})
    df_res.rename(columns={'q2.5': 'lower', 'q97.5': 'upper'}).loc[:,['var','mean','lower','upper']].\
        to_csv(output_dir+'/'+CUTOFF_TIME+'_onset2report.csv', index=False)

    # main module
    inci_idx = np.min(df.loc[lambda d: d.exports>0].index)
    inci_tmin = df.loc[inci_idx,'time']
    len_p = len(df.loc[lambda d: d['time']>=inci_tmin,'prob_travel'])
    death_idx = np.min(df.loc[lambda d: d['deaths']>0].index)
    T0 = df['time'].values[inci_idx]
    with pm.Model() as model:  
        ## main data and priors ##
        K = df['exports'].shape[0]
        exported_cases = df['exports'].values
        p = df.loc[0,'prob_travel']

        neglogr = pm.HalfNormal('neglogr', testval=-np.log(0.1))
        r = pm.Deterministic('r',np.exp(-neglogr))
        logi0 = pm.HalfNormal('logi0', sd=np.log(1000), testval=np.log(500))
        i0 = pm.Deterministic('i0', tt.exp(logi0))

        t = tt.arange(1,K+1,1)
        Incidence = pm.Deterministic('Incidence',i0*(tt.exp(r*(t-T0))-tt.exp(-r*T0))/r)

        ## implementing numerical integration 
        s = tt.dscalar('s')
        s.tag.test_value = np.zeros(()) #variable of integration
        r_ = tt.dscalar('r_')
        r_.tag.test_value = np.ones(())*0.14
        func = tt.exp(-r_*s)/s/σ/((2.0*np.pi)**0.5)*tt.exp(-((tt.log(s)-μ)**2)/2/(σ**2))
        integrate = Integrate(func, s, r_)

        ## calculating us ##
        u_delay = pm.Deterministic('u_delay', (1 + r*res_delay['mean_delay']/res_delay['a_delay'])**(-res_delay['a_delay']))
        u_death = pm.Deterministic('u_death', integrate(start, stop, r))
        ##############################

        ## reconstructed incidence from exportation events ##
        mu = (u_delay*Incidence*p/(1-p))[inci_idx:K]
        alpha = (1.0/(1-p))
        pm.Gamma('likelihood_incidence', mu, alpha, shape=K-inci_idx, 
                 observed=exported_cases[inci_idx:K])
        ##############################

        ## CFR ##
        death = df['deaths'].values
        neglogq = pm.Gamma('neglogq', 2, .5, shape=K-death_idx, testval=-np.log(.06))
        q = pm.Deterministic('q',np.exp(-neglogq))

        shape_death = u_death*Incidence[death_idx:K]*q/(1-q)
        invscale_death = 1.0/(1-q)
        pm.Gamma('likelihood_death', shape_death, invscale_death, observed=death[death_idx:K])
        ##############################

        pm.Deterministic('predictedDeath', u_death*Incidence[death_idx:K]*q)

        sample = pm.sample(4250, cores=8, tune=2500, target_accept=.92, init='advi')

    df_res = az.summary(sample, 
                        var_names=['r','Incidence','q','u_delay','u_death', 'predictedDeath'], 
                        stat_funcs=func_dict, extend=False, round_to=6).reset_index().rename(columns={'index': 'var'})
    df_res['time'] = df_res['var'].apply(lambda st: st[st.find("[")+1:st.find("]")])
    df_res['time'] = ['NA' if "[" not in y else int(x)+1 for x,y in zip(df_res['time'],df_res['var'])]
    df_res['var'] = df_res['var'].apply(lambda st: st[:st.find("[")] if "[" in st else st)
    df_res.loc[lambda d: d['var']=='q', 'var'] = 'CFR'
    df_res.rename(columns={'q2.5': 'lower', 'q97.5': 'upper'}).loc[:,['var','time','mean','lower','upper']].\
        to_csv(output_dir+'/'+CUTOFF_TIME+'_incidence.csv', index=False)
    
!rm -rf data_tmp_CUTOFF_TIME_2

2020-01-17


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 34.808:   5%|▌         | 10551/200000 [00:02<00:46, 4050.72it/s]
Convergence achieved at 10900
Interrupted at 10,899 [5%]: Average Loss = 72.071
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [b_delay, a_delay]
Sampling 10 chains, 0 divergences: 100%|██████████| 150000/150000 [00:37<00:00, 4000.58draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 8.3814e+19:   0%|          | 885/200000 [00:05<21:08, 157.01it/s]
Convergence achieved at 900
Interrupted at 899 [0%]: Average Loss = 2.6677e+15
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [neglogq, logi0, neglogr]
Sampling 8 chains, 0 divergences: 100%|██████████| 54000/54000 [1:05:12<00:00, 13.80draws/s]
The number of effective samples is smaller than 25% for some parameters.


2020-01-18


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 40.436:   5%|▌         | 10071/200000 [00:02<00:51, 3714.52it/s]
Convergence achieved at 10500
Interrupted at 10,499 [5%]: Average Loss = 78.796
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [b_delay, a_delay]
Sampling 10 chains, 0 divergences: 100%|██████████| 150000/150000 [00:36<00:00, 4081.52draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 4,316.2:   5%|▌         | 10886/200000 [01:07<19:29, 161.73it/s]  
Convergence achieved at 10900
Interrupted at 10,899 [5%]: Average Loss = 8.6218e+18
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [neglogq, logi0, neglogr]
Sampling 8 chains, 0 divergences: 100%|██████████| 54000/54000 [50:38<00:00, 17.77draws/s]  
The number of effective samples is smaller than 25% for some parameters.


2020-01-19


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 47.75:   5%|▍         | 9521/200000 [00:01<00:35, 5333.07it/s] 
Convergence achieved at 9800
Interrupted at 9,799 [4%]: Average Loss = 92.103
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [b_delay, a_delay]
Sampling 10 chains, 0 divergences: 100%|██████████| 150000/150000 [00:35<00:00, 4264.20draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 7.5218e+18:   0%|          | 885/200000 [00:05<18:51, 176.04it/s]
Convergence achieved at 900
Interrupted at 899 [0%]: Average Loss = 9.6799e+10
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [neglogq, logi0, neglogr]
Sampling 8 chains, 0 divergences: 100%|██████████| 54000/54000 [1:02:14<00:00, 14.46draws/s]
The number of effective samples is smaller than 25% for some parameters.


2020-01-20


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 64.657:   6%|▌         | 11033/200000 [00:02<00:35, 5333.80it/s]
Convergence achieved at 11100
Interrupted at 11,099 [5%]: Average Loss = 122.89
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [b_delay, a_delay]
Sampling 10 chains, 0 divergences: 100%|██████████| 150000/150000 [00:38<00:00, 3852.29draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 605.91:   4%|▍         | 8695/200000 [00:55<20:19, 156.90it/s]    
Convergence achieved at 8700
Interrupted at 8,699 [4%]: Average Loss = 9.2752e+19
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [neglogq, logi0, neglogr]
Sampling 8 chains, 0 divergences: 100%|██████████| 54000/54000 [53:27<00:00, 16.84draws/s]  
The number of effective samples is smaller than 25% for some parameters.


2020-01-21


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 77.121:   6%|▌         | 11131/200000 [00:02<00:40, 4677.22it/s]
Convergence achieved at 11200
Interrupted at 11,199 [5%]: Average Loss = 140.43
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [b_delay, a_delay]
Sampling 10 chains, 0 divergences: 100%|██████████| 150000/150000 [00:35<00:00, 4213.68draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 384.8:   4%|▍         | 8997/200000 [00:56<19:51, 160.31it/s]     
Convergence achieved at 9000
Interrupted at 8,999 [4%]: Average Loss = 2.7535e+23
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [neglogq, logi0, neglogr]
Sampling 8 chains, 0 divergences: 100%|██████████| 54000/54000 [59:55<00:00, 15.02draws/s]  
The number of effective samples is smaller than 25% for some parameters.


2020-01-22


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 89.923:   5%|▍         | 9958/200000 [00:02<00:49, 3838.87it/s]
Convergence achieved at 10400
Interrupted at 10,399 [5%]: Average Loss = 162
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [b_delay, a_delay]
Sampling 10 chains, 0 divergences: 100%|██████████| 150000/150000 [00:36<00:00, 4117.95draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 5.3653e+23:   0%|          | 396/200000 [00:02<21:10, 157.14it/s]
Convergence achieved at 400
Interrupted at 399 [0%]: Average Loss = 5.2577e+23
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [neglogq, logi0, neglogr]
Sampling 8 chains, 0 divergences: 100%|██████████| 54000/54000 [1:20:47<00:00, 11.14draws/s]
The number of effective samples is smaller than 25% for some parameters.


2020-01-23


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 125.08:   6%|▌         | 11378/200000 [00:03<00:49, 3787.71it/s]
Convergence achieved at 11700
Interrupted at 11,699 [5%]: Average Loss = 208.58
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [b_delay, a_delay]
Sampling 10 chains, 0 divergences: 100%|██████████| 150000/150000 [00:34<00:00, 4336.66draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 1.9997e+05:   3%|▎         | 6399/200000 [00:39<19:43, 163.64it/s]
Convergence achieved at 6400
Interrupted at 6,399 [3%]: Average Loss = 5.8387e+15
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [neglogq, logi0, neglogr]
Sampling 8 chains, 0 divergences: 100%|██████████| 54000/54000 [1:19:41<00:00, 11.29draws/s]
The number of effective samples is smaller than 25% for some parameters.


2020-01-24


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 153.39:   6%|▋         | 12521/200000 [00:02<00:34, 5437.61it/s]
Convergence achieved at 12700
Interrupted at 12,699 [6%]: Average Loss = 256.24
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [b_delay, a_delay]
Sampling 10 chains, 0 divergences: 100%|██████████| 150000/150000 [00:32<00:00, 4555.72draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 1.418e+09:   2%|▏         | 4782/200000 [00:27<18:32, 175.52it/s] 
Convergence achieved at 4800
Interrupted at 4,799 [2%]: Average Loss = 5.407e+19
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [neglogq, logi0, neglogr]
Sampling 8 chains, 0 divergences:  24%|██▎       | 12820/54000 [23:15<1:14:42,  9.19draws/s]


ValueError: Not enough samples to build a trace.

(Was interrupted at the end because 2020-01-25 was not used in the manuscript)