In [5]:
import pymc3 as pm
import theano
import theano.tensor as tt
import numpy as np
import pandas as pd
import arviz as az
import subprocess

func_dict = {"mean": np.mean, 
             "q2.5": lambda x: np.percentile(x, 2.5), 
             "q97.5": lambda x: np.percentile(x, 97.5)}


output_dir = "../../results/Scenario-1/sens_CUTOFF_TIME"
#!rm -rf {output_dir}
!mkdir -p {output_dir}
output_data_dir = output_dir + "/datasets"
!mkdir -p {output_data_dir}

from scipy.integrate import quad

class Integrate(theano.Op):
    def __init__(self, expr, var, *extra_vars):
        super().__init__()
        self._expr = expr
        self._var = var
        self._extra_vars = extra_vars
        self._func = theano.function(
            [var] + list(extra_vars),
            self._expr,
            on_unused_input='ignore')
    
    def make_node(self, start, stop, *extra_vars):
        self._extra_vars_node = extra_vars
        assert len(self._extra_vars) == len(extra_vars)
        self._start = start
        self._stop = stop
        vars = [start, stop] + list(extra_vars)
        return theano.Apply(self, vars, [tt.dscalar().type()])
    
    def perform(self, node, inputs, out):
        start, stop, *args = inputs
        val = quad(self._func, start, stop, args=tuple(args))[0]
        out[0][0] = np.array(val)
        
    def grad(self, inputs, grads):
        start, stop, *args = inputs
        out, = grads
        replace = dict(zip(self._extra_vars, args))
        
        replace_ = replace.copy()
        replace_[self._var] = start
        dstart = out * theano.clone(-self._expr, replace=replace_)
        
        replace_ = replace.copy()
        replace_[self._var] = stop
        dstop = out * theano.clone(self._expr, replace=replace_)

        grads = tt.grad(self._expr, self._extra_vars)
        dargs = []
        for grad in grads:
            integrate = Integrate(grad, self._var, *self._extra_vars)
            darg = out * integrate(start, stop, *args)
            dargs.append(darg)
            
        return [dstart, dstop] + dargs

In [11]:
%%time

!rm -rf data_tmp_CUTOFF_TIME
!mkdir -p data_tmp_CUTOFF_TIME

start = theano.shared(0.)
stop = theano.shared(250.)
μ = theano.shared(2.838)
σ = theano.shared(0.520)

for idx, day in enumerate(np.arange(17,26,1)):
    t0 = '2019-12-08'
    CUTOFF_TIME = '2020-01-%02d'%day
    
    print(CUTOFF_TIME)
    subprocess.call(['Rscript', 'prepare_data.R', './data_tmp_CUTOFF_TIME', t0, CUTOFF_TIME])
    
    df = pd.read_csv("data_tmp_CUTOFF_TIME/data.csv")
    df_onset2death = pd.read_csv("data_tmp_CUTOFF_TIME/data_onset2death.csv")
    df_onset2report = pd.read_csv("data_tmp_CUTOFF_TIME/data_onset2report.csv")

    for idx0, flnm in enumerate(['data.csv', 'data_onset2death.csv', 'data_onset2report.csv']):
        !cp data_tmp_CUTOFF_TIME/{flnm} {output_data_dir}/{CUTOFF_TIME}_{flnm}
            
    # module for onset2report
    with pm.Model() as model_reporting_delay:
        a_delay = pm.HalfNormal('a_delay', sd=5)
        b_delay = pm.HalfCauchy('b_delay', 2.5)
        timeOnsetToDeath = df_onset2report.dist.values
        pm.Gamma('likelihood_delay', a_delay, b_delay, observed=timeOnsetToDeath)
        pm.Deterministic('mean_delay', a_delay/b_delay);
        pm.Deterministic('sd_delay', np.sqrt(a_delay)/b_delay);
        trace_reporting_delay = pm.sample(20000, tune=5000, cores=5, target_accept=.85, init='advi')

    res_delay = pm.summary(trace_reporting_delay, var_names=['a_delay', 'b_delay', 'mean_delay'])['mean']
    df_res = az.summary(trace_reporting_delay, var_names=['mean_delay', 'sd_delay', 'a_delay', 'b_delay'], stat_funcs=func_dict, extend=False, round_to=5).reset_index().rename(columns={'index': 'var'})
    df_res.rename(columns={'q2.5': 'lower', 'q97.5': 'upper'}).loc[:,['var','mean','lower','upper']].\
        to_csv(output_dir+'/'+CUTOFF_TIME+'_onset2report.csv', index=False)

    # main module
    inci_idx = np.min(df.loc[lambda d: d.exports>0].index)
    inci_tmin = df.loc[inci_idx,'time']
    len_p = len(df.loc[lambda d: d['time']>=inci_tmin,'prob_travel'])
    death_idx = np.min(df.loc[lambda d: d['deaths']>0].index)
    with pm.Model() as model:  
        ## main data and priors ##
        K = df['exports'].shape[0]
        exported_cases = df['exports'].values
        p = df.loc[0,'prob_travel']

        neglogr = pm.HalfNormal('neglogr', testval=-np.log(0.1))
        r = pm.Deterministic('r',np.exp(-neglogr))
        i0 = 1.0

        t = tt.arange(1,K+1,1)
        Incidence = pm.Deterministic('Incidence',i0*(np.exp(r*t)-1.0)/r)

        ## implementing numerical integration 
        s = tt.dscalar('s')
        s.tag.test_value = np.zeros(()) #variable of integration
        r_ = tt.dscalar('r_')
        r_.tag.test_value = np.ones(())*0.14
        func = tt.exp(-r_*s)/s/σ/((2.0*np.pi)**0.5)*tt.exp(-((tt.log(s)-μ)**2)/2/(σ**2))
        integrate = Integrate(func, s, r_)

        ## calculating us ##
        u_delay = pm.Deterministic('u_delay', (1 + r*res_delay['mean_delay']/res_delay['a_delay'])**(-res_delay['a_delay']))
        u_death = pm.Deterministic('u_death', integrate(start, stop, r))
        ##############################

        ## reconstructed incidence from exportation events ##
        mu = (u_delay*Incidence*p/(1-p))[inci_idx:K]
        alpha = (1.0/(1-p))
        pm.Gamma('likelihood_incidence', mu, alpha, shape=K-death_idx, observed=exported_cases[inci_idx:K])
        ##############################

        ## CFR ##
        death = df['deaths'].values
        neglogq = pm.Gamma('neglogq', 2, .5, shape=K-death_idx, testval=-np.log(.06))
        q = pm.Deterministic('q',np.exp(-neglogq))

        shape_death = u_death*Incidence[death_idx:K]*q/(1-q)
        invscale_death = 1.0/(1-q)
        pm.Gamma('likelihood_death', shape_death, invscale_death, observed=death[death_idx:K])
        ##############################

        pm.Deterministic('predictedDeath', u_death*Incidence[death_idx:K]*q)

        sample = pm.sample(4000, cores=10, tune=2500, target_accept=.92, init='advi')

    df_res = az.summary(sample, 
                        var_names=['r','Incidence','q','u_delay','predictedDeath'], 
                        stat_funcs=func_dict, extend=False, round_to=6).reset_index().rename(columns={'index': 'var'})
    df_res['time'] = df_res['var'].apply(lambda st: st[st.find("[")+1:st.find("]")])
    df_res['time'] = ['NA' if "[" not in y else int(x)+1 for x,y in zip(df_res['time'],df_res['var'])]
    df_res['var'] = df_res['var'].apply(lambda st: st[:st.find("[")] if "[" in st else st)
    df_res.loc[lambda d: d['var']=='q', 'var'] = 'CFR'
    df_res.rename(columns={'q2.5': 'lower', 'q97.5': 'upper'}).loc[:,['var','time','mean','lower','upper']].\
        to_csv(output_dir+'/'+CUTOFF_TIME+'_incidence.csv', index=False)
    
!rm -rf data_tmp_CUTOFF_TIME

2020-01-17


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 34.774:   5%|▌         | 10620/200000 [00:02<00:43, 4311.23it/s]
Convergence achieved at 10800
Interrupted at 10,799 [5%]: Average Loss = 72.242
Multiprocess sampling (5 chains in 5 jobs)
NUTS: [b_delay, a_delay]
Sampling 5 chains, 0 divergences: 100%|██████████| 125000/125000 [00:36<00:00, 3385.50draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 62.01:   5%|▍         | 9887/200000 [01:10<22:41, 139.68it/s]     
Convergence achieved at 9900
Interrupted at 9,899 [4%]: Average Loss = 1.466e+07
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [neglogq, neglogr]
Sampling 10 chains, 0 divergences: 100%|██████████| 65000/65000 [17:04<00:00, 63.45draws/s]


2020-01-18


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 40.797:   5%|▌         | 10098/200000 [00:02<00:48, 3876.25it/s]
Convergence achieved at 10300
Interrupted at 10,299 [5%]: Average Loss = 79.052
Multiprocess sampling (5 chains in 5 jobs)
NUTS: [b_delay, a_delay]
Sampling 5 chains, 0 divergences: 100%|██████████| 125000/125000 [00:37<00:00, 3341.10draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 67.382:   5%|▌         | 10595/200000 [01:15<22:27, 140.53it/s]   
Convergence achieved at 10600
Interrupted at 10,599 [5%]: Average Loss = 1.2693e+08
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [neglogq, neglogr]
Sampling 10 chains, 0 divergences: 100%|██████████| 65000/65000 [18:19<00:00, 59.13draws/s]


2020-01-19


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 46.165:   5%|▌         | 10564/200000 [00:02<00:49, 3816.26it/s]
Convergence achieved at 10600
Interrupted at 10,599 [5%]: Average Loss = 87.258
Multiprocess sampling (5 chains in 5 jobs)
NUTS: [b_delay, a_delay]
Sampling 5 chains, 0 divergences: 100%|██████████| 125000/125000 [00:37<00:00, 3372.25draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 71.028:   6%|▌         | 11086/200000 [01:18<22:11, 141.91it/s]   
Convergence achieved at 11100
Interrupted at 11,099 [5%]: Average Loss = 3.0135e+08
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [neglogq, neglogr]
Sampling 10 chains, 0 divergences: 100%|██████████| 65000/65000 [20:56<00:00, 51.74draws/s]


2020-01-20


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 64.278:   6%|▌         | 11278/200000 [00:03<00:51, 3681.73it/s]
Convergence achieved at 11400
Interrupted at 11,399 [5%]: Average Loss = 120.92
Multiprocess sampling (5 chains in 5 jobs)
NUTS: [b_delay, a_delay]
Sampling 5 chains, 0 divergences: 100%|██████████| 125000/125000 [00:34<00:00, 3631.70draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 96.801:   6%|▌         | 11295/200000 [01:20<22:21, 140.67it/s]   
Convergence achieved at 11300
Interrupted at 11,299 [5%]: Average Loss = 2.3773e+08
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [neglogq, neglogr]
Sampling 10 chains, 0 divergences: 100%|██████████| 65000/65000 [23:23<00:00, 46.32draws/s]


2020-01-21


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 79.165:   5%|▌         | 10120/200000 [00:02<00:50, 3793.13it/s]
Convergence achieved at 10200
Interrupted at 10,199 [5%]: Average Loss = 147.18
Multiprocess sampling (5 chains in 5 jobs)
NUTS: [b_delay, a_delay]
Sampling 5 chains, 0 divergences: 100%|██████████| 125000/125000 [00:30<00:00, 4057.28draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 114.13:   6%|▌         | 12496/200000 [01:28<22:01, 141.87it/s]   
Convergence achieved at 12500
Interrupted at 12,499 [6%]: Average Loss = 6.5796e+08
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [neglogq, neglogr]
Sampling 10 chains, 0 divergences: 100%|██████████| 65000/65000 [25:55<00:00, 41.80draws/s]


2020-01-22


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 86.652:   6%|▌         | 11904/200000 [00:03<00:50, 3692.18it/s]
Convergence achieved at 12100
Interrupted at 12,099 [6%]: Average Loss = 151.35
Multiprocess sampling (5 chains in 5 jobs)
NUTS: [b_delay, a_delay]
Sampling 5 chains, 0 divergences: 100%|██████████| 125000/125000 [00:30<00:00, 4153.42draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 149.97:   6%|▌         | 11689/200000 [01:22<22:02, 142.36it/s]   
Convergence achieved at 11700
Interrupted at 11,699 [5%]: Average Loss = 1.0131e+10
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [neglogq, neglogr]
Sampling 10 chains, 0 divergences: 100%|██████████| 65000/65000 [33:18<00:00, 32.53draws/s]


2020-01-23


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 122.92:   6%|▌         | 12309/200000 [00:03<00:51, 3623.39it/s]
Convergence achieved at 12500
Interrupted at 12,499 [6%]: Average Loss = 209.66
Multiprocess sampling (5 chains in 5 jobs)
NUTS: [b_delay, a_delay]
Sampling 5 chains, 0 divergences: 100%|██████████| 125000/125000 [00:29<00:00, 4268.84draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 814.28:   5%|▍         | 9890/200000 [01:09<22:22, 141.58it/s]    
Convergence achieved at 9900
Interrupted at 9,899 [4%]: Average Loss = 4.4964e+09
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [neglogq, neglogr]
Sampling 10 chains, 0 divergences: 100%|██████████| 65000/65000 [46:42<00:00, 23.19draws/s] 


2020-01-24


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 155.97:   6%|▌         | 11924/200000 [00:03<00:49, 3783.83it/s]
Convergence achieved at 12000
Interrupted at 11,999 [5%]: Average Loss = 261.21
Multiprocess sampling (5 chains in 5 jobs)
NUTS: [b_delay, a_delay]
Sampling 5 chains, 0 divergences: 100%|██████████| 125000/125000 [00:29<00:00, 4303.68draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 424.1:   6%|▌         | 11494/200000 [01:20<22:03, 142.44it/s]    
Convergence achieved at 11500
Interrupted at 11,499 [5%]: Average Loss = 2.2498e+09
Multiprocess sampling (10 chains in 10 jobs)
NUTS: [neglogq, neglogr]
Sampling 10 chains, 0 divergences: 100%|██████████| 65000/65000 [50:59<00:00, 21.25draws/s]  


2020-01-25


Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average Loss = 170.39:   7%|▋         | 13249/200000 [00:03<00:50, 3680.66it/s]
Convergence achieved at 13400
Interrupted at 13,399 [6%]: Average Loss = 280.4
Multiprocess sampling (5 chains in 5 jobs)
NUTS: [b_delay, a_delay]
Sampling 5 chains, 0 divergences:  96%|█████████▌| 120193/125000 [00:26<00:01, 4516.06draws/s]


KeyboardInterrupt: 

(Was interrupted at the end because 2020-01-25 was not used in the manuscript)