In [1]:
libraries = c("dplyr","magrittr","tidyr","ggplot2","rstan","readxl")
for(x in libraries) { library(x,character.only=TRUE,warn.conflicts=FALSE,quietly=TRUE) }

require(zoo)
require(lubridate)

base_sz = 12 # base_size parameter
theme_set(theme_bw())

'%&%' = function(x,y) paste0(x,y)

options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)

packageVersion("rstan")
packageVersion("StanHeaders")
rstan::stan_version()

rstan (Version 2.19.2, GitRev: 2e1f913d3ca3)

For execution on a local, multicore CPU with excess RAM we recommend calling
options(mc.cores = parallel::detectCores()).
To avoid recompilation of unchanged Stan programs, we recommend calling
rstan_options(auto_write = TRUE)

Loading required package: zoo


Attaching package: ‘zoo’


The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric


Loading required package: lubridate


Attaching package: ‘lubridate’


The following object is masked from ‘package:base’:

    date




[1] ‘2.19.2’

[1] ‘2.21.0.1’

In [2]:
filenames = c("data_incper", "data_incper_inclwuhan", "data_ons_hosp", "dthdata_hosp_dth", "dthdata_ons_dth", "dthdata_ons_hosp")

# <font color="maroon">Lognormal distribution</font>

## Without truncation for *data_incper* dataset

In [41]:
idx = 1
## main dir for Stan simulations
standirname = filenames[idx]%&%"-lognormal-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)

datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) -> df

# Dumping data
N = nrow(df)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N'), file=standirname%&%"/Data.R") 

# Dumping initial conditions
s_raw = rep(.9, N)
logmean_SI = 1.0
logsd_SI = 1.0
t = rep(5.0, N)
stan_rdump(c('t', 's_raw', 'logmean_SI', 'logsd_SI'), file=standirname%&%"/Init.R")   

# Stan program
"data {
    int<lower = 0> N; // number of records
    vector<lower = 0>[N] E_L;
    vector<lower = 0>[N] E_R;
    vector<lower = 0>[N] S_L;
    vector<lower = 0>[N] S_R;
}

parameters {
    real logmean_SI;
    real logsd_SI;

    vector<lower = 0>[N] t;
    vector<lower = 0, upper = 1>[N] s_raw;
}

transformed parameters {
    real<lower = 0> param2 = sqrt(log((exp(2*(logsd_SI-logmean_SI))+1.0)));
    real<lower = 0> param1 = logmean_SI - param2^2/2.0;
    vector<lower = min(S_L), upper = max(S_R)>[N] s;

    s = S_L + (S_R - S_L) .* s_raw;
}

model {
    logmean_SI ~ std_normal();
    logsd_SI ~ std_normal();

    t ~ normal(0, 10.0);
    s_raw ~ normal(0.5, 0.5);

    for (k in 1:N) {
        // here the first term for t[k] is used for predictions, not for the inference of params
        target += lognormal_lpdf(t[k] | param1, param2) 
                    + lognormal_lcdf(s[k] - E_L[k] | param1, param2);
        if (s[k] > E_R[k])
            target += lognormal_lccdf(s[k] - E_R[k] | param1, param2);
    }
}

generated quantities {
    real<lower = 0> mean_SI = exp(param1 + param2^2/2);
    real<lower = 0> sd_SI = sqrt((exp(param2^2)-1)*exp(2*param1+param2^2));

    vector[N] log_likelihood;
    for (k in 1:N) {
        log_likelihood[k] = lognormal_lpdf(t[k] | param1, param2)
                                + lognormal_lcdf(s[k] - E_L[k] | param1, param2);
        if (s[k] > E_R[k])
            log_likelihood[k] += lognormal_lccdf(s[k] - E_R[k] | param1, param2);
    }
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)

standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..4}
do
    echo Running ${i}
    SEEDNUMBER=$((1+$i))
    ./fit \\
        method=sample num_samples=5000 num_warmup=10000 save_warmup=0 \\
            adapt delta=0.92 \\
            algorithm=hmc \\
                engine=nuts \\
        random seed=${SEEDNUMBER} \\
        id=$i \\
        data file=Data.R \\
        init=Init.R \\
        output file=trace-$i.csv \\
            diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)

## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)

## Without truncation for *data_incper_inclwuhan* dataset

To note: the following code can be used for all other datasets as well, but I decided to keep everything simpler, so I wrote different scripts for different type of datasets.

In [42]:
idx = 2
## main dir for Stan simulations
standirname = filenames[idx]%&%"-lognormal-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)

datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) -> df
df %>% filter(EL>0) -> df1 # E_L is defined
df %>% filter(EL==0) -> df2 # E_L is missing
df = rbind(df1,df2) # we move all incomplete records to the end of the dataframe

# Dumping data
N = nrow(df)
N2 = nrow(df2)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N', 'N2'), file=standirname%&%"/Data.R") 

# Dumping initial conditions
s_raw = rep(.5, N)
logmean_SI = 1.0
logsd_SI = 1.0
E_L_est_raw = rep(.5, N2)
t = rep(5.0, N)
stan_rdump(c('s_raw', 'E_L_est_raw', 't', 'logmean_SI', 'logsd_SI'), file=standirname%&%"/Init.R")   

# Stan program
"data {
    int<lower = 0> N; // number of records
    int<lower = 0> N2; // number of incomplete records
    vector<lower = 0>[N] E_L;
    vector<lower = 0>[N] E_R;
    vector<lower = 0>[N] S_L;
    vector<lower = 0>[N] S_R;
}

transformed data {
    int<lower = 0> N1 = N - N2;
}

parameters {
    real logmean_SI;
    real logsd_SI;

    vector<lower = 0>[N] t;
    vector<lower = 0, upper = 1>[N] s_raw;
    vector<lower = 0, upper = 1>[N2] E_L_est_raw;
}

transformed parameters {
    real<lower = 0> param2 = sqrt(log((exp(2*(logsd_SI-logmean_SI))+1.0)));
    real<lower = 0> param1 = logmean_SI - param2^2/2.0;

    vector<lower = min(S_L), upper = max(S_R)>[N] s;
    vector<lower = 0, upper = max(E_R)>[N2] E_L_est;

    s = S_L + (S_R - S_L) .* s_raw;
    for (k in 1:N2) 
        E_L_est[k] = E_R[N1 + k] * E_L_est_raw[k];
}

model {
    logmean_SI ~ std_normal();
    logsd_SI ~ std_normal();

    t ~ normal(0, 10.0);
    s_raw ~ normal(0.5, 0.5);
    E_L_est_raw ~ normal(0.5, 0.5);

    for (k in 1:N) {
        // here the first term for t[k] is used for predictions, not for the inference of params
        target += lognormal_lpdf(t[k] | param1, param2);
        if (s[k] > E_R[k])
            target += lognormal_lccdf(s[k] - E_R[k] | param1, param2);
        if (k <= N1) 
            target += lognormal_lcdf(s[k] - E_L[k] | param1, param2);
        else
            target += lognormal_lcdf(s[k] - E_L_est[k - N1] | param1, param2);
    }
}

generated quantities {
    real<lower = 0> mean_SI = exp(param1 + param2^2/2);
    real<lower = 0> sd_SI = sqrt((exp(param2^2)-1)*exp(2*param1+param2^2));

    vector[N] log_likelihood;
    for (k in 1:N) {
        log_likelihood[k] = lognormal_lpdf(t[k] | param1, param2);
        if (s[k] > E_R[k])
            log_likelihood[k] += lognormal_lccdf(s[k] - E_R[k]| param1, param2);
        if (k <= N1) 
            log_likelihood[k] += lognormal_lcdf(s[k] - E_L[k] | param1, param2);
        else
            log_likelihood[k] += lognormal_lcdf(s[k] - E_L_est[k - N1] | param1, param2);
    }
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)

standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..4}
do
    echo Running ${i}
    SEEDNUMBER=$((12345+$i))
    ./fit \\
        method=sample num_samples=5000 num_warmup=10000 save_warmup=0 \\
            adapt delta=0.92 \\
            algorithm=hmc \\
                engine=nuts \\
        random seed=${SEEDNUMBER} \\
        id=$i \\
        data file=Data.R \\
        init=Init.R \\
        output file=trace-$i.csv \\
            diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)

## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)

## Without truncation for the rest of the data

In [45]:
for (idx in 3:length(filenames)) {
    ## main dir for Stan simulations
    standirname = filenames[idx]%&%"-lognormal-no_truncation"
    unlink(standirname, recursive=T)
    dir.create(standirname)

    datafilename = "../../data/"%&%filenames[idx]%&%".csv"
    read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) -> df
    
    # Dumping data
    N = nrow(df)
    E_L = df$EL
    E_R = df$ER
    S_L = df$SL
    S_R = df$SR
    stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N'), file=standirname%&%"/Data.R") 

    # Dumping initial conditions
    E_raw = rep(.3, N)
    S_raw = rep(.7, N)
    logmean_SI = log(5.0)
    logsd_SI = log(3.0)
    stan_rdump(c('E_raw', 'S_raw', 'logmean_SI', 'logsd_SI'), file=standirname%&%"/Init.R")   

    # Stan program
    "data {
        int<lower = 0> N; // number of records
        vector<lower = 0>[N] E_L;
        vector<lower = 0>[N] E_R;
        vector<lower = 0>[N] S_L;
        vector<lower = 0>[N] S_R;
    }

    parameters {
        real logmean_SI;
        real logsd_SI;

        vector<lower = 0, upper = 1>[N] E_raw;
        vector<lower = 0, upper = 1>[N] S_raw;
    }

    transformed parameters {
        real<lower = 0> param2 = sqrt(log((exp(2*(logsd_SI-logmean_SI))+1.0)));
        real<lower = 0> param1 = logmean_SI - param2^2/2.0;

        vector<lower = min(E_L), upper = max(E_R)>[N] E;
        vector<lower = min(S_L), upper = max(S_R)>[N] S;
        vector<lower = 0>[N] t;

        E = E_L + (E_R - E_L) .* E_raw;
        for (k in 1:N) {
            if (E[k] > S_L[k]) 
                S[k] = E[k] + (S_R[k] - E[k]) * S_raw[k];
            else 
                S[k] = S_L[k] + (S_R[k] - S_L[k]) * S_raw[k];
        }
        t = S - E;
    }

    model {
        logmean_SI ~ std_normal();
        logsd_SI ~ std_normal();

        E_raw ~ normal(0.5, 1.0);
        S_raw ~ normal(0.5, 1.0);

        t ~ lognormal(param1, param2);
    }

    generated quantities {
        real<lower = 0> mean_SI = exp(param1 + param2^2/2);
        real<lower = 0> sd_SI = sqrt((exp(param2^2)-1)*exp(2*param1+param2^2));

        vector[N] log_likelihood;
        for (k in 1:N)
             log_likelihood[k] = lognormal_lpdf(t[k] | param1, param2);
    }" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)

    standistribdir = "../../../CmdStan"
    stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei/"%&%standirname
    ## bash file
    "#!/bin/bash
    cwd=$(pwd)
    cd "%&%standistribdir%&%"
    make -j6 "%&%stanscriptdir%&%"/fit
    cd "%&%stanscriptdir%&%"
    mkdir -p diagnostics
    for i in {1..5}
    do
        echo Running ${i}
        SEEDNUMBER=$((12345+$i))
        ./fit \\
            method=sample num_samples=5000 num_warmup=10000 save_warmup=0 \\
                adapt delta=0.92 \\
                algorithm=hmc \\
                    engine=nuts \\
            random seed=${SEEDNUMBER} \\
            id=$i \\
            data file=Data.R \\
            init=Init.R \\
            output file=trace-$i.csv \\
                diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
    done
    echo Finished sampling haha!
    " %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)

    ## running the bash script
    system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
}

## <font color="red">With truncation</font>

**ONLY DONE FOR BEST-FIT MODELS ACCORDING TO TABLE 1**

## With truncation for *data_incper* dataset

In [79]:
idx = 1
## main dir for Stan simulations
standirname = filenames[idx]%&%"-lognormal-truncated"
unlink(standirname, recursive=T)
dir.create(standirname)

datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) -> df

# Dumping data
N = nrow(df)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
r = 0.14
upper_bound = df$tstar[1]
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N', 'upper_bound', 'r'), file=standirname%&%"/Data.R") 

# Dumping initial conditions
s_raw = rep(.9, N)
logmean_SI = log(5.0)
logsd_SI = log(3.0)
t = rep(5.0, N)
stan_rdump(c('t', 's_raw', 'logmean_SI', 'logsd_SI'), file=standirname%&%"/Init.R")   

# Stan program
"functions {
    real fstar(real x,          // Function argument
               real xc,
               real[] theta,    // parameters
               real[] x_r,      // data (real)
               int[] x_i) {     // data (integer)

        real param1 = theta[1];
        real param2 = theta[2];
        real s = theta[3];
        real t = theta[4];
        real upper_bound = x_r[1];
        real r = x_r[2];

        real tstar = upper_bound - (s - t);

        return exp(lognormal_lcdf(tstar*(1.0-x) | param1, param2)) * r * tstar* exp(-r*tstar*x) / (1.0 - exp(-r*tstar*x));
    }
}

data {
    int<lower = 0> N; // number of records
    vector<lower = 0>[N] E_L;
    vector<lower = 0>[N] E_R;
    vector<lower = 0>[N] S_L;
    vector<lower = 0>[N] S_R;
    real<lower = 0> upper_bound;
    real<lower = 0> r;
}

transformed data {
    int X_i[0]; //empty array
}


parameters {
    real logmean_SI;
    real logsd_SI;

    vector<lower = 0>[N] t;
    vector<lower = 0, upper = 1>[N] s_raw;
}

transformed parameters {
    real<lower = 0> param2 = sqrt(log((exp(2*(logsd_SI-logmean_SI))+1.0)));
    real<lower = 0> param1 = logmean_SI - param2^2/2.0;

    vector<lower = min(S_L), upper = max(S_R)>[N] s;

    s = S_L + (S_R - S_L) .* s_raw;
}

model {
    logmean_SI ~ std_normal();
    logsd_SI ~ std_normal();

    t ~ normal(0, 5.0);
    s_raw ~ normal(0.5, 0.5);

    for (k in 1:N) {
        // here the first term for t[k] is used for predictions, not for the inference of params
        target += lognormal_lpdf(t[k] | param1, param2) 
                    + lognormal_lcdf(s[k] - E_L[k] | param1, param2)
                    - log(integrate_1d(fstar, 0.01, 1.0, {param1, param2, s[k], t[k]}, {upper_bound, r}, X_i, 1e-8));
        if (s[k] > E_R[k])
            target += lognormal_lccdf(s[k] - E_R[k] | param1, param2);
    }
}

generated quantities {
    real<lower = 0> mean_SI = exp(logmean_SI);
    real<lower = 0> sd_SI = exp(logsd_SI);

    vector[N] log_likelihood;
    for (k in 1:N) {
        log_likelihood[k] = lognormal_lpdf(t[k] | param1, param2)
                                + lognormal_lcdf(s[k] - E_L[k] | param1, param2)
                                - log(integrate_1d(fstar, 0.01, 1.0, {param1, param2, s[k], t[k]}, {upper_bound, r}, X_i, 1e-8));

        if (s[k] > E_R[k])
            log_likelihood[k] += lognormal_lccdf(s[k] - E_R[k] | param1, param2);
    }
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)

standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..4}
do
    echo Running ${i}
    SEEDNUMBER=$((1+$i))
    ./fit \\
        method=sample num_samples=2500 num_warmup=4000 save_warmup=0 \\
            adapt delta=0.92 \\
            algorithm=hmc \\
                engine=nuts \\
        random seed=${SEEDNUMBER} \\
        id=$i \\
        data file=Data.R \\
        init=Init.R \\
        output file=trace-$i.csv \\
            diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)

## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)

## With truncation for *data_incper_inclwuhan* dataset

In [78]:
idx = 2
## main dir for Stan simulations
standirname = filenames[idx]%&%"-lognormal-truncated"
unlink(standirname, recursive=T)
dir.create(standirname)

datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) -> df
df %>% filter(EL>0) -> df1 # E_L is defined
df %>% filter(EL==0) -> df2 # E_L is missing
df = rbind(df1,df2) # we move all incomplete records to the end of the dataframe

# Dumping data
N = nrow(df)
N2 = nrow(df2)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
r = 0.14
upper_bound = df$tstar[1]
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N', 'N2', 'r', 'upper_bound'), file=standirname%&%"/Data.R") 

# Dumping initial conditions
s_raw = rep(.9, N)
logmean_SI = log(4.0)
logsd_SI = log(3.0)
E_L_est_raw = rep(.5, N2)
t = rep(5.0, N)
stan_rdump(c('s_raw', 'E_L_est_raw', 't', 'logmean_SI', 'logsd_SI'), file=standirname%&%"/Init.R")   

# Stan program
"functions {
    real fstar(real x,          // Function argument
               real xc,
               real[] theta,    // parameters
               real[] x_r,      // data (real)
               int[] x_i) {     // data (integer)

        real param1 = theta[1];
        real param2 = theta[2];
        real s = theta[3];
        real t = theta[4];
        real upper_bound = x_r[1];
        real r = x_r[2];

        real tstar = upper_bound - (s - t);

        return exp(lognormal_lcdf(tstar*(1.0-x) | param1, param2)) * r * tstar* exp(-r*tstar*x) / (1.0 - exp(-r*tstar*x));
    }
}

data {
    int<lower = 0> N; // number of records
    int<lower = 0> N2; // number of incomplete records
    vector<lower = 0>[N] E_L;
    vector<lower = 0>[N] E_R;
    vector<lower = 0>[N] S_L;
    vector<lower = 0>[N] S_R;
    real<lower = 0> upper_bound;
    real<lower = 0> r;
}

transformed data {
    int X_i[0]; //empty array
    int<lower = 0> N1 = N - N2;
}

parameters {
    real logmean_SI;
    real logsd_SI;

    vector<lower = 0>[N] t;
    vector<lower = 0, upper = 1>[N] s_raw;
    vector<lower = 0, upper = 1>[N2] E_L_est_raw;
}

transformed parameters {
    real<lower = 0> param2 = sqrt(log((exp(2*(logsd_SI-logmean_SI))+1.0)));
    real<lower = 0> param1 = logmean_SI - param2^2/2.0;
    real<lower = 0> mean_SI = exp(logmean_SI);
    real<lower = 0> sd_SI = exp(logsd_SI);
    real median_SI = exp(param1);
    vector<lower = min(S_L), upper = max(S_R)>[N] s;
    vector<lower = 0, upper = max(E_R)>[N2] E_L_est;

    s = S_L + (S_R - S_L) .* s_raw;
    for (k in 1:N2) 
        E_L_est[k] = E_R[N1 + k] * E_L_est_raw[k];
}

model {
    logmean_SI ~ std_normal();
    logsd_SI ~ std_normal();

    t ~ normal(0, 10.0);
    s_raw ~ normal(0.5, 0.5);
    E_L_est_raw ~ normal(0.5, 0.5);

    for (k in 1:N) {
        // here the first term for t[k] is used for predictions, not for the inference of params
        target += lognormal_lpdf(t[k] | param1, param2) 
                    - log(integrate_1d(fstar, 0.01, 1.0, {param1, param2, s[k], t[k]}, {upper_bound, r}, X_i));
        if (s[k] > E_R[k])
            target += lognormal_lccdf(s[k] - E_R[k] | param1, param2);
        if (k <= N1) 
            target += lognormal_lcdf(s[k] - E_L[k] | param1, param2);
        else
            target += lognormal_lcdf(s[k] - E_L_est[k - N1] | param1, param2);
    }
}

generated quantities {
    vector[N] log_likelihood;
    for (k in 1:N) {
        log_likelihood[k] = lognormal_lpdf(t[k] | param1, param2) 
                                - log(integrate_1d(fstar, 0.01, 1.0, {param1, param2, s[k], t[k]}, {upper_bound, r}, X_i));
        if (s[k] > E_R[k])
            log_likelihood[k] += lognormal_lccdf(s[k] - E_R[k]| param1, param2);
        if (k <= N1) 
            log_likelihood[k] += lognormal_lcdf(s[k] - E_L[k] | param1, param2);
        else
            log_likelihood[k] += lognormal_lcdf(s[k] - E_L_est[k - N1] | param1, param2);
    }
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)

standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..4}
do
    echo Running ${i}
    SEEDNUMBER=$((12345+$i))
    ./fit \\
        method=sample num_samples=2500 num_warmup=4000 save_warmup=0 \\
            adapt delta=0.92 \\
            algorithm=hmc \\
                engine=nuts \\
        random seed=${SEEDNUMBER} \\
        id=$i \\
        data file=Data.R \\
        init=Init.R \\
        output file=trace-$i.csv \\
            diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)

## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)

# <font color="maroon">Gamma distribution</font>

## Without truncation for *data_incper* dataset

In [32]:
idx = 1
## main dir for Stan simulations
standirname = filenames[idx]%&%"-gamma-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)

datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) -> df

# Dumping data
N = nrow(df)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N'), file=standirname%&%"/Data.R") 

# Dumping initial conditions
s_raw = rep(.5, N)
param1 = 3.0
param2 = 2.0
t = rep(4.0, N)
stan_rdump(c('t', 's_raw', 'param1', 'param2'), file=standirname%&%"/Init.R")   

# Stan program
"data {
    int<lower = 0> N; // number of records
    vector<lower = 0>[N] E_L;
    vector<lower = 0>[N] E_R;
    vector<lower = 0>[N] S_L;
    vector<lower = 0>[N] S_R;
}

parameters {
    real<lower = 0> param1;
    real<lower = 0> param2;

    vector<lower = 0>[N] t;
    vector<lower = 0, upper = 1>[N] s_raw;
}

transformed parameters {
    vector<lower = min(S_L), upper = max(S_R)>[N] s;

    s = S_L + (S_R - S_L) .* s_raw;
}

model {
    param1 ~ std_normal();
    param2 ~ cauchy(0, 5.0);

    t ~ normal(5, 5);
    s_raw ~ normal(0.5, 0.5);

    for (k in 1:N) {
        // here the first term for t[k] is used for predictions, not for the inference of params
        target += gamma_lpdf(t[k] | param1, param2) + gamma_lcdf(s[k] - E_L[k] | param1, param2);
        if (s[k]>E_R[k])
            target += gamma_lccdf(s[k] - E_R[k] | param1, param2);
    }
}

generated quantities {
    real<lower = 0> mean_SI = param1/param2;
    real<lower = 0> sd_SI = sqrt(param1)/param2;

    vector[N] log_likelihood;
    for (k in 1:N) {
        log_likelihood[k] = gamma_lpdf(t[k] | param1, param2) + gamma_lcdf(s[k] - E_L[k] | param1, param2);
        if (s[k]>E_R[k])
            log_likelihood[k] += gamma_lccdf(s[k] - E_R[k] | param1, param2);
    }
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)

standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..4}
do
    echo Running ${i}
    SEEDNUMBER=$((12345+$i))
    ./fit \\
        method=sample num_samples=5000 num_warmup=10000 save_warmup=0 \\
            adapt delta=0.92 \\
            algorithm=hmc \\
                engine=nuts \\
        random seed=${SEEDNUMBER} \\
        id=$i \\
        data file=Data.R \\
        init=Init.R \\
        output file=trace-$i.csv \\
            diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)

## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)

## Without truncation for *data_incper_inclwuhan* dataset

In [33]:
idx = 2

## main dir for Stan simulations
standirname = filenames[idx]%&%"-gamma-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)

datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) -> df
df %>% filter(EL>0) -> df1 # E_L is defined
df %>% filter(EL==0) -> df2 # E_L is missing
df = rbind(df1,df2) # we move all incomplete records to the end of the dataframe

# Dumping data
N = nrow(df)
N2 = nrow(df2)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N', 'N2'), file=standirname%&%"/Data.R") 

# Dumping initial conditions
s_raw = rep(.9, N)
param1 = 3.0
param2 = 2.0
E_L_est_raw = rep(.5, N2)
t = rep(5.0, N)
stan_rdump(c('s_raw', 'E_L_est_raw', 't', 'param1', 'param2'), file=standirname%&%"/Init.R")   

# Stan program
"data {
    int<lower = 0> N; // number of records
    int<lower = 0> N2; // number of incomplete records
    vector<lower = 0>[N] E_L;
    vector<lower = 0>[N] E_R;
    vector<lower = 0>[N] S_L;
    vector<lower = 0>[N] S_R;
}

transformed data {
    int<lower = 0> N1 = N - N2;
}

parameters {
    real<lower = 0> param1;
    real<lower = 0> param2;

    vector<lower = 0>[N] t;
    vector<lower = 0, upper = 1>[N] s_raw;
    vector<lower = 0, upper = 1>[N2] E_L_est_raw;
}

transformed parameters {
    vector<lower = min(S_L), upper = max(S_R)>[N] s;
    vector<lower = 0, upper = max(E_R)>[N2] E_L_est;

    s = S_L + (S_R - S_L) .* s_raw;
    for (k in 1:N2) 
        E_L_est[k] = E_R[N1 + k] * E_L_est_raw[k];
}

model {
    param1 ~ std_normal();
    param2 ~ cauchy(0, 5.0);

    t ~ normal(0, 10.0);
    s_raw ~ normal(0.5, 0.5);
    E_L_est_raw ~ normal(0.5, 0.5);

    for (k in 1:N) {
        // here the first term for t[k] is used for predictions, not for the inference of params
        target += gamma_lpdf(t[k] | param1, param2);
        if (s[k]>E_R[k])
            target += gamma_lccdf(s[k] - E_R[k]| param1, param2);
        if (k <= N1) 
            target += gamma_lcdf(s[k] - E_L[k] | param1, param2);
        else
            target += gamma_lcdf(s[k] - E_L_est[k - N1] | param1, param2);
    }
}

generated quantities {
    real<lower = 0> mean_SI = param1/param2;
    real<lower = 0> sd_SI = sqrt(param1)/param2;

    vector[N] log_likelihood;
    for (k in 1:N) {
        log_likelihood[k] = gamma_lpdf(t[k] | param1, param2);
        if (s[k]>E_R[k])
            log_likelihood[k] += gamma_lccdf(s[k] - E_R[k] | param1, param2);
        if (k <= N1) 
            log_likelihood[k] += gamma_lcdf(s[k] - E_L[k] | param1, param2);
        else
            log_likelihood[k] += gamma_lcdf(s[k] - E_L_est[k - N1] | param1, param2);
    }
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)

standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..4}
do
    echo Running ${i}
    SEEDNUMBER=$((12345+$i))
    ./fit \\
        method=sample num_samples=5000 num_warmup=10000 save_warmup=0 \\
            adapt delta=0.92 \\
            algorithm=hmc \\
                engine=nuts \\
        random seed=${SEEDNUMBER} \\
        id=$i \\
        data file=Data.R \\
        init=Init.R \\
        output file=trace-$i.csv \\
            diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)

## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)

## Without truncation for the rest of the data

In [34]:
for (idx in 3:length(filenames)) {
    ## main dir for Stan simulations
    standirname = filenames[idx]%&%"-gamma-no_truncation"
    unlink(standirname, recursive=T)
    dir.create(standirname)
    
    datafilename = "../../data/"%&%filenames[idx]%&%".csv"
    read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) -> df

    # Dumping data
    N = nrow(df)
    E_L = df$EL
    E_R = df$ER
    S_L = df$SL
    S_R = df$SR
    stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N'), file=standirname%&%"/Data.R") 

    # Dumping initial conditions
    E_raw = rep(.1, N)
    S_raw = rep(.9, N)
    param1 = 3.0
    param2 = 2.0
    stan_rdump(c('E_raw', 'S_raw', 'param1', 'param2'), file=standirname%&%"/Init.R")   

    # Stan program
    "data {
        int<lower = 0> N; // number of records
        vector<lower = 0>[N] E_L;
        vector<lower = 0>[N] E_R;
        vector<lower = 0>[N] S_L;
        vector<lower = 0>[N] S_R;
    }

    parameters {
        real<lower = 0> param1;
        real<lower = 0> param2;

        vector<lower = 0, upper = 1>[N] E_raw;
        vector<lower = 0, upper = 1>[N] S_raw;
    }

    transformed parameters {
        vector<lower = min(E_L), upper = max(E_R)>[N] E;
        vector<lower = min(S_L), upper = max(S_R)>[N] S;
        vector<lower = 0>[N] t;

        E = E_L + (E_R - E_L) .* E_raw;
        for (k in 1:N) {
            if (E[k]>S_L[k]) 
                S[k] = E[k] + (S_R[k] - E[k]) * S_raw[k];
            else 
                S[k] = S_L[k] + (S_R[k] - S_L[k]) * S_raw[k];
        }
        t = S - E;
    }

    model {
        param1 ~ std_normal();
        param2 ~ cauchy(0, 5.0);

        E_raw ~ normal(0.5, 1.0);
        S_raw ~ normal(0.5, 1.0);

        t ~ gamma(param1, param2);
    }

    generated quantities {
        real mean_SI = param1/param2;
        real sd_SI = sqrt(param1)/param2;

        vector[N] log_likelihood;
        for (k in 1:N)
             log_likelihood[k] = gamma_lpdf(t[k] | param1, param2);
    }" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)

    standistribdir = "../../../CmdStan"
    stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei/"%&%standirname
    ## bash file
    "#!/bin/bash
    cwd=$(pwd)
    cd "%&%standistribdir%&%"
    make -j6 "%&%stanscriptdir%&%"/fit
    cd "%&%stanscriptdir%&%"
    mkdir -p diagnostics
    for i in {1..4}
    do
        echo Running ${i}
        SEEDNUMBER=$((12345+$i))
        ./fit \\
            method=sample num_samples=5000 num_warmup=10000 save_warmup=0 \\
                adapt delta=0.92 \\
                algorithm=hmc \\
                    engine=nuts \\
            random seed=${SEEDNUMBER} \\
            id=$i \\
            data file=Data.R \\
            init=Init.R \\
            output file=trace-$i.csv \\
                diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
    done
    echo Finished sampling haha!
    " %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)

    ## running the bash script
    system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
}

## <font color="red">With truncation</font>

**ONLY DONE FOR BEST-FIT MODELS ACCORDING TO TABLE 1**

In [75]:
for (idx in c(3)) {
    ## main dir for Stan simulations
    standirname = filenames[idx]%&%"-gamma-truncated"
    unlink(standirname, recursive=T)
    dir.create(standirname)
    
    datafilename = "../../data/"%&%filenames[idx]%&%".csv"
    read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) -> df

    # Dumping data
    N = nrow(df)
    E_L = df$EL
    E_R = df$ER
    S_L = df$SL
    S_R = df$SR
    upper_bound = df$tstar[1]
    r = 0.14
    stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N', 'upper_bound', 'r'), file=standirname%&%"/Data.R") 

    # Dumping initial conditions
    E_raw = rep(.1, N)
    S_raw = rep(.9, N)
    param1 = 3.0
    param2 = 2.0
    stan_rdump(c('E_raw', 'S_raw', 'param1', 'param2'), file=standirname%&%"/Init.R")   

    # Stan program
    "functions {
        real fstar(real x,          // Function argument
                   real xc,
                   real[] theta,    // parameters
                   real[] x_r,      // data (real)
                   int[] x_i) {     // data (integer)

            real param1 = theta[1];
            real param2 = theta[2];
            real E = theta[3];
            real upper_bound = x_r[1];
            real r = x_r[2];

            real tstar = upper_bound - E;

            return exp(gamma_lcdf(tstar*(1.0-x) | param1, param2)) * r * tstar* exp(-r*tstar*x) / (1.0 - exp(-r*tstar*x));
        }
    }

    data {
        int<lower = 0> N; // number of records
        vector<lower = 0>[N] E_L;
        vector<lower = 0>[N] E_R;
        vector<lower = 0>[N] S_L;
        vector<lower = 0>[N] S_R;
        real<lower = 0> upper_bound;
        real<lower = 0> r;
    }

    transformed data {
        int X_i[0]; //empty array
    }

    parameters {
        real<lower = 0> param1;
        real<lower = 0> param2;

        vector<lower = 0, upper = 1>[N] E_raw;
        vector<lower = 0, upper = 1>[N] S_raw;
    }

    transformed parameters {
        vector<lower = min(E_L), upper = max(E_R)>[N] E;
        vector<lower = min(S_L), upper = max(S_R)>[N] S;
        vector<lower = 0>[N] t;

        E = E_L + (E_R - E_L) .* E_raw;
        for (k in 1:N) {
            if (E[k]>S_L[k]) 
                S[k] = E[k] + (S_R[k] - E[k]) * S_raw[k];
            else 
                S[k] = S_L[k] + (S_R[k] - S_L[k]) * S_raw[k];
        }
        t = S - E;
    }

    model {
        param1 ~ std_normal();
        param2 ~ cauchy(0, 5.0);

        E_raw ~ normal(0.5, 1.0);
        S_raw ~ normal(0.5, 1.0);

        for (k in 1:N) 
            target += lognormal_lpdf(t[k] | param1, param2)
                    - log(integrate_1d(fstar, 0.2, 1.0, {param1, param2, E[k]}, {upper_bound, r}, X_i));

    }

    generated quantities {
        real mean_SI = param1/param2;
        real sd_SI = sqrt(param1)/param2;

        vector[N] log_likelihood;
        for (k in 1:N)
             log_likelihood[k] = gamma_lpdf(t[k] | param1, param2)
                    - log(integrate_1d(fstar, 0.2, 1.0, {param1, param2, E[k]}, {upper_bound, r}, X_i));
    }" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)

    standistribdir = "../../../CmdStan"
    stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei/"%&%standirname
    ## bash file
    "#!/bin/bash
    cwd=$(pwd)
    cd "%&%standistribdir%&%"
    make -j6 "%&%stanscriptdir%&%"/fit
    cd "%&%stanscriptdir%&%"
    mkdir -p diagnostics
    for i in {1..4}
    do
        echo Running ${i}
        SEEDNUMBER=$((12345+$i))
        ./fit \\
            method=sample num_samples=2500 num_warmup=4000 save_warmup=0 \\
                adapt delta=0.92 \\
                algorithm=hmc \\
                    engine=nuts \\
            random seed=${SEEDNUMBER} \\
            id=$i \\
            data file=Data.R \\
            init=Init.R \\
            output file=trace-$i.csv \\
                diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
    done
    echo Finished sampling haha!
    " %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)

    ## running the bash script
    system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
}

In [40]:
idx = 1
## main dir for Stan simulations
standirname = filenames[idx]%&%"-gamma-truncated"
unlink(standirname, recursive=T)
dir.create(standirname)

datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) -> df

# Dumping data
N = nrow(df)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
r = 0.14
upper_bound = df$tstar[1]
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N', 'upper_bound', 'r'), file=standirname%&%"/Data.R") 

# Dumping initial conditions
s_raw = rep(.5, N)
param1 = 1.0
param2 = 1.0
t = rep(5.0, N)
stan_rdump(c('t', 's_raw', 'param1', 'param2'), file=standirname%&%"/Init.R")   

# Stan program
"functions {
    real fstar(real x,          // Function argument
               real xc,         // Complement of function argument on the domain (defined later)
               real[] theta,    // parameters
               real[] x_r,      // data (real)
               int[] x_i) {     // data (integer)

        real param1 = theta[1];
        real param2 = theta[2];
        real tstar = theta[3];
        real r = x_r[1];

        return (1.0 - exp(gamma_lccdf(tstar*(1.0-x) | param1, param2))) * r * tstar* exp(-r*tstar*x) / (1.0 - exp(-r*tstar*x));
    }
}

data {
    int<lower = 0> N; // number of records
    vector<lower = 0>[N] E_L;
    vector<lower = 0>[N] E_R;
    vector<lower = 0>[N] S_L;
    vector<lower = 0>[N] S_R;
    real<lower = 0> upper_bound;
    real<lower = 0> r;
}

transformed data {
    int X_i[0]; //empty array
}

parameters {
    real<lower = 0> param1;
    real<lower = 0> param2;

    vector<lower = 0>[N] t;
    vector<lower = 0, upper = 1>[N] s_raw;
}

transformed parameters {
    vector<lower = min(S_L), upper = max(S_R)>[N] s;
    vector<lower = 0>[N] tstar;

    s = S_L + (S_R - S_L) .* s_raw;
    tstar = upper_bound - (s - t);
}

model {
    param1 ~ std_normal();
    param2 ~ cauchy(0, 5.0);

    t ~ normal(5, 5);
    s_raw ~ normal(0.5, 0.5);

    for (k in 1:N) {
        // here the first term for t[k] is used for predictions, not for the inference of params
        target += gamma_lpdf(t[k] | param1, param2) + gamma_lcdf(s[k] - E_L[k] | param1, param2) 
                    - log(integrate_1d(fstar, 0.001, 1.0, {param1, param2, tstar[k]}, {r}, X_i, 1e-6));
        if (s[k]>E_R[k])
            target += gamma_lccdf(s[k] - E_R[k] | param1, param2);
    }
}

generated quantities {
    real<lower = 0> mean_SI = param1/param2;
    real<lower = 0> sd_SI = sqrt(param1)/param2;

    vector[N] log_likelihood;
    for (k in 1:N) {
        log_likelihood[k] = gamma_lpdf(t[k] | param1, param2) + gamma_lcdf(s[k] - E_L[k] | param1, param2) 
                    - log(integrate_1d(fstar, 0.001, 1.0, {param1, param2, tstar[k]}, {r}, X_i, 1e-6));
        if (s[k]>E_R[k])
            log_likelihood[k] += gamma_lccdf(s[k] - E_R[k] | param1, param2);
    }
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)

standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..4}
do
    echo Running ${i}
    SEEDNUMBER=$((12345+$i))
    ./fit \\
        method=sample num_samples=2500 num_warmup=5000 save_warmup=0 \\
            adapt delta=0.92 \\
            algorithm=hmc \\
                engine=nuts \\
        random seed=${SEEDNUMBER} \\
        id=$i \\
        data file=Data.R \\
        init=Init.R \\
        output file=trace-$i.csv \\
            diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)

## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)

# <font color="maroon">Weibull distribution</font>

## Without truncation

In [9]:
idx = 1
## main dir for Stan simulations
standirname = filenames[idx]%&%"-weibull-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)

datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) -> df

# Dumping data
N = nrow(df)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N'), file=standirname%&%"/Data.R") 

# Dumping initial conditions
s_raw = rep(.9, N)
logmean_SI = log(5.0)
logsd_SI = log(3.0)
t = rep(5.0, N)
stan_rdump(c('E_raw', 's_raw', 't', 'logmean_SI', 'logsd_SI'), file=standirname%&%"/Init.R")   

# Stan program
"data {
    int<lower = 0> N; // number of records
    vector<lower = 0>[N] E_L;
    vector<lower = 0>[N] E_R;
    vector<lower = 0>[N] S_L;
    vector<lower = 0>[N] S_R;
}

parameters {
    real logmean_SI;
    real logparam1;

    vector<lower = 0>[N] t;
    vector<lower = 0, upper = 1>[N] s_raw;
}

transformed parameters {
    real<lower=0> mean_SI = exp(logmean_SI);
    real<lower=0> param1 = exp(logparam1);
    real<lower=0> param2 = mean_SI/tgamma(1.0+1.0/param1);

    vector<lower = min(S_L), upper = max(S_R)>[N] s;

    s = S_L + (S_R - S_L) .* s_raw;
}

model {
    logmean_SI ~ std_normal();
    logparam1 ~ std_normal();

    t ~ normal(0, 10.0);
    s_raw ~ normal(0.5, 0.5);

    for (k in 1:N) {
        // here the first term for t[k] is used for predictions, not for the inference of params
        target += weibull_lpdf(t[k] | param1, param2) + weibull_lcdf(s[k] - E_L[k] | param1, param2);
        if (s[k]>E_R[k])
            target += weibull_lccdf(s[k] - E_R[k] | param1, param2);
    }
}

generated quantities {
    real median_SI = param2*log(2)^(1/param1);
    real sd_SI = param2*sqrt(tgamma(1.0+2.0/param1)-(tgamma(1.0+1.0/param1))^2);

    vector[N] log_likelihood;
    for (k in 1:N) {
        log_likelihood[k] = weibull_lpdf(t[k] | param1, param2) + weibull_lcdf(s[k] - E_L[k] | param1, param2);
        if (s[k] > E_R[k])
            log_likelihood[k] += weibull_lccdf(s[k] - E_R[k] | param1, param2);
    }
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)

standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..4}
do
    echo Running ${i}
    SEEDNUMBER=$((12345+$i))
    ./fit \\
        method=sample num_samples=5000 num_warmup=10000 save_warmup=0 \\
            adapt delta=0.92 \\
            algorithm=hmc \\
                engine=nuts \\
        random seed=${SEEDNUMBER} \\
        id=$i \\
        data file=Data.R \\
        init=Init.R \\
        output file=trace-$i.csv \\
            diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)

## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)

## Without truncation for *data_incper_inclwuhan* dataset

In [10]:
idx = 2
## main dir for Stan simulations
standirname = filenames[idx]%&%"-weibull-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)

datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) -> df
df %>% filter(EL>0) -> df1 # E_L is defined
df %>% filter(EL==0) -> df2 # E_L is missing
df = rbind(df1,df2) # we move all incomplete records to the end of the dataframe

# Dumping data
N = nrow(df)
N2 = nrow(df2)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N', 'N2'), file=standirname%&%"/Data.R") 

# Dumping initial conditions
s_raw = rep(.9, N)
logmean_SI = log(5.0)
logsd_SI = log(3.0)
E_L_est_raw = rep(.5, N2)
t = rep(5.0, N)
stan_rdump(c('s_raw', 'E_L_est_raw', 't', 'logmean_SI', 'logsd_SI'), file=standirname%&%"/Init.R")   

# Stan program
"data {
    int<lower = 0> N; // number of records
    int<lower = 0> N2; // number of incomplete records
    vector<lower = 0>[N] E_L;
    vector<lower = 0>[N] E_R;
    vector<lower = 0>[N] S_L;
    vector<lower = 0>[N] S_R;
}

transformed data {
    int<lower = 0> N1 = N - N2;
}

parameters {
    real logmean_SI;
    real logparam1;

    vector<lower = 0>[N] t;
    vector<lower = 0, upper = 1>[N] s_raw;
    vector<lower = 0, upper = 1>[N2] E_L_est_raw;
}

transformed parameters {
    real<lower=0> mean_SI = exp(logmean_SI);
    real<lower=0> param1 = exp(logparam1);
    real<lower=0> param2 = mean_SI/tgamma(1.0+1.0/param1);

    vector<lower = min(S_L), upper = max(S_R)>[N] s;
    vector<lower = 0, upper = max(E_R)>[N2] E_L_est;

    s = S_L + (S_R - S_L) .* s_raw;
    for (k in 1:N2) 
        E_L_est[k] = E_R[N1 + k] * E_L_est_raw[k];
}

model {
    logmean_SI ~ std_normal();
    logparam1 ~ std_normal();

    t ~ normal(0, 10.0);
    s_raw ~ normal(0.5, 0.5);
    E_L_est_raw ~ normal(0.5, 0.5);

    for (k in 1:N) {
        // here the first term for t[k] is used for predictions, not for the inference of params
        target += weibull_lpdf(t[k] | param1, param2);
        if (s[k] > E_R[k])
            target += weibull_lccdf(s[k] - E_R[k] | param1, param2);
        if (k <= N1) 
            target += weibull_lcdf(s[k] - E_L[k] | param1, param2);
        else
            target += weibull_lcdf(s[k] - E_L_est[k - N1] | param1, param2);
    }
}

generated quantities {
    real median_SI = param2*log(2)^(1/param1);
    real sd_SI = param2*sqrt(tgamma(1.0+2.0/param1)-(tgamma(1.0+1.0/param1))^2);

    vector[N] log_likelihood;
    for (k in 1:N) {
        log_likelihood[k] = weibull_lpdf(t[k] | param1, param2);
        if (s[k] > E_R[k])
            log_likelihood[k] += weibull_lccdf(s[k] - E_R[k] | param1, param2);
        if (k <= N1) 
            log_likelihood[k] += weibull_lcdf(s[k] - E_L[k] | param1, param2);
        else
            log_likelihood[k] += weibull_lcdf(s[k] - E_L_est[k - N1] | param1, param2);
    }
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)

standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..5}
do
    echo Running ${i}
    SEEDNUMBER=$((12345+$i))
    ./fit \\
        method=sample num_samples=5000 num_warmup=10000 save_warmup=0 \\
            adapt delta=0.92 \\
            algorithm=hmc \\
                engine=nuts \\
        random seed=${SEEDNUMBER} \\
        id=$i \\
        data file=Data.R \\
        init=Init.R \\
        output file=trace-$i.csv \\
            diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)

## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)

## Without truncation for the rest of the data

In [11]:
for (idx in 3:length(filenames)) {
    ## main dir for Stan simulations
    standirname = filenames[idx]%&%"-weibull-no_truncation"
    unlink(standirname, recursive=T)
    dir.create(standirname)
    
    datafilename = "../../data/"%&%filenames[idx]%&%".csv"
    read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) -> df
    if (idx==2) df %<>% mutate(EL = ifelse(EL==0, SL - 28, EL))

    # Dumping data
    N = nrow(df)
    E_L = df$EL
    E_R = df$ER
    S_L = df$SL
    S_R = df$SR
    stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N'), file=standirname%&%"/Data.R") 

    # Dumping initial conditions
    E_raw = rep(.2, N)
    S_raw = rep(.8, N)
    logmean_SI = log(5.0)
    logparam1 = log(3.0)
    stan_rdump(c('E_raw', 'S_raw', 'logmean_SI', 'logparam1'), file=standirname%&%"/Init.R")   

    # Stan program
    "data {
        int<lower = 0> N; // number of records
        vector<lower = 0>[N] E_L;
        vector<lower = 0>[N] E_R;
        vector<lower = 0>[N] S_L;
        vector<lower = 0>[N] S_R;
    }

    parameters {
        real logmean_SI;
        real logparam1;

        vector<lower = 0, upper = 1>[N] E_raw;
        vector<lower = 0, upper = 1>[N] S_raw;
    }

    transformed parameters {
        real<lower=0> mean_SI = exp(logmean_SI);
        real<lower=0> param1 = exp(logparam1);
        real<lower=0> param2 = mean_SI/tgamma(1.0+1.0/param1);

        vector<lower = min(E_L), upper = max(E_R)>[N] E;
        vector<lower = min(S_L), upper = max(S_R)>[N] S;
        vector<lower = 0>[N] t;

        E = E_L + (E_R - E_L) .* E_raw;
        for (k in 1:N) {
            if (E[k]>S_L[k]) 
                S[k] = E[k] + (S_R[k] - E[k]) * S_raw[k];
            else 
                S[k] = S_L[k] + (S_R[k] - S_L[k]) * S_raw[k];
        }
        t = S - E;
    }

    model {
        logmean_SI ~ std_normal();
        logparam1 ~ std_normal();

        E_raw ~ normal(0.5, 1.0);
        S_raw ~ normal(0.5, 1.0);

        t ~ weibull(param1, param2);
    }

    generated quantities {
        real median_SI = param2*log(2)^(1/param1);
        real sd_SI = param2*sqrt(tgamma(1.0+2.0/param1)-(tgamma(1.0+1.0/param1))^2);

        vector[N] log_likelihood;
        for (k in 1:N)
             log_likelihood[k] = weibull_lpdf(t[k] | param1, param2);
    }" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)

    standistribdir = "../../../CmdStan"
    stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei/"%&%standirname
    ## bash file
    "#!/bin/bash
    cwd=$(pwd)
    cd "%&%standistribdir%&%"
    make -j6 "%&%stanscriptdir%&%"/fit
    cd "%&%stanscriptdir%&%"
    mkdir -p diagnostics
    for i in {1..4}
    do
        echo Running ${i}
        SEEDNUMBER=$((12345+$i))
        ./fit \\
            method=sample num_samples=5000 num_warmup=10000 save_warmup=0 \\
                adapt delta=0.92 \\
                algorithm=hmc \\
                    engine=nuts \\
            random seed=${SEEDNUMBER} \\
            id=$i \\
            data file=Data.R \\
            init=Init.R \\
            output file=trace-$i.csv \\
                diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
    done
    echo Finished sampling haha!
    " %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)

    ## running the bash script
    system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
}

## With truncation

In [40]:
for (idx in 1:length(filenames)) {
    ## main dir for Stan simulations
    standirname = filenames[idx]%&%"-weibull-trunc"
    unlink(standirname, recursive=T)
    dir.create(standirname)

    datafilename = "../../data/"%&%filenames[idx]%&%".csv"
    read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) -> df
    if (idx==2) df %<>% mutate(EL = ifelse(EL==0, SL - 28, EL))
    
    # Dumping data
    N = nrow(df)
    E_L = df$EL
    E_R = df$ER
    S_L = df$SL
    S_R = df$SR
    upper_bound = df$tstar[1]
    r = 0.14
    stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'upper_bound', 'r', 'N'), file=standirname%&%"/Data.R") 

    # Dumping initial conditions
    E_raw = rep(.2, N)
    S_raw = rep(.8, N)
    logmean_SI = log(5.0)
    logparam1 = log(3.0)
    stan_rdump(c('E_raw', 'S_raw', 'logmean_SI', 'logparam1'), file=standirname%&%"/Init.R")   

    # Stan program
    "functions {
        real fstar(real x,          // Function argument
                   real xc,         // Complement of function argument on the domain (defined later)
                   real[] theta,    // parameters
                   real[] x_r,      // data (real)
                   int[] x_i) {     // data (integer)

            real param1 = theta[1];
            real param2 = theta[2];
            real tstar = theta[3];
            real r = x_r[1];

            return exp(weibull_lcdf(tstar-x | param1, param2)) * r * exp(-r*x) / (1.0 - exp(-r*x));
        }
    }

    data {
        int<lower = 0> N; // number of records
        vector<lower = 0>[N] E_L;
        vector<lower = 0>[N] E_R;
        vector<lower = 0>[N] S_L;
        vector<lower = 0>[N] S_R;
        real<lower = 0> upper_bound;
        real<lower = 0> r;
    }

    transformed data {
        int X_i[0]; //empty array
    }

    parameters {
        real logmean_SI;
        real logparam1;

        vector<lower = 0, upper = 1>[N] E_raw;
        vector<lower = 0, upper = 1>[N] S_raw;
    }

    transformed parameters {
        real<lower=0> mean_SI = exp(logmean_SI);
        real<lower=0> param1 = exp(logparam1);
        real<lower=0> param2 = mean_SI/tgamma(1.0+1.0/param1);

        vector<lower = min(E_L), upper = max(E_R)>[N] E;
        vector<lower = min(S_L), upper = max(S_R)>[N] S;
        vector<lower = 0>[N] t;
        vector<lower = 0>[N] tstar;

        E = E_L + (E_R - E_L) .* E_raw;
        for (k in 1:N) {
            if (E[k]>S_L[k]) 
                S[k] = E[k] + (S_R[k] - E[k]) * S_raw[k];
            else 
                S[k] = S_L[k] + (S_R[k] - S_L[k]) * S_raw[k];
        }
        t = S - E;
        tstar = upper_bound - E;
    }

    model {
        logmean_SI ~ std_normal();
        logparam1 ~ std_normal();

        E_raw ~ normal(0.5, 1.0);
        S_raw ~ normal(0.5, 1.0);

        for (k in 1:N) 
            target += weibull_lpdf(t[k] | param1, param2)
                        - log(integrate_1d(fstar, 0.001, tstar[k] - 0.001, {param1, param2, tstar[k]}, {r}, X_i));
    }

    generated quantities {
        vector[N] log_likelihood;
        for (k in 1:N)
             log_likelihood[k] = weibull_lpdf(t[k] | param1, param2) - log(integrate_1d(fstar, .001, tstar[k] - 0.001, {param1, param2, tstar[k]}, {r}, X_i));
    }" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)

    standistribdir = "../../../CmdStan"
    stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei/"%&%standirname
    ## bash file
    "#!/bin/bash
    cwd=$(pwd)
    cd "%&%standistribdir%&%"
    make -j6 "%&%stanscriptdir%&%"/fit
    cd "%&%stanscriptdir%&%"
    mkdir -p diagnostics
    for i in {1..4}
    do
        echo Running ${i}
        SEEDNUMBER=$((12345+$i))
        ./fit \\
            method=sample num_samples=2500 num_warmup=5000 save_warmup=0 \\
                adapt delta=0.92 \\
                algorithm=hmc \\
                    engine=nuts \\
            random seed=${SEEDNUMBER} \\
            id=$i \\
            data file=Data.R \\
            init=Init.R \\
            output file=trace-$i.csv \\
                diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
    done
    echo Finished sampling haha!
    " %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)

    ## running the bash script
    system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
}