In [8]:
libraries = c("dplyr","magrittr","tidyr","ggplot2","rstan","readxl")
for(x in libraries) { library(x,character.only=TRUE,warn.conflicts=FALSE,quietly=TRUE) }

require(zoo)
require(lubridate)

base_sz = 12 # base_size parameter
theme_set(theme_bw())

'%&%' = function(x,y) paste0(x,y)

options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)

packageVersion("rstan")
packageVersion("StanHeaders")
rstan::stan_version()

rstan (Version 2.19.2, GitRev: 2e1f913d3ca3)

For execution on a local, multicore CPU with excess RAM we recommend calling
options(mc.cores = parallel::detectCores()).
To avoid recompilation of unchanged Stan programs, we recommend calling
rstan_options(auto_write = TRUE)

Loading required package: zoo


Attaching package: ‘zoo’


The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric


Loading required package: lubridate


Attaching package: ‘lubridate’


The following object is masked from ‘package:base’:

    date




[1] ‘2.19.2’

[1] ‘2.21.0.1’

# <font color="purple">Preprocessing of the data</font>

Done in a different *Rscript*

In [34]:
### day zero
t0_str = "2019-12-09"
t0 = as.Date(t0_str)

#### Cut-off time for our analysis
CUTOFF_DATE_str = "2020-01-24"
CUTOFF_DATE = as.Date(CUTOFF_DATE_str)

system("mkdir -p data_tmp")
system(paste("Rscript prepare_data.R ./data_tmp", t0_str, CUTOFF_DATE_str, sep=" "))
paste("Rscript prepare_data.R", t0_str, CUTOFF_DATE_str, sep=" ")

In [16]:
df = read.table("data_tmp/data.csv", sep=",", header=TRUE)
df_onset2report = read.table("data_tmp/data_onset2report.csv", sep=",", header=TRUE)

# <font color="maroon">Stan part</font>

In [17]:
## main dir for Stan simulations
standirname = "stan-sims"
dir.create(standirname, showWarnings = FALSE)

# <font color="green">Fit of the time delay b/w illness onset and confirmation (reporting) to Gamma distribution</font>

## <font color="orange">Main dir for Stan simulations</font>

In [18]:
dirOnsetToReport = standirname %&% "/OnsetToReport"
unlink(dirOnsetToReport, recursive=T)
dir.create(dirOnsetToReport)

In [19]:
df_onset2report %<>% mutate(onset_left = 0, onset_right = 1, report_left = dist, report_right = dist+1)
df_onset2report

Onset,distUpper,dist,onset_left,onset_right,report_left,report_right
<fct>,<int>,<int>,<dbl>,<dbl>,<int>,<dbl>
2020-01-03,22,13,0,1,13,14
2020-01-05,20,8,0,1,8,9
2020-01-18,7,2,0,1,2,3
2020-01-15,10,6,0,1,6,7
2020-01-17,8,6,0,1,6,7
2020-01-20,5,3,0,1,3,4
2020-01-14,11,10,0,1,10,11
2020-01-21,4,2,0,1,2,3
2020-01-10,15,14,0,1,14,15
2020-01-21,4,3,0,1,3,4


## <font color="orange">Dumping data</font>

In [20]:
N = nrow(df_onset2report)
onset_left = df_onset2report$onset_left
onset_right = df_onset2report$onset_right
report_left = df_onset2report$report_left
report_right = df_onset2report$report_right
upper_bound = df_onset2report$distUpper + 1
stan_rdump(c('onset_left', 'onset_right', 'report_left', 'report_right', 'upper_bound', 'N'), file=dirOnsetToReport%&%"/Data.R") 

## <font color="orange">Dumping initial conditions</font>

In [21]:
shapeOnsetToReport = 4.0
invscaleOnsetToReport = 1.0
onset_raw = rep(.5, N)
report_raw = rep(.5, N)
stan_rdump(c('shapeOnsetToReport', 'invscaleOnsetToReport', 'onset_raw', 'report_raw'), file=dirOnsetToReport%&%"/Init.R")   

## <font color="orange">Stan program</font>

In [22]:
numsamples = 5000
numwarmup = 10000

In [23]:
"data {
    int<lower = 0> N; // number of records
    real<lower = 0> onset_left[N];
    real<lower = 0> onset_right[N];
    real<lower = 0> report_left[N];
    real<lower = 0> report_right[N];
    real<lower = 0> upper_bound[N];
}

transformed data {
    int X_i[0];
}

parameters {
    real<lower = 0> shapeOnsetToReport;
    real<lower = 0> invscaleOnsetToReport;

    real<lower = 0, upper = 1> onset_raw[N];
    real<lower = 0, upper = 1> report_raw[N];
}

transformed parameters {
    real meanOnsetToReport = shapeOnsetToReport/invscaleOnsetToReport;
    real sdOnsetToReport = sqrt(shapeOnsetToReport)/invscaleOnsetToReport;

    real<lower = min(onset_left), upper = max(onset_right)> onset[N];
    real<lower = min(report_left), upper = max(report_right)> report[N];
    real<lower = 0> t[N];
    
    for (k in 1:N) {
        onset[k] = onset_left[k] + (onset_right[k]-onset_left[k])*onset_raw[k];
        report[k] = report_left[k] + (report_right[k]-report_left[k])*report_raw[k];
        t[k] = report[k] - onset[k];
    }
}

model {
    shapeOnsetToReport ~ std_normal();
    invscaleOnsetToReport ~ cauchy(0, 5.0);

    for (k in 1:N) 
        target += gamma_lpdf(t[k] | shapeOnsetToReport, invscaleOnsetToReport);
}" %>% cat(file=dirOnsetToReport %&% "/fit.stan", sep="", fill=TRUE)

standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_CFR_2020/scripts/Andrei/"%&%dirOnsetToReport
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..5}
do
    echo Running ${i}
    SEEDNUMBER=$((12345+$i))
    ./fit \\
        method=sample num_samples="%&%numsamples%&%" num_warmup="%&%numwarmup%&%" save_warmup=0 \\
            adapt delta=0.85 \\
            algorithm=hmc \\
                engine=nuts \\
        random seed=${SEEDNUMBER} \\
        id=$i \\
        data file=Data.R \\
        init=Init.R \\
        output file=trace-$i.csv \\
            diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!" %>% cat(file=dirOnsetToReport%&%"/fit.sh", sep="", fill=TRUE)

In [24]:
## running the bash script
system("bash "%&%dirOnsetToReport%&%"/fit.sh", intern = TRUE)

In [30]:
## postprocessing
system("python postprocessing_with_arviz.py ./"%&%dirOnsetToReport%&%" summary.csv", intern = TRUE)
system("cp ./"%&%dirOnsetToReport%&%"/summary.csv ./stan-sims/OnsetToReport.csv", intern = TRUE)

res_OnsetToReport = read.table('stan-sims/OnsetToReport.csv', sep=',', header=TRUE, stringsAsFactors=FALSE) %>% as.data.frame
res_OnsetToReport %>% filter(is.na(time))

var,time,mean,hpd2.5,hpd25,hpd75,hpd97.5,q2.5,q25,median,q75,q97.5,ess_bulk,ess_tail,r_hat
<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
shapeOnsetToReport,,1.357,0.636,0.98,1.503,2.147,0.687,1.071,1.319,1.603,2.231,27959,18403,1
invscaleOnsetToReport,,0.267,0.1,0.172,0.295,0.457,0.113,0.199,0.258,0.326,0.478,28974,18661,1
meanOnsetToReport,,5.325,3.185,4.195,5.696,7.926,3.384,4.438,5.15,5.989,8.318,28162,18543,1
sdOnsetToReport,,4.725,2.619,3.351,4.811,7.466,2.875,3.77,4.454,5.361,8.184,29219,20221,1


# <font color="green">Main module</font>

In [31]:
## main dir for Stan simulations
dirCFR = standirname %&% "/CFR"
unlink(dirCFR, recursive=T)
dir.create(dirCFR)

In [36]:
## Dumping data
K = nrow(df)
t = df$`time`
time_firstcase_exports = df[df$exports>0,]$time[[1]]
time_firstcase_deaths = df[df$deaths>0,]$time[[1]]
Exportations = df$exports
Deaths = df$deaths
prob = df$prob_travel[[1]]
meanDelayReport = res_OnsetToReport[res_OnsetToReport$var=='meanOnsetToReport', 'mean']
sdDelayReport = res_OnsetToReport[res_OnsetToReport$var=='sdOnsetToReport', 'mean']
uDeath = 0.216236387215998 #using Linton et al
stan_rdump(c('K', 't', 'time_firstcase_exports', 'time_firstcase_deaths', 'Exportations', 'Deaths', 'prob', 
             'meanDelayReport', 'sdDelayReport', 'uDeath'), file=dirCFR%&%"/Data.R") 

## Dumping initial conditions
r_std = 0.0
CFR_std = rep(0.0, K-time_firstcase_deaths+1)
stan_rdump(c('r_std', 'CFR_std'), file=dirCFR%&%"/Init.R") 

## Stan program
"data {
    int<lower = 0> K; // number of time points
    int<lower = 0> t[K]; // time points
    int<lower = 0> time_firstcase_exports;
    int<lower = 0> time_firstcase_deaths;
    real<lower = 0> Exportations[K]; // cumulative number of exportation events by day t
    real<lower = 0> Deaths[K]; // cumulative number of death cases by day t
    real<lower = 0, upper = 1> prob; // probability of travelling
    
    real<lower = 0> meanDelayReport;
    real<lower = 0> sdDelayReport;
    real<lower = 0> uDeath;
}

parameters {
    real r_std;
    real CFR_std[K-time_firstcase_deaths+1];
}

transformed parameters {
    real<lower = 0> r = 0.1 + r_std*0.05;

    real shapeDelayReport = (meanDelayReport/sdDelayReport)^2;
    real uReport = (1.0 + r*meanDelayReport/shapeDelayReport)^(-shapeDelayReport);

    real<lower = 0> Incidence[K];
    real<lower = 0, upper = 1> CFR[K-time_firstcase_deaths+1];
    real<lower = 0> ExpectedDeaths[K-time_firstcase_deaths+1];

    for (k in 1:K) 
        Incidence[k] = (exp(r*t[k])-1.0)/r;

    for (k in 1:(K-time_firstcase_deaths+1)) {
        CFR[k] = 0.04 + CFR_std[k]*0.06;
        ExpectedDeaths[k] = uDeath*Incidence[time_firstcase_deaths+k-1]*CFR[k];
    }
}

model {
    r_std ~ std_normal();
    CFR_std ~ std_normal();

    real shapeReport;
    real invscaleReport;
    real shapeDeath;
    real invscaleDeath;

    for (k in time_firstcase_exports:K) {
        shapeReport = uReport*Incidence[k]*prob/(1.0-prob);
        invscaleReport = 1.0/(1.0-prob);
        target += gamma_lpdf(Exportations[k] | shapeReport, invscaleReport);
    }

    for (k in time_firstcase_deaths:K) {
        shapeDeath = uDeath*Incidence[k]*CFR[k-time_firstcase_deaths+1]/(1.0-CFR[k-time_firstcase_deaths+1]);
        invscaleDeath = 1.0/(1.0-CFR[k-time_firstcase_deaths+1]);
        target += gamma_lpdf(Deaths[k] | shapeDeath, invscaleDeath);
    }
}
" %>% cat(file=dirCFR%&%"/fit.stan", sep="", fill=TRUE)

stanscriptdir = "../Hokkaido_Wuhan_CFR_2020/scripts/Andrei/"%&%standirname%&%"/CFR"
## bash file
"#!/bin/bash
cwd=$(pwd)
rm "%&%stanscriptdir%&%"/fit
cd "%&%standistribdir%&%"
make -j4 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..5}
do
    echo Running ${i}
    SEEDNUMBER=$((12345+$i))
    ./fit \\
        method=sample num_samples="%&%numsamples%&%" num_warmup="%&%numwarmup%&%" save_warmup=0 \\
            adapt delta=0.85 \\
            algorithm=hmc \\
                engine=nuts \\
        random seed=${SEEDNUMBER} \\
        id=$i \\
        data file=Data.R \\
        init=Init.R \\
        output file=trace-$i.csv \\
            diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=dirCFR%&%"/fit.sh", sep="", fill=TRUE)

In [37]:
## running the bash script
system("bash "%&%dirCFR%&%"/fit.sh", intern = TRUE)

In [38]:
system("time.sleep(15)")
## postprocessing
system("python postprocessing_with_arviz.py ./stan-sims/CFR summary.csv", intern = TRUE)
system("cp ./stan-sims/CFR/summary.csv ./stan-sims/main.csv", intern = TRUE)