In [1]:
!date

Fri Feb  7 03:49:52 JST 2020


In [3]:
import pandas as pd
import numpy as np
import scipy as sp
import scipy.stats as ss
import arviz as az

# Make inline plots raster graphics
from IPython.display import set_matplotlib_formats
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')

import seaborn as sns
clrs = sns.color_palette("Spectral", 6)
def set_plot_style(usetex=False):
    sns.set_style('white', {'axes.linewidth': 0.5})
    sns.set(style='white', font_scale=1.1,#context='paper',
            rc={'xtick.major.size': 6, 'ytick.major.size': 6, 'legend.fontsize': 14,
                'text.usetex': usetex, 'font.family': 'serif', 'font.serif': ['Verdana'],
                'text.latex.preamble': r"\usepackage{type1cm}"}) 
    plt.rcParams['xtick.major.size'] = 6
    plt.rcParams['xtick.major.width'] = 1
    plt.rcParams['ytick.major.size'] = 6
    plt.rcParams['ytick.major.width'] = 1
    plt.rcParams['xtick.bottom'] = True
    plt.rcParams['ytick.left'] = True
set_plot_style()
    
import warnings
warnings.filterwarnings('ignore')

func_dict = {"q2.5": lambda x: np.percentile(x, 2.5), 
             "q25": lambda x: np.percentile(x, 25), 
             "median": lambda x: np.percentile(x, 50), 
             "q75": lambda x: np.percentile(x, 75), 
             "q97.5": lambda x: np.percentile(x, 97.5)}

# MLE

In [4]:
!ls ../../results/Andrei/*csv

../../results/Andrei/output.csv  ../../results/Andrei/output-final.csv


In [4]:
df = pd.read_csv("../../results/Andrei/output-final.csv")
df.loc[:,'AIC'] = df.loc[:,'AIC'].round(1)
for var in ['mean', 'sd', 'q5', 'q50', 'q95', 'q99']:
    var_ = var+'_median' if var[:1]=='q' else var
    df[var] = ['%.1f (%.1f, %.1f)'%(x,y,z) for x,y,z in zip(df[var_],df[var+'_lower'],df[var+'_upper'])]
    if var[:1]=='q':
        df = df.drop([var+'_lower',var+'_upper',var_],'columns')
    else:
        df = df.drop([var+'_lower',var+'_upper'],axis='columns')
df = df.drop('negloglk',axis='columns')
df.loc[lambda d: d.filename=='data_incper','filename'] = 'Incubation period excluding WR (days)'
df.loc[lambda d: d.filename=='data_incper_inclwuhan','filename'] = 'Incubation period including WR (days)'
df.loc[lambda d: d.filename=='data_ons_hosp','filename'] = 'Onset to hospitalization - living (days)'
df.loc[lambda d: d.filename=='dthdata_ons_hosp','filename'] = 'Onset to hospitalization - deceased (days)'
df.loc[lambda d: d.filename=='dthdata_ons_dth','filename'] = 'Onset to death (days)'
df.loc[lambda d: d.filename=='dthdata_hosp_dth','filename'] = 'Hospitalization to death (days)'
df = df.rename(columns={'q50': 'median'})
df = pd.melt(df, id_vars=['filename','distribution','truncated'], value_vars=['mean','sd','q5','median','q95','q99','AIC'])
df['variable'] = pd.Categorical(df['variable'],categories=['mean','sd','q5','median','q95','q99','AIC'])
df['distribution'] = pd.Categorical(df['distribution'],categories=['Lognormal','Weibull','Gamma'])
df = df.pivot_table(index=['distribution','truncated','variable'],columns='filename',values='value', aggfunc='first')
cols = df.columns.tolist()
df = df.ix[:,[cols[x] for x in [1,2,5,4,3,0]]]
df_no = df.query("truncated == 'no'").droplevel(level=['truncated'])
df_no

Unnamed: 0_level_0,filename,Incubation period excluding WR (days),Incubation period including WR (days),Onset to hospitalization - living (days),Onset to hospitalization - deceased (days),Onset to death (days),Hospitalization to death (days)
distribution,variable,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Lognormal,mean,"5.0 (4.3, 5.7)","6.4 (5.8, 7.0)","3.8 (3.3, 4.2)","6.4 (5.5, 7.3)","14.9 (13.2, 16.6)","9.0 (7.7, 10.3)"
Lognormal,sd,"3.0 (2.3, 4.0)","3.8 (3.1, 4.7)","7.0 (5.7, 9.0)","4.3 (3.3, 6.0)","6.8 (5.4, 9.0)","7.0 (5.5, 9.4)"
Lognormal,q5,"1.6 (1.0, 2.5)","2.1 (1.5, 2.9)","0.2 (0.1, 0.4)","1.8 (1.0, 2.9)","6.3 (4.4, 8.6)","2.1 (1.2, 3.5)"
Lognormal,median,"4.2 (3.4, 5.1)","5.4 (4.7, 6.2)","1.7 (1.3, 2.3)","5.2 (4.0, 6.4)","13.4 (11.4, 15.3)","6.9 (5.4, 8.6)"
Lognormal,q95,"10.9 (9.1, 12.8)","13.8 (12.1, 15.5)","13.4 (11.9, 15.1)","15.0 (12.5, 17.7)","28.5 (24.5, 32.6)","22.7 (19.1, 26.4)"
Lognormal,q99,"16.3 (12.6, 20.1)","20.3 (16.9, 23.8)","31.6 (26.3, 36.5)","23.5 (18.0, 29.1)","39.1 (31.9, 46.5)","37.3 (29.0, 45.5)"
Lognormal,AIC,103.7,104.6,656.1,174.4,429.7,477
Weibull,mean,"5.1 (4.2, 6.3)","6.6 (5.7, 7.8)","3.3 (2.7, 3.9)","6.3 (5.2, 7.8)","15.1 (13.0, 17.8)","8.8 (7.4, 10.6)"
Weibull,sd,"3.4 (2.4, 5.1)","3.7 (2.7, 5.1)","3.7 (2.7, 5.2)","3.8 (2.6, 5.9)","7.4 (5.4, 10.9)","5.2 (3.6, 8.0)"
Weibull,q5,"0.9 (0.5, 1.3)","1.6 (1.1, 2.2)","0.1 (0.1, 0.2)","1.3 (0.7, 2.0)","4.3 (2.8, 6.0)","1.8 (1.0, 2.7)"


In [5]:
df_no.to_excel("../../results/Andrei/not_truncated.xlsx")

In [6]:
df_yes = df.query("truncated == 'yes'").droplevel(level=['truncated'])
df_yes

Unnamed: 0_level_0,filename,Incubation period excluding WR (days),Incubation period including WR (days),Onset to hospitalization - living (days),Onset to hospitalization - deceased (days),Onset to death (days),Hospitalization to death (days)
distribution,variable,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Lognormal,mean,"5.5 (4.7, 6.2)","7.3 (6.6, 8.0)",,,,
Lognormal,sd,"3.6 (2.7, 5.1)","5.0 (4.2, 6.2)",,,,
Lognormal,q5,"1.5 (0.9, 2.5)","2.1 (1.5, 2.9)",,,,
Lognormal,median,"4.4 (3.4, 5.4)","6.0 (5.1, 6.9)",,,,
Lognormal,q95,"12.8 (10.4, 15.0)","17.0 (15.0, 19.0)",,,,
Lognormal,q99,"19.8 (14.9, 24.9)","26.2 (21.9, 30.6)",,,,
Lognormal,AIC,332.9,782.9,,,,
Weibull,mean,"5.7 (4.5, 7.6)",,,,,
Weibull,sd,"3.9 (2.5, 6.7)",,,,,
Weibull,q5,"0.9 (0.4, 1.5)",,,,,


In [7]:
df_yes.to_excel("../../results/Andrei/truncated.xlsx")

In [8]:
df_yes

Unnamed: 0_level_0,filename,Incubation period excluding WR (days),Incubation period including WR (days),Onset to hospitalization - living (days),Onset to hospitalization - deceased (days),Onset to death (days),Hospitalization to death (days)
distribution,variable,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Lognormal,mean,"5.5 (4.7, 6.2)","7.3 (6.6, 8.0)",,,,
Lognormal,sd,"3.6 (2.7, 5.1)","5.0 (4.2, 6.2)",,,,
Lognormal,q5,"1.5 (0.9, 2.5)","2.1 (1.5, 2.9)",,,,
Lognormal,median,"4.4 (3.4, 5.4)","6.0 (5.1, 6.9)",,,,
Lognormal,q95,"12.8 (10.4, 15.0)","17.0 (15.0, 19.0)",,,,
Lognormal,q99,"19.8 (14.9, 24.9)","26.2 (21.9, 30.6)",,,,
Lognormal,AIC,332.9,782.9,,,,
Weibull,mean,"5.7 (4.5, 7.6)",,,,,
Weibull,sd,"3.9 (2.5, 6.7)",,,,,
Weibull,q5,"0.9 (0.4, 1.5)",,,,,


# MCMC simulations

In [12]:
df = pd.read_excel("../../results/Andrei/MCMC-no_truncation.xlsx")
df.loc[lambda d: d.Dataset=='data_incper','Dataset'] = 'Incubation period excluding WR (days)'
df.loc[lambda d: d.Dataset=='data_incper_inclwuhan','Dataset'] = 'Incubation period including WR (days)'
df.loc[lambda d: d.Dataset=='data_ons_hosp','Dataset'] = 'Onset to hospitalization - living (days)'
df.loc[lambda d: d.Dataset=='dthdata_ons_hosp','Dataset'] = 'Onset to hospitalization - deceased (days)'
df.loc[lambda d: d.Dataset=='dthdata_ons_dth','Dataset'] = 'Onset to death (days)'
df.loc[lambda d: d.Dataset=='dthdata_hosp_dth','Dataset'] = 'Hospitalization to death (days)'
df.drop(['Unnamed: 0', 'd_waic'],1)
value_variables = ['mean','SD','q5','median','q95','q99','waic','weight']
df = pd.melt(df, id_vars=['Dataset','Distribution'], value_vars=value_variables)
df['variable'] = pd.Categorical(df['variable'],categories=value_variables)
df['Distribution'] = pd.Categorical(df['Distribution'],categories=['lognormal','weibull','gamma'])
df = df.pivot_table(index=['Distribution','variable'],columns='Dataset',values='value', aggfunc='first')
cols = df.columns.tolist()
df = df.loc[:,[cols[x] for x in [1,2,5,4,3,0]]]
df

Unnamed: 0_level_0,Dataset,Incubation period excluding WR (days),Incubation period including WR (days),Onset to hospitalization - living (days),Onset to hospitalization - deceased (days),Onset to death (days),Hospitalization to death (days)
Distribution,variable,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
lognormal,mean,"5.0 (4.1, 6.0)","6.4 (5.5, 7.5)","3.8 (2.9, 5.1)","6.2 (5.0, 7.8)","14.5 (12.5, 17.0)","8.5 (6.9, 10.7)"
lognormal,SD,"3.0 (2.1, 4.3)","3.8 (2.7, 5.2)","8.3 (5.0, 14.3)","4.3 (2.9, 6.7)","6.7 (4.9, 9.5)","6.7 (4.6, 10.1)"
lognormal,q5,"1.7 (1.2, 2.3)","2.3 (1.8, 2.7)","0.2 (0.1, 0.3)","1.9 (1.2, 2.5)","6.5 (4.9, 7.9)","2.2 (1.5, 3.0)"
lognormal,median,"4.2 (3.5, 5.1)","5.5 (4.8, 6.3)","1.6 (1.3, 2.0)","5.1 (4.1, 6.3)","13.2 (11.3, 15.3)","6.7 (5.4, 8.3)"
lognormal,q95,"10.4 (8.3, 13.7)","13.3 (10.8, 17.1)","13.6 (10.1, 19.3)","13.9 (10.7, 19.9)","26.8 (22.2, 34.4)","20.5 (15.6, 28.5)"
lognormal,q99,"15.0 (11.5, 21.6)","19.3 (14.7, 26.4)","33.1 (22.3, 53.2)","21.2 (15.1, 33.6)","36.0 (28.6, 49.5)","32.5 (23.3, 49.6)"
lognormal,waic,264.7,945.1,693.8,184,221.9,240.1
lognormal,weight,0.968,0.076,0,0.396,0.741,0.119
weibull,mean,"5.1 (4.1, 6.3)","6.9 (5.9, 8.1)","3.3 (2.7, 4.1)","6.2 (5.0, 7.7)","14.8 (12.3, 17.6)","8.7 (7.1, 10.5)"
weibull,SD,"3.5 (2.7, 4.6)","3.9 (3.1, 5.0)","4.2 (3.2, 5.7)","4.0 (3.1, 5.3)","7.7 (6.2, 9.9)","5.4 (4.2, 7.2)"


In [13]:
df.to_excel("../../results/Andrei/MCMC-not_truncated.xlsx")