In [1]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
import pymc as pm
%matplotlib inline

Notebook of non-simulation calcs for CRN paper:

In [2]:
n_draws = 1000 * 1000
n_simulants = 1000   ### 1000*1000
n_days = 365

In [3]:
incidence = 1.72  # per person-year
mu_incidence = incidence
sigma_incidence = .12

special_frac = .056  # ETEC fraction
sigma_special_frac = .018  # ETEC

# special_frac = .25  # Rota fraction
# sigma_special_frac = .076518 # Rota

mu_special_frac = special_frac


Direct calculation of expected cases averted:

In [4]:
def cases_averted(val_dict):
    incidence, special_frac = val_dict['incidence'], val_dict['special_frac']
    pr_diarrhea_actual = 1 - np.exp(-incidence/365.)
    pr_diarrhea_counterfactual = (1-special_frac)*pr_diarrhea_actual

    all_cases = pr_diarrhea_actual * n_simulants * n_days
    special_cases = all_cases * special_frac

    cases_averted = special_cases
    return cases_averted

print('E[cases_averted]:', cases_averted({'incidence':incidence,
                                          'special_frac':special_frac}))

E[cases_averted]: 96.093410308


Monte Carlo calculation of 95% UI of expected cases averted (Parameter Uncertainty):

In [5]:
def my_truncated_normal(mu, sigma, lower=0, upper=np.inf):
    vals = np.random.normal(mu, sigma, size=n_draws)
    vals = np.clip(vals, lower, upper)
    return vals    

def parameter_distribution(seed=12345):
    np.random.seed(seed) # set seed for reproducibility
    df = pd.DataFrame(index=range(n_draws))
    df['incidence'] = my_truncated_normal(mu_incidence, sigma_incidence)
    df['special_frac'] = my_truncated_normal(mu_special_frac, sigma_special_frac)

    return df
df = parameter_distribution()

t = df.apply(cases_averted, axis=1)
t.describe(percentiles=[.025,.975]).loc[['mean', '2.5%', '97.5%']]

mean      96.133561
2.5%      35.077931
97.5%    159.615687
dtype: float64

Monte Carlo calculation of variance of difference between actual and counterfactual cases (without CRN):

In [6]:
def cases_averted_predicted_no_crn(val_dict):
    incidence, special_frac = val_dict['incidence'], val_dict['special_frac']
    pr_diarrhea_actual = 1 - np.exp(-incidence/365.)
    pr_diarrhea_counterfactual = (1-special_frac)*pr_diarrhea_actual

    actual_cases         = np.random.binomial(n_simulants * n_days,
                                   pr_diarrhea_actual)
    counterfactual_cases = np.random.binomial(n_simulants * n_days,
                                   pr_diarrhea_counterfactual)
    cases_averted = actual_cases - counterfactual_cases
    return cases_averted

print('predicted cases_averted without crn:',
      cases_averted_predicted_no_crn({'incidence':incidence,
                     'special_frac':special_frac}))

predicted cases_averted without crn: 184


Only Simulation Uncertainty:

In [7]:
df = pd.DataFrame(index=range(n_draws))
df['incidence'] = incidence
df['special_frac'] = special_frac

t = df.apply(cases_averted_predicted_no_crn, axis=1)
t.describe(percentiles=[.025,.975]).loc[['mean', '2.5%', '97.5%']]

mean      96.071417
2.5%     -17.000000
97.5%    209.000000
dtype: float64

Both Simulation and Parameter Uncertainty:

In [8]:
df = parameter_distribution()

t = df.apply(cases_averted_predicted_no_crn, axis=1)
t.describe(percentiles=[.025,.975]).loc[['mean', '2.5%', '97.5%']]

mean      96.113645
2.5%     -32.000000
97.5%    225.000000
dtype: float64

Monte Carlo calculation of uncertainty with CRN:

In [9]:
def cases_averted_predicted_crn(val_dict):
    incidence, special_frac = val_dict['incidence'], val_dict['special_frac']
    pr_diarrhea_actual = 1 - np.exp(-incidence/365.)

    actual_cases         = np.random.binomial(n_simulants * n_days,
                                   pr_diarrhea_actual)
    counterfactual_cases = np.random.binomial(actual_cases,
                                   1 - special_frac)
    cases_averted = actual_cases - counterfactual_cases
    return cases_averted

# not worth looking at a single value, want to know about distribution
# print('predicted cases_averted with crn:',
#       cases_averted_predicted_crn({'incidence':mu_incidence,
#                      'special_frac':mu_special_frac}))

Only simulation uncertainty (with CRN):

In [10]:
df = pd.DataFrame(index=range(n_draws))
df['incidence'] = incidence
df['special_frac'] = special_frac

t = df.apply(cases_averted_predicted_crn, axis=1)
t.describe(percentiles=[.025,.975]).loc[['mean', '2.5%', '97.5%']]

mean      96.089715
2.5%      77.000000
97.5%    116.000000
dtype: float64

In [11]:
# percent reduction in uncertainty interval width:
(1 - (116 - 77) / 230.) * 100.

83.04347826086956

Both simulation and parameter uncertainty (with CRN):

In [12]:
df = parameter_distribution()

t = df.apply(cases_averted_predicted_crn, axis=1)
t.describe(percentiles=[.025,.975]).loc[['mean', '2.5%', '97.5%']]

mean      96.12569
2.5%      34.00000
97.5%    164.00000
dtype: float64