In [None]:
import numpy as np
import pandas as pd
from os import mkdir

release = 16
root_dir = f"FILEPATH"

try:
    mkdir(root_dir)
except FileExistsError:
    print("dir already exists")

  
# CREATE SEVERITY SPLIT DRAWS FOR TYPHOID
abdom_sev = 0.17
bleeding  = 0.05 * (2/365) * (52/6)  # 5% of people will have this for 2-days -- I'm adjusting proportion rather than duration (easier to code & mathematically equivalent)
inf_mod   = 0.35 
inf_sev   = 0.43 + 0.05 - bleeding  # for the remainder of duration, those with gastric bleeding have inf_sev

SAMPLE = 100

typh = pd.DataFrame({"state": ["inf_mod", "inf_sev", "abdom_sev", "gastric_bleeding"],
                     "mu": [inf_mod, inf_sev, abdom_sev, bleeding]})     

typh["alpha"] = SAMPLE * typh["mu"]
typh["beta"]  = SAMPLE - typh["alpha"]
typh["cause"] = "typhoid"

typh = typh.drop(columns = ["mu"])

print(typh)

In [None]:
  
# CREATE SEVERITY SPLIT DRAWS FOR PARATYPHOID 
abdom_mod = 0.05
inf_mild  = 0.30 * (1 - abdom_mod)
inf_mod   = 0.55 * (1 - abdom_mod)
inf_sev   = 0.15 * (1 - abdom_mod)

para = pd.DataFrame({"state": ["inf_mild", "inf_mod", "inf_sev", "abdom_mod"],
                     "mu":[inf_mild, inf_mod, inf_sev, abdom_mod]})

para["sigma"] = para["mu"]/4
para["alpha"] = para["mu"] * (para["mu"] - para["mu"]**2 - para["sigma"]**2) / para["sigma"]**2 
para["beta"]  = para["alpha"] * (1 - para["mu"]) / para["mu"]
para["cause"] = "paratyphoid"
para = para.drop(columns = ["mu", "sigma"])
print(para)

In [None]:
# CREATE DURATION DISTRIBUTION PARAMETERS
"""
This code was used to determine the parameters of a PERT distribution that would yield the correct point estimate and CI bounds
for the duration of typhoid/paratyphoid symptoms.  It works via brute force grid search, adjusting min and max values until  
parameters yield the correct 95% CI limits.

The first block was used to determine the parameters for severe illness with a mean duration of 28 days and 95%CI of 14 - 49 (2-7 weeks)
The second block was used for moderate illness with a mean duratin of 14 days and 95%CI of 7-21 (1-3 weeks)
"""

def get_beta_parms(mn, mx, u, l):
    range = mx - mn
    mode  = (u * l + 2 * u - mn - mx) / l
    
    if u == mode:
        v = (l / 2 ) + 1
    else:
        v = ((u - mn) * (2 * mode - mn - mx)) / ((mode - u) * (mx - mn))
    
    w = (v * (mx - u)) / (u - mn)
    
    return [v, w, range, mn]

sev_parms = get_beta_parms(12.05, 67, 28, 4)   
mod_parms = get_beta_parms(4.2, 24, 14, 4)

In [None]:
full = pd.concat([typh, para], ignore_index = True)
full.loc[full.state.isin(["inf_sev", "abdom_sev", "gastric_bleeding"]),  ["v", "w", "range", "min"]] = sev_parms
full.loc[~full.state.isin(["inf_sev", "abdom_sev", "gastric_bleeding"]), ["v", "w", "range", "min"]] = mod_parms

draw_df = pd.DataFrame({'draw': [f'draw_{str(i)}' for i in list(range(1000))], 'key': 0})

full['key'] = 0
full = full.merge(draw_df, on = 'key', how = 'outer')   

full['pr'] = np.random.beta(full['alpha'], full['beta'])
full['duration'] = (np.random.beta(full['v'], full['w']) * full['range'] + full['min']) / 365.25

full = full[['state', 'cause', 'draw', 'pr', 'duration']]
full.head()

In [None]:
full = pd.concat([full, full], keys = [5, 6], names = ['measure_id', 'row_id']).reset_index().drop(columns = 'row_id')
full.loc[full['measure_id'] == 6, 'duration'] = 1
full.head()

In [None]:
full.groupby('measure_id')['duration'].mean()

In [None]:
parent = full.copy()
parent['wt_duration'] = parent['duration'] * parent['pr']
parent = parent.groupby(['measure_id', 'draw'])['wt_duration'].sum().reset_index()
parent = parent.rename(columns = {'wt_duration': 'duration'})
parent['cause'] = 'intest'
parent['state'] = 'parent'
parent['pr'] = 1
parent.loc[parent['measure_id'] == 6, 'duration'] = 1

full = pd.concat([full, parent], ignore_index = True)
full.groupby('measure_id')['duration'].mean()

In [None]:
out = root_dir + "sequela_splits.csv"
print(out)

full.to_csv(out, index = False)        