In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

Generate synthetic trace, screening, and genomic data for a phylodynamic simulation model

In [None]:
T = 52
g = 0.05
a = 0.1
# np.random.seed()

#### trace data

In [None]:
M = 300 # 100
trace = dict()
for i in range(M):
    trace[i] = np.zeros(T)
    trace[i][0] = 1

np.random.seed(88)
N = M
for t in range(1, T):
    n_infected = 0
    for k in list(trace):
        if trace[k][t-1]: # present in the facility
            if np.random.uniform() < g:
                trace[N] = np.zeros(T) # replacement...
                trace[N][t] = 1
                N += 1
            else:
                trace[k][t] = 1
        n_infected += trace[k][t]

In [None]:
df_trace = pd.DataFrame(trace).T
df_trace.head(M).sum(1).mean()

#### screen data

In [None]:
# build screening data
df_screen = pd.DataFrame(index = np.arange(N), columns=np.arange(T))

np.random.seed(91)

for t in range(T):
    for n in range(N):
        if df_trace.loc[n, t] == 1:
            if t == 0:
                if np.random.uniform() < a:
                    df_screen.loc[n, t] = 1
                else:
                    df_screen.loc[n, t] = 0
            elif df_trace.loc[n, t-1] == 0: # new admit
                if np.random.uniform() < a:
                    df_screen.loc[n, t] = 1
                else:
                    df_screen.loc[n, t] = 0

In [None]:
# how many index patients are there?
df_screen.sum(0).sum() 

In [None]:
# how many clusters are there over time?
n_clusters = np.zeros(T)
clusters = set()
cluster_lookup = []
for t in range(T):
    for n in range(N):
        if df_trace.loc[n,t] == 1:
            if df_screen.loc[n, t] == 1:
                clusters.add(n)
                cluster_lookup.append(n)
        else:
            if n in clusters:
                clusters.remove(n)
    n_clusters[t] += len(clusters)

In [None]:
n_clusters # number of distinct *index patients* present over time
# note that clusters may persist after an index patient is discharged...

In [None]:
cluster_lookup = pd.Series(data = np.array(cluster_lookup), index = np.arange(1, len(cluster_lookup) + 1))

#### floor and room trace data

In [None]:
# assume: five floors, 50 rooms 

n_floors = 5
n_rooms = M // 2

df_floor = pd.DataFrame(index = np.arange(N), columns = np.arange(T))
df_room = pd.DataFrame(index = np.arange(N), columns = np.arange(T))

In [None]:
floor = {}
room = {}
for i in range(M):
    floor[i] = i % n_floors
    room[i] = i % n_rooms

In [None]:
m = M
for t in range(T):
    for n in range(N):
        if df_trace.loc[n, t]:
            df_floor.loc[n,t] = floor[n]
            df_room.loc[n, t] = room[n]
        elif (t > 0) and (df_trace.loc[n, t-1] == 1):
            floor[m] = floor[n]
            room[m] = room[n]
            m += 1

#### write out

In [None]:
df_trace.to_csv("../sim_data/facility_trace2.csv", index=False)
df_screen.to_csv("../sim_data/screening2.csv", index=False)
df_floor.to_csv("../sim_data/floor_trace2.csv", index=False)
df_room.to_csv("../sim_data/room_trace2.csv", index=False)
cluster_lookup.to_csv("../sim_data/cluster_lookup2.csv", index=False)