In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
epi_dir = '/Volumes/umms-esnitkin/Project_KPC_LTACH/Analysis/LTACH_transmission_modeling'

In [None]:
infections_cleaned = pd.read_csv(f"{epi_dir}/preprocessed/infections.csv", index_col=0)
infections_cleaned.columns = np.arange(367)

df_screen = pd.read_csv(f"{epi_dir}/preprocessed/screening.csv", index_col=0)
df_screen.columns = np.arange(367)

df_floor = pd.read_csv(f"{epi_dir}/preprocessed/floor_trace.csv", index_col=0)
df_floor.columns = np.arange(367)

df_room = pd.read_csv(f"{epi_dir}/preprocessed/room_trace.csv", index_col=0)
df_room.columns = np.arange(367)

In [None]:
(np.nansum(infections_cleaned, 1) > 0)

In [None]:
window_starts = [i*7 for i in range(367//7 + 1)]

window_stops = [w -1 for w in window_starts[1:]] + [367]

windows = list(zip(window_starts, window_stops))

In [None]:
infections_rs = pd.DataFrame(index=infections_cleaned.index, columns=np.arange(len(windows)))
for n in infections_cleaned.index:
    for t, w in enumerate(windows):
        a, b = w
        week_data = infections_cleaned.loc[n, a:b]
        if np.isnan(week_data).all():
            infections_rs.loc[n, t] = np.nan
        else:
            infections_rs.loc[n, t] = np.nanmax(week_data)

In [None]:
NN = infections_rs.notna().sum(0)[:-1]
II = (infections_rs == 1).sum(0)[:-1]
sns.lineplot(NN, color="green", linestyle="--", label="Total")
sns.lineplot(II, label="Infected")
plt.ylim(bottom=0)
plt.ylabel("Number of Patients")
plt.xlabel("Weeks")
# plt.savefig("images/crkp.png")
plt.show()

In [None]:
df_facility_rs = infections_rs.notna().astype(int)

In [None]:
NN.mean()

#### screening data

In [None]:
df_screen_rs = pd.DataFrame(index=infections_cleaned.index, columns=np.arange(len(windows)))
df_screen_rs[0] = infections_rs[0]
for t in range(1, len(windows)):
    newly_admitted = infections_rs[t].notna() * infections_rs[t-1].isna()
    
    df_screen_rs[t][newly_admitted] = infections_rs[t][newly_admitted]
    assert df_screen_rs[t][newly_admitted].notna().all()

In [None]:
A = df_screen_rs.sum(0)
B = df_screen_rs.notna().sum(0)

In [None]:
sns.lineplot(A)
sns.lineplot(B)
plt.show()

In [None]:
B[1:].mean()

In [None]:
A[1:].mean()

### floor and room trace data

In [None]:
df_floor_rs = pd.DataFrame(index=df_floor.index, columns=np.arange(len(windows)))
for n in df_floor.index:
    admitted = False
    for t, w in enumerate(windows):
        a, b = w
        week_data = np.array(df_floor.loc[n, a:b])
        df_floor_rs.loc[n, t] = week_data[np.argmax(week_data > 0)]
        # if not np.isnan(week_data).all():
        #     df_screen_rs.loc[n, t] = np.nanmax(week_data)

In [None]:
df_room_rs = pd.DataFrame(index=df_room.index, columns=np.arange(len(windows)))
for n in df_room.index:
    admitted = False
    for t, w in enumerate(windows):
        a, b = w
        week_data = np.array(df_room.loc[n, a:b])
        df_room_rs.loc[n, t] = week_data[np.argmax(week_data > 0)]

## "observed" data

In [None]:
total_count = infections_rs.sum(0).values

In [None]:
floor_counts = []
X = infections_rs.values
F = df_floor_rs.values
# big change: ignore floor 5
for i in [1,2,3,4,6]:
    floor_count = np.nansum(X * (F == i), axis=0)
    floor_counts.append(floor_count)

In [None]:
T = X.shape[1]
# room_infect_density = np.ones(T)
room_count = np.empty(T)
R = df_room_rs.values
for t in range(T):
    r = R[:, t]
    x = X[:, t]
    rx, ry = np.meshgrid(r, r)
    rC = (rx == ry).astype(int)
    I = (x == 1).astype(int)
    infected_roommates = (rC * I).sum(1)
    # rm.append(infected_roommates.sum())
    # if infected_roommates.max() > 0:
    #     room_infect_density[t] = infected_roommates[infected_roommates > 0].mean()
    room_count[t] = (infected_roommates > 1).sum()

In [None]:
room_count

In [None]:
observed_data = np.stack(([total_count] + floor_counts + [room_count]))

### sanity checks

In [None]:
assert ((df_floor_rs > 0).values == (df_room_rs > 0).values).all()
assert ((infections_rs.notna()).values == (df_facility_rs > 0).values).all()
for n, r in df_screen_rs.iterrows():
    for t, v in r.items():
        if not np.isnan(v):
            assert infections_rs.loc[n, t] == v
            # assert not np.isnan(infections_cleaned.loc[n, t])
assert ((infections_rs.notna()).values == (df_floor_rs > 0).values).all()

#### write out data

In [None]:
# takeaway from sanity check: each week sees about 20 new patients admitted

output_dir = '/Volumes/umms-esnitkin/Project_KPC_LTACH/Analysis/LTACH_transmission_modeling/preprocessed/resampled'
infections_rs.to_csv(f"{output_dir}/infections.csv")
df_screen_rs.to_csv(f"{output_dir}/screening.csv")
df_facility_rs.to_csv(f"{output_dir}/facility_trace.csv")
df_floor_rs.to_csv(f"{output_dir}/floor_trace.csv")
df_room_rs.to_csv(f"{output_dir}/room_trace.csv")

In [None]:
with open(f"{output_dir}/observed_data.npy", "wb") as f:
    np.save(f, observed_data)