In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append("..")
from src.dataset import SIModel
from src.approx_bc import abc_rejection_sampler, abc_rejection_sampler2
import seaborn as sns
from scipy.stats import norm, lognorm, multivariate_normal
from src.utils import get_results

In [None]:
sns.set_theme()
sns.set_style("white")
sns.set_palette("deep")
beta_true = [0.05, .02, .04, .06, .08, .1, .05]
alpha = 0.1
gamma = 0.05
eta = 0.1 # interpretation: after one week, a colonized patient will show symptoms with probability .5
heterogeneous = True
N = 300 # consider increasing
T = 52
K = 30
data_seed = 31
prior_mu = [-3, -3, -3, -3, -3, -3, -3]
prior_sigma = [1, 1, 1, 1, 1, 1, 1]

si_model = SIModel(alpha, gamma, beta_true, 
                    heterogeneous, prior_mu, prior_sigma,
                   N, T, summarize=False, flatten=False,
                  observed_seed=data_seed, eta=eta)
y_o = si_model.get_observed_data()


si_model_alt = SIModel(alpha, gamma, beta_true, 
                    heterogeneous, prior_mu, prior_sigma,
                   N, T, summarize=False, flatten=False,
                  observed_seed=data_seed, eta=.999)

x_o = si_model_alt.get_observed_data()


sns.lineplot(y_o[0, :], label="Observed")
sns.lineplot(x_o[0, :], label="Actual", linestyle="--")

plt.ylim((0,1))
plt.xlabel("Time Steps")
plt.ylabel("Proportion Infected")
plt.savefig("images/partial_obs.png")
plt.show()

In [None]:
y_o.mean(1)

In [None]:
data = y_o[:6,:]

data = pd.DataFrame(data = data.T, columns=["Facility"] + [f"Floor {i}" for i in range(1,6)])

sns.lineplot(data)
plt.ylim((0,1))
plt.xlabel("Time Steps")
plt.ylabel("Proportion Infected")
plt.savefig("images/partial_obs_floor_viz.png")
plt.show()

# Partial Observation

In [None]:
df_partial = get_results("../multirun/2024-11-04/15-42-44").sort_values("val_loss")

In [None]:
mu = df_partial.iloc[0]["mu"][0]
sigma = df_partial.iloc[0]["sigma"]

In [None]:
# j = 1
# mu = eval(df_partial["mu"].values[j])[0]
# # mu

# sigma = eval(df_partial["sigma"].values[j])
# sds = [sigma[i][i]**0.5 for i in range(7)]
# # sds

# npe_rates = [np.exp(mu[i] + sds[i]**2/2) for i in range(7)]

# ABC

In [None]:
si_model = SIModel(alpha, gamma, beta_true, heterogeneous,
                  prior_mu, prior_sigma, N=N, T=T, summarize=False, eta=0.1)
prior_sampler = lambda: si_model.sample_logbeta(1)
simulator = lambda theta, seed: si_model.SI_simulator(theta, seed)
x_o = si_model.get_observed_data(31)

epsilon = 1.3

S = 100
print(f"epsilon: {epsilon}")
posterior_sample, errors = abc_rejection_sampler(
    S, epsilon, prior_sampler, simulator, x_o, max_attempts=30000,
    print_every=1000, summarize=False
    )

In [None]:
np.quantile(errors[errors > 0], 0.005)

In [None]:
abc_rates = np.exp(posterior_sample).mean(0)

a = np.quantile(np.exp(posterior_sample), 0.05, 0)
b = np.quantile(np.exp(posterior_sample), 0.95, 0)

abc_cis = list(zip(a, b))

In [None]:
a = np.sort(errors[errors > 0])

In [None]:
x = np.sort(a)
y = np.arange(len(x))/float(len(x))
plt.plot(x[1:], np.log(y[1:]))

# wrong way to visualize this imo

## Build Table

In [None]:
# 90% credible intervals
npe_cis = []
for i in range(7):
    sd = sigma[i][i]**0.5
    dist = norm(mu[i], sd)
    a, b = dist.ppf(0.05), dist.ppf(0.95)
    npe_cis.append((a, b))
    # print(np.round(np.exp(a), 3), np.round(np.exp(b), 3))

In [None]:
npe_cis

In [None]:
abc_cis

In [None]:
table = pd.DataFrame({
    "Transmission Rate": ["Facility"] + [f"Floor {i}" for i in range(1, 6)] + ["Room"],
    "Value": [0.05, 0.02, 0.04, 0.06, 0.08, 0.1, 0.05],
    "NPE Mean": mu,
    "NPE CI": npe_cis,
    "ABC Mean": abc_rates,
    "ABC CI": abc_cis
})

In [None]:
# possibly need to show posterior predictive checks to show that NPE does a better job...
print(table.to_latex())

## Posterior Predictive Checks

In [None]:
neural_posterior = multivariate_normal(mu, sigma)
K = 30
npe_sample = np.empty((0,7))
seed = 2
while npe_sample.shape[0] < K:
    draw = neural_posterior.rvs(size=K, random_state=seed)
    seed += 1
    accepted = draw[(draw > 0).all(axis=1)]
    npe_sample = np.concatenate((npe_sample, accepted), axis=0)
npe_sample = npe_sample[:K]

In [None]:
def posterior_predictive(sample, K=30):
    posterior_predictive = np.empty((K, 7, T))
    si_model = SIModel(alpha, gamma, beta_true, 
                       heterogeneous, prior_mu, prior_sigma, 
                       N, T, summarize=False, observed_seed=31,
                       flatten=False, eta = 0.1)
    x_o = si_model.get_observed_data()
    
    for i in range(K):
        si_model = SIModel(alpha, gamma, sample[i], 
                       heterogeneous, prior_mu, prior_sigma,
                       N, T, summarize=False, flatten=False,
                          eta = 0.1)
        x_rep = si_model.get_observed_data(i)
        posterior_predictive[i] =  np.array(x_rep)
    return posterior_predictive

In [None]:
pp_abc = posterior_predictive(np.exp(posterior_sample))

In [None]:
def ppc_plotter(npe_sample, j, save_tag = None):
    posterior_predictive = np.empty((K, 7, T))
    si_model = SIModel(alpha, gamma, beta_true, 
                       heterogeneous, prior_mu, prior_sigma, 
                       N, T, summarize=False, observed_seed=31,
                       flatten=False, eta = 0.1)
    x_o = si_model.get_observed_data()
    
    for i in range(K):
        si_model = SIModel(alpha, gamma, npe_sample[i], 
                       heterogeneous, prior_mu, prior_sigma,
                       N, T, summarize=False, flatten=False,
                          eta = 0.1)
        x_rep = si_model.get_observed_data(i)
        posterior_predictive[i] =  np.array(x_rep)
    pp_mean = posterior_predictive.mean(0)[j]
    abc_mean = pp_abc.mean(0)[j]
    
    labels = ["Post. Pred. Draw (NPE)"] + [None for _ in range(K-1)]
    I_o = np.array(x_o)[j]
    S_o  = 1 - I_o
    plt.plot(I_o, label="Observed", color="k")
    plt.plot(posterior_predictive[:,j,:].T, 
             label=labels, color="b",
             alpha=0.2)
    plt.plot(pp_mean, label="Post. Pred. Mean (NPE)", linestyle="--",
             color="orange")
    plt.plot(abc_mean, label="Post. Pred. Mean (ABC)", linestyle=":", color="green")
    plt.legend()
    # plt.title("NPE Posterior Predictive Check")
    if j == 6:
        plt.ylabel("Proportion of Infected Roommates")
    else:
        plt.ylabel("Proportion Infected")
    plt.xlabel("Time Steps")
    # plt.ylim(0, 1)
    if save_tag is not None:
        plt.savefig(f"images/{save_tag}.png")
    
    plt.show()

In [None]:
ppc_plotter(npe_sample, 4)

In [None]:
ppc_plotter(npe_sample, 0, "partial-ppc-a")

In [None]:
ppc_plotter(npe_sample, 5, "partial-ppc-b")

In [None]:
ppc_plotter(npe_sample, 6, "partial-ppc-c")

In [None]:
D = np.diag(1 / np.sqrt(np.diag(sigma)))
corr = D @ sigma @ D
plt.matshow(corr, cmap="rocket")
plt.colorbar()
plt.xlabel(r"$\beta_j$")
plt.ylabel(r"$\beta_j$")
# plt.savefig("images/corr.png")
plt.show()

In [None]:
plt.matshow(np.corrcoef(np.exp(posterior_sample).T), cmap="rocket")
plt.xlabel(r"$\beta_j$")
plt.ylabel(r"$\beta_j$")
plt.colorbar()
plt.savefig("images/partial-corr-abc.png")

## modified NPE estimates

In [None]:
neural_posterior = multivariate_normal(mu, sigma)
K = 100
npe_sample2 = np.empty((0,7))
seed = 2
while npe_sample2.shape[0] < K:
    draw = neural_posterior.rvs(size=K, random_state=seed)
    seed += 1
    accepted = draw[(draw > 0).all(axis=1)]
    npe_sample2 = np.concatenate((npe_sample2, accepted), axis=0)
npe_sample2 = npe_sample2[:K]

In [None]:
plt.matshow(np.corrcoef(npe_sample2.T), cmap="rocket")
plt.xlabel(r"$\beta_j$")
plt.ylabel(r"$\beta_j$")
plt.colorbar()
plt.savefig("images/partial-corr-npe.png")