In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import sys
sys.path.append("..")
from src.dataset import SIModel
from src.approx_bc import abc_rejection_sampler
import seaborn as sns
from scipy.stats import norm, lognorm, multivariate_normal
from src.utils import lognormal_sd, get_results

In [None]:
sns.set_theme()
sns.set_style("white")
sns.set_palette("deep")
run_abc = True

beta_true = [1e-8, .01, .5, .01, .01, .01, 1e-8] # [0.05, .02, .04, .06, .08, .1, .05]
alpha = 0.1
gamma = 0.05
heterogeneous = True
N = 300 # consider increasing
T = 52
K = 30
data_seed = 31
prior_mu = -3
prior_sigma = 1

In [None]:
si_model = SIModel(alpha, gamma, beta_true, 
                    heterogeneous, prior_mu, prior_sigma,
                   N, T, summarize=False, flatten=False,
                  observed_seed=data_seed)
x_o = si_model.get_observed_data()
plt.plot(x_o[0], label="infected")
plt.plot(1 - x_o[0], label="susceptible")

In [None]:
data = x_o[:6,:]

data = pd.DataFrame(data = data.T, columns=["Facility"] + [f"Floor {i}" for i in range(1,6)])

sns.lineplot(data)
plt.ylim((0,1))
plt.xlabel("Time Steps")
plt.ylabel("Proportion Infected")
# plt.savefig("images/hetero_infection_viz.png")
plt.show()

# Likelihood-based

In [None]:
exact_sample = np.load("posterior_sample.npy")

In [None]:
exact_rates = np.exp(exact_sample).mean(0)
exact_means = exact_sample.mean(0)
exact_sds = exact_sample.std(0)

In [None]:
exact_rates

In [None]:
np.exp(exact_sample).std(0)

In [None]:
# ax = sns.pairplot(pd.DataFrame(np.exp(exact_sample), columns = [rf"$\beta_{k}$" for k in range(7)]))
# plt.savefig("images/pairsplot.png")

# NPE

In [None]:
df_npe = pd.read_csv("df_het.csv")
df_npe["mu"] = df_npe["mu"].apply(eval)
df_npe["sigma"] = df_npe["sigma"].apply(eval)

In [None]:
sigma = np.array(df_npe.iloc[-1]["sigma"])

In [None]:
df_npe_full = df_npe[df_npe["mean_field"] == False]
npe_means = np.array([r for r in df_npe_full["mu"]])
npe_sds = np.array([[r[i][i]**.5 for i in range(7)] for r in df_npe_full["sigma"]])

In [None]:
df_mf = df_npe[df_npe["mean_field"] == True]
mf_means = np.array([r for r in df_mf["mu"]])
mf_sds = np.array([[r[i][i]**.5 for i in range(7)] for r in df_mf["sigma"]])

In [None]:
df_ln = get_results("../multirun/2024-11-14/14-50-37").\
    sort_values(["n_sample", "val_loss"]).drop_duplicates("n_sample")
ln_means = np.array([r[0] for r in df_ln["mu"]])
ln_sds = np.array([[r[i][i]**.5 for i in range(7)] for r in df_ln["sigma"]])
ln_rates = np.exp(ln_means + ln_sds**2/2)

In [None]:
df_ln

## Posterior Predictive Checks

In [None]:
mu = df_npe["mu"].iloc[-1]
sigma = df_npe["sigma"].iloc[-1]
neural_posterior = multivariate_normal(mu, sigma)
K = 30
npe_sample = np.empty((0,7))
seed = 2
while npe_sample.shape[0] < K:
    draw = neural_posterior.rvs(size=K, random_state=seed)
    seed += 1
    accepted = draw[(draw > 0).all(axis=1)]
    npe_sample = np.concatenate((npe_sample, accepted), axis=0)
npe_sample = npe_sample[:K]

In [None]:
posterior_predictive = np.empty((K, 7, T))
j = 0
si_model = SIModel(alpha, gamma, beta_true, 
                   heterogeneous, prior_mu, prior_sigma, 
                   N, T, summarize=False, observed_seed=31,
                   flatten=False)
x_o = si_model.get_observed_data()

for i in range(K):
    si_model = SIModel(alpha, gamma, np.clip(npe_sample[i], 1e-8, None), 
                   heterogeneous, prior_mu, prior_sigma,
                   N, T, summarize=False, flatten=False)
    x_rep = si_model.get_observed_data(i)
    posterior_predictive[i] =  np.array(x_rep)
pp_mean = posterior_predictive.mean(0)[j]

labels = ["Post. Pred. Draw"] + [None for _ in range(K-1)]
I_o = np.array(x_o)[j]
plt.plot(I_o, label="Observed", color="k")
plt.plot(posterior_predictive[:,j,:].T, 
         label=labels, color="b",
         alpha=0.2)
plt.plot(pp_mean, label="Post. Pred. Mean", linestyle="--",
         color="orange")
plt.legend()
plt.ylabel("Proportion Infected")
plt.xlabel("Time Steps")
plt.savefig("images/ppc_het.png")
plt.ylim(0, 1)
plt.show()

#### stupid ppc

In [None]:
mu2 = df_ln.iloc[-1]["mu"][0]
sigma2 = df_ln.iloc[-1]["sigma"]
neural_posterior2 = multivariate_normal(mu2, sigma2)
npe_sample2 = np.exp(neural_posterior2.rvs(size=K, random_state=seed))
posterior_predictive = np.empty((K, 7, T))
j = 0
si_model = SIModel(alpha, gamma, beta_true, 
                   heterogeneous, prior_mu, prior_sigma, 
                   N, T, summarize=False, observed_seed=31,
                   flatten=False)
x_o = si_model.get_observed_data()

for i in range(K):
    si_model = SIModel(alpha, gamma, np.clip(npe_sample2[i], 1e-8, None), 
                   heterogeneous, prior_mu, prior_sigma,
                   N, T, summarize=False, flatten=False)
    x_rep = si_model.get_observed_data(i)
    posterior_predictive[i] =  np.array(x_rep)
pp_mean = posterior_predictive.mean(0)[j]

labels = ["Post. Pred. Draw"] + [None for _ in range(K-1)]
I_o = np.array(x_o)[j]
plt.plot(I_o, label="Observed", color="k")
plt.plot(posterior_predictive[:,j,:].T, 
         label=labels, color="b",
         alpha=0.2)
plt.plot(pp_mean, label="Post. Pred. Mean", linestyle="--",
         color="orange")
plt.legend()
plt.ylabel("Proportion Infected")
plt.xlabel("Time Steps")
# plt.savefig("images/ppc_het.png")
plt.ylim(0, 1)
plt.show()

In [None]:
D = np.diag(1 / np.sqrt(np.diag(sigma2)))
corr = D @ sigma2 @ D
plt.matshow(corr, cmap="rocket")
plt.colorbar()
plt.xlabel(r"$\beta_j$")
plt.ylabel(r"$\beta_j$")
# plt.savefig("images/corr.png")
plt.show()

In [None]:
pis = [
    [.1, 1, 1, 1, 1, 1, 1],
    [.75, .75, .75, .75, .75, .75, .75],
    [1, 1, 1, 1, 1, 1, 1e-8]
]
intervention = np.empty((K, len(pis), T))
# fix this: truncate...
for j, pi in enumerate(pis):
    for i in range(K):
        si_model = SIModel(alpha, gamma, npe_sample[i], 
                    heterogeneous, prior_mu, prior_sigma,
                    N, T, summarize=False, flatten=False,
                    pi=pi)
        x_rep = si_model.get_observed_data(i)
        intervention[i,j] =  np.array(x_rep)[0]
intervention = intervention.mean(0)

plt.plot(I_o, label="Observed", color="k", linestyle=":")
plt.plot(pp_mean, label="No Intervention", linestyle="--")
plt.plot(intervention[0], label="Floor Isolation", linestyle="--")
plt.plot(intervention[1], label="25% Reduction", linestyle="--")
plt.plot(intervention[2], label="Room Isolation", linestyle="--")
# plt.plot(intervention[3], label="?", linestyle=":")
plt.legend()
plt.ylim(0, 1)
plt.xlabel("Time Steps")
plt.ylabel("Proportion Infected")
plt.savefig("images/intervention.png")


## ABC

In [None]:
summarize = False
if run_abc:
    n_simulations = []
    abc_samples = []
    epsilons = [2.8, 2.5, 2.3, 2.1, 1.9, 1.8, 1.75]
    for epsilon in epsilons:
        si_model = SIModel(alpha, gamma, beta_true, 
                        heterogeneous, prior_mu, prior_sigma, 
                       N, T, summarize, observed_seed=31, flatten=False)
        prior_sampler = lambda: si_model.sample_logbeta(1)
        simulator = lambda theta, seed: si_model.SI_simulator(theta, seed)
        x_o = si_model.get_observed_data()
        S = 100
        print(f"epsilon: {epsilon}")
        posterior_sample, errors = abc_rejection_sampler(
            S, epsilon, prior_sampler, simulator, x_o, max_attempts=40000,
            summarize=False
            )
        print("***")
        n_simulations.append((errors > 0).sum())
        abc_samples.append(posterior_sample)   

In [None]:
abc_posterior = np.array(abc_samples)
np.save("abc_results", abc_posterior)

# Approximation Error

In [None]:
original_pal = sns.color_palette('deep')
new_pal = original_pal[:2] + original_pal[3:]
sns.set_palette(new_pal)

### Total Square Error

In [None]:
npe_sims = list(df_npe_full["n_sample"].values)

data = {"Method": ["NPE"]*5 + ["ABC"]*7 + ["NPE-MF"]*5 + ["NPE-LN"]*5,
        "Simulations": npe_sims + n_simulations + npe_sims + npe_sims,
        "Error": np.concatenate(
            [((npe_means - exact_rates)**2).sum(1),
             ((np.exp(abc_posterior).mean(1) - exact_rates)**2).sum(1),
             ((mf_means - exact_rates)**2).sum(1),
             ((ln_rates - exact_rates)**2).sum(1),
            ]
        )
}
mean_mse = pd.DataFrame(data)

In [None]:
ax = sns.lineplot(mean_mse, x="Simulations", y="Error", hue="Method", marker="o")
plt.xscale("log")
plt.xlabel("Simulations")
plt.ylabel("Total Square Error")
plt.legend(title=None)
plt.ylim(0, None)
ax.set_xticks([500, 1000, 2000, 4000])
ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
# plt.savefig("images/het-error-ln.png")
plt.show()

In [None]:
def get_errors(j):
    data = {"Method": ["NPE"]*5 + ["ABC"]*7 + ["NPE-MF"]*5,
        "Simulations": npe_sims + n_simulations + npe_sims,
        "Error": np.concatenate(
            [npe_means[:, j],
             np.exp(abc_posterior).mean(1)[:, j],
             mf_means[:, j]
            ]
        )
    }
    return pd.DataFrame(data)

In [None]:
beta_0 = get_errors(0)
ax = sns.lineplot(beta_0, x="Simulations", y="Error", hue="Method", marker="o")
plt.xscale("log")
plt.xlabel("Simulations")
plt.axhline(y = exact_rates[0], color="k", label=r"Exact Mean", linestyle="--")
plt.ylabel("Posterior Mean")
plt.legend(title=None)
ax.set_xticks([500, 1000, 2000, 4000])
plt.ylim(0, 0.07)
ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
plt.savefig("images/het-error-b.png")
plt.show()

In [None]:
beta_0 = get_errors(5)
ax = sns.lineplot(beta_0, x="Simulations", y="Error", hue="Method", marker="o")
plt.xscale("log")
plt.xlabel("Simulations")
plt.axhline(y = exact_rates[5], color="k", label=r"Exact Mean", linestyle="--")
plt.ylabel("Posterior Mean")
plt.legend(title=None)
ax.set_xticks([500, 1000, 2000, 4000])
plt.ylim(0, 0.12)
ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
plt.savefig("images/het-error-c.png")
plt.show()

In [None]:
beta_0 = get_errors(6)
ax = sns.lineplot(beta_0, x="Simulations", y="Error", hue="Method", marker="o")
plt.xscale("log")
plt.xlabel("Simulations")
plt.axhline(y = exact_rates[6], color="k", label=r"Exact Mean", linestyle="--")
plt.ylabel("Posterior Mean")
plt.legend(title=None)
ax.set_xticks([500, 1000, 2000, 4000])
plt.ylim(0, .10)
ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
plt.savefig("images/het-error-d.png")
plt.show()

In [None]:
for j in [1, 2, 3, 4]:
    beta_j = get_errors(j)
    ax = sns.lineplot(beta_j, x="Simulations", y="Error", hue="Method", marker="o")
    plt.xscale("log")
    plt.xlabel("Simulations")
    plt.axhline(y = exact_rates[j], color="k", label=r"Exact Mean", linestyle="--")
    plt.ylabel("Posterior Mean")
    plt.legend(title=None)
    ax.set_xticks([500, 1000, 2000, 4000])
    plt.ylim(0, .10)
    ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
    plt.savefig(f"images/het-error-misc-{j}.png")
    plt.show()

In [None]:
D = np.diag(1 / np.sqrt(np.diag(sigma)))
corr = D @ sigma @ D
plt.matshow(corr, cmap="rocket")
plt.colorbar()
plt.xlabel(r"$\beta_j$")
plt.ylabel(r"$\beta_j$")
plt.savefig("images/corr.png")
plt.show()

### Master Table

In [None]:
exact_rates

In [None]:
n_simulations

In [None]:
het_table = pd.DataFrame({
    "Transmission Rate": ["Facility"] + [f"Floor {i}" for i in range(1, 6)] + ["Room"],
    "Value": [0.05, 0.02, 0.04, 0.06, 0.08, 0.1, 0.05],
    "Exact": exact_rates,
    "NPE": npe_means[-1],
    "ABC": np.exp(abc_posterior[-1]).mean(0)
})

In [None]:
het_table2 = pd.DataFrame({
    "Transmission Rate": ["Facility"] + [f"Floor {i}" for i in range(1, 6)] + ["Room"],
    "Value": [0.05, 0.02, 0.04, 0.06, 0.08, 0.1, 0.05],
    "Exact": np.exp(exact_sample).std(0),
    "NPE": np.diag(sigma)**0.5,
    "ABC": np.exp(abc_posterior[-1]).std(0)
})

In [None]:
np.log(0.107)

In [None]:
np.arange(1, step=1/8)

In [None]:
norm(-3, 1).cdf(-2.23)

In [None]:
# 4000 vs 7,646 simulations, ABC vs NPE
print(het_table.to_latex())

In [None]:
print(het_table2.to_latex())

### Credible Intervals

In [None]:
exact_cis = list(zip(
    np.quantile(np.exp(exact_sample), 0.05, axis=0),
    np.quantile(np.exp(exact_sample), 0.95, axis=0),
))

In [None]:
exact_cis

In [None]:
m = npe_means[-1]
s = npe_sds[-1]

npe_cis = [(norm(m[j], s[j]).ppf(0.05),
              norm(m[j], s[j]).ppf(0.95)) for j in range(7)]

In [None]:
npe_cis

In [None]:
abc_cis = list(zip(
    np.quantile(np.exp(abc_posterior[-1]), 0.05, axis=0),
    np.quantile(np.exp(abc_posterior[-1]), 0.95, axis=0),
))

In [None]:
abc_cis

In [None]:
round((3.1415))

In [None]:
het_cis = pd.DataFrame({
    "Transmission Rate": ["Facility"] + [f"Floor {i}" for i in range(1, 6)] + ["Room"],
    "Value": [0.05, 0.02, 0.04, 0.06, 0.08, 0.1, 0.05],
    "Exact": round(exact_cis, 5),
    "NPE": round(npe_cis, 5),
    "ABC": round(abc_cis, 5)
})
print(het_cis.to_latex())

### ABC

In [None]:
# sns.pairplot(pd.DataFrame(np.exp(abc_posterior[-1])))

## Model Identiafiability

In [None]:
df_ident = get_results("../multirun/2024-11-12/17-36-17").sort_values("val_loss")

In [None]:
ident_mu = np.array(df_ident["mu"][0][0])

In [None]:
ident_sigma = np.array(df_ident["sigma"][0])
D = np.diag(1 / np.sqrt(np.diag(ident_sigma)))
corr = D @ sigma @ D
plt.matshow(corr, cmap="rocket")
plt.colorbar()
plt.xlabel(r"$\beta_j$")
plt.ylabel(r"$\beta_j$")
# plt.savefig("images/corr.png")
plt.show()

In [None]:
for i in range(7):
    dist = norm(ident_mu[i], ident_sigma[i][i]**0.5)
    print(dist.ppf(0.05), dist.ppf(0.95))

#### modified NPE estimates

In [None]:
neural_posterior = multivariate_normal(mu, sigma)
K = 100
npe_sample2 = np.empty((0,7))
seed = 2
while npe_sample2.shape[0] < K:
    draw = neural_posterior.rvs(size=K, random_state=seed)
    seed += 1
    accepted = draw[(draw > 0).all(axis=1)]
    npe_sample2 = np.concatenate((npe_sample2, accepted), axis=0)
npe_sample2 = npe_sample2[:K]

In [None]:
npe_sample2.mean(0)

In [None]:
exact_rates