### Libraries

In [1]:
!pip install dill numpyro

Collecting dill
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Collecting numpyro
  Downloading numpyro-0.18.0-py3-none-any.whl.metadata (37 kB)
Downloading dill-0.3.9-py3-none-any.whl (119 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.4/119.4 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpyro-0.18.0-py3-none-any.whl (365 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m365.8/365.8 kB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dill, numpyro
Successfully installed dill-0.3.9 numpyro-0.18.0


In [2]:
import arviz as az
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import jax.numpy as jnp
from jax import random
import geopandas as gpd
import dill
import numpyro
import numpyro.distributions as dist
from numpyro.infer import MCMC, NUTS
import pandas as pd

import os

import torch
import time

import itertools
import jax
import jax.numpy as jnp
from jax import random, lax, jit, ops
from jax.example_libraries import stax

import numpyro
from numpyro.infer import SVI, MCMC, NUTS, init_to_median, Predictive, RenyiELBO, log_likelihood
import numpyro.distributions as dist

import geopandas as gpd
import plotly.express as px

from termcolor import colored

import pickle

### Load the final_combined_divisions data to get the observed prevalence counts

In [4]:
df = gpd.read_file("final_combined_divisions.shp")

In [5]:
true_values = df["Cases"]/df['Population']  # observed prevalence values
total_cases = df['Cases']  # your total cases
total_population = df['Population']  # your total population

### Load the mcmc data from all 4 different sigma priors (1, 0.01, 0.0001, 0.000001)

In [6]:
#for sigma 1e-2
with open("aggVAEPrev_nsamples_2000_tt0min_sigma0.01_hdim50_zdim20.pkl", "rb") as file:
        sigma_1 = dill.load(file)

#for sigma 1e-1
with open("aggVAEPrev_nsamples_2000_tt0min_sigma0.1_hdim50_zdim20.pkl", "rb") as file:
        sigma_2 = dill.load(file)

#for sigma 1
with open("aggVAEPrev_nsamples_2000_tt1min_sigma1_hdim50_zdim20.pkl", "rb") as file:
        sigma_3 = dill.load(file)

#for sigma 5
with open("aggVAEPrev_nsamples_2000_tt2min_sigma5_hdim50_zdim20.pkl", "rb") as file:
        sigma_4 = dill.load(file)

#for sigma 10
with open("aggVAEPrev_nsamples_2000_tt2min_sigma10_hdim50_zdim20.pkl", "rb") as file:
        sigma_5 = dill.load(file)

#for sigma 50
with open("aggVAEPrev_nsamples_2000_tt2min_sigma50_hdim50_zdim20.pkl", "rb") as file:
        sigma_6 = dill.load(file)

In [7]:
idata_1 = az.from_dict(posterior = sigma_1.get_samples())
idata_2 = az.from_dict(posterior = sigma_2.get_samples())
idata_3 = az.from_dict(posterior = sigma_3.get_samples())
idata_4 = az.from_dict(posterior = sigma_4.get_samples())
idata_5 = az.from_dict(posterior = sigma_5.get_samples())
idata_6 = az.from_dict(posterior = sigma_6.get_samples())



### Load the model and parameters for aggVAEPrev

In [8]:
#define the necessary functions
def dist_euclid(x, z):
    """
    Computes Eucledian Distance Between Regions. This function is used by
    exp_sq_kernel function (kernel function for gaussian processes)
    """
    x = jnp.array(x) # (ngrid_pts, lat/lon) <- i.e (7304,2)
    z = jnp.array(z) # (ngrid_pts, lat/lon) <- i.e (7304,2)
    if len(x.shape)==1:
        x = x.reshape(x.shape[0], 1) #(2618,) -> (7304,1)
    if len(z.shape)==1:
        z = x.reshape(x.shape[0], 1) #(2618,) -> (7304,1)
    n_x, m = x.shape # 7304 , 2
    n_z, m_z = z.shape # 7304 , 2
    assert m == m_z
    delta = jnp.zeros((n_x,n_z)) #(ngrid_pts,ngrid_pts) <- i.e (7304,7304)
    for d in jnp.arange(m):
        x_d = x[:,d] #(ngrid_pts-lat/lon,) <- (7304,)
        z_d = z[:,d] #(ngrid_pts-lat/lon,) <- (7304,)
        delta += (x_d[:,jnp.newaxis] - z_d)**2 # (7304,7304)

    return jnp.sqrt(delta) #(7304,7304)
def exp_sq_kernel(x, z, var, length, noise, jitter=1.0e-4):
    dist = dist_euclid(x, z) #(7304, 7304)
    deltaXsq = jnp.power(dist/ length, 2.0)
    k = var * jnp.exp(-0.5 * deltaXsq)
    k += (noise + jitter) * jnp.eye(x.shape[0])
    return k # (ngrid_pts, ngrid_pts) <- (7304,7304)

def vae_decoder(hidden_dim, out_dim):
    return stax.serial(
        # (num_samples, z_dim) -> (num_samples, hidden_dim): (5,40) -> (5,50)
        stax.Dense(hidden_dim, W_init = stax.randn()),
        stax.Elu,
        # (num_samples, hidden_dim) -> (num_samples, num_regions) : (5,50) -> (5, 58)
        stax.Dense(out_dim, W_init = stax.randn())
    )

In [9]:
def prev_model_vae_aggr(args):

    x = args["x"]
    out_dims = args["out_dims"]
    pop_density = args["pop_density"]
    hdi = args["hdi"]
    total_cases = args["total_cases"]
    total_population = args["total_population"]
    n = args["sigma"]

    # random effect
    decoder_params =args["decoder_params"]
    z_dim, hidden_dim = decoder_params[0][0].shape
    z = numpyro.sample("z", dist.Normal(jnp.zeros(z_dim), jnp.ones(z_dim)))
    _, decoder_apply = vae_decoder(hidden_dim, out_dims) #Instantiate decoder
    vae_aggr = numpyro.deterministic("vae_aggr", decoder_apply(decoder_params, z))
    s = numpyro.sample("sigma", dist.HalfNormal(n))
    vae = numpyro.deterministic("vae", s * vae_aggr)

    ## Fixed effects
    b0 = numpyro.sample("b0", dist.Normal(0, 1))  #Intercept
    b_pop_density = numpyro.sample("b_pop_density", dist.Normal(0, 1))  #Effect of population density
    b_hdi = numpyro.sample("b_hdi", dist.Normal(0, 1))  #Effect of HDI

    #scale pop_density and hdi (normalise)
    # Standardize covariates
    pop_density = (pop_density - jnp.mean(pop_density)) / (jnp.std(pop_density))
    hdi = (hdi - jnp.mean(hdi)) / (jnp.std(hdi))

    # Linear predictor
    lp = b0 + vae + b_pop_density * pop_density + b_hdi * hdi  # (num_districts,)

    # Prevalence probability
    theta = numpyro.deterministic("theta", jax.nn.sigmoid(lp)*1e-2)  # (num_districts,)

    # Binomial likelihood
    observed_cases = numpyro.sample(
        "observed_cases",
        dist.Binomial(total_count=total_population, probs = theta),
        obs=total_cases
    )

In [12]:
# Lat/Lon Values of artificial grid
x = np.load("lat_lon_x_all.npy")

# combined regional data
pol_pts_all = np.load("pol_pts_all.npy")
pt_which_pol_all = np.load("pt_which_pol_all.npy")

#combine the dataframes
df_combined = gpd.read_file("final_combined_divisions.shp")

In [13]:
M = pol_pts_all
out_dims = df_combined.shape[0]

In [14]:
args = {
        "total_cases" : jnp.array(df_combined["Cases"]),
        "total_population" : jnp.array(df_combined["Population"]),
        "hdi" : jnp.array(df_combined["HDI"]),
        "pop_density" : jnp.array(df_combined["Pop_den"]),
        "x" : jnp.array(x),
        "gp_kernel" : exp_sq_kernel,
        "jitter" : 1e-4,
        "noise" : 1e-4,
        "M" : M,
        # VAE training
        "rng_key": random.PRNGKey(5),
        "num_epochs": 20,
        "learning_rate": 0.0005,
        "batch_size": 100,
        "out_dims" : out_dims,
        "num_train": 100,
        "num_test":100,
        "vae_var": 1,
        #default optimal sigma
        "sigma": 50
    }

In [15]:
#change the specific file name under the folder model_weights
with open("aggVAE_e20_h50_z20", "rb") as file:
        vae_params = pickle.load(file)

encoder_params = vae_params["encoder$params"]
decoder_params = vae_params["decoder$params"]
args["decoder_params"] = decoder_params

### Append all the theta estimates correctly to df

In [16]:
# Assuming you have posterior samples for different priors
# e.g., theta samples for different priors: sigma_1, sigma_1e-2, sigma_1e-3, sigma_1e-5

# Extract posterior samples for each prior (as per your example)
theta_samps_sigma_1 = sigma_1.get_samples()["theta"]
theta_samps_sigma_2 = sigma_2.get_samples()["theta"]
theta_samps_sigma_3 = sigma_3.get_samples()["theta"]
theta_samps_sigma_4 = sigma_4.get_samples()["theta"]
theta_samps_sigma_5 = sigma_5.get_samples()["theta"]
theta_samps_sigma_6 = sigma_6.get_samples()["theta"]

# Compute mean and quantiles (if needed) for theta
theta_mean_sigma_1 = theta_samps_sigma_1.mean(axis=(0, 1))
theta_mean_sigma_2 = theta_samps_sigma_2.mean(axis=(0, 1))
theta_mean_sigma_3 = theta_samps_sigma_3.mean(axis=(0, 1))
theta_mean_sigma_4 = theta_samps_sigma_4.mean(axis=(0, 1))
theta_mean_sigma_5 = theta_samps_sigma_5.mean(axis=(0, 1))
theta_mean_sigma_6 = theta_samps_sigma_6.mean(axis=(0, 1))

# Append the theta estimates for each prior to the DataFrame
df["theta_vae_aggr_sigma_1"] = theta_mean_sigma_1
df["theta_vae_aggr_sigma_2"] = theta_mean_sigma_2
df["theta_vae_aggr_sigma_3"] = theta_mean_sigma_3
df["theta_vae_aggr_sigma_4"] = theta_mean_sigma_4
df["theta_vae_aggr_sigma_5"] = theta_mean_sigma_5
df["theta_vae_aggr_sigma_6"] = theta_mean_sigma_6

# Ensure that the observed prevalence (obs_prev) is also included
df["obs_prev"] = df["Cases"] / df["Population"]
theta_observed = df["obs_prev"]

In [17]:
# Extract log likelihood for each prior
# Assume observed_data is the actual data you used for inference
# Update args with the observed data for log_likelihood calculation

args_with_obs_1 = args.copy()
args_with_obs_1["observed_cases"] = np.asarray(true_values) # Assuming 'true_values' is the observed data
args_with_obs_1["sigma"] = 0.01
log_likelihood_1 = log_likelihood(prev_model_vae_aggr, sigma_1.get_samples(), args_with_obs_1)
print(log_likelihood_1)


args_with_obs_2 = args.copy()
args_with_obs_2["observed_cases"] = np.asarray(true_values) # Assuming 'true_values' is the observed data
args_with_obs_2["sigma"] = 0.1
log_likelihood_2 = log_likelihood(prev_model_vae_aggr, sigma_2.get_samples(), args_with_obs_2)
print(log_likelihood_2)


args_with_obs_3 = args.copy()
args_with_obs_3["observed_cases"] = np.asarray(true_values) # Assuming 'true_values' is the observed data
args_with_obs_3["sigma"] = 1
log_likelihood_3 = log_likelihood(prev_model_vae_aggr, sigma_3.get_samples(), args_with_obs_3)
print(log_likelihood_3)


args_with_obs_4 = args.copy()
args_with_obs_4["observed_cases"] = np.asarray(true_values) # Assuming 'true_values' is the observed data
args_with_obs_4["sigma"] = 5
log_likelihood_4 = log_likelihood(prev_model_vae_aggr, sigma_4.get_samples(), args_with_obs_4)
print(log_likelihood_4)


args_with_obs_5 = args.copy()
args_with_obs_5["observed_cases"] = np.asarray(true_values) # Assuming 'true_values' is the observed data
args_with_obs_5["sigma"] = 10
log_likelihood_5 = log_likelihood(prev_model_vae_aggr, sigma_5.get_samples(), args_with_obs_5)
print(log_likelihood_5)


args_with_obs_6 = args.copy()
args_with_obs_6["observed_cases"] = np.asarray(true_values) # Assuming 'true_values' is the observed data
args_with_obs_6["sigma"] = 50
log_likelihood_6 = log_likelihood(prev_model_vae_aggr, sigma_6.get_samples(), args_with_obs_6)
print(log_likelihood_6)

#update the idata object with log-likelihood
idata_1 = az.from_dict(posterior=sigma_1.get_samples(), log_likelihood={"obs": log_likelihood_1})
idata_2 = az.from_dict(posterior=sigma_2.get_samples(), log_likelihood={"obs": log_likelihood_2})
idata_3 = az.from_dict(posterior=sigma_3.get_samples(), log_likelihood={"obs": log_likelihood_3})
idata_4 = az.from_dict(posterior=sigma_4.get_samples(), log_likelihood={"obs": log_likelihood_4})
idata_5 = az.from_dict(posterior=sigma_5.get_samples(), log_likelihood={"obs": log_likelihood_5})
idata_6 = az.from_dict(posterior=sigma_6.get_samples(), log_likelihood={"obs": log_likelihood_6})

{'observed_cases': Array([[ -476.125   ,   -79.72412 , -4713.639   , ..., -1681.7864  ,
         -428.10693 , -1769.157   ],
       [ -443.68506 ,   -66.55957 , -4574.645   , ..., -1699.165   ,
         -438.13135 , -1793.2725  ],
       [ -474.81445 ,   -83.197266, -4703.9146  , ..., -1688.1946  ,
         -428.28345 , -1776.8489  ],
       ...,
       [ -474.44678 ,   -79.43701 , -4688.0864  , ..., -1675.5502  ,
         -421.87866 , -1762.2217  ],
       [ -468.30176 ,   -76.28076 , -4662.1743  , ..., -1684.3828  ,
         -428.9812  , -1775.2847  ],
       [ -483.4702  ,   -82.07422 , -4729.5093  , ..., -1659.873   ,
         -414.37427 , -1751.571   ]], dtype=float32)}
{'observed_cases': Array([[  -22.356445,  -449.7627  , -2796.5098  , ...,  -323.72455 ,
           -9.378418,  -346.41235 ],
       [  -25.254883,  -425.73438 , -2800.287   , ...,  -324.51312 ,
           -9.050049,  -341.03162 ],
       [  -25.983398,  -417.70508 , -2828.9443  , ...,  -339.67218 ,
           -8.68



In [18]:
idata_dict = {
    "Sigma 1e-2": idata_1,
    "Sigma 1e-1": idata_2,
    "Sigma 1": idata_3,
    "Sigma 5": idata_4,
    "Sigma 10": idata_5,
    "Sigma 50": idata_6,
}

In [19]:
df.head()

Unnamed: 0,District,x,y,Year,Area_sq_km,HDI,Cases,Population,Pop_den,geometry,theta_vae_aggr_sigma_1,theta_vae_aggr_sigma_2,theta_vae_aggr_sigma_3,theta_vae_aggr_sigma_4,theta_vae_aggr_sigma_5,theta_vae_aggr_sigma_6,obs_prev
0,BANDUNG,107.610841,-7.099969,2020,1767.96,72.39,9180,14495160,8198.805403,"POLYGON ((107.73309 -6.814, 107.73354 -6.81427...",0.0008217926,0.0009509845,0.0009286254,0.00093148515,0.0009252074,0.0009305632,0.000633
1,BANDUNG,107.610841,-7.099969,2021,1767.96,72.73,8008,14662620,8293.52474,"POLYGON ((107.73309 -6.814, 107.73354 -6.81427...",0.0008217926,0.0009509845,0.0009286254,0.00093148515,0.0009252074,0.0009305632,0.000546
2,BANDUNG,107.610841,-7.099969,2022,1767.96,73.16,16764,14830092,8388.250865,"POLYGON ((107.73309 -6.814, 107.73354 -6.81427...",0.0008217926,0.0009509845,0.0009286254,0.00093148515,0.0009252074,0.0009305632,0.00113
3,BANDUNG,107.610841,-7.099969,2023,1767.96,73.74,4020,14997564,8482.97699,"POLYGON ((107.73309 -6.814, 107.73354 -6.81427...",0.0008217926,0.0009509845,0.0009286254,0.00093148515,0.0009252074,0.0009305632,0.000268
4,BANDUNG BARAT,107.414953,-6.897056,2020,1305.77,68.08,3864,7153344,5478.257273,"POLYGON ((107.40945 -6.68851, 107.40986 -6.688...",0.0008217926,0.0009509845,0.0009286254,0.00093148515,0.0009252074,0.0009305632,0.00054


### Define Functions to plot different posterior data using different priors

In [20]:
def compute_model_comparison(idata_dict):
    loo_scores = {}
    waic_scores = {}
    rhat_values = {}
    avg_rhat_values = {}

    for prior_name, idata in idata_dict.items():
        # Compute LOO using stored log-likelihood
        loo_result = az.loo(idata, pointwise=True)
        loo_scores[prior_name] = (
            loo_result["elpd_loo"].item(),
            loo_result["se"].item()
        )

        # Compute WAIC
        waic_result = az.waic(idata, pointwise=True)
        waic_scores[prior_name] = (
            waic_result["elpd_waic"].item(),
            waic_result["se"].item()
        )

        # Compute R-hat
        rhat = az.rhat(idata)
        rhat_values[prior_name] = rhat
        avg_rhat_values[prior_name] = np.mean(rhat.to_array().values)

        print(f"{prior_name} - LOO: {loo_scores[prior_name][0]:.2f} ± {loo_scores[prior_name][1]:.2f}")
        print(f"{prior_name} - WAIC: {waic_scores[prior_name][0]:.2f} ± {waic_scores[prior_name][1]:.2f}")
        print(f"{prior_name} - Average R-hat: {avg_rhat_values[prior_name]:.3f}")

    best_model = min(avg_rhat_values, key=avg_rhat_values.get)
    print(f"\nBest model based on R-hat: {best_model} with average R-hat of {avg_rhat_values[best_model]:.3f}")

    return {
        "LOO": loo_scores,
        "WAIC": waic_scores,
        "R-hat": rhat_values,
        "Avg R-hat": avg_rhat_values,
        "Best Model": best_model
    }


### Compute the metrics for different samples

In [21]:
# Step 2: Compute Model Comparison Metrics (LOO, WAIC, R-hat, Log-Likelihood)
model_comparison_results = compute_model_comparison(idata_dict)

Array contains NaN-value.
  b_ary /= prior_bs * ary[int(n / 4 + 0.5) - 1]
  k_ary = np.log1p(-b_ary[:, None] * ary).mean(axis=1)  # pylint: disable=no-member
  sigma = -k_post / b_post
See http://arxiv.org/abs/1507.04544 for details
            the Observed RV in your model to make sure it returns element-wise logp.
            


Sigma 1e-2 - LOO: nan ± nan
Sigma 1e-2 - WAIC: -562532.33 ± 0.00
Sigma 1e-2 - Average R-hat: nan


  b_ary /= prior_bs * ary[int(n / 4 + 0.5) - 1]
  k_ary = np.log1p(-b_ary[:, None] * ary).mean(axis=1)  # pylint: disable=no-member
  sigma = -k_post / b_post
See http://arxiv.org/abs/1507.04544 for details
            the Observed RV in your model to make sure it returns element-wise logp.
            


Sigma 1e-1 - LOO: nan ± nan
Sigma 1e-1 - WAIC: -239658.13 ± 0.00
Sigma 1e-1 - Average R-hat: nan


  b_ary /= prior_bs * ary[int(n / 4 + 0.5) - 1]
  k_ary = np.log1p(-b_ary[:, None] * ary).mean(axis=1)  # pylint: disable=no-member
  sigma = -k_post / b_post
See http://arxiv.org/abs/1507.04544 for details
            the Observed RV in your model to make sure it returns element-wise logp.
            


Sigma 1 - LOO: nan ± nan
Sigma 1 - WAIC: -165749.63 ± 0.00
Sigma 1 - Average R-hat: nan


  k_ary = np.log1p(-b_ary[:, None] * ary).mean(axis=1)  # pylint: disable=no-member
  sigma = -k_post / b_post
See http://arxiv.org/abs/1507.04544 for details
            the Observed RV in your model to make sure it returns element-wise logp.
            


Sigma 5 - LOO: nan ± nan
Sigma 5 - WAIC: -122643.59 ± 0.00
Sigma 5 - Average R-hat: nan


  b_ary /= prior_bs * ary[int(n / 4 + 0.5) - 1]
  k_ary = np.log1p(-b_ary[:, None] * ary).mean(axis=1)  # pylint: disable=no-member
  sigma = -k_post / b_post
See http://arxiv.org/abs/1507.04544 for details
            the Observed RV in your model to make sure it returns element-wise logp.
            


Sigma 10 - LOO: nan ± nan
Sigma 10 - WAIC: -104673.51 ± 0.00
Sigma 10 - Average R-hat: nan


  k_ary = np.log1p(-b_ary[:, None] * ary).mean(axis=1)  # pylint: disable=no-member
  sigma = -k_post / b_post
See http://arxiv.org/abs/1507.04544 for details
            the Observed RV in your model to make sure it returns element-wise logp.
            


Sigma 50 - LOO: nan ± nan
Sigma 50 - WAIC: -122266.17 ± 0.00
Sigma 50 - Average R-hat: nan

Best model based on R-hat: Sigma 1e-2 with average R-hat of nan
