### Description: This script produces values for Table 11 (Note that for the time taken, it is directly referenced from the naming of the MCMC samples located in aggGP or aggVAEPrev folders within model weights)

# Load Libraries

In [1]:
import os
import pandas as pd
import jax
import jax.nn as nn
from jax import lax, random
import jax.numpy as jnp
from jax.random import PRNGKey
import numpy as np
import numpyro
import pickle
import numpyro.distributions as dist
from numpyro.infer import SVI, Trace_ELBO, RenyiELBO
import arviz as az
import sys
from pyprojroot import here
import time
sys.path.append(str(here() / "src"))
# Import our modular components
from kernels import exp_sq_kernel, M_g
from loader_jkt import load_data
from gp import gp_aggr, gp_aggr_count
from vae import vae_model, vae_guide, vae_decoder
from plotting import plot_process, plot_incidence_map
import matplotlib.pyplot as plt
from numpyro.infer import NUTS, MCMC, Predictive, init_to_median, init_to_uniform, init_to_sample, init_to_mean, init_to_value

  from .autonotebook import tqdm as notebook_tqdm


## Load all the MCMC objects for all the years


In [2]:
# for 2020, 2021, 2022, 2023
with open("../model weights/aggGP/mcmc_jkt_2023_25min_targetprob0.9.pkl", "rb") as f:
    mcmc_2023 = pickle.load(f)

with open("../model weights/aggGP/mcmc_jkt_2022_8min_targetprob0.95.pkl", "rb") as f:
    mcmc_2022 = pickle.load(f)

with open("../model weights/aggGP/mcmc_jkt_2020_28min_targetprob0.95.pkl", "rb") as f:
    mcmc_2020 = pickle.load(f)

with open("../model weights/aggGP/mcmc_jkt_2021_30min_targetprob0.9.pkl", "rb") as f:
    mcmc_2021 = pickle.load(f)

In [3]:
n_lo = 1 #no of province for DKI Jakarta
n_hi = 5 #no of districts in DKI Jakarta except for Kep. Seribu

# Yearly Inferences (target prob 0.95 for 2020, 2022; 0.9 for 2021, 2023, max tree depth = 20)

### 2023 Inference

In [6]:
df_2023 = pd.read_csv("../data/processed/df_hi_jkt_w_pred_2023.csv")

In [4]:
# creating  posterior predictive
rng_key_pr, rng_key_po = random.split(random.PRNGKey(4))
posterior_samples = mcmc_2023.get_samples()
print(mcmc_2023.print_summary())


                     mean       std    median      5.0%     95.0%     n_eff     r_hat
             b0    100.08     10.27    100.30     83.21    116.74   1298.51      1.00
          b_hdi     -0.03      0.97     -0.01     -1.65      1.55    874.05      1.00
  b_pop_density      0.03      0.97      0.04     -1.66      1.50   1448.10      1.01
        b_urban      0.00      0.99     -0.00     -1.55      1.66    560.93      1.01
  kernel_length      0.50      0.26      0.43      0.17      0.84    580.94      1.01
     kernel_var      0.37      0.02      0.37      0.34      0.40   1347.99      1.00
       log_f[0]      3.06      0.59      3.09      2.04      3.95     90.34      1.05
       log_f[1]      3.06      0.59      3.10      2.10      4.01     81.43      1.06
       log_f[2]      3.06      0.59      3.09      2.07      3.95     72.73      1.07
       log_f[3]      3.05      0.59      3.09      2.09      3.98     49.11      1.07
       log_f[4]      3.05      0.58      3.08      2.

In [5]:
ss = numpyro.diagnostics.summary(mcmc_2023.get_samples(group_by_chain=True))
r = np.mean(ss['gp_aggr']['n_eff'])
print("Average ESS for all aggGP effects : " + str(round(r)))

ess_lo = np.mean(ss["gp_aggr"]["n_eff"][0:n_lo])
r_hat_lo = np.max(ss["gp_aggr"]["r_hat"][0:n_lo])

ess_hi = np.mean(ss["gp_aggr"]["n_eff"][n_lo:n_lo + n_hi])
r_hat_hi = np.max(ss["gp_aggr"]["r_hat"][n_lo : n_lo + n_hi])

print(f"Average ESS for all aggGP-low effects : {round(ess_lo)}")
print(f"Max r_hat for all aggGP-low : {round(r_hat_lo,2)}")

print(f"Average ESS for all aggGP-high effects : {round(ess_hi)}")
print(f"Max r_hat for all aggGP-high : {round(r_hat_hi,2)}")

Average ESS for all aggGP effects : 81
Average ESS for all aggGP-low effects : 81
Max r_hat for all aggGP-low : 1.059999942779541
Average ESS for all aggGP-high effects : 81
Max r_hat for all aggGP-high : 1.0700000524520874


### 2022 Inference

In [39]:
df_2022 = pd.read_csv("../data/processed/df_hi_jkt_w_pred_2022.csv")

In [6]:
# creating  posterior predictive
rng_key_pr, rng_key_po = random.split(random.PRNGKey(4))
posterior_samples = mcmc_2022.get_samples()
print(mcmc_2022.print_summary())


                     mean       std    median      5.0%     95.0%     n_eff     r_hat
             b0    100.12     10.21    100.13     83.53    116.50   1222.95      1.00
          b_hdi      0.02      1.00      0.03     -1.53      1.70    826.60      1.00
  b_pop_density      0.03      0.99      0.03     -1.54      1.76   1158.81      1.00
        b_urban     -0.01      1.00      0.00     -1.68      1.62    456.56      1.02
  kernel_length      0.48      0.29      0.41      0.17      0.80    568.81      1.01
     kernel_var      0.37      0.02      0.37      0.34      0.40   1342.08      1.00
       log_f[0]      2.92      0.66      2.88      1.84      4.00     12.26      1.15
       log_f[1]      2.91      0.65      2.88      1.88      4.03     12.24      1.15
       log_f[2]      2.90      0.66      2.88      1.87      4.00     12.19      1.15
       log_f[3]      2.90      0.66      2.88      1.88      3.97     12.26      1.15
       log_f[4]      2.90      0.65      2.90      1.

In [7]:
ss = numpyro.diagnostics.summary(mcmc_2022.get_samples(group_by_chain=True))
r = np.mean(ss['gp_aggr']['n_eff'])
print("Average ESS for all aggGP effects : " + str(round(r)))

ess_lo = np.mean(ss["gp_aggr"]["n_eff"][0:n_lo])
r_hat_lo = np.max(ss["gp_aggr"]["r_hat"][0:n_lo])

ess_hi = np.mean(ss["gp_aggr"]["n_eff"][n_lo:n_lo + n_hi])
r_hat_hi = np.max(ss["gp_aggr"]["r_hat"][n_lo : n_lo + n_hi])

print(f"Average ESS for all aggGP-low effects : {round(ess_lo)}")
print(f"Max r_hat for all aggGP-low : {round(r_hat_lo,2)}")

print(f"Average ESS for all aggGP-high effects : {round(ess_hi)}")
print(f"Max r_hat for all aggGP-high : {round(r_hat_hi,2)}")

Average ESS for all aggGP effects : 23
Average ESS for all aggGP-low effects : 21
Max r_hat for all aggGP-low : 1.149999976158142
Average ESS for all aggGP-high effects : 23
Max r_hat for all aggGP-high : 1.159999966621399


### 2021 Inference

In [43]:
df_2021 = pd.read_csv("../data/processed/df_hi_jkt_w_pred_2021.csv")

In [8]:
# creating  posterior predictive
rng_key_pr, rng_key_po = random.split(random.PRNGKey(4))
posterior_samples = mcmc_2021.get_samples()
print(mcmc_2021.print_summary())


                     mean       std    median      5.0%     95.0%     n_eff     r_hat
             b0     99.97     10.19     99.98     83.26    116.48   1976.06      1.00
          b_hdi     -0.03      1.00     -0.05     -1.71      1.62   1344.53      1.00
  b_pop_density      0.01      1.00      0.00     -1.58      1.67   1755.23      1.00
        b_urban     -0.02      1.02     -0.01     -1.71      1.63   1149.78      1.00
  kernel_length      0.48      0.26      0.42      0.17      0.81    721.77      1.01
     kernel_var      0.37      0.02      0.37      0.34      0.40   1512.52      1.00
       log_f[0]      3.02      0.62      3.06      1.97      4.00     56.82      1.10
       log_f[1]      3.03      0.62      3.04      2.04      4.07     57.28      1.10
       log_f[2]      3.02      0.62      3.04      2.02      4.04     62.21      1.09
       log_f[3]      3.03      0.61      3.03      2.11      4.11     70.60      1.09
       log_f[4]      3.03      0.60      3.03      2.

In [9]:
ss = numpyro.diagnostics.summary(mcmc_2021.get_samples(group_by_chain=True))
r = np.mean(ss['gp_aggr']['n_eff'])
print("Average ESS for all aggGP effects : " + str(round(r)))

ess_lo = np.mean(ss["gp_aggr"]["n_eff"][0:n_lo])
r_hat_lo = np.max(ss["gp_aggr"]["r_hat"][0:n_lo])

ess_hi = np.mean(ss["gp_aggr"]["n_eff"][n_lo:n_lo + n_hi])
r_hat_hi = np.max(ss["gp_aggr"]["r_hat"][n_lo : n_lo + n_hi])

print(f"Average ESS for all aggGP-low effects : {round(ess_lo)}")
print(f"Max r_hat for all aggGP-low : {round(r_hat_lo,2)}")

print(f"Average ESS for all aggGP-high effects : {round(ess_hi)}")
print(f"Max r_hat for all aggGP-high : {round(r_hat_hi,2)}")

Average ESS for all aggGP effects : 98
Average ESS for all aggGP-low effects : 86
Max r_hat for all aggGP-low : 1.0700000524520874
Average ESS for all aggGP-high effects : 100
Max r_hat for all aggGP-high : 1.0800000429153442


### 2020 Inference

In [47]:
df_2020 = pd.read_csv("../data/processed/df_hi_jkt_w_pred_2020.csv")

In [10]:
# creating  posterior predictive
rng_key_pr, rng_key_po = random.split(random.PRNGKey(4))
posterior_samples = mcmc_2020.get_samples()
print(mcmc_2020.print_summary())


                     mean       std    median      5.0%     95.0%     n_eff     r_hat
             b0    100.07     10.05    100.09     84.29    117.05    889.94      1.01
          b_hdi     -0.01      0.99      0.03     -1.64      1.54    890.12      1.00
  b_pop_density      0.04      1.01      0.04     -1.57      1.70    715.79      1.01
        b_urban      0.01      0.95      0.02     -1.54      1.55   1041.21      1.00
  kernel_length      0.50      0.29      0.42      0.17      0.85    577.36      1.01
     kernel_var      0.37      0.02      0.37      0.34      0.40    923.41      1.00
       log_f[0]      2.99      0.58      2.99      2.03      3.92    107.51      1.03
       log_f[1]      2.99      0.58      2.98      2.02      3.93    105.62      1.03
       log_f[2]      2.99      0.58      2.99      2.09      3.93    102.26      1.03
       log_f[3]      2.99      0.58      2.98      2.00      3.90    106.22      1.04
       log_f[4]      2.98      0.58      2.97      2.

In [11]:
ss = numpyro.diagnostics.summary(mcmc_2020.get_samples(group_by_chain=True))
r = np.mean(ss['gp_aggr']['n_eff'])
print("Average ESS for all aggGP effects : " + str(round(r)))

ess_lo = np.mean(ss["gp_aggr"]["n_eff"][0:n_lo])
r_hat_lo = np.max(ss["gp_aggr"]["r_hat"][0:n_lo])

ess_hi = np.mean(ss["gp_aggr"]["n_eff"][n_lo:n_lo + n_hi])
r_hat_hi = np.max(ss["gp_aggr"]["r_hat"][n_lo : n_lo + n_hi])

print(f"Average ESS for all aggGP-low effects : {round(ess_lo)}")
print(f"Max r_hat for all aggGP-low : {round(r_hat_lo,2)}")

print(f"Average ESS for all aggGP-high effects : {round(ess_hi)}")
print(f"Max r_hat for all aggGP-high : {round(r_hat_hi,2)}")

Average ESS for all aggGP effects : 52
Average ESS for all aggGP-low effects : 44
Max r_hat for all aggGP-low : 1.059999942779541
Average ESS for all aggGP-high effects : 54
Max r_hat for all aggGP-high : 1.059999942779541
