In [1]:
%reload_ext autoreload
%autoreload 2

import os
import pickle
import logging
import multiprocessing
from pathlib import Path

import pandas as pd
import numpy as np
import jax
import jax.numpy as jnp

import arviz as az
import numpyro

from hbmep.config import Config
from hbmep.model.utils import Site as site

PLATFORM = "cpu"
jax.config.update("jax_platforms", PLATFORM)
numpyro.set_platform(PLATFORM)

cpu_count = multiprocessing.cpu_count() - 2
numpyro.set_host_device_count(cpu_count)
numpyro.enable_x64()
numpyro.enable_validation()

logger = logging.getLogger(__name__)


In [2]:
import numpyro.distributions as dist
from hbmep.model import Baseline


class Logistic5(Baseline):
    LINK = "Logistic5"

    def __init__(self, config: Config):
        super(Logistic5, self).__init__(config=config)
        self.combination_columns = self.features + [self.subject]

    def _model(self, subject, features, intensity, response_obs=None):
        subject, n_subject = subject
        features, n_features = features
        intensity, n_data = intensity

        intensity = intensity.reshape(-1, 1)
        intensity = np.tile(intensity, (1, self.n_response))

        feature0 = features[0].reshape(-1,)
        n_feature0 = n_features[0]

        with numpyro.plate(site.n_response, self.n_response, dim=-1):
            global_sigma_b = numpyro.sample("global_sigma_b", dist.HalfNormal(100))
            global_sigma_v = numpyro.sample("global_sigma_v", dist.HalfNormal(100))

            global_sigma_L = numpyro.sample("global_sigma_L", dist.HalfNormal(1))
            global_sigma_H = numpyro.sample("global_sigma_H", dist.HalfNormal(5))

            global_sigma_g_1 = numpyro.sample("global_sigma_g_1", dist.HalfNormal(100))
            global_sigma_g_2 = numpyro.sample("global_sigma_g_2", dist.HalfNormal(100))

            global_sigma_p = numpyro.sample("global_sigma_p", dist.HalfNormal(100))

            with numpyro.plate("n_feature0", n_feature0, dim=-2):
                """ Hyper-priors """
                mu_a = numpyro.sample(site.mu_a, dist.HalfNormal(scale=5))
                sigma_a = numpyro.sample(site.sigma_a, dist.HalfNormal(scale=1))

                sigma_b_raw = numpyro.sample("sigma_b_raw", dist.HalfNormal(scale=1))
                sigma_b = numpyro.deterministic(site.sigma_b, global_sigma_b * sigma_b_raw)

                sigma_v_raw = numpyro.sample("sigma_v_raw", dist.HalfNormal(scale=1))
                sigma_v = numpyro.deterministic(site.sigma_v, global_sigma_v * sigma_v_raw)

                sigma_L_raw = numpyro.sample("sigma_L_raw", dist.HalfNormal(scale=1))
                sigma_L = numpyro.deterministic(site.sigma_L, global_sigma_L * sigma_L_raw)

                sigma_H_raw = numpyro.sample("sigma_H_raw", dist.HalfNormal(scale=1))
                sigma_H = numpyro.deterministic(site.sigma_H, global_sigma_H * sigma_H_raw)

                sigma_g_1_raw = numpyro.sample("sigma_g_1_raw", dist.HalfNormal(scale=1))
                sigma_g_1 = numpyro.deterministic("sigma_g_1", global_sigma_g_1 * sigma_g_1_raw)

                sigma_g_2_raw = numpyro.sample("sigma_g_2_raw", dist.HalfNormal(scale=1))
                sigma_g_2 = numpyro.deterministic("sigma_g_2", global_sigma_g_2 * sigma_g_2_raw)

                sigma_p_raw = numpyro.sample("sigma_p_raw", dist.HalfNormal(scale=1))
                sigma_p = numpyro.deterministic("sigma_p", global_sigma_p * sigma_p_raw)

                with numpyro.plate(site.n_subject, n_subject, dim=-3):
                    """ Priors """
                    a_raw = numpyro.sample("a_raw", dist.Gamma(concentration=mu_a, rate=1))
                    a = numpyro.deterministic(site.a, (1 / sigma_a) * a_raw)

                    b_raw = numpyro.sample("b_raw", dist.HalfNormal(scale=1))
                    b = numpyro.deterministic(site.b, sigma_b * b_raw)

                    v_raw = numpyro.sample("v_raw", dist.HalfNormal(scale=1))
                    v = numpyro.deterministic(site.v, sigma_v * v_raw)

                    L_raw = numpyro.sample("L_raw", dist.HalfNormal(scale=1))
                    L = numpyro.deterministic(site.L, sigma_L * L_raw)

                    H_raw = numpyro.sample("H_raw", dist.HalfNormal(scale=1))
                    H = numpyro.deterministic(site.H, sigma_H * H_raw)

                    g_1_raw = numpyro.sample("g_1_raw", dist.Exponential(rate=1))
                    g_1 = numpyro.deterministic(site.g_1, (1 / sigma_g_1) * g_1_raw)

                    g_2_raw = numpyro.sample("g_2_raw", dist.Exponential(rate=1))
                    g_2 = numpyro.deterministic(site.g_2, (1 / sigma_g_2) * g_2_raw)

                    p_raw = numpyro.sample("p_raw", dist.HalfNormal(scale=1))
                    p = numpyro.deterministic("p", sigma_p * p_raw)

        with numpyro.plate(site.n_response, self.n_response, dim=-1):
            with numpyro.plate(site.data, n_data, dim=-2):
                """ Model """
                mu = numpyro.deterministic(
                    site.mu,
                    L[subject, feature0]
                    + (
                        H[subject, feature0]
                        / jnp.power(
                            1 + v[subject, feature0] * jnp.exp(-b[subject, feature0] * (intensity - a[subject, feature0])),
                            1 / v[subject, feature0]
                        )
                    )
                )
                beta = numpyro.deterministic(
                    site.beta,
                    g_1[subject, feature0] + g_2[subject, feature0] * ((1 / (mu + 1)) ** p[subject, feature0])
                )

                """ Observation """
                numpyro.sample(
                    site.obs,
                    dist.Gamma(concentration=mu * beta, rate=beta),
                    obs=response_obs
                )


In [3]:
toml_path = os.path.join("/home/vishu/repos/hbmep-paper/configs/paper/tms/link-comparison/Logistic5.toml")

config = Config(toml_path=toml_path)
# config.MCMC_PARAMS["num_warmup"] = 40000
# config.MCMC_PARAMS["num_samples"] = 60000
# config.MCMC_PARAMS["thinning"] = 4

model = Logistic5(config=config)


2023-10-17 15:50:12,194 - hbmep.config - INFO - Verifying configuration ...
2023-10-17 15:50:12,194 - hbmep.config - INFO - Success!


2023-10-17 15:50:12,394 - hbmep.model.baseline - INFO - Initialized model with Logistic5 link


In [4]:
src = "/home/vishu/data/hbmep-processed/human/tms/data_pkpk_auc.csv"
df = pd.read_csv(src)

# model.mep_matrix_path = None
# subset = ["SCA01"]
# ind = df[model.subject].isin(subset)
# df = df[ind].reset_index(drop=True).copy()

df, encoder_dict = model.load(df=df)


2023-10-17 15:50:12,479 - hbmep.dataset.core - INFO - Artefacts will be stored here - /home/vishu/repos/hbmep-paper/reports/paper/tms/link-comparison/Logistic5
2023-10-17 15:50:12,480 - hbmep.dataset.core - INFO - Copied config to /home/vishu/repos/hbmep-paper/reports/paper/tms/link-comparison/Logistic5
2023-10-17 15:50:12,482 - hbmep.dataset.core - INFO - Processing data ...
2023-10-17 15:50:12,484 - hbmep.utils.utils - INFO - func:load took: 0.01 sec


In [5]:
mcmc, posterior_samples = model.run_inference(df=df)


2023-10-17 15:50:12,835 - hbmep.model.baseline - INFO - Running inference with Logistic5 ...


  0%|          | 0/3000 [00:00<?, ?it/s]

  0%|          | 0/3000 [00:00<?, ?it/s]

  0%|          | 0/3000 [00:00<?, ?it/s]

  0%|          | 0/3000 [00:00<?, ?it/s]

2023-10-17 16:01:31,546 - hbmep.utils.utils - INFO - func:run_inference took: 11 min and 18.71 sec


In [6]:
mcmc.print_summary(prob=.95)



                         mean       std    median      2.5%     97.5%     n_eff     r_hat
       H_raw[0,0,0]      0.80      0.58      0.69      0.02      1.92     21.42      1.15
       H_raw[0,0,1]      0.51      0.53      0.35      0.00      1.64     10.57      1.19
       H_raw[0,0,2]      0.82      0.61      0.69      0.01      1.97     39.93      1.12
       H_raw[0,0,3]      0.80      0.68      0.67      0.00      2.09     27.20      1.13
       H_raw[0,0,4]      0.78      0.56      0.69      0.03      1.85     27.78      1.15
       H_raw[0,0,5]      0.70      0.56      0.61      0.01      1.71     50.16      1.04
       H_raw[0,1,0]      0.85      0.21      0.83      0.50      1.26     18.20      1.27
       H_raw[0,1,1]      0.43      0.11      0.42      0.21      0.66     23.18      1.22
       H_raw[0,1,2]      0.36      0.49      0.12      0.00      1.49      9.27      1.20
       H_raw[0,1,3]      0.68      0.16      0.64      0.45      1.03      8.28      1.36
       H_

In [6]:
mcmc.print_summary(prob=.95)



                         mean       std    median      2.5%     97.5%     n_eff     r_hat
       H_raw[0,0,0]      0.92      0.64      0.84      0.01      2.13     37.49      1.12
       H_raw[0,0,1]      0.68      0.52      0.60      0.01      1.68     47.42      1.06
       H_raw[0,0,2]      0.65      0.54      0.46      0.03      1.79     43.74      1.15
       H_raw[0,0,3]      0.91      0.65      0.76      0.00      2.14     17.42      1.19
       H_raw[0,0,4]      0.78      0.58      0.65      0.03      1.96     38.24      1.14
       H_raw[0,0,5]      0.79      0.66      0.66      0.00      2.13     22.65      1.26
       H_raw[0,1,0]      0.75      0.21      0.74      0.40      1.15      6.18      1.40
       H_raw[0,1,1]      0.47      0.10      0.47      0.28      0.64      5.09      1.52
       H_raw[0,1,2]      0.06      0.04      0.05      0.01      0.14      7.48      1.47
       H_raw[0,1,3]      0.63      0.16      0.63      0.37      0.91      7.17      1.29
       H_

In [7]:
prediction_df = model.make_prediction_dataset(df=df)
posterior_predictive = model.predict(df=prediction_df, posterior_samples=posterior_samples)

model.render_recruitment_curves(df=df, encoder_dict=encoder_dict, posterior_samples=posterior_samples, prediction_df=prediction_df, posterior_predictive=posterior_predictive)
model.render_predictive_check(df=df, encoder_dict=encoder_dict, prediction_df=prediction_df, posterior_predictive=posterior_predictive)


2023-10-17 16:01:32,244 - hbmep.utils.utils - INFO - func:make_prediction_dataset took: 0.01 sec
2023-10-17 16:01:47,175 - hbmep.utils.utils - INFO - func:predict took: 14.93 sec
2023-10-17 16:01:47,268 - hbmep.model.baseline - INFO - Rendering ...
2023-10-17 16:02:04,946 - hbmep.model.baseline - INFO - Saved to /home/vishu/repos/hbmep-paper/reports/paper/tms/link-comparison/Logistic5/recruitment_curves.pdf
2023-10-17 16:02:04,948 - hbmep.utils.utils - INFO - func:render_recruitment_curves took: 17.77 sec
2023-10-17 16:02:05,074 - hbmep.model.baseline - INFO - Rendering Posterior Predictive Check ...
2023-10-17 16:02:20,429 - hbmep.model.baseline - INFO - Saved to /home/vishu/repos/hbmep-paper/reports/paper/tms/link-comparison/Logistic5/posterior_predictive_check.pdf
2023-10-17 16:02:20,435 - hbmep.utils.utils - INFO - func:_render_predictive_check took: 15.49 sec
2023-10-17 16:02:20,436 - hbmep.utils.utils - INFO - func:render_predictive_check took: 15.49 sec


In [8]:
numpyro_data = az.from_numpyro(mcmc)

""" Model evaluation """
logger.info("Evaluating model ...")


2023-10-17 16:02:24,583 - __main__ - INFO - Evaluating model ...


In [10]:
score = az.loo(numpyro_data)
logger.info(f"ELPD LOO (Log): {score.elpd_loo:.2f}")

  weights = 1 / np.exp(len_scale - len_scale[:, None]).sum(axis=1)
2023-10-17 13:05:10,994 - __main__ - INFO - ELPD LOO (Log): 8996.47


In [9]:
score = az.loo(numpyro_data)
logger.info(f"ELPD LOO (Log): {score.elpd_loo:.2f}")

  weights = 1 / np.exp(len_scale - len_scale[:, None]).sum(axis=1)
2023-10-17 16:02:51,746 - __main__ - INFO - ELPD LOO (Log): 8594.84


In [10]:
score = az.waic(numpyro_data)
logger.info(f"ELPD WAIC (Log): {score.elpd_waic:.2f}")

See http://arxiv.org/abs/1507.04544 for details
2023-10-17 16:02:52,190 - __main__ - INFO - ELPD WAIC (Log): 8528.41


In [None]:
import pickle

dest = os.path.join(model.build_dir, "inference.pkl")
with open(dest, "wb") as f:
    pickle.dump((model, mcmc, posterior_samples), f)


In [None]:
# import pickle

# with open(dest, "rb") as g:
#     model_, mcmc_, posterior_samples_ = pickle.load(g)
