In [1]:
%reload_ext autoreload
%autoreload 2

import os
from pathlib import Path
import multiprocessing

import jax
import jax.numpy as jnp
import pandas as pd
import numpy as np
import numpyro
import numpyro.distributions as dist

from hbmep.config import Config
from hbmep.model import Baseline
from hbmep.model.utils import Site as site
from hbmep.utils.constants import RECTIFIED_LOGISTIC

PLATFORM = "cpu"
jax.config.update("jax_platforms", PLATFORM)
numpyro.set_platform(PLATFORM)

cpu_count = multiprocessing.cpu_count() - 2
numpyro.set_host_device_count(cpu_count)
numpyro.enable_x64()


#### Load config

In [2]:
root_path = Path(os.getcwd()).parent.parent.parent.absolute()
toml_path = os.path.join(root_path, "configs/fix.toml")

config = Config(toml_path=toml_path)


2023-07-18 10:43:00,769 - hbmep.config - INFO - Verifying configuration ...
2023-07-18 10:43:00,769 - hbmep.config - INFO - Success!


#### Load data and preprocess

In [3]:
class RectifiedLogistic(Baseline):
    def __init__(self, config: Config):
        super(RectifiedLogistic, self).__init__(config=config)
        self.link = RECTIFIED_LOGISTIC

    def _model(self, subject, features, intensity, response_obs=None):
        intensity = intensity.reshape(-1, 1)
        intensity = np.tile(intensity, (1, self.n_response))

        feature0 = features[0].reshape(-1,)

        n_data = intensity.shape[0]
        n_subject = np.unique(subject).shape[0]
        n_feature0 = np.unique(feature0).shape[0]

        with numpyro.plate(site.n_response, self.n_response, dim=-1):
            with numpyro.plate(site.n_subject, n_subject, dim=-2):
                """ Hyper-priors """
                mu_a = numpyro.sample(
                    site.mu_a,
                    dist.TruncatedNormal(100, 100, low=0)
                )
                sigma_a = numpyro.sample(site.sigma_a, dist.HalfCauchy(20))

                sigma_b = numpyro.sample(site.sigma_b, dist.HalfCauchy(0.5))

                sigma_L = numpyro.sample(site.sigma_L, dist.HalfCauchy(0.05))
                sigma_H = numpyro.sample(site.sigma_H, dist.HalfCauchy(5))
                sigma_v = numpyro.sample(site.sigma_v, dist.HalfCauchy(5))

                with numpyro.plate("n_feature0", n_feature0, dim=-3):
                    """ Priors """
                    a = numpyro.sample(
                        site.a,
                        dist.TruncatedNormal(mu_a, sigma_a, low=0)
                    )
                    b = numpyro.sample(site.b, dist.HalfNormal(sigma_b))

                    L = numpyro.sample(site.L, dist.HalfNormal(sigma_L))
                    H = numpyro.sample(site.H, dist.HalfNormal(sigma_H))
                    v = numpyro.sample(site.v, dist.HalfNormal(sigma_v))

                    p = numpyro.sample(site.p, dist.HalfNormal(10))

                    g_1 = numpyro.sample(
                        site.g_1, dist.HalfCauchy(20)
                    )
                    g_2 = numpyro.sample(
                        site.g_2, dist.HalfCauchy(20)
                    )

        """ Model """
        mu = numpyro.deterministic(
            site.mu,
            L[feature0, subject]
            + jnp.maximum(
                0,
                -1
                + (H[feature0, subject] + 1)
                / jnp.power(
                    1
                    + (jnp.power(1 + H[feature0, subject], v[feature0, subject]) - 1)
                    * jnp.exp(-b[feature0, subject] * (intensity - a[feature0, subject])),
                    1 / v[feature0, subject]
                )
            )
        )
        beta = numpyro.deterministic(
            site.beta,
            g_1[feature0, subject]
            + g_2[feature0, subject] * jnp.power(1 / mu, p[feature0, subject])
        )

        with numpyro.plate(site.data, n_data):
            return numpyro.sample(
                site.obs,
                dist.Gamma(mu * beta, beta).to_event(1),
                obs=response_obs
            )


model = RectifiedLogistic(config=config)

In [4]:
df = pd.read_csv(model.csv_path)

""" Filter """
# ind = df.compound_position.isin(["-C5M"])
subset = [
    ("amap01", "-C5M"),
    ("amap02", "-C5M")
]
ind = df[model.columns].apply(tuple, axis=1).isin(subset)

df = df[ind].copy()
df.reset_index(drop=True, inplace=True)

""" Load data """
df, encoder_dict = model.load(df=df)

2023-07-18 10:43:00,923 - hbmep.dataset.core - INFO - Artefacts will be stored here - /home/vishu/reports/fix
2023-07-18 10:43:00,924 - hbmep.dataset.core - INFO - Copied config to /home/vishu/reports/fix
2023-07-18 10:43:00,924 - hbmep.dataset.core - INFO - Processing data ...
2023-07-18 10:43:00,925 - hbmep.utils.utils - INFO - func:load took: 0.00 sec


In [5]:
mcmc, posterior_samples = model.run_inference(df=df)


2023-07-18 10:43:00,969 - hbmep.model.baseline - INFO - Running inference with rectified_logistic ...


  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

2023-07-18 10:45:34,165 - hbmep.utils.utils - INFO - func:run_inference took: 2 min and 33.20 sec


In [9]:
mcmc.print_summary(prob=.95)



                mean       std    median      2.5%     97.5%     n_eff     r_hat
  H[0,0,0]     14.13    112.06      2.52      0.74     37.30   4956.91      1.00
  H[0,0,1]      7.19    372.43      0.68      0.40     11.25  12342.72      1.00
  H[0,1,0]     34.54    403.78      6.48      0.59     72.79   6411.11      1.00
  H[0,1,1]     46.28    984.65      3.54      0.13     58.33   6871.97      1.00
  L[0,0,0]      0.04      0.00      0.04      0.03      0.04  12763.06      1.00
  L[0,0,1]      0.02      0.00      0.02      0.02      0.02  14438.52      1.00
  L[0,1,0]      0.02      0.00      0.02      0.02      0.02  13147.25      1.00
  L[0,1,1]      0.02      0.00      0.02      0.02      0.03   7935.47      1.00
  a[0,0,0]    163.39      4.30    163.53    154.32    171.72   8798.85      1.00
  a[0,0,1]    171.52      2.75    171.62    164.75    176.01   5915.58      1.00
  a[0,1,0]    198.19      3.36    199.15    191.76    202.65   4480.74      1.00
  a[0,1,1]    266.38      9

In [7]:
model.render_recruitment_curves(df=df, encoder_dict=encoder_dict, posterior_samples=posterior_samples)


2023-07-18 10:45:34,514 - hbmep.model.baseline - INFO - Rendering recruitment curves ...
2023-07-18 10:45:39,964 - hbmep.model.baseline - INFO - Saved to /home/vishu/reports/fix/recruitment_curves.pdf
2023-07-18 10:45:39,965 - hbmep.utils.utils - INFO - func:render_recruitment_curves took: 5.45 sec


In [8]:
model.render_predictive_check(df=df, encoder_dict=encoder_dict, posterior_samples=posterior_samples)


2023-07-18 10:45:40,279 - hbmep.model.baseline - INFO - Rendering Posterior Predictive Check ...
2023-07-18 10:45:46,511 - hbmep.model.baseline - INFO - Saved to /home/vishu/reports/fix/posterior_predictive_check.pdf
2023-07-18 10:45:46,515 - hbmep.utils.utils - INFO - func:render_predictive_check took: 6.24 sec
