In [1]:
import time
start_time = time.time()

In [2]:
import logging
import multiprocessing

import numpy as np

from scipy.integrate import solve_ivp
from scipy.stats import gaussian_kde
from scipy.stats import multivariate_normal as mvn

import lotka_volterra
import utils.caching
from utils.caching import make_cached_s3
from utils.parallel import apply_along_axis_parallel, get_map_parallel_joblib
from utils.paths import S3_BUCKET_NAME

In [3]:
logging.basicConfig()
logging.getLogger(utils.caching.__name__).setLevel(logging.DEBUG)

In [4]:
cached = make_cached_s3(S3_BUCKET_NAME)

In [5]:
map_parallel = get_map_parallel_joblib(multiprocessing.cpu_count())

Read-only input datasets:

In [6]:
@cached(item_type=np.ndarray, batch_size=lotka_volterra.n_chains, read_only=True)
def rw_samples() -> list[np.ndarray]:
    raise NotImplementedError

In [7]:
kde_scipy = gaussian_kde(rw_samples[0].T, bw_method='silverman')

DEBUG:utils.caching:Reading from disk cache: rw_samples_0
DEBUG:utils.caching:Reading from S3 gradient-free-mcmc-postprocessing/rw_samples_0.npy


In [8]:
%%timeit
mvn.pdf(rw_samples[0], mean=rw_samples[0][0], cov=kde_scipy.covariance)

42.6 ms ± 1.45 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
def evaluate_for_row(row):
    return np.log(np.average(mvn.pdf(rw_samples[0], mean=row, cov=kde_scipy.covariance)))

In [15]:
chunk_size = 1000

In [None]:
@cached(batch_size=len(lotka_volterra.theta_inits))
def rw_kde_log_q(i: int) -> np.ndarray:
    return apply_along_axis_parallel(evaluate_for_row, 1, rw_samples[i], chunk_size, map_parallel)

In [None]:
[rw_kde_log_q(i) for i in range(lotka_volterra.n_chains)];

Notebook execution took:

In [12]:
time.time() - start_time

32.254340171813965