In [49]:
import aesara
import aesara.tensor as at
import arviz as az
import IPython
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pymc as pm
from datetime import datetime
import os
import json
import tellurium as te
import scipy

print(f"Running on PyMC v{pm.__version__}")

Running on PyMC v4.0.0b6


In [50]:
### log-likelihood functions

def calc_norm_log_like(mu,sigma,X):
    ''' calculates the Normal log-likelihood function: -[(n/2)ln(2pi*sigma^2)]-[sum((X-mu)^2)/(2*sigma^2)]
    ref: https://www.statlect.com/fundamentals-of-statistics/normal-distribution-maximum-likelihood 
    '''
    # fix this - remove loop
    n = len(X)
    f1 = -1*(n/2)*np.log(2*np.pi*sigma**2)
    f2_a = -1/(2*sigma**2)
    f2_b = 0 
    for i in range(n):
        f2_b += (X[i]-mu[i])**2
    f2 = f2_a*f2_b
    log_likelihood = f1+f2
    return log_likelihood


def calc_log_like(K,y_obs,m):
    '''calculates the log likelihood of a transporter tellurium ODE model m, given data y_obs, and parameters K
    '''
    #m = te.loada(ms)
    idx_list = [0,2,4,6,8]  # index of rate pairs used to set attribute, last rate omitted - fix this later 
    m.resetToOrigin()
    m.H_out = 5e-7
    m.integrator.absolute_tolerance = 1e-18
    m.integrator.relative_tolerance = 1e-12

    # update tellurium model parameter values (rate constants)
    for i, idx in enumerate(idx_list):
        setattr(m, f'k{i+1}_f', 10**K[idx])
        setattr(m, f'k{i+1}_r', 10**K[idx+1])

    # last rate constant (k6_r) has cycle constraint
    m.k6_f = 10**K[10]
    m.k6_r = (m.k1_f*m.k2_f*m.k3_f*m.k4_f*m.k5_f*m.k6_f)/(m.k1_r*m.k2_r*m.k3_r*m.k4_r*m.k5_r)

    try:
        D_tmp = m.simulate(0, 5, 50, selections=['time', 'rxn4'])
        y_tmp = D_tmp['rxn4'][1:]  # remove first point
        sigma = 10**K[11]
        log_like_tmp = calc_norm_log_like(y_tmp,sigma,y_obs)
    except:
        log_like_tmp = -np.inf  # if there is an issue calculating the flux --> no probability
    return log_like_tmp

In [51]:
### pymc3 functions

def normal_gradients(theta, data, m):
    """
    Calculate the partial derivatives of a function at a set of values. The
    derivatives are calculated using the central difference, using an iterative
    method to check that the values converge as step size decreases.

    Parameters
    ----------
    theta: array_like
        A set of values, that are passed to a function, at which to calculate
        the gradient of that function
    x, data, sigma:
        Observed variables as we have been using so far


    Returns
    -------
    grads: array_like
        An array of gradients for each non-fixed value.
    """

    grads = scipy.optimize.approx_fprime(theta, calc_log_like, 1.4901161193847656e-08, *(data,m))

    return grads

# define a aesara Op for our likelihood function
class LogLikeWithGrad(at.Op):

    """
    Specify what type of object will be passed and returned to the Op when it is
    called. In our case we will be passing it a vector of values (the parameters
    that define our model) and returning a single "scalar" value (the
    log-likelihood)
    """

    itypes = [at.dvector]  # expects a vector of parameter values when called
    otypes = [at.dscalar]  # outputs a single scalar value (the log likelihood)

    def __init__(self, loglike, data, m):
        """
        Initialise the Op with various things that our log-likelihood function
        requires. Below are the things that are needed in this particular
        example.

        Parameters
        ----------
        loglike:
            The log-likelihood (or whatever) function we've defined
        data:
            The "observed" data that our log-likelihood function takes in
        m:
            Tellurium (libroadrunner) ODE model
        """

        # add inputs as class attributes
        self.likelihood = loglike
        self.data = data
        self.m = m

        # initialise the gradient Op (below)
        self.logpgrad = LogLikeGrad(self.data, self.m)

    def perform(self, node, inputs, outputs):
        # the method that is used when calling the Op
        (theta,) = inputs  # this will contain my variables

        # call the log-likelihood function
        logl = self.likelihood(theta, self.data, self.m)

        outputs[0][0] = np.array(logl)  # output the log-likelihood
    
    def grad(self, inputs, g):
        # the method that calculates the gradients - it actually returns the
        # vector-Jacobian product - g[0] is a vector of parameter values
        (theta,) = inputs  # our parameters
        return [g[0] * self.logpgrad(theta)]


class LogLikeGrad(at.Op):
    
    """
    This Op will be called with a vector of values and also return a vector of
    values - the gradients in each dimension.
    """

    itypes = [at.dvector]
    otypes = [at.dvector]

    def __init__(self, data, m):
        """
        Initialise with various things that the function requires. Below
        are the things that are needed in this particular example.

        Parameters
        ----------
        loglike:
            The log-likelihood (or whatever) function we've defined
        data:
            The "observed" data that our log-likelihood function takes in
        m:
            Tellurium (libroadrunner) ODE model
        """
 

        # add inputs as class attributes
        self.data = data
        self.m = m

    def perform(self, node, inputs, outputs):
        (theta,) = inputs

        # calculate gradients
        grads = normal_gradients(theta, self.data, self.m)

        outputs[0][0] = grads

In [52]:
### utility functions
def parse_p_info(p_info, near_global_min=True):
    '''parse parameter settings data
    p_info[i] = [parameter name, lower bound, upper bound, reference value]
    '''
    p_ref = [p_i[3] for p_i in p_info]
    p_labels = [p[0] for p in p_info]
    if near_global_min==True:
        p_bounds = [(p[3]*0.999, p[3]*1.001) if p[3] > 0 else (p[3]*1.001, p[3]*0.999) for p in p_info]  # near global min
    else:
        p_bounds = [(p[1], p[2]) for p in p_info]  # default
    return p_ref, p_labels, np.array(p_bounds)

In [53]:
### input arguments
model_file = "/Users/georgeau/Desktop/GitHub/Bayesian_Transporter/transporter_model/antiporter_12D_model.txt"
obs_data_file = "/Users/georgeau/Desktop/GitHub/Bayesian_Transporter/synthetic_data/synth_data_1exp_a_trunc_50s.csv"
parameter_file = "/Users/georgeau/Desktop/GitHub/Bayesian_Transporter/transporter_model/12D_transporter_w_full_priors.json"

seed = 42
near_global_min = False
n_dim = 12
n_steps = int(1e4)
np.random.seed(seed)

### file i/o - create new directory, load tellurium model string, and load model parameter info
date_string = datetime.today().strftime('%Y%m%d_%H%M%S')
out_fname=f'run_pymc4_d{date_string}_nd{n_dim}_ngm{near_global_min}_ns{n_steps}_r{seed}'
current_directory = os.getcwd()
final_directory = os.path.join(current_directory, out_fname)
if not os.path.exists(final_directory):
    os.makedirs(final_directory)
with open(model_file, "r") as f:
    antimony_string_SS = f.read()
with open (parameter_file, 'rb') as fp:
    p_info = json.load(fp)
p_ref, p_labels, p_bounds = parse_p_info(p_info, near_global_min=near_global_min)
_, _, p_bounds2 = parse_p_info(p_info, near_global_min=False)  # for plot (useful if starting near global max)

### set log likelihood arguments and initial parameter sets
y_obs= np.genfromtxt(obs_data_file)
m = te.loada(antimony_string_SS)
#p_0 = get_p0(p_bounds, n_walkers) 

### write to log file
with open(os.path.join(final_directory, f'{out_fname}_log.txt'), "a") as f:
    f.write(f"date: {date_string}\n")
    f.write(f"model file: {model_file}\n")
    f.write(f"parameter file: {parameter_file}\n")
    f.write(f"data file: {obs_data_file}\n")
    f.write(f"seed: {seed}\n")
    f.write(f"n dim: {n_dim}\n")
    f.write(f"n steps: {n_steps}\n")
    f.write(f"near global min: {near_global_min}\n")
    f.write(f"out fname: {out_fname}\n")
    f.write(f"parameter ref: {p_ref}\n")
    f.write(f"parameter labels: {p_labels}\n")
    f.write(f"parameter boundaries: {p_bounds}\n")

In [55]:
### pyMC4 sampling

# create our Op
logl = LogLikeWithGrad(calc_log_like, y_obs, m)

# use PyMC to sampler from log-likelihood
with pm.Model() as opmodel:
    # uniform priors on m and c
    p_0 = pm.Uniform(f"{p_labels[0]}", lower=p_bounds[0][0], upper=p_bounds[0][1])
    p_1 = pm.Uniform(f"{p_labels[1]}", lower=p_bounds[1][0], upper=p_bounds[1][1])
    p_2 = pm.Uniform(f"{p_labels[2]}", lower=p_bounds[2][0], upper=p_bounds[2][1])
    p_3 = pm.Uniform(f"{p_labels[3]}", lower=p_bounds[3][0], upper=p_bounds[3][1])
    p_4 = pm.Uniform(f"{p_labels[4]}", lower=p_bounds[4][0], upper=p_bounds[4][1])
    p_5 = pm.Uniform(f"{p_labels[5]}", lower=p_bounds[5][0], upper=p_bounds[5][1])
    p_6 = pm.Uniform(f"{p_labels[6]}", lower=p_bounds[6][0], upper=p_bounds[6][1])
    p_7 = pm.Uniform(f"{p_labels[7]}", lower=p_bounds[7][0], upper=p_bounds[7][1])
    p_8 = pm.Uniform(f"{p_labels[8]}", lower=p_bounds[8][0], upper=p_bounds[8][1])
    p_9 = pm.Uniform(f"{p_labels[9]}", lower=p_bounds[9][0], upper=p_bounds[9][1])
    p_10 = pm.Uniform(f"{p_labels[10]}", lower=p_bounds[10][0], upper=p_bounds[10][1])
    p_11 = pm.Uniform(f"{p_labels[11]}", lower=p_bounds[11][0], upper=p_bounds[11][1])
    p_list = [p_0, p_1, p_2, p_3, p_4, p_5, p_6, p_7, p_8, p_9, p_10, p_11]

    # convert m and c to a tensor vector
    theta = at.as_tensor_variable(p_list)

    # use a Potential to "call" the Op and include it in the logp computation
    pm.Potential("likelihood", logl(theta))

    # Use custom number of draws to replace the HMC based defaults
    idata_grad = pm.sample(chains=1)

# plot the traces
az.plot_trace(idata_grad)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...

The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.

Sequential sampling (1 chains in 1 job)
NUTS: [log_k1_f, log_k1_r, log_k2_f, log_k2_r, log_k3_f, log_k3_r, log_k4_f, log_k4_r, log_k5_f, log_k5_r, log_k6_f, log_sigma]



invalid value encountered in subtract

[35mError: CVODE Error: CV_ERR_FAILURE, Module: CVODES, Function: CVode, Message: At t = 0 and h = 1.86597e-29, the error test failed repeatedly or with |h| = hmin.[0m
[35mError: CVODE Error: CV_ERR_FAILURE, Module: CVODES, Function: CVode, Message: At t = 0 and h = 1.86597e-29, the error test failed repeatedly or with |h| = hmin.[0m
[35mError: CVODE Error: CV_ERR_FAILURE, Module: CVODES, Function: CVode, Message: At t = 0 and h = 1.86597e-29, the error test failed repeatedly or with |h| = hmin.[0m
[35mError: CVODE Error: CV_ERR_FAILURE, Module: CVODES, Function: CVode, Message: At t = 0 and h = 1.86597e-29, the error test failed repeatedly or with |h| = hmin.[0m
[35mError: CVODE Error: CV_ERR_FAILURE, Module: CVODES, Function: CVode, Message: At t = 0 and h = 1.86597e-29, the error test failed repeatedly or with |h| = hmin.[0m
[35mError: CVODE Error: CV_ERR_FAILURE, Module: CVODES, Function: CVode, Message: At t = 0 and h = 1.86597e-2