In [1]:
import pypesto
import pypesto.petab
import pypesto.optimize as optimize
import pypesto.sample as sample
import pypesto.visualize as visualize
import pandas as pd

import petab
import numpy as np
from scipy.special import gammaln
from scipy.special import gamma
import pickle

datatype = "original"

# import to petab
if datatype == "original":
    petab_problem = petab.Problem.from_yaml(
    "corrupted_data/SS_conversion_reaction_original.yaml")
elif datatype == "switch":
    petab_problem = petab.Problem.from_yaml(
    "corrupted_data/SS_conversion_reaction_switch.yaml")
else:
    petab_problem = petab.Problem.from_yaml(
    "corrupted_data/SS_conversion_reaction_loss.yaml")

In [2]:
dataframe = pd.read_csv("data/SS_measurements_scaling.tsv", sep = "\t")

In [3]:
def analytical_b(t, a0, b0, k1, k2):
    return (k2 - k2 * np.exp(-(k2 + k1) * t)) / (k2 + k1)

def simulate_model(x, tvec):
    # assign parameters
    k1, k2, _ = x
    # define initial conditions
    a0 = 1
    b0 = 0
    # simulate model
    simulation = [analytical_b(t, a0, b0, k1, k2)
                   for t in tvec]
    return simulation

After importing the model, we need to define the objective function. This time we will do it via an external function that will be used then by pyPESTO instead of using the built-in ones.

For numerical reasons we will implement the log likelihood and log prior.

In [4]:
def negative_log_posterior(x):
    """ Negative log posterior function."""

    shape = x[2]

    # experimental data
    data = np.array(dataframe.measurement)
    # data[0] += 0.01
    # time vector
    tvec = np.array(dataframe.time)

    N = len(tvec)

    # simulate model
    _simulation = simulate_model(np.exp(x), tvec)
    simulation = np.asarray(_simulation)
    # simulation[0] += 0.01
    
    b_vector_us = np.divide(data, simulation, out = np.zeros(N), where=simulation!=0)
    b_vector_l = np.sort(b_vector_us)
    sort_order = np.searchsorted(b_vector_l, b_vector_us)
    zero_amnt = np.argmax(b_vector_l > 0)
    b_vector = b_vector_l[zero_amnt:]
    
    
    N_prime = N - zero_amnt
    data_sorted = np.ones(N + zero_amnt) -2
    simulation_sorted = np.ones(N + zero_amnt) -2
    
    for i in range(N):
        j = 0 # in case that several values in b_vector have the same value
        while simulation_sorted[sort_order[i] + j] != -1:
            j += 1
        
            
        simulation_sorted[sort_order[i] + j] = simulation[i]
        data_sorted[sort_order[i] + j] = data[i]
        
        
    for i in range(zero_amnt):
        data_sorted[N + i] = data_sorted[i]
        simulation_sorted[N + i] = simulation_sorted[i]
        
    data_sorted = data_sorted[zero_amnt:]
    simulation_sorted = simulation_sorted[zero_amnt:]
        
    
    bounds = np.append(np.append(0, b_vector), np.inf)
    
    d_vector = -np.sum(data_sorted)
    q_vector = np.sum(simulation_sorted)
    
    l_llh = 0
    check = lamda*shape
    
    for i in range(N_prime):
        aux1 = np.exp(d_vector / shape)
        
        if q_vector != check:
            aux2 = (shape / (q_vector - check))
            aux3 = np.exp(bounds[i + 1] * (q_vector - check) / shape) \
                -  np.exp(bounds[i] * (q_vector - check) / shape)
        else:
            aux2 = 1
            aux3 = bounds[i + 1] - bounds[i]
        
        l_llh += aux1 * aux2 *aux3
        d_vector = d_vector + 2*data_sorted[i]
        q_vector = q_vector - 2*simulation_sorted[i]
        
    l_llh = l_llh - np.exp(d_vector / shape) * (shape / (q_vector - check)) \
            * np.exp(bounds[N_prime] * ((q_vector - check) / shape))
    
    
    
    # return NEGATIVE log posterior (required for pyPESTO)
    
    
    return -(np.log(lamda) - N*(np.log(2) + np.log(shape)) + np.log(l_llh))

Now that we have the objective function defined, we need to create a pyPESTO problem.

In [5]:
def standard_sampling():
    """Creates a pyPESTO problem."""
    objective = pypesto.Objective(fun=negative_log_posterior)
    problem = pypesto.Problem(objective=objective,  # objective function
                              lb=[-5, -5, 0],  # lower bounds
                              ub=[5, 5, np.inf],  # upper bounds
                              x_names=['k1', 'k2', 'shape'],  # parameter names
                              x_scales=['log', 'log', 'lin'])  # parameter scale
    return problem

In [6]:
### Prior dependent paramters

lamda = 0.01

# create the estimation problem
problem = standard_sampling()

# MCMC chain length
n_samples= 100000

# call the sampler of choice
sampler = sample.AdaptiveMetropolisSampler()

In [7]:
# Define number of runs
runs = 1

save_results = False # for testing just set to False

# Loop over n runs
for n in range(runs):
    # set initial random seed
    np.random.seed(n)
    # perform MCMC sampling
    result = sample.sample(problem, n_samples=n_samples, sampler=sampler,
                           x0=np.array([-1.2741, -0.6160, 0.3684]))
    # calculate effective sample size
    sample.effective_sample_size(result=result)

    # save the results as a pickle object
    if save_results:
        results = result.sample_result
        with open('Results/Full_parameter/' + str(n) + '.pickle','wb') as result_file:
            pickle.dump(results, result_file, n)

 14%|█▍        | 14128/100000 [00:05<00:32, 2661.84it/s]


LinAlgError: Array must not contain infs or NaNs

There are some built-in visualization functions that one can use.

In [None]:
print(result.sample_result.effective_sample_size)
print(result.sample_result.effective_sample_size/result.sample_result.time)
# Visualize the parameter trace
ax = visualize.sampling.sampling_parameters_trace(result, use_problem_bounds=False, full_trace=True, size=(12,5))
# Visualize the one-dimensional marginals --> Important!
ax = visualize.sampling_1d_marginals(result, size=(12,5))

In [None]:
plot_MAP = True

if plot_MAP:
    MAP_index=np.argmax(-result.sample_result.trace_neglogpost[0,result.sample_result.burn_in:])
    MAP = result.sample_result.trace_x[0,result.sample_result.burn_in+MAP_index,:]
    
    # experimental data
    data = np.array(dataframe.measurement)
    # time vector
    tvec = np.array(dataframe.time)

    tvec_for_simulation = np.linspace(tvec[0],tvec[-1],100)

    scale = MAP[-1]

    # simulate model
    _simulation = simulate_model(np.exp(MAP), tvec_for_simulation)
    simulation = (scale * np.asarray(_simulation))
    
    import matplotlib.pyplot as plt

    plt.figure()
    plt.plot(tvec,data,'or',label='Corrupted data')
    plt.plot(tvec_for_simulation,simulation,'k',label='MAP simulation')
    plt.xlabel('Time [a.u.]')
    plt.ylabel('Signal [a.u.]')
    plt.ylim([0,2])
    plt.legend()
    plt.show()