In [1]:
import numpy as np
from kronvec import kron_diag as get_diag_paired
from src.model import bits_fixed_n, MetMHN
import itertools
from scipy.linalg.blas import dcopy, dscal, daxpy
import pandas as pd

log_theta = pd.read_csv(
    "../../results/paad/paad_mixed_08_003.csv", index_col=0)
tau1, tau2 = np.exp(log_theta["Sampling"][:2])
log_theta = log_theta.drop(columns=["Sampling"]).to_numpy()
mmhn = MetMHN(log_theta=log_theta, tau1=tau1, tau2=tau2)


# n = 2
# np.random.seed(2)
# log_theta = 2 * np.random.random(size=(n + 1, n + 1)) - 1
# tau1, tau2 = np.random.random(2) * 2
# mmhn = MetMHN(log_theta=log_theta, tau1=tau1, tau2=tau2)

In [2]:
def get_diag_unpaired(log_theta, state: np.array) -> np.array:
    """This returns the diagonal of the restricted rate matrix of the metMHN's Markov chain.

    Args:
        state (np.array): Binary unpaired state vector, dtype must be int32. This is the vector according
        to which state space restriction will be performed. Shape (n,) with n the number of events including
        seeding.

    Returns:
        np.array: Diagonal of the restricted rate matrix. Shape (2^k,) with k the number of 1s in state.
    """
    k = state.sum()
    nx = 1 << k
    n = log_theta.shape[0]
    diag = np.zeros(nx)
    subdiag = np.zeros(nx)

    for i in range(n):

        current_length = 1
        subdiag[0] = 1
        # compute the ith subdiagonal of Q
        for j in range(n):
            if state[j]:
                exp_theta = np.exp(log_theta[i, j])
                if i == j:
                    exp_theta *= -1
                    dscal(n=current_length, a=exp_theta, x=subdiag, incx=1)
                    dscal(n=current_length, a=0,
                          x=subdiag[current_length:], incx=1)
                else:
                    dcopy(n=current_length, x=subdiag, incx=1,
                          y=subdiag[current_length:], incy=1)
                    dscal(n=current_length, a=exp_theta,
                          x=subdiag[current_length:], incx=1)

                current_length *= 2

            elif i == j:
                exp_theta = - np.exp(log_theta[i, j])
                dscal(n=current_length, a=exp_theta, x=subdiag, incx=1)

        # add the subdiagonal to dg
        daxpy(n=nx, a=1, x=subdiag, incx=1, y=diag, incy=1)
    return diag

In [74]:
def likelihood(order_1, order_2, log_theta, tau1, tau2):
    """ Compute the likelihood of two orders of events happening before the first
    and the second observation 

    Args:
        order_1 (np.array): Order of events (2i and 2i+1 encode the ith events happening in PT and Met respectively)
        that have happened when the first observation has been made. Note that these do not correspond to the actual PT
        observation, as it is possible that events have happened in the metastasis that are not visible in the PT 
        observation.
        order_2 (_type_): Order of events (2i and 2i+1 encode the ith events happening in PT and Met respectively)
        that have happened when the second observation has been made. Note that these do not correspond to the actual Met
        observation, as it is possible that events have happened in the primary tumor that are not visible in the Met 
        observation.
        log_theta (_type_): Logarithmic theta values
        tau1 (_type_): rate of first observation
        tau2 (_type_): rate of second observation

    Returns:
        float: likelihood of these two orders happening
    """
    # translate first observation to state
    n = log_theta.shape[0] - 1
    state = np.zeros(2 * n + 1, dtype=int)
    if len(order_1) > 0:
        state[order_1] = 1
    diag = get_diag_paired(log_theta=log_theta, n=n, state=state)

    event_to_bin = {e: 1 << i for i, e in enumerate(np.sort(order_1))}

    p = tau1 / (tau1 - diag[0])

    st = np.zeros(2 * n + 1)
    st_bin = 0  # binary state
    seeded = False
    for i, e in enumerate(order_1):
        if not seeded:
            if i % 2:  # if the seeding has not happened yet, every second event is just the second part of the joint development
                continue
            if e == 2 * n:  # seeding
                seeded = True
                st[-1] = 1
                st_bin += event_to_bin[2 * n]
                p *= (np.exp(log_theta[n, st[::2].astype(bool)
                                       ].sum()) / (tau1 - diag[st_bin]))
            else:
                st[[e, e + 1]] = 1
                st_bin += (event_to_bin[e] + event_to_bin[e + 1])
                p *= (np.exp(log_theta[e // 2, st[::2].astype(bool)
                                       ].sum()) / (tau1 - diag[st_bin]))
        else:
            st[e] = 1
            st_bin += event_to_bin[e]
            if not e % 2:  # PT event
                p *= (np.exp(log_theta[e//2, np.append(st[:-1:2].astype(bool), False)
                                       ].sum()) / (tau1 - diag[st_bin]))
            else:  # Met event
                p *= (np.exp(log_theta[e//2, np.append(st[1::2].astype(bool), True)].sum()) / (
                    tau1 - diag[st_bin]))
        pass

    st = np.append(state[1::2], [1])  # reduce to met events
    k = len(order_2) + st.sum()
    state = st.copy()
    if len(order_2) > 0:
        state[order_2 // 2] = 1
    event_to_bin = {e: 1 << i for i, e in enumerate(np.nonzero(state)[0])}
    st_bin = (st[state.astype(bool)] << np.arange(k)).sum()
    diag = get_diag_unpaired(log_theta=log_theta, state=state)
    p *= tau2 / (tau2 - diag[st_bin])

    for i, e in enumerate(order_2):
        e = e//2
        st[e] = 1
        st_bin += event_to_bin[e]
        p *= (np.exp(log_theta[e, st.astype(bool)].sum()
                     ) / (tau2 - diag[st_bin]))
        pass

    return p

# n = log_theta.shape[0] - 1
# p1 = likelihood(order_1=np.array([0, 1, 2*n]), order_2=np.array(
#     [3]), log_theta=log_theta, tau1=tau1, tau2=tau2)
# p2 = likelihood(order_1=np.array([0, 1, 2*n, 3]), order_2=np.array(
#     []), log_theta=log_theta, tau1=tau1, tau2=tau2)
# print(p1, p2, p1+p2, sep="\n")


likelihood(order_1=np.array([0,  1,  2,  3, 70, 20,  8,  4,  5]), order_2=[], log_theta=log_theta, tau1=tau1, tau2=tau2) \
    + likelihood(order_1=np.array([0,  1,  2,  3, 70, 20,  8,  4]),
                 order_2=np.array([5]), log_theta=log_theta, tau1=tau1, tau2=tau2)

9.33568477382706e-09

In [4]:
# import matplotlib.pyplot as plt
# from matplotlib import cm

# f,a = plt.subplots(ncols=2, sharey=True)

# for i in range(65,85):
#     a[0].plot([0,1], [x[chr(i)], y[chr(i)]], label=chr(i), color=cm.hsv((i-65)/20))
#     if chr(i) in x_hat.keys():
#         a[1].plot([0,1], [x[chr(i)], y[chr(i)]], label=chr(i), color=cm.hsv((i-65)/20))

Function to transform a state to all kinds of different observations

In [86]:
def get_combos(order: np.array) -> list[tuple[np.array]]:
    seeding = np.where(order == 2*n)[0]
    combos = list()
    for i in range(len(order)-seeding[0]):
        combos.append(np.split(order, [len(order)-i]))
        if not order[-i - 1] % 2:
            break
    return combos

In [90]:
state = np.zeros(2 * mmhn.n + 1, dtype=int)
state[:6] = 1
state[[8, 20]] = 1
state[23] = 1
state[-1] = 1
likeliest = mmhn._likeliest_order_paired(state)
print(likeliest)

(array([[ 0,  1,  2,  3,  4,  5, 70, 20,  8, 23],
       [ 0,  1,  2,  3, 70, 20,  8,  4, 23,  5],
       [ 0,  1,  2,  3, 70, 20,  8, 23,  4,  5],
       [ 0,  1,  2,  3, 70, 20,  8, 23,  5,  4]]), [5.017845375430524e-12, 8.77775417890597e-12, 5.806851340961197e-12, 3.1807056392086498e-12])


In [91]:
s = 0
for c1, c2 in get_combos(np.array([0,  1,  2,  3, 70, 20,  8,  4, 23,  5], dtype=int)):

    l = likelihood(order_1=c1, order_2=c2,
                   log_theta=log_theta, tau1=tau1, tau2=tau2)
    print(c1, c2, l)
    s += l
s

[ 0  1  2  3 70 20  8  4 23  5] [] 1.6863043307471983e-12
[ 0  1  2  3 70 20  8  4 23] [5] 2.6267928928929393e-12
[ 0  1  2  3 70 20  8  4] [23  5] 4.464656955265833e-12


8.77775417890597e-12