In [None]:
import metmhn.simulations as simul
import metmhn.Utilityfunctions as utils
import pandas as pd
import numpy as np
import jax as jax
from jax import numpy as jnp
jax.config.update("jax_enable_x64", True)

import matplotlib.pyplot as plt
plt.rcParams['text.usetex'] = True

Read in the metMHN, learned on the reduced LUAD-dataset in `examples/data_analysis.pynb`:

In [None]:
params = pd.read_csv("../results/luad/luad_g14_20muts.csv", index_col=0)
d_p = params.iloc[0,:].to_numpy()
d_m = params.iloc[1, :].to_numpy()
th = params.iloc[2:,:].to_numpy()
events = params.columns
n = th.shape[1]
rng = np.random.default_rng(seed=42)
utils.plot_theta(params.to_numpy(), events, 0.2, True, 7.5)

Calculate $P(t_{mut} < t_{seed}| met[mut] = 1)$ i.e. the probability that a mutation happend prior to the seeding for both primary tumors and metastases:

In [None]:
mut_ps, mut_total_prim, mut_total_met = simul.p_any_mut_pre_seed(np.array(th), np.array(d_p), np.array(d_m), 50000, rng)

In [None]:
psp = pd.DataFrame(np.column_stack((mut_ps/mut_total_prim, mut_ps/mut_total_met)), 
                  index=events[:-1], columns=["PT", "MT"])
psp.sort_values(by=["MT"], inplace=True)
fig, ax = plt.subplots(figsize=(8.27, 11.69))

plt.style.use("default")
ax.set_xlim(0,1)
ax.axvline(0.5, color = "#D55E00", linestyle="dashed")
psp["MT"].plot(kind="barh", ax=ax, color="#D55E00")
ax.legend()

Simulate synthetic data using Gillespie's algorithm and plot the marginal event frequencies:

In [None]:
dat = np.array(simul.simulate_dat_jax(jnp.array(th), jnp.array(d_p), jnp.array(d_m), 10000, seed = 4209090908979))
dat_prim_nomet = dat[dat[:,-2]==0,:-1]
dat_coupled = dat[dat[:,-2]==1,:-1]
dat_pt_first = dat[dat[:, -1] != 2, :]

In [None]:
n_mut = (th.shape[0]-1)
n_tot = n_mut + 1
arr = dat_coupled * np.array([1,2]*n_mut+[1])
arr = arr @ (np.diag([1,0]*n_mut+[1]) + np.diag([1,0]*n_mut, -1))

counts = np.zeros((4, n_tot))
for i in range(0,2*n_tot,2):
    i_h = int(i/2)
    for j in range(1,4):
        counts[j-1, i_h] = np.count_nonzero(arr[:,i]==j)/dat_coupled.shape[0]
    counts[3, i_h] = np.sum(dat_prim_nomet[:, i], axis=0)/dat_prim_nomet.shape[0]

labels = [["Coupled ("+str(dat_coupled.shape[0])+")"]*3 +\
        ["NM ("+str(dat_prim_nomet.shape[0])+")"],
        ["PT-Private", "MT-Private", "Shared"] + ["Present"]*3]
       
inds =  pd.MultiIndex.from_tuples(list(zip(*labels)))
counts = pd.DataFrame(np.around(counts, 2), columns=events, index=inds).T
counts

Calculate $P(t_{mut} < t_{seed} | prim[mut] = met[mut] = 1)$ ie. the probability that a **shared** mutation between the primary tumor and the metastasis happened prior to the seeding:

In [None]:
mut_ps, mut_total = simul.p_shared_mut_pre_seed(th, d_p, d_m, 10000, rng)
psp = pd.DataFrame(mut_ps/mut_total, index=events[:-1], columns=["Shared"])
psp.sort_values(by=["Shared"], inplace=True)
fig, ax = plt.subplots(figsize=(8.27, 11.69))
plt.style.use("default")
ax.set_xlim(0,1)
ax.axvline(0.5, color = "#D55E00", linestyle="dashed")
psp["Shared"].plot(kind="barh", color="#56B4E9", ax=ax)
ax.legend()