In [None]:
import matplotlib.pyplot as plt
import json
import numpy as np
import pandas as pd
import socket
import seaborn as sns
from scipy import stats
from pathlib import Path
from ecdna_figures import dynamics, parse_version

PATH2BIN = Path("~").expanduser() / "hsc/target/release"
assert PATH2BIN.is_dir()
FIGSIZE = [7, 3]
PDF = True
EXTENSION = ".pdf" if PDF else ".png"
SAVE = False

In [None]:
NCELLS = 200_000
RUNS = 24

# division rate for the wild-type, aka alpha
B0 = 1
# avg fit mutations arising in 1 year
MU0 = 2
# avg number of neutral mutations per each proliferative event
NEUTRAL_RATE = 1

# proliferative advantage conferred by fit mutations
S = 0.15
# probability of getting an asymmetric division per each proliferate event
P_ASYMMETRIC = 0

In [None]:
if socket.gethostname() == "5X9ZYD3":
    PATH2SIMS = Path(
        "/mnt/c/Users/terenz01/Documents/SwitchDrive/PhD/variantFractionTime_s0.15_sigma0.03_mu2.csv"
    )
    # need + 1 to save the last timepoint
    YEARS = 50 + 1
    x = [
        0.1,
        5.6555552,
        11.211111,
        16.766666,
        22.322222,
        27.877777,
        33.433334,
        38.98889,
        44.54445,
        50.0,
    ]
else:
    PATH2SIMS = Path("~").expanduser() / Path(
        "variantFractionTime_s0.15_sigma0.03_mu2.csv"
    )
    # need + 1 to save the last timepoint
    YEARS = 100 + 1
    x = [
        0.1,
        11.211111,
        22.322222,
        33.433334,
        44.544445,
        55.655556,
        66.76666,
        77.87778,
        88.98889,
        100.0,
    ]
assert PATH2SIMS.is_file()

In [None]:
%%bash -s "$PATH2BIN" --out version
$1/hsc --version

In [None]:
VERSION = parse_version(version)
PATH2SAVE = Path(f"./{VERSION}")
print("Running hsc with version:", VERSION)

In [None]:
%%bash -s "$PATH2BIN" "$PATH2SAVE" "$B0" "$MU0" "$NEUTRAL_RATE" "$S" "$P_ASYMMETRIC" "$RUNS" "$NCELLS" "$YEARS"
rm -rf $2
$1/hsc -c $9 -y ${10} -r $8 --b0 $3 --mu0 $4 --neutral-rate $5 -s $6 --p-asymmetric $7 $2

## Entropy
Compute the entropy of the neutral sfs (all neutral lineages) for all patients.

In [None]:
def load_sfs_neutral(path2dir: Path):
    assert path2dir.is_dir(), "must be dir"

    data, lens, idx = [], [], []
    for file in (PATH2SAVE / "sfs_neutral").iterdir():
        with open(file, "r") as f:
            data.append([ele for ele in json.load(f).values()])
        lens.append(len(data[-1]))
        idx.append(int(file.stem))
    pad = max(lens)
    for d in data:
        for _ in range(pad - len(d)):
            d.append(0)
    print(f"max number of neutral lineages found in all patients: {pad}")
    return np.array(data, dtype=float).reshape(RUNS, pad), idx

In [None]:
pk, idx = load_sfs_neutral(PATH2SAVE)
pk /= pk.sum(axis=1)[:, np.newaxis]

fig, ax = plt.subplots(1, 1, tight_layout=True, figsize=FIGSIZE)
ax.bar(idx, stats.entropy(pk, axis=1))
ax.set_ylabel("entropy")
ax.set_xlabel("run idx")
ax.set_ylim([0.95, 1.05])
ax.set_xticks(range(0, RUNS))
plt.show()

## Total variant
The total variant fraction is the fraction of all selected clones averaged over all patients, that is anything except the wild type.

In [None]:
def load_variant_fractions(path2dir: Path):
    assert path2dir.is_dir(), "must be dir"

    data = []
    for path2snapshot in sorted(
        list((PATH2SAVE / "variant_fraction").iterdir()),
        key=lambda path2name: int(path2name.name),
        reverse=True,
    ):  # need to reverse because rust saves from the last timepoint
        for file in path2snapshot.iterdir():
            with open(file, "r") as f:
                # remove wild type clone
                data.extend(
                    [
                        float(ele)
                        for i, ele in enumerate(f.read().split(","))
                        if ele and i > 0
                    ]
                )
    return np.array(data, dtype=float).reshape(
        10, RUNS, 40 - 1
    )  # timepoints x RUNS x MAX_SUBCLONES - 1 (rm wildtype)

In [None]:
fig, ax = plt.subplots(1, 1, tight_layout=True, figsize=FIGSIZE)
other_sims = pd.read_csv(PATH2SIMS).loc[:YEARS, :]

ax.set_xlabel("time [years]")
ax.set_ylabel("avg total variant fraction")
variant_fraction = load_variant_fractions(PATH2SAVE)
total_fraction = variant_fraction.sum(axis=-1)
ax.errorbar(
    x,
    total_fraction.mean(axis=-1),
    yerr=total_fraction.std(axis=-1),
    fmt="o",
    alpha=0.8,
    label=f"ABM, avg of {RUNS} runs",
)
ax.plot(other_sims._t, other_sims.theory, label="theory")
ax.plot(
    other_sims._t, other_sims.simsAverage, linestyle="--", label="sims, avg of 500 runs"
)
ax.legend(loc="upper left")
if SAVE:
    path2figure = PATH2SAVE / "figures"
    try:
        plt.savefig(path2figure / f"total_variant{EXTENSION}")
    except FileNotFoundError:
        path2figure.mkdir()
        plt.savefig(path2figure / f"total_variant{EXTENSION}")
plt.show()

In [None]:
int_x = [int(x_) for x_ in x]
for run in range(RUNS):
    fig, ax = plt.subplots(1, 1, tight_layout=True, figsize=FIGSIZE)
    for clone in range(variant_fraction.shape[-1]):
        sns.barplot(x=int_x, y=variant_fraction[:, run, clone], ax=ax)
    ax.set_ylabel("variant fraction")
    ax.set_xlabel("time [years]")
    plt.show()