This will generate a file `paramters.txt` containing all the parameters required to generate the simulations using `MY_BIN` rust binary [`hsc`](https://github.com/fraterenz/hsc/releases).

Some parameters are sampled from prior distributions while others are estimated from the literature.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pathlib import Path
from enum import Enum, auto
from hscpy import parameters, mitchell
from futils import parse_version
from more_itertools import interleave, collapse
from itertools import repeat
from typing import Tuple

RUNS = 65_001
ABC_4D = True  # infer tau from abc (4d abc)
MU_UNIFORM = True  # uniform or 2**2 steps
N = 100_000


class SampleConfig(Enum):
    # for each donor, save only samples with # of cells
    # as in mitchell.donors
    AS_DATA = auto()
    # for each donor, save only samples with # of cells
    # as in mitchell.donors AND full population 100'000
    FULL_POP = auto()
    # for each donor, save only samples with # of cells
    # as in mitchell.donors AND full population 100'000
    # AND 5'000 cells
    FULL_POP_AND_5000 = auto()


SAMPLING = SampleConfig.FULL_POP_AND_5000

## BINARY file that will be used to generate the simulations
PATH2BIN = Path("~").expanduser() / "hsc-results/"
assert PATH2BIN.is_dir()
# ADD bin that will be used to generate sims
# example hsc-v4.3.12-x86_64-unknown-linux-gnu
MY_BIN = "hsc-v4.3.13-x86_64-unknown-linux-gnu"
assert MY_BIN
MY_BIN = PATH2BIN / MY_BIN
assert MY_BIN.is_file
MY_BIN

In [None]:
class Subsamples:
    def __init__(self, ages: List[int], cells: List[int]):
        assert len(ages) == len(cells), f"{ages} vs {cells}"
        self.ages = ages
        self.cells = cells

    def to_params(self) -> str:
        cells, ages = list(map(str, self.cells)), list(map(str, self.ages))
        return f"--subsamples={','.join(cells)} --snapshots={','.join(ages)}"


def interleave_dealing_with_newborns(
    ages, cells, interleaved_cells: List[int]
) -> Tuple[List[int], List[int]]:
    """take care of dubplication at 0 yo (ie 2 newborns)"""
    age_newborn, cells_newborn = ages.pop(0), cells.pop(0)
    ages, cells = (
        interleave(ages, *[ages] * len(interleaved_cells)),
        interleave(cells, repeat(interleaved_cells, len(cells))),
    )
    ages, cells = list(collapse(ages)), list(collapse(cells))
    ages.insert(0, age_newborn)
    cells.insert(0, cells_newborn)
    return ages, cells


def subsamples_from_mitchell_donors(how_to_sample: SampleConfig) -> Subsamples:
    donors = mitchell.donors()
    ages, cells = donors.age.tolist(), donors.cells.tolist()
    if how_to_sample == SampleConfig.AS_DATA:
        return Subsamples(ages, cells)
    if how_to_sample == SampleConfig.FULL_POP:
        return Subsamples(*interleave_dealing_with_newborns(ages, cells, [N]))
    if how_to_sample == SampleConfig.FULL_POP_AND_5000:
        return Subsamples(*interleave_dealing_with_newborns(ages, cells, [5_000, N]))
    raise ValueError(f"how_to_sample is wrong {how_to_sample}")

In [None]:
%%bash -s "$MY_BIN" --out version
chmod +x "$1"
"$1" --version

In [None]:
VERSION = parse_version(version)
PATH2SAVE = Path(f"/data/scratch/hfx923/hsc-draft/{VERSION}")
PATH2SAVE

In [None]:
# RANGES!!
"""
Up-to-date ranges Feb 13:
    s (i.e. old s/tau) : 0.01 to 0.4
    sigma (i.e. old sigma/tau) : 0.001 to 0.1
    mu: 0.1 to 30
    tau: 0.1 to 10
    N: 25'000 to 900'000
"""
# TRY something new, increase mu space
"""
Up-to-date ranges Feb 13:
    s (i.e. old s/tau) : 0.01 to 0.4
    sigma (i.e. old sigma/tau) : 0.001 to 0.1
    mu: 2, 4, 8, 16, 32, 64, 128, 
    tau: 0.1 to 10
    N: 25'000 to 900'000
"""
cells = np.ones(RUNS, dtype=int) * N
rng = np.random.default_rng(seed=42)

if ABC_4D:
    taus = rng.uniform(0.1, 10, RUNS)
else:
    taus = np.ones(RUNS, dtype=int)
if MU_UNIFORM:
    mu0s = rng.uniform(0.1, 30, RUNS)
else:
    mu0s = rng.choice(np.exp2(np.arange(1, 9)), RUNS)
# eta
etas = rng.uniform(0.001, 0.4, RUNS)
# sigma
sigmas = rng.uniform(0.001, 0.1, RUNS)

assert (etas * taus).max() < 4
assert (sigmas * taus).max() < 1

In [None]:
if MU_UNIFORM:
    sns.pairplot(
        pd.DataFrame(
            [mu0s, taus, etas, sigmas, cells], index=["mu", "tau", "eta", "sigma", "N"]
        ).T
    )
    plt.show()
else:
    sns.pairplot(
        pd.DataFrame([taus, etas, sigmas, cells], index=["tau", "eta", "sigma", "N"]).T
    )
    plt.show()

    plt.hist(mu0s, bins=np.exp2(np.arange(1, 10)), rwidth=0.5, align="left")
    plt.xscale("log", base=2)
    plt.show()
plt.hist(
    parameters.compute_m_background_exp(),
    label="mu background",
    bins=100,
)
plt.legend()
plt.show()

In [None]:
with open("parameters.txt", "w") as f:
    for tau, mu, mean, std, n in np.column_stack((taus, mu0s, etas, sigmas, cells)):
        c = int(n)
        f.write(
            f"{MY_BIN} -c {c + 1} -y 82 -r 1 --sequential --mean-std {parameters.compute_s_per_division_from_s_per_year(mean, tau)} {parameters.compute_std_per_division_from_std_per_year(std, tau)} {subsamples_from_mitchell_donors(SAMPLING).to_params()} {PATH2SAVE} exp-moran --tau-exp {parameters.tau_exp(c)} --mu-exp {mu} --mu-division-exp 1.14 --mu-background-exp {parameters.compute_m_background_exp()} --tau {tau} --mu {mu} --mu-division 1.14 --mu-background {parameters.m_background(tau)}\n"
        )

with open("parameters_neutral.txt", "w") as f:
    for tau, mu, mean, std, n in np.column_stack((taus, mu0s, etas, sigmas, cells))[
        :100
    ]:
        f.write(
            f"{MY_BIN} -c {c + 1} -y 82 -r 1 --sequential --neutral {subsamples_from_mitchell_donors(SAMPLING).to_params()} {PATH2SAVE} exp-moran --tau-exp {parameters.tau_exp(c)} --mu-exp {mu} --mu-division-exp 1.14 --mu-background-exp {parameters.compute_m_background_exp()} --tau {tau} --mu {mu} --mu-division 1.14 --mu-background {parameters.m_background(tau)}\n"
        )

In [None]:
%%bash
head parameters.txt

In [None]:
%%bash --out runs
wc -l parameters.txt

In [None]:
runs = int(runs.split(" ")[0])
assert RUNS == runs, f"{RUNS} vs {runs}"

In [None]:
%%bash
head parameters_neutral.txt

In [None]:
%%bash --out runs
wc -l parameters_neutral.txt

In [None]:
runs = int(runs.split(" ")[0])
assert 100 == runs, f"{RUNS} vs {runs}"