## Show the SFS dynamics varying the params

**how to use this:**
1. Generate parameters by running `parameters.ipynb`
1. Generate the first batch of data by `qsub -t 1:1500 hsc-draft/simulations.sh hsc-draft/parameters_varying_eta.txt`
2. Generate the second batch of data by `qsub -t 1:1500 hsc-draft/simulations.sh hsc-draft/parameters_varying_mu.txt`
3. Generate the third batch of data by `qsub -t 1:1500 hsc-draft/simulations.sh hsc-draft/parameters_varying_tau.txt`
4. run this notebook

Note: `qsub` is the command to submit jobs via the Univa Grid engine available at QMUL. Another comand might be used with other job schedulers (e.g. Slurm, Apache Hadoop...).

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib.pyplot as plt
from pathlib import Path
from typing import List, Dict, Set

from hscpy import mitchell, realisation, parameters
from hscpy.figures import PlotOptions, ToCellFrequency
from hscpy.figures import sfs as sfs_fig

from futils import parse_version, snapshot

BIGLABELS = True
NCELLS = 100_000
FIGSIZE = [3.5, 3] if BIGLABELS else [6.4, 4.8]  # default matplotlib
LATEST = True
SAVEFIG = True
EXTENSION = ".svg"
PATH2DATA = Path("~").expanduser() / "hsc-draft/data"
PATH2HSC = Path("~").expanduser() / "hsc"
PLOT_OPTIONS = PlotOptions(figsize=FIGSIZE, extension=EXTENSION, save=SAVEFIG)

In [None]:
%%bash -s "$PATH2HSC" --out version
$1/target/release/hsc  --version

In [None]:
if LATEST:
    VERSION = parse_version(version)
else:
    VERSION = "sfs/v3.0.6"
PATH2SAVE = Path(f"./{VERSION}")

print("Running hsc with version:", VERSION)

In [None]:
PATH2SIMS = PATH2DATA / f"./dynamics/{VERSION}"

In [None]:
donors = mitchell.donors()
donors

In [None]:
def loading_sfs_varying_param(param: str) -> Dict:
    sfs_sims = dict()
    if param == "eta":
        p = "etas"
    elif param == "mu":
        p = "mus"
    elif param == "tau":
        p = "taus"
    else:
        raise ValueError("wrong value of param")

    for donor in donors[donors.name.isin({"KX001", "KX008"})].itertuples():
        path = Path(f"{PATH2SIMS}/{p}/{donor.cells}cells/sfs")
        print(path)
        sfs_sims[donor.name] = realisation.load_all_sfs_by_age(path)[donor.age]
    return sfs_sims

In [None]:
%%time
sample_size = donors.loc[donors.name == "KX008", "cells"].unique()
assert sample_size.shape[0] == 1
sample_size = sample_size[0]
correction_1_over_f2_KX008 = realisation.compute_variants(
    realisation.Correction.ONE_OVER_F_SQUARED, pop_size=NCELLS, sample_size=sample_size
)
normalisation_x = ToCellFrequency(sample_size=sample_size)

In [None]:
def plot_fig_params(
    sfs_idx: List[Set], donors, expected_ss, expected_mus, expected_taus, filename: str
):
    for i, (sfs_s, expected_s, expected_mu, expected_tau) in enumerate(
        zip(sfs_idx, expected_ss, expected_mus, expected_taus)
    ):
        fig, ax = plt.subplots(1, 1, layout="constrained", figsize=PLOT_OPTIONS.figsize)
        markers, linestyles, colors = (".", "x"), ("-", "-"), ("#737373", "#bdbdbd")
        sfs_fig.plot_sfs_correction(
            ax,
            correction_1_over_f2_KX008,
            normalise=True,
            options=PLOT_OPTIONS,
            normalise_x=normalisation_x,
            linestyle="-",
            color="black",
            alpha=0.8,
            label="growth theory",
            linewidth=1.5,
        )
        for color, ls, marker, (name, s) in zip(
            colors, linestyles, markers, sfs_sims.items()
        ):
            subset = [sfs for sfs in s if sfs.parameters.idx in sfs_s]
            ss, mus, taus = (
                list({ele.parameters.s for ele in subset}),
                list({ele.parameters.mu for ele in subset}),
                list({ele.parameters.tau for ele in subset}),
            )
            assert len(ss) == 1
            assert round(ss[0], 4) == expected_s, f"{ss[0]} vs {expected_s}"
            assert len(mus) == 1
            assert round(mus[0], 4) == expected_mu, f"{mus[0]} vs {expected_mu}"
            assert len(taus) == 1
            assert round(taus[0], 4) == expected_tau, f"{mus[0]} vs {expected_tau}"
            cells, age = (
                donors[donors.name == name].cells.squeeze(),
                donors[donors.name == name].age.squeeze(),
            )
            print(age, marker)
            sfs_fig.plot_sfs_with_avg(
                ax,
                subset,
                subset[1],
                age,
                cells,
                marker,
                PLOT_OPTIONS,
                color=color,
                ls=ls,
            )
        params = subset[1].parameters.into_dict()
        s, std, mu, tau = params["s"], params["std"], params["mu"], params["tau"]
        ax.text(
            x=0.5,
            y=0.9,
            s=f"$\eta={{{s/tau:.2f}}}, \sigma={{{std/tau:.2f}}}$",
            transform=ax.transAxes,
            fontsize=12,
        )
        ax.text(
            x=0.5,
            y=0.8,
            s=f"$\mu={{{mu}}}, τ={{{tau}}}$",
            transform=ax.transAxes,
            fontsize=12,
        )
        ax.set_ylim([0.1 * 10 ** (-5), 1])
        ax.legend(fontsize=10, ncols=3, frameon=False).set_visible(False)
        if PLOT_OPTIONS.save:
            fig.savefig(filename + f"_{i}{PLOT_OPTIONS.extension}")
        plt.show()

In [None]:
sfs_sims = loading_sfs_varying_param("eta")
sfs_small_eta = {
    ele.parameters.idx
    for x in sfs_sims.values()
    for ele in x
    if ele.parameters.s == 0.05
}
sfs_medium_eta = {
    ele.parameters.idx
    for x in sfs_sims.values()
    for ele in x
    if ele.parameters.s > 0.05 and ele.parameters.s < 0.14
}
sfs_high_eta = {
    ele.parameters.idx
    for x in sfs_sims.values()
    for ele in x
    if ele.parameters.s > 0.14
}
plot_fig_params(
    [sfs_small_eta, sfs_medium_eta, sfs_high_eta],
    donors,
    [0.05, 0.08, 0.15],
    [1, 1, 1],
    [1, 1, 1],
    f"./sfs_dynamics_varying_eta",
)

In [None]:
sfs_sims = loading_sfs_varying_param("mu")
sfs_small_mu = {
    ele.parameters.idx
    for x in sfs_sims.values()
    for ele in x
    if ele.parameters.mu == 0.5
}
sfs_medium_mu = {
    ele.parameters.idx for x in sfs_sims.values() for ele in x if ele.parameters.mu == 3
}
sfs_high_mu = {
    ele.parameters.idx
    for x in sfs_sims.values()
    for ele in x
    if ele.parameters.mu == 10
}
plot_fig_params(
    [sfs_small_mu, sfs_medium_mu, sfs_high_mu],
    donors,
    [0.8, 0.8, 0.8],
    [0.5, 3, 10],
    [1, 1, 1],
    f"./sfs_dynamics_varying_mu{PLOT_OPTIONS.extension}",
)

In [None]:
sfs_sims = loading_sfs_varying_param("tau")
taus = [0.3, 1, 5]
etas = [0.8 * tau for tau in taus]
sfs_small_tau = {
    ele.parameters.idx
    for x in sfs_sims.values()
    for ele in x
    if ele.parameters.tau == 0.3
}
sfs_medium_tau = {
    ele.parameters.idx
    for x in sfs_sims.values()
    for ele in x
    if ele.parameters.tau == 1
}
sfs_high_tau = {
    ele.parameters.idx
    for x in sfs_sims.values()
    for ele in x
    if ele.parameters.tau == 5
}
plot_fig_params(
    [sfs_small_tau, sfs_medium_tau, sfs_high_tau],
    donors,
    etas,
    [3, 3, 3],
    taus,
    f"./sfs_dynamics_varying_tau{PLOT_OPTIONS.extension}",
)