## Compare the number of clones over time from the agent-based model vs SDE model
**how to use this:**
1. Generate parameters by running `parameters.ipynb`
1. Generate the first batch of data by `qsub -t 1:1000 hsc-draft/simulations.sh hsc-draft/parameters_sde_varying_eta1.txt`
2.  Generate the second batch of data by `qsub -t 1:1000 hsc-draft/simulations.sh hsc-draft/parameters_sde_varying_eta2.txt`
3. run this notebook

SDE parameters:
```
tau=1
sigma=0.025
N=100'000
detection threshold = 0.005
```

Note: `qsub` is the command to submit jobs via the Univa Grid engine available at QMUL. Another comand might be used with other job schedulers (e.g. Slurm, Apache Hadoop...).

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import json
import numpy as np
from pathlib import Path

from hscpy import mitchell, realisation, parameters, variant
from hscpy.figures import PlotOptions, ToCellFrequency
from hscpy.figures import sfs as sfs_fig

from futils import parse_version, snapshot

BIGLABELS = False
FIGSIZE = [5, 3] if BIGLABELS else [6.4, 4.8]  # default matplotlib
LATEST = True
SAVEFIG = True
EXTENSION = ".svg"
PATH2HSC = Path("~").expanduser() / "hsc"
PATH2DATA = Path("~").expanduser() / "hsc-draft/data"
PLOT_OPTIONS = PlotOptions(figsize=FIGSIZE, extension=EXTENSION, save=SAVEFIG)
DETECT_THRESHOLD = 0.005

In [None]:
%%bash -s "$PATH2HSC" --out version
$1/target/release/hsc  --version

In [None]:
if LATEST:
    VERSION = parse_version(version)
else:
    VERSION = "v4.2.1"
PATH2SAVE = Path(f"./{VERSION}")

print("Running hsc with version:", VERSION)
PATH2SIMS = PATH2DATA / f"sde/{VERSION}"

In [None]:
donors = mitchell.donors()
donors

In [None]:
def float_similar(f1: float, f2: float):
    return abs(f1 - f2) < 0.001

    
def filter_variants(variants, eta: float):
    counts = dict()
    for k, var in variants.items():
        counts[k] = [ele for ele in var if float_similar(ele.parameters.s, eta)]
    return counts

### Number of clones

In [None]:
p = Path(f"{PATH2SIMS}/etas_mu4/100000cells/variant_fraction")
variants_agent_based_etas_mu4 = variant.load_all_detected_var_counts_by_age(p, DETECT_THRESHOLD)
variants_agent_based_eta005_mu4 = filter_variants(variants_agent_based_etas_mu4, 0.05)
variants_agent_based_eta02_mu4 = filter_variants(variants_agent_based_etas_mu4, 0.2)
assert len(variants_agent_based_eta005_mu4[0]) == len(variants_agent_based_eta02_mu4[0]) == 500
variants_agent_based_eta02_mu4

In [None]:
for ele in variants_agent_based_eta02_mu4.values():
    assert len(ele) == 500
    for e in ele:
        assert e.parameters.mu == 4
        assert float_similar(e.parameters.s, 0.2), e.parameters.s

In [None]:
for ele in variants_agent_based_eta005_mu4.values():
    assert len(ele) == 500
    for e in ele:
        assert e.parameters.mu == 4
        assert float_similar(e.parameters.s, 0.05), e.parameters.s

In [None]:
variants_sde_eta005_mu4 = pd.read_csv("sde_variants/nClonesWithTime_eta0.05_mu4.csv")
variants_sde_eta02_mu4 = pd.read_csv("sde_variants/nClonesWithTime_eta0.2_mu4.csv")

In [None]:
for (eta, mu), (abm, sde) in zip(
    (("0.05", "4"), ("0.2", "4")),
    [(variants_agent_based_eta005_mu4, variants_sde_eta005_mu4), (variants_agent_based_eta02_mu4, variants_sde_eta02_mu4)]
):
    counts = variant.variant_counts_detected_df(abm)
    fig, ax = plt.subplots(1, 1, layout="tight")
    sns.lineplot(
        counts,
        x="age",
        y="variant counts detected",
        errorbar=lambda x: (np.min(x), np.max(x)),
        ax=ax,
        # label="abm",
    )
    sns.lineplot(
        sde,
        x="t",
        y="nClones",
        ax=ax,
        # label="sde",
    )
    ax.set_ylabel("Expanded clones")
    ax.set_xlabel("Age (years)")
    ax.text(x=0.1, y=0.8, s=f"$\eta={eta}$", fontsize=12, transform=ax.transAxes)
    ax.text(x=0.1, y=0.65, s=f"$\mu={mu}$", fontsize=12, transform=ax.transAxes)
    
    if PLOT_OPTIONS.save:
        plt.savefig(f"sde_eta{eta.replace('.', '')}_mu{mu.replace('.', '')}{PLOT_OPTIONS.extension}")

    plt.show()

In [None]:
p = Path(f"{PATH2SIMS}/etas_mu15/100000cells/variant_fraction")
variants_agent_based_etas_mu15 = variant.load_all_detected_var_counts_by_age(p, DETECT_THRESHOLD)
variants_agent_based_eta005_mu15 = filter_variants(variants_agent_based_etas_mu15, 0.05)
variants_agent_based_eta02_mu15 = filter_variants(variants_agent_based_etas_mu15, 0.2)
assert len(variants_agent_based_eta005_mu15[0]) == len(variants_agent_based_eta02_mu15[0]) == 500
variants_agent_based_eta02_mu15

In [None]:
for ele in variants_agent_based_eta02_mu15.values():
    for e in ele:
        assert e.parameters.mu == 15
        assert float_similar(e.parameters.s, 0.2), e.parameters.s

In [None]:
for ele in variants_agent_based_eta005_mu15.values():
    for e in ele:
        assert e.parameters.mu == 15
        assert float_similar(e.parameters.s, 0.05), e.parameters.s

In [None]:
variants_sde_eta005_mu15 = pd.read_csv("sde_variants/nClonesWithTime_eta0.05_mu15.csv")
variants_sde_eta02_mu15 = pd.read_csv("sde_variants/nClonesWithTime_eta0.2_mu15.csv")

In [None]:
variants_sde_eta02_mu15.head(n=15)

In [None]:
counts[["variant counts detected", "age"]].groupby("age").mean()

In [None]:
for (eta, mu), (abm, sde) in zip(
    (("0.05", "15"), ("0.2", "15")),
    [(variants_agent_based_eta005_mu15, variants_sde_eta005_mu15), (variants_agent_based_eta02_mu15, variants_sde_eta02_mu15)]
):
    counts = variant.variant_counts_detected_df(abm)
    fig, ax = plt.subplots(1, 1, layout="tight")
    sns.lineplot(
        counts,
        x="age",
        y="variant counts detected",
        errorbar=lambda x: (np.min(x), np.max(x)),
        ax=ax,
        #label="agent-based model",
    )
    sns.lineplot(
        sde,
        x="t",
        y="nClones",
        ax=ax,
        #label="sde",
    )
    ax.set_ylabel("Expanded clones")
    ax.set_xlabel("Age (years)")
    ax.text(x=0.1, y=0.8, s=f"$\eta={eta}$", fontsize=12, transform=ax.transAxes)
    ax.text(x=0.1, y=0.65, s=f"$\mu={mu}$", fontsize=12, transform=ax.transAxes)
    if PLOT_OPTIONS.save:
        plt.savefig(f"sde_eta{eta.replace('.', '')}_mu{mu.replace('.', '')}{PLOT_OPTIONS.extension}")
    plt.show()

### Distribution of clones aka variant sizes distribution

In [None]:
t = variant.load_all_var_frac_by_age(Path("/data/home/hfx923/hsc-draft/data/sde/v4.3.6/etas_mu15/100000cells/variant_fraction"))

In [None]:
arr = np.array(t[60][1].variant_fractions)
arr[arr > 0.0]