# SFS
We plot the burden and the SFS for the data published in [Mitchell's et al. Nature 2022](https://www.nature.com/articles/s41586-022-04786-y).

For the SFS we show also some simulations with the parameters that are inferred from the ABC.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib.pyplot as plt
from matplotlib import gridspec
from matplotlib.lines import Line2D
import numpy as np
import pandas as pd
import socket
import seaborn as sns
import subprocess
from pathlib import Path

from hscpy import mitchell, realisation, parameters
from hscpy.figures import sfs as sfs_figures
from hscpy.figures import PlotOptions, simulations
from futils import parse_version, snapshot

NCELLS = 100_000
SEED = 10
RUN_SIMS = False
SAVEFIG = True
BIGLABELS = False
FIGSIZE = [5, 4] if BIGLABELS else [6.4, 4.8]  # default matplotlib
LATEST = False
EXTENSION = ".svg"
PLOT_OPTIONS = PlotOptions(figsize=FIGSIZE, extension=EXTENSION, save=SAVEFIG)
PATH2HSC = Path("~").expanduser() / "hsc"
assert PATH2HSC.is_dir()

In [None]:
if socket.gethostname() == "5X9ZYD3":
    PATH2MITCHELL = Path("/mnt/c/Users/terenz01/Documents/SwitchDrive/PhD/hsc")
elif socket.gethostname() == "LAPTOP-CEKCHJ4C":
    PATH2MITCHELL = Path("/mnt/c/Users/fra_t/Documents/PhD/hsc")
else:
    PATH2MITCHELL = Path("~").expanduser()

In [None]:
%%bash -s "$PATH2HSC" --out version
$1/target/release/hsc  --version

In [None]:
if LATEST:
    VERSION = parse_version(version)
else:
    VERSION = "v2.2.13"
PATH2SAVE = Path(f"./{VERSION}")

print("Running hsc with version:", VERSION)

In [None]:
donors = mitchell.donors()
donors

## Plot the SFS 
Combine different data for this plot:
1. 1/f^2 sampled prediction (computed here in python)
2. Mitchell's SFS (loaded and computed here in python)
2. 1/f sampled prediction from Nate's (loaded from external file)
3. SFS from simulations (need to generate them)

### Generate/load/compute the data

####  1. 1/f2 predictions

In [None]:
%%time
# compute the correction for the sims' SFS with sampled distributions from
# https://www.biorxiv.org/content/10.1101/2022.11.07.515470v2
corrected_variants_one_over_1_squared = dict()
for donor in donors.itertuples():
    print(
        f"apply sampling correction to SFS of donor {donor.name} with age {donor.age} with sample size {donor.cells}"
    )
    corrected_variants_one_over_1_squared[donor.name] = realisation.compute_variants(
        realisation.Correction.ONE_OVER_F_SQUARED,
        pop_size=NCELLS,
        sample_size=donor.cells,
    )

#### 2. Mitchell's SFS

In [None]:
%%time
# there are two donors with the same age 0
mitchell_sfs = {
    donor.name: mitchell.sfs_donor_mitchell(
        donor.name, donor.age, PATH2MITCHELL, remove_indels=False
    )
    for donor in donors.itertuples()
}

In [None]:
assert all(
    [
        m[2] == donors.loc[donors.name == name, "cells"].squeeze()
        for name, m in mitchell_sfs.items()
    ]
), "number of cells loaded for the SFSF do not match those in donors"

#### 3. 1/f sample predictions

In [None]:
# theoretical homeostatic neutral SFS data, from Nate's paper in Elife: for each patient (skipping the neonates)
# I evolved until their specific age, and then sampled to the same size as in the data
mapping = {
    age: f"homeostasisSFS_pid{i}.csv"
    for i, age in enumerate(donors.age.unique().tolist()[1:], 3)
}
mapping

#### 4. SFS from simulations
The data have been generated with the cmd `sfs.sh parameters.txt`.

In [None]:
sfs_sims = dict()
for donor in donors.itertuples():
    sfs_sims[donor.name] = realisation.load_all_sfs_by_age(
        Path(f"{VERSION}/{donor.cells}cells/sfs")
    )[donor.age]

### Plot

In [None]:
selected = ["CB002", "KX002", "KX008"]
for name in donors.name.unique().tolist():
    age = donors.loc[donors.name == name, "age"].squeeze()
    fig, ax = plt.subplots(1, 1, layout="constrained", figsize=PLOT_OPTIONS.figsize)
    sfs_figures.plot_ax_sfs_predictions_data_sims(
        ax,
        donor=donors[donors.name == name].squeeze(),
        corrected_one_over_1_squared=corrected_variants_one_over_1_squared[name],
        sfs_sims_donor=sfs_sims[name],
        mitchell_sfs=mitchell_sfs[name][3],
        one_over_f_csv=mapping.get(age),
        idx_sim2plot=4,
        plot_options=PLOT_OPTIONS,
    )
    if age:
        ax.set_ylabel("")

    ax.text(
        x=0.65,
        y=0.9,
        s=f"donor {age} y.o.",
        transform=ax.transAxes,
    )
    handles, labels = ax.get_legend_handles_labels()
    """
    ax.legend(
        fontsize="small",
        loc="upper right",
        frameon=False,
    )
    if name not in selected:
        ax.legend(
            fontsize="small",
            loc="upper right",
            frameon=False,
        )
    else:
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(
            handles=[handles[-1]],
            labels=[labels[-1]],
            fontsize="medium",
            loc="upper right",
            frameon=False,
            handletextpad=0,
        )
    """
    if PLOT_OPTIONS.save:
        fig.savefig(f"./sfs_age{age}_{name}{PLOT_OPTIONS.extension}")
    fig.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20, 1))
legend_elements = [
    Line2D([1], [1], color="black", alpha=0.8, lw=4, label="growth scaling law"),
    Line2D(
        [0],
        [0],
        color="black",
        ls="--",
        alpha=0.8,
        lw=4,
        label="homeostatsis scaling law",
    ),
    Line2D(
        [0],
        [0],
        marker="x",
        ls="",
        mew=4,
        color="#d95f0e",
        label="data",
        markerfacecolor="g",
        markersize=13,
    ),
    Line2D(
        [0],
        [0],
        marker="o",
        ls="",
        mew=1,
        color="grey",
        label="single simulation",
        markersize=12,
    ),
    Line2D([0], [0], color="grey", alpha=0.6, lw=4, label="simulation average"),
]

ax.legend(
    handles=legend_elements,
    mode="extend",
    ncols=5,
    handletextpad=0.4,
)
ax.axis("off")
plt.show()

In [None]:
fig = plt.figure(figsize=[26, 5], layout="constrained")

gs0 = gridspec.GridSpec(1, 2, figure=fig, width_ratios=[7, 1])
gs00 = gridspec.GridSpecFromSubplotSpec(1, 3, subplot_spec=gs0[0], wspace=0.1)
gs01 = gs0[1].subgridspec(1, 1)
ax1 = fig.add_subplot(gs00[0, 0])
ax2 = fig.add_subplot(gs00[0, 1])
ax3 = fig.add_subplot(gs00[0, 2])
ax4 = fig.add_subplot(gs01[0, 0])

for i, (ax_, name) in enumerate(
    zip(
        (ax1, ax2, ax3),
        donors.loc[donors.name.isin(selected), "name"].squeeze().tolist(),
    )
):
    sfs_figures.plot_ax_sfs_predictions_data_sims(
        ax_,
        donor=donors[donors.name == name].squeeze(),
        corrected_one_over_1_squared=corrected_variants_one_over_1_squared[name],
        sfs_sims_donor=sfs_sims[name],
        mitchell_sfs=mitchell_sfs[name][3],
        one_over_f_csv=mapping.get(donors.loc[donors.name == name, "age"].squeeze()),
        idx_sim2plot=-1,
        plot_options=PLOT_OPTIONS,
    )
    handles, labels = ax_.get_legend_handles_labels()
    if i:
        ax_.set_ylabel("")
        ax_.set_title(labels[2])
    else:
        ax_.set_title(labels[1])

ax4.axis("off")
labels[2] = "donors"
ax4.legend(
    handles=handles,
    labels=labels,
    fontsize="medium",
    loc="upper right",
    frameon=False,
)
if PLOT_OPTIONS.save:
    fig.savefig(f"./3sfs_age{PLOT_OPTIONS.extension}")
fig.show()