**N.B.** to generate the data, have a look at the script `neutral.sh`.

In [None]:
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import numpy as np
import pandas as pd
import socket
from pathlib import Path

from hscpy import mitchell, realisation
from hscpy.figures import PlotOptions

from futils import parse_version, snapshot

PATH2BIN = Path("~").expanduser() / "hsc/target/release"
assert PATH2BIN.is_dir()

SAMPLE = 368
USE_SCRATCH = True

SAVEFIG = True
BIGLABELS = False
FIGSIZE = [5, 3] if BIGLABELS else [6.4, 4.8]  # default matplotlib
EXTENSION = ".png"

options = PlotOptions(figsize=(7, 4), extension=EXTENSION, save=SAVEFIG)

In [None]:
%%bash -s "$PATH2BIN" --out version
$1/hsc --version

In [None]:
VERSION = parse_version(version)
PATH2SAVE = Path(f"./{VERSION}")

print("Running hsc with version:", VERSION)

if USE_SCRATCH:
    PATH2SIMS = Path("/data/scratch/")
else:
    PATH2SIMS = Path("/data/home/")
PATH2SIMS /= f"hfx923/hsc-draft/{VERSION}"

if socket.gethostname() == "5X9ZYD3":
    PATH2MITCHELL = Path("/mnt/c/Users/terenz01/Documents/SwitchDrive/PhD/hsc")
elif socket.gethostname() == "LAPTOP-CEKCHJ4C":
    PATH2MITCHELL = Path("/mnt/c/Users/fra_t/Documents/PhD/hsc")
else:
    PATH2MITCHELL = Path("~").expanduser()

PATH2SAVE = Path(f"./{VERSION}")

In [None]:
burden_neutral = realisation.load_all_burden_by_age(
    PATH2SIMS / f"neutral/{SAMPLE}cells/burden/"
)

In [None]:
summary = summary = mitchell.load_and_process_mitchell(
    PATH2MITCHELL / "Summary_cut.csv", drop_donor_KX007=True
)
summary.dtypes

In [None]:
fig = plt.figure(layout="constrained", figsize=options.figsize)
subfigs = fig.subfigures(1, 2, wspace=-0.1, width_ratios=[2.4, 1])

ax = subfigs[0].subplots(1, 1)
ax3 = subfigs[1].subplots(1, 1)

max_, min_, means = [], [], []
runs = len(burden_neutral[0.0])
for t, b in burden_neutral.items():
    pooled = snapshot.Uniformise.pooled_histogram([ele.burden for ele in b])

    """
    fig1, ax1 = plt.subplots(1, 1)
    realisation.plot_burden(pooled, ax1, color="b", alpha=1, label="test")
    ax1.set_title(f"age: {t} years")
    """
    yo = realisation.single_cell_mutations_from_burden(pooled)
    ax.plot(
        [t] * yo.shape[0], yo, linestyle="", marker="o", alpha=0.3, color="yellowgreen"
    )
    max_.append(yo.max())
    min_.append(yo.min())
    mean, var = realisation.compute_mean_variance(pooled)
    means.append(mean)
    # ax.errorbar(t, mean, yerr=var, alpha=1, color="grey")
    # TODO: add mitchell and add regression
# ax.fill_between(list(burden_neutral.keys()), max_, min_, alpha=0.3, color="yellowgreen", label=f"avg of {runs} simulations")
ax.plot(
    list(burden_neutral.keys()),
    means,
    linestyle="",
    marker="x",
    color="grey",
    mew=2,
    label="mean",
)
ax.set_xlabel("years")
ax.set_ylabel("nb of mutations")
means, ts = [], []
for donor in summary.donor_id.unique():
    view = summary[summary.donor_id == donor]
    ax.plot(
        view.age,
        view.number_mutations,
        linestyle="",
        marker="o",
        color="purple",
        alpha=0.05,
    )
    means.append(view.number_mutations.mean())
    ts.append(view.age.unique()[0])
ax.plot(ts, means, linestyle="", marker="x", color="orange", mew=2, label="Mitchell")
# take all datapoints with max one expanded and detected clone
x, y = np.asarray(ts[1:3] + [ts[4]]), means[1:3] + [means[4]]
A = np.vstack([x, np.ones(len(x))]).T
m, c = np.linalg.lstsq(A, y, rcond=None)[0]
ax.plot(x, m * x + c, "#329acd", linewidth=2, linestyle="--")
print(f"m={m}, c={c}")
sims_handle = mlines.Line2D(
    [], [], color="yellowgreen", marker="o", mew=2, linestyle="", label="sims"
)
data_handle = mlines.Line2D(
    [], [], color="purple", marker="o", mew=2, linestyle="", label="data"
)
avg_sims_handle = mlines.Line2D(
    [], [], color="grey", marker="x", mew=2, linestyle="", label="avg sims"
)
avg_data_handle = mlines.Line2D(
    [], [], color="orange", marker="x", mew=2, linestyle="", label="avg data"
)
reg_handle = mlines.Line2D(
    [0, 0.5], [], color="#329acd", label=f"m={m:.1f}", linewidth=2, linestyle="--"
)

ax3.legend(
    handles=[sims_handle, data_handle, avg_sims_handle, avg_data_handle, reg_handle],
    fontsize="small",
    loc=6,
    frameon=False,
)
ax3.set_xticks([])
ax3.set_yticks([])
ax3.spines.right.set_visible(False)
ax3.spines.left.set_visible(False)
ax3.spines.top.set_visible(False)
ax3.spines.bottom.set_visible(False)
if options.save:
    plt.savefig(f"./calibration{options.extension}")
plt.show()