# Burden
Simulated data generated by `sfs.sh parameters.txt`.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import socket
import seaborn as sns
import subprocess
from pathlib import Path

from hscpy import mitchell, realisation, parameters
from hscpy.figures import PlotOptions, simulations, ToCellFrequency
from hscpy.figures import burden as burden_fig

from futils import parse_version, snapshot

LATEST = True
SAVEFIG = True
BIGLABELS = False
FIGSIZE = [5, 3] if BIGLABELS else [6.4, 4.8]  # default matplotlib
EXTENSION = ".svg"
PATH2HSC = Path("~").expanduser() / "hsc"
PLOT_OPTIONS = PlotOptions(figsize=FIGSIZE, extension=EXTENSION, save=SAVEFIG)

In [None]:
if socket.gethostname() == "5X9ZYD3":
    PATH2MITCHELL = Path("/mnt/c/Users/terenz01/Documents/SwitchDrive/PhD/hsc")
elif socket.gethostname() == "LAPTOP-CEKCHJ4C":
    PATH2MITCHELL = Path("/mnt/c/Users/fra_t/Documents/PhD/hsc")
else:
    PATH2MITCHELL = Path("~").expanduser()

In [None]:
%%bash -s "$PATH2HSC" --out version
$1/target/release/hsc  --version

In [None]:
if LATEST:
    VERSION = parse_version(version)
else:
    VERSION = "v3.0.6"
PATH2SAVE = Path(f"./{VERSION}")

print("Running hsc with version:", VERSION)

In [None]:
donors = mitchell.donors()
donors

In [None]:
# mitchell's donors
burden_donors = list()
for donor in donors.itertuples():
    print("loading burden for donor", donor.name)
    burden_donors.append(mitchell.burden_donor_mitchell(donor.name, donor.age, PATH2MITCHELL, False))

In [None]:
m1, m2 = snapshot.array_from_hist(burden_donors[0][3]).mean(), snapshot.array_from_hist(burden_donors[1][3]).mean()
print(f"The mean single-cell mut burden of the two neoborns computed from the genotype matrix is: {m1:.2f}, {m2:.2f}", )
m1, m2 = snapshot.array_from_hist(burden_donors[0][3]).var(), snapshot.array_from_hist(burden_donors[1][3]).var()
print(f"The variance single-cell mut burden of the two neoborns computed from the genotype matrix is: {m1:.2f}, {m2:.2f}", )
print(np.mean([m1, m2]))

In [None]:
import matplotlib.colors as mcolors

In [None]:
# DONORS only
fig, ax = plt.subplots(1, 1)
means, variances = list(), list()
for b, c in zip(burden_donors, mcolors.TABLEAU_COLORS.values()):
    tot_cells = sum(b[3].values())
    assert tot_cells == donors[donors.name == b[0]].cells.iloc[0]
    array = snapshot.array_from_hist(b[3])
    means.append((b[0], b[1], array.mean()))
    variances.append((b[0], b[1], array.var()))

    burden_fig.plot_burden(
        ax, 
        b[3], 
        normalise=True,
        options=PLOT_OPTIONS,
        ls="-", 
        marker=".", 
        mew=3, 
        alpha=0.5,
        color=c,
        label=f"{donors.loc[donors.name == b[0], 'age'].iloc[0]} y.o."
    )
ax.legend(fontsize="small", ncols=2)
plt.show()

In [None]:
# regress neutral donors
fig, ax = plt.subplots(1, 1)
# neutral donors have no detected exp clone
neutral_donors = {"CB002", "KX001", "SX001"}
x, y = means_df.loc[means_df.name.isin(neutral_donors), "age"], means_df.loc[means_df.name.isin(neutral_donors), "mean"]
A = np.vstack([x, np.ones(len(x))]).T
m, c = np.linalg.lstsq(A, y, rcond=None)[0]
ax.plot(donors.age, m * donors.age + c, "black", linewidth=2, linestyle="--")
for donor in donors.itertuples():
    d_burden = [d for d in burden_donors if d[0] == donor.name][0]
    array = snapshot.array_from_hist(d_burden[3])
    ax.plot([d_burden[1]]*array.shape[0], array, ls="", marker=".", alpha=0.3)
    ax.plot([d_burden[1]], array.mean(), ls="", marker="x", mew=2, color="black")
ax.plot()
print(m, c)
ax.set_ylabel("Single-cell burden")
ax.set_xlabel("Time [years]")
ax.text(x=1, y=1500, s=f"m={m:.2f}")
plt.show()

In [None]:
burden_sims = dict()
for donor in donors.itertuples():
    burden_sims[donor.name] = realisation.load_all_burden_by_age(
        Path(f"{VERSION}/{donor.cells}cells/burden")
    )[donor.age]

In [None]:
# check that the burden in the sims match the data
means_var_s = list()
for b in burden_donors:
    print(b[0])
    fig, ax = plt.subplots(1, 1)
    # sims
    pooled = snapshot.Uniformise.pooled_distribution([bur.burden for bur in burden_sims[b[0]]])
    m_, v_ = realisation.compute_mean_variance(pooled)
    burden_fig.plot_burden(
        ax,
        pooled,
        normalise=False,
        color="grey",
        marker=".",
        alpha=0.5,
        label=f"{len(burden_sims[b[0]])} sims",
        options=PLOT_OPTIONS
    )
    means_var_s.append((b[0], b[1], m_, v_))
    # data
    muts, counts = list(b[3].keys()), list(b[3].values())
    tot_cells = sum(counts)
    assert tot_cells == donors[donors.name == b[0]].cells.iloc[0]
    burden_fig.plot_burden(
        ax,
        b[3],
        normalise=True,
        color="#d95f0e",
        marker=".",
        bins=10,
        alpha=0.5,
        label=f"{b[1]} y.o.",
        options=PLOT_OPTIONS
    )
    ax.legend(fontsize="small")
    plt.show()

In [None]:
fig, ax = plt.subplots(1,1 )
means_var_df_s = pd.DataFrame(means_var_s, columns=["name", "age", "mean", "variance"])
means_df = pd.DataFrame(means, columns=["name", "age", "mean"])

means_df.plot(ax=ax, x="age", y="mean", marker=".", color="#d95f0e", label="Mitchell data")
means_var_df_s.plot(ax=ax, x="age", y="mean", color="grey", label="pooled sims")
ax.set_title("Mean")
ax.set_xlabel("Time [years]")
ax.set_ylabel("Single-cell burden")
plt.show()

In [None]:
fig, ax = plt.subplots(1,1 )
variances_df = pd.DataFrame(variances, columns=["name", "age", "variance"])

variances_df.plot(ax=ax, x="age", y="variance", marker=".", color="#d95f0e", label="Mitchell data")
means_var_df_s.plot(ax=ax, x="age", y="variance", color="grey", label="pooled sims")
ax.set_title("Variance")
ax.set_xlabel("Time [years]")
ax.set_ylabel("Single-cell burden")
plt.show()