In [None]:
import os
os.chdir("..")

In [None]:
import h5py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sb
from scipy.stats import erlang, expon, gamma
from scipy.integrate import quad, dblquad, tplquad
from TravelAndMutate.quickanalysis import computeDepths, computeChildren
import TravelAndMutate.datamanager as datman

Check [this link](https://hackmd.io/@dariobaron/SkCdQAdMR) for info and formalism

In [None]:
epsilon = 0.1
mu = 0.02
gamma_trick = 3
mutation_rate = 0.02
mutation_periods = np.geomspace(10,1000,11)#1 / mutation_rate
kmuts = np.round(np.geomspace(1,100,11)).astype(int)#0.315 * mutation_period

In [None]:
def mutationsPerHost(n, gamma_trick, epsilon, mu, mutation_period, kM):
	lambdaE = gamma_trick * epsilon
	lambdaI = gamma_trick * mu
	thetaM = 1 / kM * mutation_period
	Es = erlang.rvs(gamma_trick, scale=1/lambdaE, size=int(1e6))
	Is = erlang.rvs(gamma_trick, scale=1/lambdaI, size=int(1e6))
	Mns = gamma.rvs(n*kM, scale=thetaM, size=int(1e6))
	Ds = Mns - Is - Es
	return (Ds >= 0).mean()

In [None]:
muts_per_host_mean = np.empty(mutation_periods.shape[0]*kmuts.shape[0])
muts_per_host_std = np.empty(mutation_periods.shape[0]*kmuts.shape[0])
i = 0
for mutation_period in mutation_periods:
	for k in kmuts:
		values = [0]
		n = 1
		while values[-1] < 0.9999:
			values.append(mutationsPerHost(n, gamma_trick, epsilon, mu, mutation_period, k))
			n += 1
		values = np.diff(values)
		mean = (np.arange(values.shape[0]-0.5) * values).sum() / values.sum()
		std = np.sqrt((np.arange(values.shape[0]-0.5)**2 * values).sum() / values.sum() - mean)
		muts_per_host_mean[i] = mean
		muts_per_host_std[i] = std
		i += 1
		print(f"{i*100//(len(mutation_periods)*len(kmuts))}%, with values up to n={n}\t\t", flush=True, end="\r")

In [None]:
index = pd.MultiIndex.from_product([1/mutation_periods,kmuts], names=["mr","k"])
df = pd.DataFrame(data={"mean":muts_per_host_mean, "std":muts_per_host_std}, index=index)
mean_df = df.reset_index(level=1).pivot(columns="k", values="mean")
std_df = df.reset_index(level=1).pivot(columns="k", values="std")

In [None]:
fig, axs = plt.subplots(1,2,figsize=(15,5))
sb.heatmap(mean_df, annot=True, fmt=".3f", yticklabels=[f"{m:.{2}E}" for m in 1/mutation_periods[::-1]], norm=matplotlib.colors.LogNorm(), ax=axs[0])
sb.heatmap(std_df, annot=True, fmt=".3f", yticklabels=[f"{m:.{2}E}" for m in 1/mutation_periods[::-1]], norm=matplotlib.colors.LogNorm(), ax=axs[1])
axs[0].set_title("Mean")
axs[1].set_title("Stddev")
for ax in axs:
	ax.invert_yaxis()
	ax.invert_xaxis()
	ax.set_ylabel("Average mutation rate")
fig.suptitle("Mutations per host")
fig.tight_layout()
fig.savefig("analysis/mutations_per_host.png")

In [None]:
file = h5py.File("data/betaMrmeanMrk.h5")

params = pd.concat([datman.collectAttributeFromGroup(param, file, np.unique) for param in ["betas","mutation_rate","mutation_k"]], axis=1)

data = {}
for i,(name,group) in enumerate(file.items()):

	print(f"{i*100//len(params.index)}% - loading infections", end="\r", flush=True)
	haplo_infections = group["seed-00000/infections"].fields(["mut","loc"])[:]
	print(f"{i*100//len(params.index)}% - computing infections", end="\r", flush=True)
	infectious_haplo = pd.DataFrame.from_records(haplo_infections).groupby("mut").count()
	mostinfectious_haplo = (infectious_haplo.max() / infectious_haplo.sum())["loc"]

	print(f"{i*100//len(params.index)}% - loading mutation tree", end="\r", flush=True)
	mut_tree = group["seed-00000/mutationtree"][:]
	print(f"{i*100//len(params.index)}% - computing depths\t\t", end="\r", flush=True)
	depths = computeDepths(mut_tree)
	avg_depth = depths["depth"].mean()
	print(f"{i*100//len(params.index)}% - computing children\t", end="\r", flush=True)
	children = computeChildren(mut_tree)
	best_parent = children["children"].max() / children["children"].sum()

	print(f"{i*100//len(params.index)}% - filtering", end="\r", flush=True)
	unique_haplos = np.unique(haplo_infections["mut"])
	filtered_depths = pd.DataFrame.from_records(depths, index="id").loc[unique_haplos]
	avg_depth_fil = filtered_depths.mean()["depth"]
	filtered_children = pd.DataFrame.from_records(children, index="id").loc[unique_haplos]
	best_parent_fil = (filtered_children.max() / filtered_children.sum())["children"]

	print(f"{i*100//len(params.index)}%\t\t\t\t\t\t\t\t", end="\r", flush=True)
	data[name] = [mostinfectious_haplo, avg_depth, best_parent, avg_depth_fil, best_parent_fil]

data = pd.DataFrame.from_dict(data, orient="index", columns=["mostinfhaplo","avgdepth","bestparent","FILavgdepth","FILbestparent"])

file.close()

In [None]:
df = pd.concat([params,data], axis=1)
df.sort_index(inplace=True)
df

In [None]:
betafixed = {beta:group.drop(columns="betas") for beta,group in df.groupby("betas")}

# With $R_0 = 1.1$

In [None]:
labels = [
	"Fraction of cases by the most infectious haplotypes",
	"Average depth in mutation tree",
	"Fraction of haplotypes generated by the most prolific one"
]
mycolors = ["Reds","Greens","Blues"]
tags = ["mostinfhaplo","avgdepth","bestparent"]
nrows = len(labels)
fig, axs = plt.subplots(nrows, 1, figsize=(8,nrows*4))
for i in range(len(labels)):
	plotting = betafixed[0.022].pivot(index="mutation_rate", columns="mutation_k", values=tags[i])
	if i == 1:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], norm=matplotlib.colors.LogNorm(), ax=axs[i])
	else:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], ax=axs[i])
	axs[i].set_title(labels[i])
	axs[i].invert_xaxis()
	axs[i].set_xlabel("k")
	axs[i].invert_yaxis()
	axs[i].set_ylabel("Avg mutation rate")
fig.suptitle(r"$R_0 = 1.1$", fontsize=20)
fig.tight_layout()
fig.savefig("analysis/R0_1.1.png")

In [None]:
labels = [
	"Only infectious - Average depth in mutation tree",
	"Only infectious - Fraction of haplotypes generated by the most prolific one"
]
mycolors = ["Greens","Blues"]
tags = ["FILavgdepth","FILbestparent"]
nrows = len(labels)
fig, axs = plt.subplots(nrows, 1, figsize=(8,nrows*4))
for i in range(len(labels)):
	plotting = betafixed[0.022].pivot(index="mutation_rate", columns="mutation_k", values=tags[i])
	if i == 0:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], norm=matplotlib.colors.LogNorm(), ax=axs[i])
	else:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], ax=axs[i])
	axs[i].set_title(labels[i])
	axs[i].invert_xaxis()
	axs[i].set_xlabel("k")
	axs[i].invert_yaxis()
	axs[i].set_ylabel("Avg mutation rate")
fig.suptitle(r"$R_0 = 1.1$", fontsize=20)
fig.tight_layout()
fig.savefig("analysis/R0_1.1-reduced.png")

# With $R_0 = 1.5$

In [None]:
labels = [
	"Fraction of cases by the most infectious haplotypes",
	"Average depth in mutation tree",
	"Fraction of haplotypes generated by the most prolific one"
]
mycolors = ["Reds","Greens","Blues"]
tags = ["mostinfhaplo","avgdepth","bestparent"]
nrows = len(labels)
fig, axs = plt.subplots(nrows, 1, figsize=(8,nrows*4))
for i in range(len(labels)):
	plotting = betafixed[0.03].pivot(index="mutation_rate", columns="mutation_k", values=tags[i])
	if i == 1:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], norm=matplotlib.colors.LogNorm(), ax=axs[i])
	else:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], ax=axs[i])
	axs[i].set_title(labels[i])
	axs[i].invert_xaxis()
	axs[i].set_xlabel("k")
	axs[i].invert_yaxis()
	axs[i].set_ylabel("Avg mutation rate")
fig.suptitle(r"$R_0 = 1.5$", fontsize=20)
fig.tight_layout()
fig.savefig("analysis/R0_1.5.png")

In [None]:
labels = [
	"Only infectious - Average depth in mutation tree",
	"Only infectious - Fraction of haplotypes generated by the most prolific one"
]
mycolors = ["Greens","Blues"]
tags = ["FILavgdepth","FILbestparent"]
nrows = len(labels)
fig, axs = plt.subplots(nrows, 1, figsize=(8,nrows*4))
for i in range(len(labels)):
	plotting = betafixed[0.03].pivot(index="mutation_rate", columns="mutation_k", values=tags[i])
	if i == 0:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], norm=matplotlib.colors.LogNorm(), ax=axs[i])
	else:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], ax=axs[i])
	axs[i].set_title(labels[i])
	axs[i].invert_xaxis()
	axs[i].set_xlabel("k")
	axs[i].invert_yaxis()
	axs[i].set_ylabel("Avg mutation rate")
fig.suptitle(r"$R_0 = 1.5$", fontsize=20)
fig.tight_layout()
fig.savefig("analysis/R0_1.5-reduced.png")

# With $R_0 = 2.5$

In [None]:
labels = [
	"Fraction of cases by the most infectious haplotypes",
	"Average depth in mutation tree",
	"Fraction of haplotypes generated by the most prolific one"
]
mycolors = ["Reds","Greens","Blues"]
tags = ["mostinfhaplo","avgdepth","bestparent"]
nrows = len(labels)
fig, axs = plt.subplots(nrows, 1, figsize=(8,nrows*4))
for i in range(len(labels)):
	plotting = betafixed[0.05].pivot(index="mutation_rate", columns="mutation_k", values=tags[i])
	if i == 1:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], norm=matplotlib.colors.LogNorm(), ax=axs[i])
	else:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], ax=axs[i])
	axs[i].set_title(labels[i])
	axs[i].invert_xaxis()
	axs[i].set_xlabel("k")
	axs[i].invert_yaxis()
	axs[i].set_ylabel("Avg mutation rate")
fig.suptitle(r"$R_0 = 2.5$", fontsize=20)
fig.tight_layout()
fig.savefig("analysis/R0_2.5.png")

In [None]:
labels = [
	"Only infectious - Average depth in mutation tree",
	"Only infectious - Fraction of haplotypes generated by the most prolific one"
]
mycolors = ["Greens","Blues"]
tags = ["FILavgdepth","FILbestparent"]
nrows = len(labels)
fig, axs = plt.subplots(nrows, 1, figsize=(8,nrows*4))
for i in range(len(labels)):
	plotting = betafixed[0.05].pivot(index="mutation_rate", columns="mutation_k", values=tags[i])
	if i == 0:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], norm=matplotlib.colors.LogNorm(), ax=axs[i])
	else:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], ax=axs[i])
	axs[i].set_title(labels[i])
	axs[i].invert_xaxis()
	axs[i].set_xlabel("k")
	axs[i].invert_yaxis()
	axs[i].set_ylabel("Avg mutation rate")
fig.suptitle(r"$R_0 = 2.5$", fontsize=20)
fig.tight_layout()
fig.savefig("analysis/R0_2.5-reduced.png")

# With $R_0 = 3.5$

In [None]:
labels = [
	"Fraction of cases by the most infectious haplotypes",
	"Average depth in mutation tree",
	"Fraction of haplotypes generated by the most prolific one"
]
mycolors = ["Reds","Greens","Blues"]
tags = ["mostinfhaplo","avgdepth","bestparent"]
nrows = len(labels)
fig, axs = plt.subplots(nrows, 1, figsize=(8,nrows*4))
for i in range(len(labels)):
	plotting = betafixed[0.07].pivot(index="mutation_rate", columns="mutation_k", values=tags[i])
	if i == 1:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], norm=matplotlib.colors.LogNorm(), ax=axs[i])
	else:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], ax=axs[i])
	axs[i].set_title(labels[i])
	axs[i].invert_xaxis()
	axs[i].set_xlabel("k")
	axs[i].invert_yaxis()
	axs[i].set_ylabel("Avg mutation rate")
fig.suptitle(r"$R_0 = 3.5$", fontsize=20)
fig.tight_layout()
fig.savefig("analysis/R0_3.5.png")

In [None]:
labels = [
	"Only infectious - Average depth in mutation tree",
	"Only infectious - Fraction of haplotypes generated by the most prolific one"
]
mycolors = ["Greens","Blues"]
tags = ["FILavgdepth","FILbestparent"]
nrows = len(labels)
fig, axs = plt.subplots(nrows, 1, figsize=(8,nrows*4))
for i in range(len(labels)):
	plotting = betafixed[0.07].pivot(index="mutation_rate", columns="mutation_k", values=tags[i])
	if i == 0:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], norm=matplotlib.colors.LogNorm(), ax=axs[i])
	else:
		sb.heatmap(plotting, annot=True, fmt=".2e", cmap=mycolors[i], ax=axs[i])
	axs[i].set_title(labels[i])
	axs[i].invert_xaxis()
	axs[i].set_xlabel("k")
	axs[i].invert_yaxis()
	axs[i].set_ylabel("Avg mutation rate")
fig.suptitle(r"$R_0 = 3.5$", fontsize=20)
fig.tight_layout()
fig.savefig("analysis/R0_3.5-reduced.png")