In [None]:
from pathlib import Path
import pandas as pd
import seaborn as sns
import scanpy as sc
import anndata as ad
import matplotlib.pyplot as plt
from matplotlib import colors

In [None]:
def rank_label_drops(raw: ad.AnnData, filtered: ad.AnnData):
	'''Make barcode rank plot from two AnnData objects.'''

	assert "total_counts" in raw.obs.columns, "please run calculate_qc_metrics() on the raw data"

	df = raw.obs
	del raw
	df = df.loc[df["total_counts"] > 0, :]
	df["isEmpty"] = ~df.index.isin(filtered.obs.index)
	del filtered
	df = df.sort_values(by="total_counts", ascending=False)
	df["rank"] = range(1, df.shape[0] + 1)

	return df

def read_metrics(fn: str):
	metrics = pd.read_csv(fn, header=None).set_index(0).transpose()
	for c in metrics.columns:
		if "GeneFull" in c:
			metrics.rename({c: c.replace("GeneFull", "Genes")}, axis=1, inplace=True)
		else:
			metrics.rename({c: c.replace("Gene", "Genes")}, axis=1, inplace=True)
	for c in ["Estimated Number of Cells", "Unique Reads in Cells Mapped to Genes", "Number of Reads", "UMIs in Cells", "Total Genes Detected", "Median UMI per Cell", "Median Genes per Cell", "Median Reads per Cell"]:
		metrics[c] = metrics[c].astype(int)

	return metrics

	# sequencing = metrics.iloc[:,0:5]
	# mapping = pd.concat([metrics.iloc[:,0:1], metrics.iloc[:,5:9], metrics.iloc[:,10:12]], axis=1)
	# cells = pd.concat(metrics.iloc[:,10], metrics.iloc[:,12:], axis=1)
	# return sequencing, mapping, cells


In [None]:
runsheet = pd.read_csv(snakemake.config['runsheet'], sep="\t")

df_list, summ_list = [], []
for r in runsheet["run_id"].unique():
	data = {}
	for raw, filt in zip(snakemake.input["raw"], snakemake.input["filtered"]):

		if r == Path(raw).parent.parent.parent.name:
			data['raw'] = sc.read_10x_mtx(Path(raw).parent, var_names="gene_symbols")
			sc.pp.calculate_qc_metrics(data['raw'], percent_top=None, log1p=False, inplace=True)

		if r == Path(filt).parent.parent.parent.name:
			data['filtered'] = sc.read_10x_mtx(Path(filt).parent, var_names="gene_symbols")
			sc.pp.calculate_qc_metrics(data['filtered'], percent_top=None, log1p=False, inplace=True)

	df = rank_label_drops(data['raw'], data['filtered'])
	df["run_id"] = r
	df_list.append(df)

	for f in snakemake.input["summary"]:
		p = Path(f)
		if r != p.parent.parent.name:
			continue
		data['summary'] = read_metrics(f)
		data['summary']["run_id"] = r

	summ_list.append(data['summary'])

	print(f"Loaded STARsolo from {r}")

df = pd.concat(df_list).set_index(["run_id"])
summ = pd.concat(summ_list).set_index(["run_id"])

In [None]:
cm = sns.color_palette("flare", as_cmap=True)

def background_gradient(s, cmap="PuBu"):
    norm = colors.PowerNorm(2, vmin=0, vmax=s.max())
    normed = norm(s.values)
    c = [colors.rgb2hex(x) for x in plt.colormaps.get_cmap(cmap)(normed)]
    return [f'background-color: {color}' for color in c]

summ.style.format(precision=2).apply(background_gradient, cmap=cm)

In [None]:
fig = sns.relplot(
		data=df, 
		x="rank", 
		y="total_counts", 
		style="isEmpty", 
		hue = "run_id", 
		kind = "line",
		dashes = [(1,0),(1,1)]
	)
fig.set(xscale="log", yscale="log", xlabel="Rank", ylabel="Total counts")
sns.despine()

In [None]:
df_list = []
for r in runsheet["run_id"].unique():
	for f in snakemake.input["filtered"]:
		if r == Path(f).parent.parent.parent.name:
			adata = sc.read_10x_mtx(Path(f).parent, var_names="gene_symbols")
			adata.var["mito"] = adata.var_names.str.startswith("MT-")
			sc.pp.calculate_qc_metrics(adata, qc_vars=["mito"], inplace=True)

	df = adata.obs
	df["run_id"] = r
	df_list.append(df)

	print(f"Loaded STARsolo from {r}")

df = pd.concat(df_list)

In [None]:
sns.catplot(
		data=df,
		x="run_id",
		y="log1p_total_counts",
		kind="violin",
)

In [None]:
sns.catplot(
		data=df,
		x="run_id",
		y="log1p_n_genes_by_counts",
		kind="violin",
)

In [None]:
sns.catplot(
		data=df,
		x="run_id",
		y="pct_counts_mito",
		kind="violin",
)

In [None]:
sns.relplot(
		data=df,
		x="total_counts",
		y="n_genes_by_counts",
		hue="pct_counts_mito",
		col="run_id",
		col_wrap=df["run_id"].nunique() if df["run_id"].nunique() <= 4 else 4,
)

In [None]:
sns.relplot(
		data=df,
		x="log1p_total_counts",
		y="log1p_n_genes_by_counts",
		hue="pct_counts_mito",
		col="run_id",
		col_wrap=df["run_id"].nunique() if df["run_id"].nunique() <= 4 else 4,
)