In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
import seaborn as sns
import scanpy as sc
import matplotlib.pyplot as plt

In [None]:
raw = snakemake.input["STARsoloRaw"]
filtered = snakemake.input["STARsoloFiltered"]

# load data
dir_10x_raw = Path(raw).parent
adataRaw = sc.read_10x_mtx(dir_10x_raw, var_names="gene_symbols")
dir_10x_filtered = Path(filtered).parent
adataFilter = sc.read_10x_mtx(dir_10x_filtered, var_names="gene_symbols")
name = dir_10x_raw.parent.parent.stem + "_" + dir_10x_raw.parent.stem

# do some computations
sc.pp.calculate_qc_metrics(adataRaw, percent_top=None, log1p=False, inplace=True)

# organize
df = adataRaw.obs.drop(["n_genes_by_counts"], axis=1)
del adataRaw
df = df.loc[df["total_counts"] > 0, :]
df["isEmpty"] = ~df.index.isin(adataFilter.obs.index)
del adataFilter
df = df.sort_values(by="total_counts", ascending=False)
df["rank"] = range(1, df.shape[0] + 1)

# plot
plt.clf()
fig = sns.lineplot(
    data=df, x="rank", y="total_counts", hue="isEmpty", palette="blend:#7AB,#EDA"
)
fig.set(xscale="log", yscale="log", title=name, xlabel="Rank", ylabel="Total counts")
sns.despine()
plt.show()

In [None]:
i = snakemake.input["STARsoloSummaries"]
# Load data, instantiate dataframe
df = pd.read_csv(i, header=None, names=["Metric", "Values"])
path = Path(i)
name = path.parent.parent.stem + "_" + path.parent.stem + "_" + path.name
df = df.set_index("Metric")

In [None]:
# output table with sequencing metrics
sequencing = df[0:5].transpose()
sequencing["Number of Reads"] = sequencing["Number of Reads"].astype(int)
print(f"summarizing sequencing metrics...")
sequencing

In [None]:
# output table with mapping metrics
mapping = pd.concat([df[0:1], df[5:12]]).drop(["Estimated Number of Cells"]).transpose()
if "Full" in name:
    mapping["Unique Reads in Cells Mapped to GeneFull"] = mapping[
        "Unique Reads in Cells Mapped to GeneFull"
    ].astype(int)
else:
    mapping["Unique Reads in Cells Mapped to Gene"] = mapping[
        "Unique Reads in Cells Mapped to Gene"
    ].astype(int)
mapping["Number of Reads"] = mapping["Number of Reads"].astype(int)
print(f"summarizing mapping metrics...")
mapping

In [None]:
# output table with cells metrics
if "Full" in name:
    cells = (
        df[9:]
        .drop(["Fraction of Unique Reads in Cells"])
        .drop(["Unique Reads in Cells Mapped to GeneFull"])
        .transpose()
        .astype(int)
    )
else:
    cells = (
        df[9:]
        .drop(["Fraction of Unique Reads in Cells"])
        .drop(["Unique Reads in Cells Mapped to Gene"])
        .transpose()
        .astype(int)
    )
print(f"summarizing cells metrics...")
cells