kernal: scanpy

# Set up

In [None]:
import gc
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.backends.backend_pdf as mpdf
from matplotlib.pyplot import rc_context

import scanpy as sc
import muon as mu

import warnings
from numba.core.errors import NumbaDeprecationWarning
warnings.filterwarnings(action='once')
warnings.simplefilter(action='once')
warnings.simplefilter(action="ignore", category=NumbaDeprecationWarning)
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=DeprecationWarning)

In [None]:
sc.settings.verbosity = 0  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=100, frameon=False, figsize=(8, 7), facecolor="white")
sc.logging.print_versions()

In [None]:
blood_markers_dict = {
    "HSC": ["CD34", "SPINK2", "MLLT3", "HLF", "MECOM", "CDK6", "SELL", "CD52", "PROM1", "RUNX1", "HOXA9", "MEIS1", "MYB", "ITGA6"],
    "GP": ["MPO", "AZU1", "SPI1", "LYZ"],
    "MEMP": ["GATA1", "GATA2", "TESPA1", "KLF1", "CTNNB1"],
    "Ery": ["TFRC", "AHSP", "ALAS2", "HBA1", "HBB", "GYPA", "BPGM"],
    "MK": ["ITGA2B", "GP9", "PLEK", 'MPL', 'PECAM1', 'CXCR4', "PPBP", "PF4"],
    "Mast": ["HDC", "CPA3", "LMO4", "CD63", 'ENPP3', "TPSAB1", "TPSB2"],
    "Mono": ["CD14", "FCGR3A", "S100A9", "CD68", "MNDA", "FCN1"],
    "Kupffer": ["CD163", "MS4A7", "C1QA", "MRC1", "CTSB", "MARCO", "CD5L", "VCAM1"],
    "cDC1": ["CLEC9A", "THBD", "XCR1", "BATF3"],
    "cDC2": ["CD1C", "CLEC4A", "CLEC10A"],
    "cDC3": ["FLT3", "VCAN"],
    "pDC": ["JCHAIN", "IRF8", "CLEC4C", "IL3RA", "MPEG1"],
    "B": ["CD79A", "CD79B", "IL7R", "PAX5", "MME", "IGLL1", "IGHM", "IGHD", "CD19", "MS4A1", "IRF4", "DNTT", "RAG1", "RAG2", "CD24", "CD38"],
    "B1": ["CD5", "CD27", "SPN", "CCR10"],
    "NK": ["IL2RG", "NKG7", "PRF1", "GZMA", "KLRB1", "TRBC1", "IL2RB"],
    "ILC": ["RORC", "AHR", "ID2"],
    "T": ["CD2", "CD7", "CD3D", "CD3E", "CD3G", "TRAC", "FOXP3", "TIGIT", "CD4", "CD8A", "CD8B"],
    "Mix": ["KIT", "GATA3", "IL1A", "IL1B", "PTPRC"],
    "Hepa": ["ALB", "AFP"],
    "Endo": ["CDH5", "KDR"],
    "LSEC": ["STAB1", "STAB2", "LYVE1"],
    "Stellate": ["DCN", "COL1A1", "COL3A1", "RBP1"],
    "Epi": ["KRT19"],
    "Cycling": ["MKI67", "TOP2A"]
}

blood_markers_lst = [
  "CD34", "SPINK2", "MLLT3", "HLF", "MECOM", "CDK6", "SELL", "CD52", "PROM1",
  "RUNX1", "HOXA9", "MEIS1", "MYB", "ITGA6", # HSC/MPP
  "MPO", "AZU1", "SPI1", "LYZ", # Granulocyte
  "GATA1", "GATA2", "TESPA1", "KLF1", "CTNNB1", # MEMPs (megakaryocyte-erythroid-mast cell progenitor)
  "TFRC", "AHSP", "ALAS2", "HBA1", "HBB", "GYPA", "BPGM", # Erythroid
  "ITGA2B", "GP9", "PLEK", 'MPL', 'PECAM1', 'CXCR4', "PPBP", "PF4", # Megakaryocytes
  "HDC", "CPA3", "LMO4", "CD63", 'ENPP3', "TPSAB1", "TPSB2", # Mast cells
  "CD14", "FCGR3A", "S100A9", "CD68", "MNDA", "FCN1", # Monocytes
  "CD163", "MS4A7", "C1QA", "MRC1", "CTSB", "MARCO", "CD5L", "VCAM1", # Kupffer cells
  "CLEC9A", "THBD", "XCR1", "BATF3", # cDC1
  "CD1C", "CLEC4A", "CLEC10A", # cDC2
  "FLT3", "VCAN", # cDC3
  "JCHAIN", "IRF8", "CLEC4C", "IL3RA", "MPEG1", # pDCs
  "CD79A", "CD79B", "IL7R", "PAX5", "MME", "IGLL1", "IGHM", "IGHD",
  "CD19", "MS4A1", "IRF4", "DNTT", "RAG1", "RAG2", "CD24", "CD38", # B cells
  "CD5", "CD27", "SPN", "CCR10", # B1
  "IL2RG", "NKG7", "PRF1", "GZMA", "KLRB1", "TRBC1", "IL2RB", # NK
  "RORC", "AHR", "ID2", # ILC3
  "CD2", "CD7", "CD3D", "CD3E", "CD3G", "TRAC", "FOXP3", "TIGIT", "CD4", "CD8A", "CD8B", # T
  "KIT", "GATA3", "IL1A", "IL1B",
  "PTPRC", # CD45
  "ALB", "AFP", # Hepatocytes
  "CDH5", "KDR", # endothelial cells
  "STAB1", "STAB2", "LYVE1", "DCN", # LSECs
  "COL1A1", "COL3A1", "RBP1", # stellate cells
  "KRT19",
  'MKI67', "TOP2A" # cycling
]

suo_science_b = {
  "lymphoid prog.": ["CD34", "SPINK2", "IL7R", "KIT"], # lymphoid prog.
  "PreProB": ["FLT3", "CD19", "VPREB1"], # pre pro B
  "ProB": ["MME", "CDC45", "DHFR", "MKI67"], # pro B
  "LateProB": ["CD27", "RAG1", "DNTT", "VPREB3"], # late pro B
  "Pro -> Pre": ["CD24", "TNFRSF17"], # pro -> pre
  "Pre -> Immature": ["MME", "IDH2", "SPIB", "IL4R", "IGHM"], # pre -> immature
  "Mature": ["IGHD", "MS4A1", "CD40", "FCER2"],  # mature
  "B1": ["CD27", "CD5", "SPN", "CCR10"], # B1
  "PlasmaB": ["JCHAIN", "SDC1", "CD38"],
  "Cycling": ["MKI67", "TOP2A"]
}

suo_science_megak_ery = {
  "Prog.": ["CD34", "SPINK2", "MLLT3"],
  "MK/Ery precur.": ["TESPA1", "GATA2", "FCER1A"], # MegaK/Ery precursors
  "Earyly Ery": ["KLF1", "APOE", "FAM178B"], # early ery
  "Mid Ery": ["BLVRB", "CD36", "OAT"], # mid ery
  "Late Ery": ["GYPA", "GYPB", "SLC4A1"], # late ery
  "YS Ery": ["HBZ", "HBE1"],
  "Early MK": ["HBD", "PF4"], # early megaK
  "Late MK": ["ITGA2B", "ITGB3", "CLK1"], # late megaK
  "Mf_Ery": ["C1QA", "CD163"],
  "Cycling": ["MKI67", "TOP2A"]
}

suo_science_mye = {
  "Prog.": ["CD34", "SPINK2", "MLLT3"],
  "Mye prog.": ["PRSS57", "PRTN3", "AZU1"], # mye prog.
  "Neutrophil": ["ELANE", "DEFA4", "LCN2", "LTF", "ORM1"], # neutrophil
  "Mono": ["CD52", "S100A8", "MS4A6A", "CD14", "CXCR4", "CCR2", "IL1B", "CD300E"], # mono
  "DC prog.": ["ACY3", "TIFAB", "KIF17"], # DC prog.
  "pDC": ["CLEC4C", "JCHAIN", "IRF7"], # pDC
  "ASDC": ["SIGLEC6", "AXL"], # ASDC
  "DC2": ["CLEC10A", "CD1C"], # DC2
  "DC1": ["CLEC9A", "BATF3"], # DC1
  "migratory DC": ["CCR7", "LAMP3"], # migratory DC
  "Langerhans": ["IDO1", "CD207", "CD1A"], # Langerhans cells
  "EO_BASO_Mast": ["CLC", "KIT", "TPSAB1"], # EO_BASO_Mast
  "Mac_LYVE1_high": ["F13A1", "LYVE1", "SPP1"], # mac LYVE1 high
  "Mac iron recycling": ["CD5L", "APOE", "VCAM1"], # mac iron recycling
  "Mac_MHCII_high": ["HLA-DRA", "HLA-DPA1", "CLEC7A"], # mac MHCII high
  "Mac_kupffer_like": ["ENG", "KDR", "CAV1"], # mac kupffer like
  "Mac_TREM2": ["TREM2", "P2RY12"], # mac trem2
  "Mac_TLF": ["TIMD4", "FOLR2"], # mac TLF+
  "Cycling": ["MKI67", "TOP2A"]
}

suo_science_t_ilc = {
    "DN(early)": ["IGLL1", "ST18"],
    "DN(P)": ["TRGC2", "TRDC", "HIVEP3"],
    "DN(Q)": ["RAG1", "RAG2", "TP53INP1", "PTCRA", "RGPD3"],
    "DP(P)": ["SMPD3"],
    "DP(Q)": ["AQP3", "RORC"],
    "abT(entry)": ["CCR9", "SATB1", "TOX2"],
    "Mature_T": ["CCR7", "CD5", "CD27", "CD8A", "CD8B", "CD4"],
    "Treg": ["FOXP3", "CTLA4"],
    "CD8aa": ["PDCD1", "GNG4", "ZNF683"],
    "innate": ["KLRB1", "ZBTB16"],
    "Type3_innate_T": ["CD40LG", "RORC", "ANXA1"],
    "ILC3": ["RORC", "IL1R1", "IL23R", "KIT"],
    "ILC2": ["PTGDR2", "HPGDS", "GATA3"],
    "Type1_innate_T": ["NKG7", "EOMES", "TBX21", "IFNG-AS1"],
    "NK": ["KLRD1", "KLRF1", "NKG7", "NCAM1"],
    "Cycling": ["MKI67", "TOP2A"]
}

# Load data

In [None]:
work_dir = '/work/home/project/20231127_DevM/multiome_wnn/multiome_48FL'
dataset = "FL_wnn"
new_file, old_file = "v01", "v00"
new_anno, old_anno = "anno_wnn_v1", "leiden_wnn_3"

mdata

In [None]:
mdata = mu.read(
    f"data/{dataset}_clustered.{old_file}.h5mu"
)
mdata

Modify obs

In [None]:
mdata.obs['libraryID'] = mdata['rna'].obs['libraryID'].copy()
mdata.obs['donorID'] = mdata['rna'].obs['donorID'].copy()
mdata.obs['PCW'] = mdata['rna'].obs['PCW'].copy()

# Assign new annotations

In [None]:
# create a dictionary to map cluster to annotation label
OLD2NEW = {
    "0": "NK", "1": "HSC", "2": "MK", "3": "Monocyte", "4": "MEMP", "5": "MEMP",
    "6": "HSC", "7": "B", "8": "HSC", "9": "Granulocyte", "10": "HSC", "11": "EarlyErythroid", 
    "12": "cDC2", "13": "MEMP", "14": "EarlyErythroid", "15": "cDC2", "16": "B", "17": "Mast", 
    "18": "pDC", "19": "19-lowQuality", "20": "HSC", "21": "EarlyErythroid", "22": "MK", "23": "23-6wProgenitor",
    "24": "24-MastMEMP", "25": "Kupffer", "26": "Monocyte", "27": "B", "28": "B", "29": "B", 
    "30": "B", "31": "cDC2", "32": "MEMP", "33": "EarlyErythroid", "34": "HSC", "35": "35", 
    "36": "NK", "37": "cDC2", "38": "HSC", "39": "ILC", "40": "GP", "41": "NK", 
    "42": "42-MastMEMP", "43": "Monocyte", "44": "MK", "45": "T", "46": "MK", "47": "47-Ery?",
    "48": "48", "49": "49", "50": "Endothelia", "51": "B", "52": "Hepatocyte", "53": "53", 
    "54": "cDC1", "55": "Erythroid", "56": "56", "57": "57", "58": "B", "59": "59", "60": "60", "61": "61"
}

In [None]:
# map
mdata.obs[new_anno] = mdata.obs[old_anno].map(OLD2NEW).astype("category")
any(mdata.obs[new_anno].isna())

In [None]:
sorted(mdata.obs[new_anno].unique(), key=str.casefold)

In [None]:
mdata.obs[new_anno] = mdata.obs[new_anno].cat.reorder_categories(["HSC", '23-6wProgenitor', "GP", "Granulocyte", 
                                                                  "MEMP", "EarlyErythroid", "Erythroid", "47-Ery?", "MK", "24-MastMEMP", "42-MastMEMP", "Mast",
                                                                  "Monocyte", "Kupffer", "cDC1", "cDC2", "pDC",
                                                                  "B",
                                                                  "NK", "T", "ILC",
                                                                  "Hepatocyte", "Endothelia",
                                                                  "35", "48", "49", "53", "56", "57", "59", "60",
                                                                  "19-lowQuality"])
any(mdata.obs[new_anno].isna())

# Dotplot

In [None]:
mdata['rna'].obs[new_anno] = mdata.obs[new_anno]
mdata['atac'].obs[new_anno] = mdata.obs[new_anno]

## RNA

In [None]:
sc.pl.dotplot(mdata['rna'], var_names=blood_markers_dict, groupby=[new_anno], standard_scale="var")

In [None]:
sc.pl.dotplot(mdata['rna'], var_names=blood_markers_dict, groupby=[new_anno], standard_scale="var", show=False,
              figsize=(45, len(mdata.obs[new_anno].cat.categories) * 0.35))
plt.savefig(f"{work_dir}/plots/{dataset}_dotplot_rna.{new_file}.pdf", bbox_inches="tight")
plt.close()

suo_science_megak_ery

In [None]:
sc.pl.dotplot(mdata['rna'], var_names=suo_science_megak_ery, groupby=[new_anno], standard_scale="var")

suo_science_b

In [None]:
sc.pl.dotplot(mdata['rna'], var_names=suo_science_b, groupby=[new_anno], standard_scale="var")

suo_science_mye

In [None]:
sc.pl.dotplot(mdata['rna'], var_names=suo_science_mye, groupby=[new_anno], standard_scale="var")

suo_science_t_ilc

In [None]:
sc.pl.dotplot(mdata['rna'], var_names=suo_science_t_ilc, groupby=[new_anno], standard_scale="var")

### Good libraries

In [None]:
good_samples = ["FL6PCW1-A", "FL6PCW1-B", "FL7PCW1-A", "FL7PCW1-B", "FL18150-A", "FL18150-B", 
                "FL2596", "FL2391", "FL2408-2", "FL17681", "FL17805", "FL16171-B"]

pdf = mpdf.PdfPages(f"{work_dir}/plots/{dataset}_dotPlot_rna_goodLibrary.{new_file}.pdf")
for clust in good_samples:
    print(clust)
    mdata_x = mdata[mdata.obs["libraryID"] == clust, :].copy()
    sc.pl.dotplot(mdata_x['rna'], var_names=blood_markers_dict, groupby=[new_anno], standard_scale="var", show=False,
              figsize=(45, len(mdata_x.obs[new_anno].cat.categories) * 0.35), title=clust)
    pdf.savefig(bbox_inches="tight")
    plt.close()
pdf.close()

# UMAP

Random cells

In [None]:
np.random.seed(0)
random_indices = np.random.permutation(list(range(mdata.shape[0])))

New anno

In [None]:
with rc_context({"figure.figsize": (11, 10)}):
    mu.pl.embedding(mdata[random_indices, :], basis='umap', color=[new_anno],
                size=1, show=False)
    plt.savefig(f"{work_dir}/plots/{dataset}_umap_cluster.{new_file}.pdf", bbox_inches="tight")

Each cluster

In [None]:
n_ctypes, ncols = len(mdata.obs[new_anno].unique()), 5
nrows = int(np.ceil(n_ctypes / ncols))
sns.set_theme(style="white", font_scale=1.5)
_, axes = plt.subplots(nrows, ncols, figsize=(ncols * 5.5, nrows * 5))
for i, clust in enumerate(mdata.obs[new_anno].cat.categories):
    i += 1
    mu.pl.embedding(mdata[random_indices, :], basis='umap', color=[new_anno], groups=[clust],
               show=False, use_raw=False, ax=axes[int((i - 1) / ncols), (i - 1) % ncols],
               title=f"{clust}", palette=["red"], ncols=ncols, legend_loc=None, size=2)
plt.tight_layout()

In [None]:
n_ctypes, ncols = len(mdata.obs[new_anno].unique()), 5
nrows = int(np.ceil(n_ctypes / ncols))
sns.set_theme(style="white", font_scale=1.5)
_, axes = plt.subplots(nrows, ncols, figsize=(ncols * 5.5, nrows * 5))
for i, clust in enumerate(mdata.obs[new_anno].cat.categories):
    i += 1
    mu.pl.embedding(mdata[random_indices, :], basis='umap', color=[new_anno], groups=[clust],
               show=False, use_raw=False, ax=axes[int((i - 1) / ncols), (i - 1) % ncols],
               title=f"{clust}", palette=["red"], ncols=ncols, legend_loc=None, size=2)
plt.tight_layout()
plt.savefig(f"{work_dir}/plots/{dataset}_uamp_clusterSep.{new_file}.png", dpi=300,
            bbox_inches="tight", facecolor="white")
plt.close()

# Dendrogram

In [None]:
sc.tl.dendrogram(mdata['rna'], groupby=[new_anno], n_pcs=mdata['rna'].obsm["X_harmony"].shape[1], use_rep="X_harmony",
                 cor_method="pearson", linkage_method="complete", optimal_ordering=True)

In [None]:
with plt.rc_context({"figure.figsize": (15, 5)}):
    sc.pl.dendrogram(mdata['rna'], groupby = new_anno, show=False)
    plt.savefig(f"{work_dir}/plots/{dataset}_dendrogramOfRNA.{new_file}.pdf", bbox_inches="tight")

# Composition

In [None]:
def compo_plot(data=None, groupby=None, condition=None):
    #groupby_key = "leiden_wnn_0.9"
    #condition_key = rna_anno
    df = pd.crosstab(data[groupby], data[condition])
    df = df.div(df.sum(axis=1), axis=0) * 100.0
    ax = df.plot(
            kind = "bar",
            stacked = True,
            legend = False
        )
    ax.set_xlabel(groupby)
    ax.set_ylabel("Percentage")
    ax.legend(loc="center left", bbox_to_anchor=(1.05, 0.5), ncol=3)
    if len(max(df.index.astype(str), key=len)) >= 5:
        ax.set_xticklabels(ax.get_xticklabels(), rotation=-45, ha='left')

# QC

In [None]:
d4p = mdata.obs.copy()

## Cluster size, gene/umi/peak count

In [None]:
df = d4p.groupby(new_anno).agg({"rna:nFeature_RNA": 'median', 'rna:nCount_RNA': 'median', 'atac:nCount_peaks': 'median', 'atac:nFeature_peaks': 'median'})
df['count'] = d4p[new_anno].value_counts()
df.to_csv(f"{work_dir}/data/{dataset}_clusterSizes_medianCounts.{new_file}.csv")

## Doublet

In [None]:
# set theme
sns.set_style("ticks", {'axes.grid': True})

In [None]:
my_order = d4p.groupby(by=[new_anno])["rna:scDblFinder.score"].median().sort_values().index
plt.figure(figsize=(len(mdata.obs[new_anno].cat.categories) * 0.35, 5))
p = sns.boxplot(data = d4p, y = 'rna:scDblFinder.score', x = new_anno, order=my_order)
p.set_xticklabels(p.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')

plt.savefig(f"{work_dir}/plots/{dataset}_check_rnaDoubletScore.{new_file}.pdf", bbox_inches="tight")
plt.show()

## RNA

nCount_RNA

In [None]:
my_order = d4p.groupby(by=[new_anno])["rna:nCount_RNA"].median().sort_values().index
plt.figure(figsize=(len(mdata.obs[new_anno].cat.categories) * 0.35, 5))
p = sns.boxplot(data = d4p, y = "rna:nCount_RNA", x = new_anno, order=my_order,)
plt.yscale('log')
p.set_xticklabels(p.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
plt.savefig(f"{work_dir}/plots/{dataset}_check_rnaUMICount.{new_file}.pdf", bbox_inches="tight")
plt.show()

nFeature_RNA

In [None]:
my_order = d4p.groupby(by=[new_anno])["rna:nFeature_RNA"].median().sort_values().index
plt.figure(figsize=(len(mdata.obs[new_anno].cat.categories) * 0.35, 5))
p = sns.boxplot(data = d4p, y = "rna:nFeature_RNA", x = new_anno, order=my_order, )
plt.yscale('log')
p.set_xticklabels(p.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
# Add horizontal lines
plt.axhline(y=300, color='red', linestyle='--', linewidth=2)  # Add horizontal line at y=1000
plt.axhline(y=500, color='red', linestyle='-.', linewidth=2)  # Add horizontal line at y=10000

plt.savefig(f"{work_dir}/plots/{dataset}_check_rnaGeneCount.{new_file}.pdf", bbox_inches="tight")
plt.show()

percent.mt

In [None]:
my_order = d4p.groupby(by=[new_anno])["rna:percent.mt"].median().sort_values().index
plt.figure(figsize=(len(mdata.obs[new_anno].cat.categories) * 0.35, 5))
p = sns.boxplot(data = d4p, y = "rna:percent.mt", x = new_anno, order=my_order, )
p.set_xticklabels(p.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')

plt.savefig(f"{work_dir}/plots/{dataset}_check_rnaMtPercent.{new_file}.pdf", bbox_inches="tight")
plt.show()

percent.rb

In [None]:
my_order = d4p.groupby(by=[new_anno])["rna:percent.rb"].median().sort_values().index
plt.figure(figsize=(len(mdata.obs[new_anno].cat.categories) * 0.35, 5))
p = sns.boxplot(data = d4p, y = "rna:percent.rb", x = new_anno, order=my_order, )
p.set_xticklabels(p.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
plt.show()

## ATAC

nCount_peaks

In [None]:
my_order = d4p.groupby(by=[new_anno])["atac:nCount_peaks"].median().sort_values().index
plt.figure(figsize=(len(mdata.obs[new_anno].cat.categories) * 0.35, 5))
p = sns.boxplot(data = d4p, y = 'atac:nCount_peaks', x = new_anno, order=my_order, )
plt.yscale('log')
p.set_xticklabels(p.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')

plt.savefig(f"{work_dir}/plots/{dataset}_check_atacFragCount.{new_file}.pdf", bbox_inches="tight")
plt.show()

nFeature_peaks

In [None]:
my_order = d4p.groupby(by=[new_anno])["atac:nFeature_peaks"].median().sort_values().index
plt.figure(figsize=(len(mdata.obs[new_anno].cat.categories) * 0.35, 5))
p = sns.boxplot(data = d4p, y = 'atac:nFeature_peaks', x = new_anno, order=my_order, )
plt.yscale('log')

p.set_xticklabels(p.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
plt.show()

nucleosome_signal

In [None]:
my_order = d4p.groupby(by=[new_anno])["atac:nucleosome_signal"].median().sort_values().index
plt.figure(figsize=(len(mdata.obs[new_anno].cat.categories) * 0.35, 5))
p = sns.boxplot(data = d4p, y = 'atac:nucleosome_signal', x = new_anno, order=my_order, )
p.set_xticklabels(p.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
plt.show()

TSS.enrichment

In [None]:
my_order = d4p.groupby(by=[new_anno])["atac:TSS.enrichment"].median().sort_values().index
plt.figure(figsize=(len(mdata.obs[new_anno].cat.categories) * 0.35, 5))
p = sns.boxplot(data = d4p, y = 'atac:TSS.enrichment', x = new_anno, order=my_order, )
p.set_xticklabels(p.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
plt.show()

# DGE

In [None]:
rna = mdata['rna']
sc.tl.rank_genes_groups(rna, groupby=new_anno, method="wilcoxon")
marker_df = sc.get.rank_genes_groups_df(rna, group=None)

In [None]:
marker_df.to_csv(f"{work_dir}/data/FL_wnn_markerGenes.{new_file}.csv", index=False)
# top 100
result = rna.uns['rank_genes_groups']
groups = result['names'].dtype.names
marker_df_top = pd.DataFrame({group: result[key][group] for group in groups for key in ['names']}).head(100)
marker_df_top.to_csv(f"{work_dir}/data/FL_wnn_markerGenes_top100.{new_file}.csv", index=False)

# Save

Save cellmeta

In [None]:
mdata.obs.to_csv(f"{work_dir}/data/{dataset}_cellmeta.{new_file}.csv", index=True)

Save obj

In [None]:
mdata.write(f"{work_dir}/data/{dataset}_clustered.{new_file}.h5mu")