In [None]:
data_dir = "/data/projects/dschaub/ANCA-GN_transcriptomics/data/single-cell/exploratory"
working_dir = "/data/projects/dschaub/ANCA-GN_transcriptomics"

In [None]:
%load_ext autoreload
%autoreload 2

import sys
import os
os.chdir(working_dir)

import yaml
import anndata as ad
import matplotlib.pyplot as plt
import mudata as md
import muon as mu
import numpy as np
import scanpy as sc
import pandas as pd
import scanpy.external as sce
from scipy import sparse
import seaborn as sns
import matplotlib as mpl
from matplotlib.colors import LinearSegmentedColormap

from utils.utils import *
from utils.plotting import *

sc.settings.verbosity = 0
sc.set_figure_params(dpi=80)
sns.set(style="white")

## Load data

In [None]:
path = os.path.join(data_dir, "ANCA_exploratory_27PK27PB_CD4Teff_TotalVI.h5mu")
mudata = md.read_h5mu(path)
mod_rna = mudata.mod["rna"]
mod_cite = mudata.mod["cite"]
mudata

## RNA markers

In [None]:
cluster_key = "leiden_0.8"

In [None]:
all_marker_results, filtered_marker_results, best_markers = run_de_pipeline(
    mod_rna,
    cluster_key,
    mod="rna",
    top_n=5,
    min_expression=0.2,
)

In [None]:
path = os.path.join(data_dir, "..", "T_cell_markers.xlsx")
marker_df = pd.read_excel(path)
marker_df = marker_df.set_index("Cell type")

# extract positive markers
marker_db = {}
for index, row in marker_df.iterrows():
    pos_markers_1 = (
        row["positive markers 1"].replace(" ", "").split(",")
        if row["positive markers 1"] is not np.nan
        else []
    )
    pos_markers_2 = (
        row["positive markers 2"].replace(" ", "").split(",")
        if row["positive markers 2"] is not np.nan
        else []
    )
    marker_db[index] = list(set(pos_markers_1 + pos_markers_2))

marker_plotter = MarkerPlotter(mod_rna, mod_cite, marker_db, cluster_key=cluster_key)

In [None]:
fig = marker_plotter.visualize_markers(
    markers=[
        "CD8A",
        "CD4",
        "CD3E",
    ],
    dtype="rna",
    ncols=3,
    use_default_plot=True,
    # marker_plot=False
    # dotplot=False,
)

In [None]:
for cell_type in marker_db.keys():
    fig = marker_plotter.visualize_markers(
        cell_type=cell_type,
        dtype="rna",
        ncols=3,
        use_default_plot=True,
        # marker_plot=False
        # dotplot=False,
    )

## Protein markers

In [None]:
mod_cite.obs[cluster_key] = mod_rna.obs[cluster_key]
mod_cite.obsm["X_umap"] = mod_rna.obsm["X_umap"]

In [None]:
for cell_type in marker_db.keys():
    fig = marker_plotter.visualize_markers(
        cell_type=cell_type,
        dtype="protein",
        ncols=3,
        use_default_plot=True,
        # marker_plot=False
        # dotplot=False,
    )