In [None]:
# | echo: false
# | output: false
# | warning: false

from os import path
from typing import List, Union

import decoupler as dc
import pandas as pd
import scanpy as sc
import tomlkit
from os.path import join
from datetime import datetime
import panel as pn

pn.extension("tabulator")

In [None]:
# | echo: true
# | output: false
# | warning: false

CLUSTERING_COL: str = "scTAB_annotation_majority_voting"
CONDIITION: Union[str, None] = None
SUBSET: Union[str, List, None] = None

In [None]:
# | echo: false
# | output: false
# | warning: false

## Pipeline parameters
with open("../config.toml", "r") as f:
    config = tomlkit.parse(f.read())

In [None]:
# | output: false
# | warning: false


# Directories
ROOT_DIR = config["basic"]["ANALYSIS_DIR"]
DIR_SAVE = path.join(ROOT_DIR, config["basic"]["DIR_SAVE"])
DIR_samples = config["basic"]["DIR_SAMPLES"]

# Basic information
ORGANISM: str = config["basic"]["ORGANISM"]
NORMALIZATION_METHOD = config["normalization"]["NORMALIZATION_METHOD"]

if CLUSTERING_COL == "":
    CLUSTERING_COL = config["clustering"]["CLUSTERING_COL"]
elif CLUSTERING_COL == "":
    raise ValueError(
        "No clustering column found. please provide a key for cell grouping"
    )

In [None]:
# | echo: false
# | output: false
# | warning: false

adata = sc.read_h5ad(path.join(DIR_SAVE, "adata.h5ad"))
adata.X = adata.layers[NORMALIZATION_METHOD].copy()

In [None]:
# | echo: false
# | output: false
# | warning: false

sc.tl.rank_genes_groups(
    adata,
    groupby=CLUSTERING_COL,
    mask_var=SUBSET,
    layer=NORMALIZATION_METHOD,
    method="wilcoxon",
    pts=True,
)
sc.tl.filter_rank_genes_groups(adata)

In [None]:
# | echo: false
# | output: false
# | warning: false

cell_types = adata.obs[CLUSTERING_COL].unique().tolist()
de_df = {}
for c_type in cell_types:
    de_df[c_type] = sc.get.rank_genes_groups_df(adata, group=c_type).sort_values(
        by="logfoldchanges", ascending=False
    )

In [None]:
# | echo: false
# | output: false
# | warning: false

writer = pd.ExcelWriter(
    join(DIR_SAVE, datetime.strftime(datetime.now(), "%Y%m%d") + "_DE_results.xlsx"),
    engine="openpyxl",
)
for df_name, df in de_df.items():
    df.to_excel(writer, sheet_name=df_name)
writer.close()

In [None]:
# | echo: false
# | output: false
# | warning: false

cell_types = adata.obs[CLUSTERING_COL].unique().tolist()
de_df = {}
for c_type in cell_types:
    de_df[c_type] = sc.get.rank_genes_groups_df(
        adata, group=c_type, pval_cutoff=0.05
    ).sort_values(by="logfoldchanges", ascending=False)

In [None]:
# | echo: false
# | warning: false

widget_ls = []
for index, key in enumerate(de_df.keys()):
    widget_ls.append(
        (
            key,
            pn.widgets.Tabulator(
                de_df[key],
                header_filters=True,
                pagination="local",
                show_index=False,
                sortable=True,
                disabled=True,
            ),
        )
    )

pn.Tabs(*widget_ls, dynamic=True)

In [None]:
# | echo: false
# | warning: false

sc.pl.rank_genes_groups(adata)

In [None]:
# | echo: false
# | warning: false

sc.pl.rank_genes_groups_dotplot(adata)

In [None]:
# | echo: false
# | warning: false

sc.pl.rank_genes_groups_heatmap(adata)

In [None]:
# | echo: false
# | warning: false

sc.pl.rank_genes_groups_matrixplot(adata)

In [None]:
# | echo: false
# | warning: false

sc.pl.rank_genes_groups_stacked_violin(adata)