In [None]:
# -.-|m { input: false, output: false, input_fold: show}

from os import path, system

import GPUtil
import numpy as np
import scanpy as sc
import seaborn as sns
import session_info
import tomlkit
from anndata import AnnData
from pathlib import Path
from tomlkit.items import String, Array
from typing import Union

from utils.util_funcs import cell_typist_annotate

In [None]:
# Add CELL_TYPIST model(s) to use
CELL_TYPIST_MODELS: list[str] = []
MARKER_GENES_PATH: Path = ""
BATCH_SIZE = 128

In [None]:
# | echo: false
# | output: false
# | warning: false

## Pipeline parameters
with open("../config.toml", "r") as f:
    config = tomlkit.parse(f.read())

In [None]:
ROOT_DIR: String = config["basic"]["ANALYSIS_DIR"]
DIR_SAVE : String= path.join(ROOT_DIR, config["basic"]["DIR_SAVE"])
COUNTS_LAYER: String = config["normalization"]["COUNTS_LAYER"]
CLUSTERING_COL: String = config["clustering"]["CLUSTERING_COL"]
ANNOTATION_METHODS: Union[String, Array] = config["annotation"]["ANNOTATION_METHOD"]
NORMAMALIZATION_LAYER: String = config["normalization"]["NORMALIZATION_METHOD"]

In [None]:
adata: AnnData = sc.read_h5ad(path.join(DIR_SAVE, "adata.h5ad"))

In [None]:
def annotation_dispatcher(method: str, adata: AnnData) -> None:
    if method == "celltypist":
        cell_typist_annotate(adata, CELL_TYPIST_MODELS)
        adata.write(path.join(DIR_SAVE, "adata.h5ad"))
    if method == "scGPT":
        deviceIDs = GPUtil.getAvailable()
        if len(deviceIDs) > 0:
            system(
                f"pixi run -e scgpt scgpt_annotate -i {path.join(DIR_SAVE, 'adata.h5ad')} --config {path.join(ROOT_DIR, 'config.toml')} -b {BATCH_SIZE}"
            )
        else:
            print("CUDA is not available, scGPT will not be run efficiently on CPU")
            exit(code=0)

    if method == "scTAB":
        system(
            f"pixi run -e sctab sctab_annotate --input {path.join(DIR_SAVE, 'adata.h5ad')} --config {path.join(ROOT_DIR, 'config.toml')}"
        )
        adata = sc.read_h5ad(path.join(DIR_SAVE, "adata.h5ad"))


# Getting a stable counts layer to be used later, setting X to be raw count values.
if COUNTS_LAYER == "X":
    adata.layers["counts"] = adata.X.copy()
    COUNTS_LAYER = "counts"
elif COUNTS_LAYER in adata.layers.keys():
    adata.X = adata.layers[COUNTS_LAYER].copy()
else:
    raise ValueError("{COUNTS_LAYER} layer can't be found in the object")

if isinstance(ANNOTATION_METHODS, list):
    for METHOD in ANNOTATION_METHODS:
        annotation_dispatcher(METHOD, adata)
elif isinstance(ANNOTATION_METHODS, str):
    annotation_dispatcher(ANNOTATION_METHODS, adata)

In [None]:
adata = sc.read_h5ad(path.join(DIR_SAVE, "adata.h5ad"))

## UMAP after annotation

In [None]:
sc.pl.umap(adata, color="scTAB_annotation_majority_voting")