In [13]:
# -.-|m { input: false, output: false, input_fold: show}

import tomlkit
import scanpy as sc
from anndata import AnnData
import pandas as pd
import numpy as np
import seaborn as sns
from pandas import DataFrame

from os import path
import session_info
import logging
from tempfile import TemporaryDirectory
from os import system
import torch

logging.basicConfig(level=logging.ERROR)

sc.set_figure_params(figsize=(6, 6), frameon=False)

In [14]:
# Add CELL_TYPIST model(s) to use
CELL_TYPIST_MODELS: list[str] = []

In [15]:
# | echo: false
# | output: false
# | warning: false

## Pipeline parameters
with open("../config.toml", "r") as f:
    config = tomlkit.parse(f.read())

In [16]:
ROOT_DIR = config["basic"]["ANALYSIS_DIR"]
DIR_SAVE = path.join(ROOT_DIR, config["basic"]["DIR_SAVE"])
COUNTS_LAYER = config["normalization"]["COUNTS_LAYER"]
CLUSTERING_COL = config["clustering"]["CLUSTERING_COL"]
TISSUE = config["basic"]["TISSUE"]
ANNOTATION_METHOD = config["annotation"]["ANNOTATION_METHOD"]
NORMAMALIZATION_LAYER = config["normalization"]["NORMALIZATION_METHOD"]

In [17]:
def cell_typist_annotate(adata: AnnData, models: list[str], inplace=True):
    import celltypist
    from celltypist import models as ctypist_models

    if len(models) == 0:
        raise ValueError("The models list are empty, enter valid model names.")

    all_models = ctypist_models.models_description().model.to_list()

    for model in models:
        if model not in all_models:
            raise ValueError("{model} not found in supported cell typist models.")

    ctypist_models.download_models(force_update=True, model=models)

    adata_celltypist = adata.copy()
    adata_celltypist.X = adata.layers[COUNTS_LAYER]
    sc.pp.normalize_per_cell(adata_celltypist, counts_per_cell_after=10**4)
    sc.pp.log1p(adata_celltypist)
    adata_celltypist.X = adata_celltypist.X.toarray()

    for model in models:
        loaded_model = ctypist_models.Model.load(model=model)
        predictions = celltypist.annotate(
            adata_celltypist, model=loaded_model, majority_voting=True
        )
        predictions_adata = predictions.to_adata()
        adata.obs["celltypist_" + model + "_label"] = predictions_adata.obs.loc[
            adata.obs.index, "majority_voting"
        ]
        adata.obs["celltypist_" + model + "_conf_score"] = predictions_adata.obs.loc[
            adata.obs.index, "conf_score"
        ]

    if not inplace:
        return adata

In [19]:
adata = sc.read_h5ad(path.join(DIR_SAVE, "adata.h5ad"))
# adata = sc.read_h5ad("../save/marcelo_ref.h5ad")

In [20]:
# Getting a stable counts layer to be used later, setting X to be raw count values.
if COUNTS_LAYER == "X":
    adata.layers["counts"] = adata.X.copy()
    COUNTS_LAYER = "counts"
elif COUNTS_LAYER in adata.layers.keys():
    adata.X = adata.layers[COUNTS_LAYER].copy()
else:
    raise ValueError("{COUNTS_LAYER} layer can't be found in the object")


if ANNOTATION_METHOD == "celltypist":
    cell_typist_annotate(adata, CELL_TYPIST_MODELS)


if ANNOTATION_METHOD == "scGPT":
    print(
        "please use the accelerated_annotation notebook with a GPU, TPU, or HPU present."
    )
    exit(code=0)

if ANNOTATION_METHOD == "scTAB":


Resources are ready
Loading genes from model


ModuleNotFoundError: No module named 'cellnet'