In [26]:
%load_ext pretty_jupyter

The pretty_jupyter extension is already loaded. To reload it, use:
  %reload_ext pretty_jupyter


In [27]:
# -.-|m { input: false, output: false, input_fold: show}

import tomlkit
import scanpy as sc
from anndata import AnnData
import pandas as pd
import numpy as np
import scvi
import seaborn as sns

from os import path
import session_info
import logging
from tempfile import TemporaryDirectory 
from os import system

logging.basicConfig(level=logging.ERROR)

scvi.settings.seed = 0
sc.set_figure_params(figsize=(6, 6), frameon=False)


Global seed set to 0


In [28]:
def scvi_cellassign_annotate(adata, marker_matrix, subsample=2000):

    import torch
    from scvi.external import CellAssign

    torch.set_float32_matmul_precision("high")

    markers_common = list(set(adata.var_names).intersection(set(marker_matrix.index)))
    adata_annotate = sc.pp.subsample(adata, n_obs = subsample, copy = True)
    
    adata_annotate = adata_annotate[:, list(markers_common)].copy()

    adata_annotate.X = adata_annotate.layers[COUNTS_LAYER]
    lib_size = adata_annotate.layers[COUNTS_LAYER].sum(1)
    adata_annotate.obs["size_factor"] = lib_size / np.mean(lib_size)

    scvi.external.CellAssign.setup_anndata(adata_annotate, size_factor_key="size_factor")
    cellassign_model = CellAssign(adata_annotate, marker_matrix)

    return cellassign_model, adata_annotate


def convert_scHCA(tissue, quantile = 0.98):
    df = pd.read_csv("../resources/scHCL_DB.csv", index_col=0)
    tissue_df = df.filter(regex=tissue)
    quantiles = tissue_df.quantile(q=quantile, axis=0)
    binary_markers = tissue_df.gt(quantiles, axis=1).astype("int")
    
    # Remove markers where it is all 0 or 1
    binary_markers = binary_markers.loc[~(binary_markers == 0).all(axis=1)]
    binary_markers = binary_markers.loc[~(binary_markers == 1).all(axis=1)]

    return binary_markers


In [29]:
## Pipeline parameters
with open("../config.toml", "r") as f:
    config = tomlkit.parse(f.read())

In [30]:
ROOT_DIR = config["basic"]["ANALYSIS_DIR"]
DIR_SAVE = path.join(ROOT_DIR, config["basic"]["DIR_SAVE"])
TISSUE = config["basic"]["TISSUE"]
ANNOTATION_METHOD = config["annotation"]["ANNOTATION_METHOD"]
COUNTS_LAYER = config["normalization"]["COUNTS_LAYER"]
NORMALIZATION_LAYER = ""

In [31]:
#adata = sc.read_h5ad(path.join(DIR_SAVE, "adata.h5ad"))
adata = sc.read_h5ad("../save/marcelo_ref.h5ad")

In [32]:
if ANNOTATION_METHOD == "scvi_cellassign":
    markers = convert_scHCA(TISSUE)
    model, adata_annotated = scvi_cellassign_annotate(adata, markers, subsample=100)
    model.train(batch_size = 8192, max_epochs=400)

    model.history["elbo_validation"].plot()

    predictions = model.predict()
    predictions.head()

    sns.clustermap(predictions, cmap="viridis")

  _verify_and_correct_data_format(adata, self.attr_name, self.attr_key)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Epoch 140/400:  35%|███▍      | 139/400 [06:22<12:01,  2.76s/it, v_num=1, train_loss_step=1.12e+3, train_loss_epoch=1.12e+3]

In [12]:
predictions

Unnamed: 0,CD4.T.cell.Adult.Peripheral.Blood1.,Proliferating..B.cell.Adult.Peripheral.Blood1.,Monocyte.Adult.Peripheral.Blood1.,CD8.T.cell.Adult.Peripheral.Blood1.,NK.cell.Adult.Peripheral.Blood1.,B.cell.Adult.Peripheral.Blood1.,Macrophage.Adult.Peripheral.Blood1.,Conventional.dendritic.cell.Adult.Peripheral.Blood1.,Plasmacytoid.dendritic.cell.Adult.Peripheral.Blood1.,Neutrophil.Adult.Peripheral.Blood1.,...,Neutrophil_ELANE.high.Cord.Blood.CD34P1.,HSPC.Cord.Blood.CD34P2.,Monocyte_G0S2.high.Cord.Blood.CD34P2.,Proliferating.cell.Cord.Blood.CD34P2.,Neutrophil_MPO.high.Cord.Blood.CD34P2.,Dendritic.cell.Cord.Blood.CD34P2.,Neutrophil_ELANE.high.Cord.Blood.CD34P2.,Erythroid.Basophil.Progenitor.Cord.Blood.CD34P2.,Megakaryocyte.Cord.Blood.CD34P2.,Blood.NK.CD16..Placenta_VentoTormo.
0,9.391683e-55,1.545073e-25,2.092852e-58,7.392063e-68,2.090923e-136,4.453038e-108,1.193182e-65,1.284167e-107,1.446403e-93,2.095643e-118,...,7.508440e-53,3.829039e-52,1.775488e-119,1.852182e-85,5.071343e-47,2.314498e-91,1.256611e-59,1.402177e-78,5.647343e-111,1.506198e-150
1,6.914583e-79,4.306767e-52,3.178547e-172,9.447382e-111,3.907121e-134,7.851720e-188,4.213991e-169,2.603107e-149,2.147090e-104,7.334868e-112,...,2.632576e-75,1.113848e-43,8.038149e-236,1.528726e-69,2.808442e-58,9.522738e-163,4.205253e-117,1.364001e-55,5.846952e-92,1.614101e-157
2,7.820145e-41,2.626970e-48,8.646041e-45,1.003000e-39,3.213592e-38,2.874538e-10,9.231551e-39,2.250895e-37,4.033745e-53,7.057055e-47,...,2.513820e-39,9.860908e-59,1.916633e-47,4.540583e-58,2.577034e-35,2.345719e-72,3.390040e-25,3.623575e-42,1.235234e-81,4.063627e-85
3,1.920329e-51,3.164002e-34,1.272872e-45,8.385363e-26,4.079663e-40,1.882244e-14,7.800564e-14,3.215724e-37,7.500365e-47,2.213410e-61,...,1.186550e-49,6.309403e-48,5.943239e-51,2.057659e-60,8.250897e-36,3.367636e-69,5.881542e-58,3.594419e-38,8.197134e-80,9.768556e-69
4,4.113877e-25,1.907305e-17,2.225814e-35,2.528084e-19,1.623473e-20,1.858309e-58,7.336534e-28,2.858679e-26,5.806880e-36,2.275824e-55,...,9.264933e-53,1.753187e-53,2.580811e-77,8.198840e-76,9.284674e-46,1.129703e-63,1.359384e-50,1.820648e-65,4.640008e-77,6.170861e-101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,5.795703e-48,1.972558e-44,1.529314e-61,9.559054e-47,3.381127e-19,6.314538e-48,9.028583e-28,3.711886e-43,1.080481e-51,1.112520e-44,...,3.068547e-34,1.592529e-55,8.605453e-51,3.135873e-57,9.832763e-33,2.848147e-77,3.275320e-49,1.017206e-58,1.672325e-82,5.124222e-64
96,2.024547e-78,4.687045e-69,2.602789e-174,1.586155e-82,4.659958e-91,1.070892e-185,3.377939e-178,8.046005e-168,5.414669e-151,9.698734e-137,...,5.599803e-101,8.874062e-85,7.457741e-216,4.682941e-112,1.417393e-54,9.039344e-182,1.290641e-130,3.439900e-84,8.866354e-118,2.834966e-165
97,4.635550e-90,1.629555e-91,2.710210e-59,1.756549e-91,1.916643e-112,3.166816e-116,1.909213e-67,7.119607e-71,6.026791e-57,2.653562e-111,...,2.210310e-91,6.475306e-101,9.457321e-123,3.598182e-104,3.946628e-78,6.070926e-114,3.050204e-91,2.115511e-131,2.403858e-89,1.204745e-150
98,1.671050e-71,1.963192e-53,1.354768e-138,2.911753e-89,4.265716e-87,2.734853e-134,1.816031e-137,1.192548e-157,3.014946e-137,5.110559e-92,...,2.279949e-83,2.552134e-74,6.212146e-182,2.845701e-74,4.521010e-51,4.421672e-152,1.179815e-95,1.695572e-74,1.309960e-86,1.520718e-93
