In [1]:
backup_dir = '/root/datos/maestria/netopaas/luca_explore/surgeries'
ikarus_dir = '/root/datos/maestria/netopaas/ikarus'
id_ = 'Chen_Zhang_2020_NSCLC'

In [2]:
import gdown
import anndata as ad
import pandas as pd
import os

import scanpy as sc
from ikarus import classifier, utils, data

ikarus is a stepwise machine learning pipeline that tries to cope with a task of distinguishing tumor cells from normal cells. Leveraging multiple annotated single cell datasets it can be used to define a gene set specific to tumor cells. First, the latter gene set is used to rank cells and then to train a logistic classifier for the robust classification of tumor and normal cells. Finally, sensitivity is increased by propagating the cell labels based on a custom cell-cell network. 

In [3]:
url = "https://raw.githubusercontent.com/BIMSBbioinfo/ikarus/master/tutorials/out/signatures.gmt"
signatures_path = f"{ikarus_dir}/signatures.gmt"
gdown.download(url, signatures_path, quiet=False) if not os.path.exists(signatures_path) else None
pd.read_csv(signatures_path, sep="\t", header=None)

model_path = f"{ikarus_dir}/core_model.joblib"

In [4]:
model = classifier.Ikarus(signatures_gmt=signatures_path, out_dir="out",
                          adapt_signatures=True # This is important, we are working with a reduced gen set, 
# so the model won't work if the intesrection betwen the siganture and the avialbe genes is too small..
# that is why the non-overlapping genes mus be removed from the signate. This flag does that automatically
                         )
model.load_core_model(model_path)

In [5]:
adata = sc.read_h5ad(f'{backup_dir}/filtered_{id_}.h5ad')
adata

In [6]:
adata = data.preprocess_adata(adata)

In [7]:
adata.var['gene_symbol'] = adata.var.index
_ = model.predict(adata, "tumor",
                  # connectivities_path='scvi_conns.npz', # If we use the connectivites from scVI the number of predicted cells changes a lot
                  # save=True
                 )

  disp_grouped = df.groupby('mean_bin')['dispersions']
  adata.uns['hvg'] = {'flavor': flavor}
2023-12-06 05:09:52.543614: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-06 05:09:52.543710: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-06 05:09:52.546068: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-06 05:09:52.558812: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuil

converged at iteration step: 39 with 0.0008 < 0.001


In [None]:
preds = model.results["final_pred"].values

  disp_grouped = df.groupby('mean_bin')['dispersions']
  adata.uns['hvg'] = {'flavor': flavor}
2023-12-06 01:31:58.139399: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-06 01:31:58.139478: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-06 01:31:58.141486: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-06 01:31:58.153445: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuil

In [None]:
preds_df = pd.DataFrame({'final_pred':preds})
preds_df.index = adata.index
preds_df.to_csv(f'{ikarus_dir}/{id_}.csv')