In [None]:
import scvi

import scanpy as sc
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import random
import numpy as np
import os
import torch
import sys
from datetime import datetime
import scarches as sca

from matplotlib import pyplot as plt
from datetime import datetime

scvi.settings.progress_bar_style = "tqdm"

# for white background of figures (only for docs rendering)
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
%config InlineBackend.figure_format='retina'

#hpc figures
%matplotlib inline

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" # print multiple outputs per code cell (not just last)

In [None]:
sc.set_figure_params(figsize=(4, 4), dpi=100, dpi_save=300)

In [None]:
nCores = 8
sc.settings.n_jobs = nCores #nCores
scvi.settings.num_threads = nCores # nThreads for PyTorch

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    scvi.settings.seed = seed # scvi-tools seed
    os.environ["PYTHONHASHSEED"] = str(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seed(123)

In [None]:
!cd /scratch/gent/vo/000/gvo00027/projects/Single_Cell_Neuroblastoma/NBAtlas
os.getcwd()

In [None]:
output_dir = "05c_scArches_SCANVI_NBAtlas_v2/"
os.makedirs(output_dir, exist_ok=True)

output_tables = output_dir + "/Tables/"
os.makedirs(output_tables, exist_ok=True)

output_figures = output_dir + "/Figures/"
os.makedirs(output_figures, exist_ok=True)

In [None]:
sc.settings.figdir = output_figures

In [None]:
sc.settings.verbosity = 4

In [None]:
# check memory by using 'mem'
ipython_vars = ['In', 'Out', 'exit', 'quit', 'get_ipython', 'ipython_vars']
# get memory in bytes
mem = {
    key: value
    for key, value in sorted(
        [
            (x, sys.getsizeof(globals().get(x)))
            for x in dir()
            if not x.startswith("_") and x not in sys.modules and x not in ipython_vars
        ],
        key=lambda x: x[1],
        reverse=True,
    )
}

In [None]:
mem

# SCANVI NBAtlas

## Load scvi data

In [None]:
source_adata = sc.read("03a_scVI_NBAtlas/nb_adata_c_scVI_covSample_moreLayers20230508_NBAtlas.h5ad") #v2
source_adata

In [None]:
scvi_model = scvi.model.SCVI.load(dir_path="03a_scVI_NBAtlas/03a_scVI_model_covSample_moreLayers20230508_NBAtlas/", adata=source_adata, use_gpu=True)
scvi_model

In [None]:
# add annotation
# metadata
metadata = pd.read_csv('/scratch/gent/vo/000/gvo00027/projects/Single_Cell_Neuroblastoma/NBAtlas/03c_post_scVI_R_plots/Tables/03c_post_scVI_R_plots_covSample_MetaData_AnnotationForscArches.csv', 
                       index_col=0)
metadata

In [None]:
source_adata.obs['annot_NBN_scarches'] = metadata['annot_NBN_scarches'].values

In [None]:
scanvi = scvi.model.SCANVI.from_scvi_model(adata = source_adata,
                                           scvi_model = scvi_model,
                                           labels_key = "annot_NBN_scarches",
                                           unlabeled_category = "not assigned",
                                           n_layers = 4 #v2                                           
)

In [None]:
print("Labelled Indices: ", len(scanvi._labeled_indices))
print("Unlabelled Indices: ", len(scanvi._unlabeled_indices))

In [None]:
# early_stopping_kwargs
trainer_kwargs = {
    "early_stopping_monitor": "elbo_validation", #default
    "save_best_state_metric": "elbo", 
    "early_stopping_patience": 10, #quicker stopping
    "threshold": 0, 
    "reduce_lr_on_plateau": True,
}

loss_kwargs = {
    "lr_patience": 8, 
    "lr_factor": 0.1   
}

In [None]:
print("Start =", datetime.now().strftime("%H:%M:%S"))

scanvi.train(max_epochs = 500,  
             use_gpu = True, 
             early_stopping = True)

print("End =", datetime.now().strftime("%H:%M:%S"))

In [None]:
scanvi.save(output_dir + "05c_scArches_SCANVImodel_TrainedOn_annotNBNscarches_NBAtlas/", overwrite=True)

In [None]:
# reload
scanvi = scvi.model.SCANVI.load(output_dir + "05c_scArches_SCANVImodel_TrainedOn_annotNBNscarches_NBAtlas/", adata = source_adata, use_gpu=True)

In [None]:
#plot
plt.plot(scanvi.history["elbo_train"], label="train") #elbo_train_set
plt.plot(scanvi.history["elbo_validation"], label="test")
plt.title("Negative ELBO over training epochs")
plt.legend()

In [None]:
source_adata.obsm["X_scANVI"] = scanvi.get_latent_representation(source_adata) 

In [None]:
sc.pp.neighbors(source_adata, use_rep="X_scANVI")
sc.tl.leiden(source_adata)
sc.tl.umap(source_adata)

In [None]:
sc.pl.umap(
    source_adata, 
    color=["Study"],
    save = "05c_scArches_scANVI_UMAP_source_TrainedOn_AnnotNBNscarches_colStudy_NBAtlas.png"
)

In [None]:
sc.pl.umap(
    source_adata, 
    color=["Sample"],
    save = "05c_scArches_scANVI_UMAP_source_TrainedOn_AnnotNBNscarches_colSample_NBAtlas.png"
)

In [None]:
sc.pl.umap(
    source_adata, 
    color=["Assay"],
    save = "05c_scArches_scANVI_UMAP_source_TrainedOn_AnnotNBNscarches_colAssay_NBAtlas.png"
)

In [None]:
sc.pl.umap(
    source_adata, 
    color=["annot_NBN_scarches"],
    save = "05c_scArches_scANVI_UMAP_source_TrainedOn_AnnotNBNscarches_colAnnotNBNscarches_NBAtlas.png"
)

In [None]:
source_adata.write_h5ad(output_dir + "05c_scArches_scANVI_source_adata_NBAtlas.h5ad")

In [None]:
# reload
source_adata = sc.read(output_dir + "05c_scArches_scANVI_source_adata_NBAtlas.h5ad")

In [None]:
source_adata

## Import Target

In [None]:
target_adata = sc.read("/scratch/gent/vo/000/gvo00027/projects/Single_Cell_Neuroblastoma/NBAtlas/05b_scArches_Create_AnnDataObject_NBAtlas/nb_adata_target_data_NBAtlas.h5ad")
target_adata

In [None]:
# use pretrained scanvi to prepare target_adata
scanvi.prepare_query_anndata(adata = target_adata, 
                                       reference_model = scanvi) #return_reference_var_names = True

In [None]:
target_adata #make sure batch param is in target

In [None]:
source_adata

In [None]:
target_adata.layers["counts"] = target_adata.X.copy() # otherwise error 'counts is not a valid key in adata.layers'
target_adata

In [None]:
scarches = sca.models.SCANVI.load_query_data(
    target_adata,
    reference_model = scanvi, 
    freeze_dropout = True,
) 

In [None]:
scarches._labeled_indices # empty

In [None]:
print("Start =", datetime.now().strftime("%H:%M:%S"))

scarches.train(
    max_epochs=500,
    plan_kwargs=dict(weight_decay=0.0),
    check_val_every_n_epoch=5)

print("End =", datetime.now().strftime("%H:%M:%S"))

In [None]:
scarches.save(output_dir + "05c_scArches_scArches-scANVI_SurgeryModel_target_NBAtlas/", overwrite=True)

In [None]:
# reload
scarches = sca.models.SCANVI.load(output_dir + "05c_scArches_scArches-scANVI_SurgeryModel_target_NBAtlas/", adata=target_adata, use_gpu=True)

In [None]:
plt.plot(scarches.history["elbo_train"], label="train") #elbo_train_set
plt.plot(scarches.history["elbo_validation"], label="test")
plt.title("Negative ELBO over training epochs")
plt.legend()

In [None]:
target_adata

In [None]:
target_adata

In [None]:
target_adata.obsm["X_scANVI"] = scarches.get_latent_representation()

Predicted celltype using ref annotation

In [None]:
print("Start =", datetime.now().strftime("%H:%M:%S"))

target_adata.obs['predicted_celltype'] = scarches.predict()
predictions = scarches.predict(indices = scarches._unlabeled_indices, soft = True)

print("End =", datetime.now().strftime("%H:%M:%S"))

In [None]:
predictions
print(type(predictions))
predictions.to_csv(output_tables + "05c_scArches_scArches-scANVI_target_ProbabilityPerCelltype.csv")

Save

In [None]:
target_adata.write_h5ad(output_dir + "05c_scArches_scArches-scANVI_target_adata_NBAtlas.h5ad")

In [None]:
# Reload
target_adata = sc.read(output_dir + "05c_scArches_scArches-scANVI_target_adata_NBAtlas.h5ad")

## Ref + target full norm

In [None]:
source_adata = sc.read("03a_scVI_NBAtlas/nb_adata_a_NoInt_norm_full_all_genes_NBAtlas.h5ad") #v2
source_adata

In [None]:
target_adata = sc.read("/scratch/gent/vo/000/gvo00027/projects/Single_Cell_Neuroblastoma/NBAtlas/05b_scArches_Create_AnnDataObject_NBAtlas/nb_adata_target_data_NBAtlas.h5ad")
target_adata

In [None]:
adata_full = target_adata.concatenate(source_adata)
adata_full

In [None]:
adata_full.layers["counts"] = adata_full.X.copy() # preserve counts
sc.pp.normalize_total(adata_full, target_sum=1e4)
sc.pp.log1p(adata_full)

In [None]:
adata_full.write_h5ad(output_dir + "05a_scArches_scArches-scANVI_adata_full_norm_all_genes_NBAtlas.h5ad")

## Ref + target in scANVI space

In [None]:
# reload
source_adata = sc.read(output_dir + "05c_scArches_scANVI_source_adata_NBAtlas.h5ad")

In [None]:
# Reload
target_adata = sc.read(output_dir + "05c_scArches_scArches-scANVI_target_adata_NBAtlas.h5ad")

In [None]:
adata_full = target_adata.concatenate(source_adata)

In [None]:
adata_full.obsm["X_scANVI"] = scarches.get_latent_representation(adata_full)

In [None]:
sc.pp.neighbors(adata_full, use_rep="X_scANVI")
sc.tl.leiden(adata_full)
sc.tl.umap(adata_full)

In [None]:
sc.pl.umap(
    adata_full, 
    color = ["Study"],
    #save = "05c_scArches_scArches-scANVI_UMAP_RefAndTarget_colStudy_NBAtlas.png"
)

In [None]:
# set same categories for 'predicted_celltype' as 'annot_NBN_scarches' to get same coloring
adata_full.obs['Study_v2'] = adata_full.obs['Study'].cat.set_categories(source_adata.obs['Study'].cat.categories) #new studies get NA

In [None]:
sc.pl.umap(
    adata_full, 
    color = ["Study_v2"],
    save = "05c_scArches_scArches-scANVI_UMAP_RefAndTarget_colStudy_v2_NBAtlas.pdf"
)

In [None]:
# highlight ref - col per study
ax = sc.pl.umap(
    adata_full,
    show=False,
)

sc.pl.umap(
    adata_full[ (adata_full.obs['Study'] != 'Bonine2023_nucleus') & (adata_full.obs['Study'] != 'Bonine2023_cell'), :], 
    color=["Study"],
    ax = ax,
    alpha = 0.7,
    save = "05c_scArches_scArches-scANVI_UMAP_RefAndTarget_HighlightRef_Study_NBAtlas.png"
)

In [None]:
# highlight target - col per study
ax = sc.pl.umap(
    adata_full,
    show=False,
)

sc.pl.umap(
    adata_full[ (adata_full.obs['Study'] == 'Bonine2023_nucleus') | (adata_full.obs['Study'] == 'Bonine2023_cell'), :], 
    color=["Study"],
    ax = ax,
    alpha = 0.7,
    save = "05c_scArches_scArches-scANVI_UMAP_RefAndTarget_HighlightTarget_Study_NBAtlas.png"
)

In [None]:
sc.pl.umap(
    adata_full, 
    color = ["annot_NBN_scarches"],
    #save = "05c_scArches_scArches-scANVI_UMAP_RefAndTarget_colAnnotNBNscarches_NBAtlas.png"
)

In [None]:
# highlight target manual annotation
ax = sc.pl.umap(
    adata_full,
    show=False,
)

sc.pl.umap(
    adata_full[ adata_full.obs['annot_NBN_scarches'] != 'not assigned', :], 
    color=["annot_NBN_scarches"],
    ax = ax,
    alpha = 0.7,
    save = "05c_scArches_scArches-scANVI_UMAP_RefAndTarget_RefAnnot_NBAtlas.png"
)

In [None]:
# highlight target predicted annotation
ax = sc.pl.umap(
    adata_full,
    show=False,
)

sc.pl.umap(
    adata_full[ (adata_full.obs['Study'] == 'Bonine2023_nucleus') | (adata_full.obs['Study'] == 'Bonine2023_cell'), :], 
    color=["predicted_celltype"],
    ax = ax,
    alpha = 0.7,
    save = "05c_scArches_scArches-scANVI_UMAP_RefAndTarget_TargetPredictedCelltype_NBAtlas.png"
)

In [None]:
adata_full

In [None]:
adata_full.write_h5ad(output_dir + "05c_scArches_scArches-scANVI_adata_full_NBAtlas.h5ad")

In [None]:
# reload
adata_full = sc.read(output_dir + "05c_scArches_scArches-scANVI_adata_full_NBAtlas.h5ad")

In [None]:
print(scvi.__version__) #https://docs.scvi-tools.org/en/0.16.4/api/user.html
print(sc.__version__)
print(sca.__version__)