In [None]:
import os
os.environ["SCIPY_ARRAY_API"] = "1"
import sklearn
import scarches as sca
import muon as mu
import anndata as ad
from scarches.dataset.trvae.data_handling import remove_sparsity
import matplotlib.pyplot as plt
import numpy as np
import gdown
import scanpy as sc
import torch
import os

np.random.seed(1234)

path = os.path.abspath('').replace('\\', '/')+'/'
data_path = path+'dataset/'
save_path = os.path.abspath('').replace('\\', '/')+'/results/'

## scArches

In [None]:
import scanpy as sc
import scvi
import anndata
import numpy as np
import os
import random


datasets = ["liver_human", "liver_Nafld", "adipose", "glio"]

for i in range(10):
    SEED = i*1234
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    scvi.settings.seed = SEED    
    
    for dataset in datasets:
        if dataset in ["glio", "adipose"]:
            context_key = 'mouse'
            target_key = 'human'
            load_key = dataset

        elif dataset == "liver_human":
            context_key = 'mouse'
            target_key = 'human'
            load_key = 'liver'
            
        elif dataset == "liver_Nafld":
            context_key = 'mouse'
            target_key = 'mouseNafld'
            load_key = 'liver'

        mdata = mu.read_h5mu(data_path+load_key+".h5mu") 

        mdata[context_key].obs['system'] = 0
        mdata[target_key].obs['system'] = 1

        context_genes = np.array(mdata[context_key].var['human_gene_names'])
        target_genes = np.array(mdata[target_key].var['human_gene_names'])

        ret_vec, ind_a, ind_b =  np.intersect1d(target_genes, context_genes, return_indices=True)
        mdata_target = mdata[target_key][:, ind_a]
        mdata_context = mdata[context_key][:, ind_b]

        mdata_target.var_names = target_genes[ind_a]
        mdata_context.var_names = context_genes[ind_b]

        adata = ad.concat([mdata_context, mdata_target], axis=0, join='inner')
        
        adata_context = adata[adata.obs['system'] == 0]
        adata_target = adata[adata.obs['system'] == 1]


        adata_context = adata_context.copy()
        adata_target = adata_target.copy()
        scvi.model.SCVI.setup_anndata(adata_context, batch_key="batch")
        model = scvi.model.SCVI(adata_context, n_layers=2, n_hidden=250, dropout_rate=0.1, gene_likelihood = 'zinb', dispersion='gene-batch', encode_covariates=True, deeply_inject_covariates=False, use_layer_norm="both", use_batch_norm="none")
        model.train(max_epochs=30)

        model_path = os.path.join(save_path, f"{dataset}_scarches_context_model_"+str(i))
        model.save(model_path, overwrite=True)

        scvi.model.SCVI.prepare_query_anndata(adata_target, model_path)

        query_model = scvi.model.SCVI.load_query_data(adata_target, model_path)
        query_model.train(max_epochs=30)

        model_path = os.path.join(save_path, f"{dataset}_scarches_target_model_"+str(i))
        query_model.save(model_path, overwrite=True)

        adata_context = ad.AnnData(model.get_latent_representation(), obs=adata_context.obs)
        adata_target = ad.AnnData(query_model.get_latent_representation(), obs=adata_target.obs) 
        
        adata_embed = sc.concat([adata_context, adata_target], axis=0, join='inner')
        adata_embed.write(os.path.join(data_path, f"{dataset}_embed_scArches_{str(i)}.h5ad"))


## scPoli

In [None]:
import muon as mu
import numpy as np
import scanpy as sc
import scvi
import os
from scarches.models.scpoli import scPoli
import random

datasets = ["liver_human", "liver_Nafld", "adipose", "glio"]#


for i in range(10):
    SEED = i*1234
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    scvi.settings.seed = SEED

    for dataset in datasets:
        if dataset in ["glio", "adipose"]:
            context_key = 'mouse'
            target_key = 'human'
            load_key = dataset
        elif dataset == "liver_human":
            context_key = 'mouse'
            target_key = 'human'
            load_key = 'liver'
        elif dataset == "liver_Nafld":
            context_key = 'mouse'
            target_key = 'mouseNafld'
            load_key = 'liver'

        mdata = mu.read_h5mu(os.path.join(data_path, f"{load_key}.h5mu"))

        mdata[context_key].obs['system'] = 0
        mdata[target_key].obs['system'] = 1

        context_genes = np.array(mdata[context_key].var['human_gene_names'])
        target_genes = np.array(mdata[target_key].var['human_gene_names'])
        ret_vec, ind_a, ind_b = np.intersect1d(target_genes, context_genes, return_indices=True)

        mdata_target = mdata[target_key][:, ind_a]
        mdata_context = mdata[context_key][:, ind_b]
        mdata_target.var_names = target_genes[ind_a]
        mdata_context.var_names = context_genes[ind_b]

        adata = sc.concat([mdata_context, mdata_target], axis=0, join='inner')

        adata_context = adata[adata.obs['system'] == 0].copy()
        adata_target = adata[adata.obs['system'] == 1].copy()
        
        early_stopping_kwargs = {
            "early_stopping_metric": "val_prototype_loss",
            "mode": "min",
            "threshold": 0,
            "patience": 20,
            "reduce_lr": True,
            "lr_patience": 13,
            "lr_factor": 0.1,
        }
        
        context_model = scPoli(adata_context, condition_keys="batch", cell_type_keys='cell_type_fine', 
                               hidden_layer_sizes=[300, 200], 
                               use_ln=True, 
                               use_bn=False, 
                               dr_rate=0.1, 
                               embedding_dims=10, 
                               recon_loss='zinb')
        
        context_model.train(n_epochs=40, pretraining_epochs=30, early_stopping_kwargs=early_stopping_kwargs, eta=5)

        context_model_path = os.path.join(save_path, f"{dataset}_scPoli_context_model_{i}")
        context_model.save(context_model_path, overwrite=True)

        target_model = scPoli.load_query_data(adata=adata_target, reference_model=context_model, labeled_indices=[])
        target_model.train(n_epochs=40, pretraining_epochs=30, eta=10)

        target_model_path = os.path.join(save_path, f"{dataset}_scPoli_target_model_{i}")
        target_model.save(target_model_path, overwrite=True)

        adata_context.X = adata_context.X.toarray()
        adata_target.X = adata_target.X.toarray()
        
        results_dict = target_model.classify(adata_target, scale_uncertainties=True)
        
        adata_context = ad.AnnData(context_model.get_latent(adata_context, mean=True), obs=adata_context.obs)
        adata_target = ad.AnnData(target_model.get_latent(adata_target, mean=True), obs=adata_target.obs) 
        
        adata_target.obs['cell_type_pred'] = results_dict['cell_type_fine']['preds'].tolist()
        adata_target.obs['cell_type_uncert'] = results_dict['cell_type_fine']['uncert'].tolist()
        adata_target.obs['classifier_outcome'] = (
            adata_target.obs['cell_type_pred'] == adata_target.obs['cell_type_fine']
        )

        adata_target.obs['cell_type_pred'] = '-'
        adata_target.obs['cell_type_uncert'] = '-'
        adata_target.obs['classifier_outcome'] = '-'

        adata_embed = sc.concat([adata_context, adata_target], axis=0, join='inner')
        adata_embed.uns['results_dict'] = results_dict
        adata_embed.write(os.path.join(data_path, f"{dataset}_embed_scPoli_{i}.h5ad"))
