In [8]:
import os
import numpy as np
import scanpy as sc
import scipy.sparse as sps
from os.path import join

import sys
sys.path.insert(0, '..')

from spamosaic.framework import SpaMosaic

In [9]:
os.environ['R_HOME'] = '/disco_500t/xuhua/miniforge3/envs/Seurat5/lib/R'
os.environ['R_USER'] = '/disco_500t/xuhua/miniforge3/envs/Seurat5/lib/python3.8/site-packages/rpy2'
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'  # for CuBLAS operation and you have CUDA >= 10.2
import spamosaic.utils as utls
from spamosaic.preprocessing import RNA_preprocess, ADT_preprocess, Epigenome_preprocess
import spamosaic.metrics as eval

In [10]:
data_dir = '/home/xuhua/xuhua_disco/gitrepo/BridgeNorm/SpaMosaic-release/data/imputation/Human_tonsil'

ad1_rna = sc.read_h5ad(join(data_dir, 'Slice1/s1_adata_rna.h5ad'))
ad1_adt = sc.read_h5ad(join(data_dir, 'Slice1/s1_adata_adt.h5ad'))
ad2_rna = sc.read_h5ad(join(data_dir, 'Slice2/s2_adata_rna.h5ad'))
ad2_adt = sc.read_h5ad(join(data_dir, 'Slice2/s2_adata_adt.h5ad'))
ad3_rna = sc.read_h5ad(join(data_dir, 'Slice3/s3_adata_rna.h5ad'))
ad3_adt = sc.read_h5ad(join(data_dir, 'Slice3/s3_adata_adt.h5ad'))

### 1st-fold cv (cross validation)

In [11]:
input_dict = {
    'rna': [ad1_rna, ad2_rna, ad3_rna],
    'adt': [None,    ad2_adt, ad3_adt]
}

input_key = 'dimred_bc'

In [12]:
RNA_preprocess(input_dict['rna'], batch_corr=True, favor='scanpy', n_hvg=5000, batch_key='src', key=input_key)
ADT_preprocess(input_dict['adt'], batch_corr=True, batch_key='src', key=input_key)

Use GPU mode.
	Initialization is completed.
	Completed 1 / 10 iteration(s).
	Completed 2 / 10 iteration(s).
	Completed 3 / 10 iteration(s).
	Completed 4 / 10 iteration(s).
	Completed 5 / 10 iteration(s).
Reach convergence after 5 iteration(s).
Use GPU mode.
	Initialization is completed.
	Completed 1 / 10 iteration(s).
	Completed 2 / 10 iteration(s).
	Completed 3 / 10 iteration(s).
Reach convergence after 3 iteration(s).


### training

In [13]:
model = SpaMosaic(
    modBatch_dict=input_dict, input_key=input_key,
    batch_key='src', intra_knn=2, inter_knn=2, w_g=0.8, 
    seed=1234, 
    device='cuda:0'
)

model.train(net='wlgcn', lr=0.01, T=0.01, n_epochs=100)

batch0: ['rna']
batch1: ['rna', 'adt']
batch2: ['rna', 'adt']
------Calculating spatial graph...
The graph contains 8674 edges, 4337 cells.
2.0000 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 9038 edges, 4519 cells.
2.0000 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 9042 edges, 4521 cells.
2.0000 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 9038 edges, 4519 cells.
2.0000 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 9042 edges, 4521 cells.
2.0000 neighbors per cell on average.
Number of mnn pairs for rna:6092
Number of mnn pairs for adt:1501


100%|█████████████████████████████████████████| 100/100 [00:06<00:00, 16.61it/s]


In [14]:
ad_embs = model.infer_emb(input_dict, emb_key='emb', final_latent_key='merged_emb')

### imputation

In [15]:
for mod, ads in input_dict.items():
    for ad in ads:
        if ad is not None:
            ad.layers['counts'] = ad.X.copy()  # set targeting layers

In [16]:
imputed_featureDict = model.impute(input_dict, emb_key='emb', layer_key='counts', imp_knn=10)

# format of imputed_featureDict
# {
#     'rna':  [None, None, None],
#     'adt': [array, None, None]
# }

impute adt-counts for batch-1


### evaluation

In [17]:
gt_X = ad1_adt.X
pr_X = imputed_featureDict['adt'][0]

gt_X  = gt_X.A if sps.issparse(gt_X) else gt_X
pr_X  = pr_X.A if sps.issparse(pr_X) else pr_X

pcc_cell, pcc_adt = eval.PCCs(gt_X, pr_X)

In [18]:
np.mean(pcc_cell), np.mean(pcc_adt)

(0.9184238313142341, 0.5492526181686254)

In [19]:
cmd_cell = eval.CMD(pr_X, gt_X)
cmd_adt = eval.CMD(pr_X.T, gt_X.T)

In [20]:
cmd_cell, cmd_adt

(0.012974150288178277, 0.13143264887369877)