In [1]:
import os
import scanpy as sc
import scipy.sparse as sps
from os.path import join

import sys
sys.path.insert(0, '..')

from spamosaic.framework import SpaMosaic

In [2]:
os.environ['R_HOME'] = '/disco_500t/xuhua/miniforge3/envs/Seurat5/lib/R'
os.environ['R_USER'] = '/disco_500t/xuhua/miniforge3/envs/Seurat5/lib/python3.8/site-packages/rpy2'
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'  # for CuBLAS operation and you have CUDA >= 10.2
import spamosaic.utils as utls
from spamosaic.preprocessing import RNA_preprocess, ADT_preprocess, Epigenome_preprocess
import spamosaic.metrics as eval

In [3]:
data_dir = '/home/xuhua/xuhua_disco/gitrepo/BridgeNorm/SpaMosaic-release/data/imputation/Mouse_postnatal_brain'

ad1_rna = sc.read_h5ad(join(data_dir, 'Slice1/s1_adata_rna.h5ad'))
ad1_atac = sc.read_h5ad(join(data_dir, 'Slice1/s1_adata_atac.h5ad'))
ad2_rna = sc.read_h5ad(join(data_dir, 'Slice2/s2_adata_rna.h5ad'))
ad2_atac = sc.read_h5ad(join(data_dir, 'Slice2/s2_adata_atac.h5ad'))
ad3_rna = sc.read_h5ad(join(data_dir, 'Slice3/s3_adata_rna.h5ad'))
ad3_atac = sc.read_h5ad(join(data_dir, 'Slice3/s3_adata_atac.h5ad'))

### 1st-fold cv (cross validation)

In [4]:
input_dict = {
    'rna':  [ad1_rna, ad2_rna,  ad3_rna],
    'atac': [None,    ad2_atac, ad3_atac]
}

input_key = 'dimred_bc'

In [5]:
RNA_preprocess(input_dict['rna'], batch_corr=True, n_hvg=5000, batch_key='src', key=input_key)
hvp_name, hvp_idx = Epigenome_preprocess(input_dict['atac'], batch_corr=True, n_peak=50000, batch_key='src', key=input_key, return_hvf=True)

Use GPU mode.
	Initialization is completed.
	Completed 1 / 10 iteration(s).
	Completed 2 / 10 iteration(s).
	Completed 3 / 10 iteration(s).
Reach convergence after 3 iteration(s).
Use GPU mode.
	Initialization is completed.
	Completed 1 / 10 iteration(s).
	Completed 2 / 10 iteration(s).
	Completed 3 / 10 iteration(s).
	Completed 4 / 10 iteration(s).
Reach convergence after 4 iteration(s).


### training

In [6]:
model = SpaMosaic(
    modBatch_dict=input_dict, input_key=input_key,
    batch_key='src', intra_knn=10, inter_knn=10, w_g=0.8, 
    seed=1234, 
    device='cuda:0'
)

model.train(net='wlgcn', lr=0.01, T=0.01, n_epochs=100)

batch0: ['rna']
batch1: ['rna', 'atac']
batch2: ['rna', 'atac']
------Calculating spatial graph...
The graph contains 23720 edges, 2372 cells.
10.0000 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 24970 edges, 2497 cells.
10.0000 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 92150 edges, 9215 cells.
10.0000 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 24970 edges, 2497 cells.
10.0000 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 92150 edges, 9215 cells.
10.0000 neighbors per cell on average.
Number of mnn pairs for rna:15799
Number of mnn pairs for atac:10046


100%|█████████████████████████████████████████| 100/100 [00:06<00:00, 16.33it/s]


In [7]:
ad_embs = model.infer_emb(input_dict, emb_key='emb', final_latent_key='merged_emb')

### imputation

In [8]:
for mod, ads in input_dict.items():
    for ad in ads:
        if ad is not None:
            ad.layers['counts'] = ad.X.copy()  # set targeting layers

In [9]:
imputed_featureDict = model.impute(input_dict, emb_key='emb', layer_key='counts', imp_knn=10)

# format of imputed_featureDict
# {
#     'rna':  [None, None, None],
#     'atac': [array, None, None]
# }

impute atac-counts for batch-1


### evaluation

In [10]:
gt_X = ad1_atac.X
pr_X = imputed_featureDict['atac'][0]

gt_X = gt_X.A if sps.issparse(gt_X) else gt_X
pr_X = pr_X.A if sps.issparse(pr_X) else pr_X
gt_X = gt_X[:, hvp_idx].copy()
pr_X = pr_X[:, hvp_idx].copy()

auc = eval.eval_AUC_all(gt_X, pr_X, bin_thr=1)
auc

0.803555246513297

In [11]:
Epigenome_preprocess([ad1_atac], batch_corr=False, n_peak=50000, key='dimred')
smoothed_gt_X = eval.knn_smoothing(ad1_atac, hvf_name=hvp_name, dim_red_key='dimred', knn=50)

auc = eval.eval_AUC_all(smoothed_gt_X, pr_X, bin_thr=1)
auc

0.9839390106768231