# spSeudoMap implementation

## Install conda environment

In [1]:
%conda env create --file conda_env.yaml

Collecting package metadata (repodata.json): done
Solving environment: done


  current version: 4.11.0
  latest version: 4.12.0

Please update conda by running

    $ conda update -n base conda



Downloading and Extracting Packages
libevent-2.1.10      | 1.1 MB    | ##################################### | 100% 
libpq-14.2           | 3.0 MB    | ##################################### | 100% 
mysql-common-8.0.29  | 1.8 MB    | ##################################### | 100% 
mysql-libs-8.0.29    | 1.9 MB    | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: | Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m

done
#
# To activate this environment, use
#
#     $ conda activate txt
#
# To deactivate an active environment, use
#
#     $ conda deactivate


Note: you may need to restart the kernel to use updated packages.


In [None]:
# Install kernel space
!python -m ipykernel install --user --name spSeudoMap --display-name spSeudoMap

## Load single-cell and spatial data

In [1]:
import os

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0" # run on GPU
#run on CPU: os.environ["CUDA_VISIBLE_DEVICES"]="-1"

import scanpy as sc
import pandas as pd
import numpy as np
import seaborn as sns

## Load example spatial data (10X Genomics Breast Cancer Block A Section 1)

In [2]:
sc.set_figure_params(facecolor="white", figsize=(8, 8))
sc.settings.verbosity = 3

adata_spatial = sc.datasets.visium_sge(
    sample_id="V1_Adult_Mouse_Brain_Coronal_Section_1"
)

V1_Adult_Mouse_Brain_Coronal_Section_1_spatial.tar.gz: 0.00B [00:00, ?B/s]

HTTPError: HTTP Error 403: Forbidden

### Load single-cell data (Mouse coronal section: Sanger institute)

##### Kleshchevnikov, V., Shmatko, A., Dann, E. et al. Cell2location maps fine-grained cell types in spatial transcriptomics. Nat Biotechnol (2022). https://doi.org/10.1038/s41587-021-01139-4

##### https://cell2location.readthedocs.io/en/latest/notebooks/cell2location_short_demo.html

In [None]:
os.system('wget https://cell2location.cog.sanger.ac.uk/tutorial/mouse_brain_snrna/regression_model/RegressionGeneBackgroundCoverageTorch_65covariates_40532cells_12819genes/sc.h5ad')

In [3]:
adata_single = sc.read('sc.h5ad', cache=True)

#### Structure of the transcriptomes

In [4]:
adata_single.obs['annotation_1']

5705STDY8058285_GGTAGAGTCAAATAGG-1    Astro_AMY
5705STDY8058285_GACCCAGAGCAACCAG-1    Astro_AMY
5705STDY8058282_TGATGGTCAAGAGTAT-1    Astro_AMY
5705STDY8058282_GTTGCGGTCTTCGTGC-1    Astro_AMY
5705STDY8058285_AGAAGTATCGCCGAGT-1    Astro_AMY
                                        ...    
5705STDY8058285_GTGACGCGTAAGGAGA-1        Unk_2
5705STDY8058282_CGTAAGTTCAAACTGC-1        Unk_2
5705STDY8058284_TAGGTACCAATACCTG-1        Unk_2
5705STDY8058283_GGGACCTGTGCCGAAA-1        Unk_2
5705STDY8058285_CTTCGGTCACGATTCA-1        Unk_2
Name: annotation_1, Length: 40532, dtype: category
Categories (59, object): ['Astro_AMY', 'Astro_AMY_CTX', 'Astro_CTX', 'Astro_HPC', ..., 'Oligo_1', 'Oligo_2', 'Unk_1', 'Unk_2']

In [5]:
# List of the cell types
set(adata_single.obs['annotation_1'])

{'Astro_AMY',
 'Astro_AMY_CTX',
 'Astro_CTX',
 'Astro_HPC',
 'Astro_HYPO',
 'Astro_STR',
 'Astro_THAL_hab',
 'Astro_THAL_lat',
 'Astro_THAL_med',
 'Astro_WM',
 'Endo',
 'Ext_Amy_1',
 'Ext_Amy_2',
 'Ext_ClauPyr',
 'Ext_Hpc_CA1',
 'Ext_Hpc_CA2',
 'Ext_Hpc_CA3',
 'Ext_Hpc_DG1',
 'Ext_Hpc_DG2',
 'Ext_L23',
 'Ext_L25',
 'Ext_L56',
 'Ext_L5_1',
 'Ext_L5_2',
 'Ext_L5_3',
 'Ext_L6',
 'Ext_L6B',
 'Ext_Med',
 'Ext_Pir',
 'Ext_Thal_1',
 'Ext_Thal_2',
 'Ext_Unk_1',
 'Ext_Unk_2',
 'Ext_Unk_3',
 'Inh_1',
 'Inh_2',
 'Inh_3',
 'Inh_4',
 'Inh_5',
 'Inh_6',
 'Inh_Lamp5',
 'Inh_Meis2_1',
 'Inh_Meis2_2',
 'Inh_Meis2_3',
 'Inh_Meis2_4',
 'Inh_Pvalb',
 'Inh_Sst',
 'Inh_Vip',
 'LowQ_1',
 'LowQ_2',
 'Micro',
 'Nb_1',
 'Nb_2',
 'OPC_1',
 'OPC_2',
 'Oligo_1',
 'Oligo_2',
 'Unk_1',
 'Unk_2'}

In [6]:
adata_spatial.shape, adata_single.shape

((2903, 32285), (40532, 12819))

### Simulation of the subpopulation single-cell dataset: select the excitatory neuron types

In [7]:
adata_sc_sub = adata_single[adata_single.obs['annotation_1'].str.contains('Ext_')].copy()

In [8]:
# List of the cell types
set(adata_sc_sub.obs['annotation_1'])

{'Ext_Amy_1',
 'Ext_Amy_2',
 'Ext_ClauPyr',
 'Ext_Hpc_CA1',
 'Ext_Hpc_CA2',
 'Ext_Hpc_CA3',
 'Ext_Hpc_DG1',
 'Ext_Hpc_DG2',
 'Ext_L23',
 'Ext_L25',
 'Ext_L56',
 'Ext_L5_1',
 'Ext_L5_2',
 'Ext_L5_3',
 'Ext_L6',
 'Ext_L6B',
 'Ext_Med',
 'Ext_Pir',
 'Ext_Thal_1',
 'Ext_Thal_2',
 'Ext_Unk_1',
 'Ext_Unk_2',
 'Ext_Unk_3'}

### Preparation of the parameters for the training

In [9]:
# column name for single-cell annotation data in metadata(.obs)
celltype = 'annotation_1'
# number of selected marker genes in each cell-type
num_markers=20

In [10]:
# Total number of cell mixture (modified pseudospots) to generate
npseudo = adata_spatial.shape[0]*5
npseudo

14515

In [11]:
# ratio of number of single-cell markers to virtual pseudotype markers
mk_ratio = 2

In [12]:
# Average of presumed fraction of the pseudotype (cell types exclusively present in spatial data) across all spatial spots
# -> Presumed average non-excitatory neuron fraction from simulation dataset
pseudo_frac_m = 1 - ((adata_sc_sub.shape[0])/(adata_single.shape[0]))
pseudo_frac_m

0.6716421592815554

In [13]:
# pseudo_frac_std: standard deviation of the distribution of presumed pseudotype fraction across all spatial spots (default = 0.1)
pseudo_frac_std = 0.1

In [14]:
# Number of cells sampled from single-cell data when making a pseudospot
nmix = 10

### Run spSEudoMape
#### cell type mapping of spatial transcriptomics using unmatched single-cell RNA-seq data

In [3]:
from pred_cellf_spSeudoMap import pred_cellf_spSeudoMap
adata_spatial_cellf = pred_cellf_spSeudoMap(adata_sp=adata_spatial, adata_sc=adata_sc_sub, count_from_raw=False, 
                                            gpu=True, celltype=celltype, num_markers=num_markers,
                                            mixture_mode='pseudotype', seed_num=0, 
                                            mk_ratio_fix=True, mk_ratio=mk_ratio,
                                            pseudo_frac_m=pseudo_frac_m, pseudo_frac_std=pseudo_frac_std,
                                            nmix=nmix, npseudo=npseudo, alpha=0.6, alpha_lr=5, emb_dim=64, 
                                            batch_size=512, n_iterations=3000, init_train_epoch=10, 
                                            outdir='./output', return_format='anndata')

NameError: name 'adata_spatial' is not defined

### Save AnnData with predicted cell fraction data

In [None]:
outdir = '.'
adata_spatial_cellf.write_h5ad(os.path.join(outdir,'spatial_cellf.h5'))