#### Demo requirements
- code conda env: mefisto_env.yaml
- demo runnning time: 5-8 hours depending our your compuation hardware
- package version: 1.12.1

In [1]:
# load your mefisto env python
Sys.setenv(RETICULATE_PYTHON = "/home/shaliu_fu/miniconda3/envs/mefisto_env/bin/python")


In [3]:

ADT_h5ad_path = './lymph_node-CITE_seq-raw-ADT-counts.rds'
RNA_h5ad_path = './lymph_node-CITE_seq-raw-RNA-counts.rds'

mefisto_embed_path = "../output/mefisto/" 


In [4]:

library(MOFA2)
library(tidyverse)
library(cowplot)
library(magrittr)

library(dplyr)
library(ggplot2)
library(data.table)
library(Matrix)
library(stringr)

library(Seurat)
library(SeuratDisk)
library(here)


Attaching package: ‘MOFA2’


The following object is masked from ‘package:stats’:

    predict


── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.3     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.4.4     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: ‘cowplot’


The following object is masked from ‘pa

In [5]:
packageVersion('MOFA2') 

[1] ‘1.12.1’

In [7]:
rna_counts = readRDS(RNA_h5ad_path)
adt_counts = readRDS(ADT_h5ad_path)


In [8]:
spatial_loc = rna_counts@meta.data[,c("X","Y")]
head(spatial_loc)

Unnamed: 0_level_0,X,Y
Unnamed: 0_level_1,<dbl>,<dbl>
AACACTTGGCAAGGAA-1_1,47,71
AACAGGATTCATAGTT-1_1,49,43
AACAGGTTATTGCACC-1_1,28,86
AACAGGTTCACCGAAG-1_1,51,41
AACAGTCAGGCTCCGC-1_1,24,6
AACAGTCGTGTCGCGG-1_1,44,124


In [9]:
adt_counts

An object of class Seurat 
31 features across 6843 samples within 1 assay 
Active assay: Spatial_ADT (31 features, 0 variable features)
 2 layers present: counts, data
 1 image present: image

In [10]:
obj.multi <- CreateSeuratObject(counts = rna_counts@assays[['Spatial_RNA']]@counts,meta.data = rna_counts@meta.data)
obj.multi[["ADT"]] <- CreateAssayObject(counts = adt_counts@assays[['Spatial_ADT']]@counts ,assay = 'ADT')



In [11]:
obj.multi

An object of class Seurat 
18116 features across 6843 samples within 2 assays 
Active assay: RNA (18085 features, 0 variable features)
 2 layers present: counts, data
 1 other assay present: ADT

In [12]:
DefaultAssay(obj.multi) <- "RNA"
obj.multi  <- SCTransform(obj.multi , verbose = FALSE) %>% RunPCA() %>% RunUMAP(dims = 1:50, reduction.name = 'umap.rna', reduction.key = 'rnaUMAP_')
obj.multi <- FindVariableFeatures(obj.multi, selection.method = "vst", nfeatures = 3000) 


PC_ 1 
Positive:  ENSG00000211772, ENSG00000277734, ENSG00000137077, ENSG00000227507, ENSG00000211751, ENSG00000008517, ENSG00000102879, ENSG00000136490, ENSG00000185811, ENSG00000134954 
	   ENSG00000019582, ENSG00000009790, ENSG00000188404, ENSG00000106948, ENSG00000068831, ENSG00000175463, ENSG00000167895, ENSG00000130592, ENSG00000168685, ENSG00000065357 
	   ENSG00000111348, ENSG00000013725, ENSG00000128340, ENSG00000107742, ENSG00000136167, ENSG00000115085, ENSG00000081059, ENSG00000179144, ENSG00000111728, ENSG00000184357 
Negative:  ENSG00000170323, ENSG00000169710, ENSG00000123689, ENSG00000166819, ENSG00000175445, ENSG00000211445, ENSG00000148671, ENSG00000148180, ENSG00000135821, ENSG00000181092 
	   ENSG00000184557, ENSG00000189058, ENSG00000197766, ENSG00000167676, ENSG00000167588, ENSG00000138207, ENSG00000135447, ENSG00000026025, ENSG00000079435, ENSG00000005249 
	   ENSG00000198886, ENSG00000128016, ENSG00000211895, ENSG00000108551, ENSG00000004776, ENSG00000132465, ENS

In [13]:
DefaultAssay(obj.multi) <- "ADT"
obj.multi <- NormalizeData(obj.multi, normalization.method = 'CLR', margin = 2) %>% ScaleData() 
obj.multi<- FindVariableFeatures(obj.multi)

Normalizing across cells



Centering and scaling data matrix



In [32]:
mofa <- create_mofa(obj.multi, assays = c("SCT","ADT"))

Creating MOFA object from a Seurat object...

No features specified, using variable features from the Seurat object...



In [33]:
spatial_loc2 = data.frame(spatial_loc)
rownames(spatial_loc2)=colnames(rna_counts)
colnames(spatial_loc2)=c("coord1","coord2")

In [34]:
mofa <- set_covariates(mofa, t(spatial_loc2))

In [35]:
data_opts <- get_default_data_options(mofa)

model_opts <- get_default_model_options(mofa)
model_opts$num_factors <- 4

train_opts <- get_default_training_options(mofa)
train_opts$maxiter <- 1

mefisto_opts <- get_default_mefisto_options(mofa)

mofa <- prepare_mofa(mofa, model_options = model_opts,
                   mefisto_options = mefisto_opts,
                   training_options = train_opts,
                   data_options = data_opts)

Checking data options...

Checking training options...

“Maximum number of iterations is very small
”


Checking model options...

Checking inference options for mefisto covariates...



In [36]:
mofa <- run_mofa(mofa,outfile =paste0(mefisto_embed_path,"/mefisto_model.hdf5"),use_basilisk = FALSE)


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'




“All 4 factors were found to explain little or no variance so remove_inactive_factors option has been disabled.”


In [19]:
mofa <-load_model(paste0(mefisto_embed_path,"/mefisto_model.hdf5"),remove_inactive_factors = FALSE)
factors <- 1:get_dimensions(mofa)[["K"]]

mofa <- run_umap(mofa, 
  factors = factors, 
  n_neighbors = 15,  
  min_dist = 0.30
)


Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'

Also defined by ‘spam’

Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'

Also defined by ‘spam’



In [None]:
out_tab=mofa@expectations$Z$group1

In [None]:
write.table(out_tab,file = paste0(mefisto_embed_path,"/mefisto_latent.csv",sep=",",row.names=T,col.names=T)