In [None]:
setwd('../')
source(".Rprofile")
setwd('./evaluation')
print(.libPaths())
library(Seurat)
library(tidyverse)
library(reticulate)
library(data.table)
library(yaml)

In [None]:
# parameters cell
adata1_file <- ""      # input : adata file
adata2_file <- ""      # input : adata with noise
emb0_file <- ""        # output: cca embedding + pca of adata
emb1_file <- ""        # output: cca embedding + pca of adata noise

# hyperparameters
anchor_feature <- 0
dim <- 0
k_anchor <- 0
k_filter <- 0
k_score <- 0

In [None]:
adata1 <- read.h5ad(adata1_file)
adata2 <- read.h5ad(adata2_file)

In [None]:
mat1 <- Matrix::t(adata1$X)
colnames(mat1) <- rownames(adata1$obs)
rownames(mat1) <- rownames(adata1$var)
mat2 <- Matrix::t(adata2$X)
colnames(mat2) <- rownames(adata2$obs)
rownames(mat2) <- rownames(adata2$var)

In [None]:
# NOTE: we must use name slide-seq because seurat only support slide-seq yet
slide.seq.1 <- CreateSeuratObject(counts = mat1, meta.data = adata1$obs, assay = "Spatial")
slide.seq.2 <- CreateSeuratObject(counts = mat2, meta.data = adata2$obs, assay = "Spatial")

In [None]:
Cells(slide.seq.1) %>% length()

In [None]:
coord.df.1 <- data.frame(x = adata1$obsm$spatial[, 1], y = adata1$obsm$spatial[, 2], stringsAsFactors = FALSE)
rownames(coord.df.1) <- Cells(slide.seq.1)
slide.seq.1@images$image <-  new(
    Class = "SlideSeq", # can not change class name
    assay = "Spatial",
    key = "image_",
    coordinates = coord.df.1
  )

coord.df.2 <- data.frame(x = adata2$obsm$spatial[, 1], y = adata2$obsm$spatial[, 2], stringsAsFactors = FALSE)
rownames(coord.df.2) <- Cells(slide.seq.2)
slide.seq.2@images$image <- new(
    Class = "SlideSeq", # can not change class name
    assay = "Spatial",
    key = "image_",
    coordinates = coord.df.2
  )
SpatialFeaturePlot(slide.seq.1, features = "nCount_Spatial") + theme(legend.position = "right")
SpatialFeaturePlot(slide.seq.2, features = "nCount_Spatial") + theme(legend.position = "right")


In [None]:
start <- proc.time()
slide.seq.1@meta.data["batch"] <- "a1"
slide.seq.2@meta.data["batch"] <- "a2"
seu_list <- list(slide.seq.1, slide.seq.2)
seu_list <- lapply(X = seu_list, FUN = function(x) {
    x <- NormalizeData(x) # has been normalized
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = anchor_feature)
})

# select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(object.list = seu_list, nfeatures = anchor_feature)

In [None]:
plan("sequential")
seu.anchors <- FindIntegrationAnchors(object.list = seu_list, anchor.features = features, verbose = FALSE,
                                        k.filter = k_filter, dims = 1:dim, k.anchor = k_anchor, k.score = k_score)
plan("sequential")
seu_combined <- IntegrateData(anchorset = seu.anchors, verbose = FALSE)

In [None]:
DefaultAssay(seu_combined) <- "integrated"
seu_combined <- ScaleData(seu_combined, verbose = FALSE)
seu_combined <- RunPCA(seu_combined, npcs = 50, verbose = FALSE)
run_time <- as.numeric((proc.time() - start)[3])

In [None]:
embd1 <- seu_combined@reductions$pca@cell.embeddings[1:dim(slide.seq.1)[2], ]
embd2 <- seu_combined@reductions$pca@cell.embeddings[-c(1:dim(slide.seq.1)[2]), ]
fwrite(embd1, sep = ",", file = emb0_file, col.names = FALSE)
fwrite(embd2, sep = ",", file = emb1_file, col.names = FALSE)

In [None]:
if (grepl("visium", adata1_file) & grepl("DLPFC", adata1_file)) {
    biology_meta <- "leiden"
    topology_meta <- "layer_guess"
}else if (grepl("merfish", adata1_file) & grepl("hypothalamic", adata1_file)) {
    biology_meta <- "Cell_class"
    topology_meta <- "region"
}else if (grepl("stereo", adata1_file) & grepl("embryo", adata1_file)) {
    biology_meta <- "annotation"
    topology_meta <- "region"
}

# Plot

In [None]:
out_dir <- dirname(normalizePath(emb0_file))

In [None]:
# seu_combined <- RunUMAP(seu_combined, reduction = "pca", dims = 1:30, verbose = FALSE)
# seu_combined <- FindNeighbors(seu_combined, reduction = "pca", dims = 1:30, verbose = FALSE)
# seu_combined <- FindClusters(seu_combined, resolution = 0.5, verbose = FALSE)
# options(repr.plot.width = 10, repr.plot.height = 8)
# fig1 <- DimPlot(seu_combined, reduction = "umap", group.by = "batch")
# fig2 <- DimPlot(seu_combined, reduction = "umap", group.by = biology_meta)
# fig3 <- DimPlot(seu_combined, reduction = "umap", group.by = topology_meta)


In [None]:
# out_dir <- dirname(normalizePath(emb0_file))
# ggsave(paste0(out_dir, "/batch.pdf"), plot = fig1, device = "pdf", dpi = 600, bg = "transparent")
# ggsave(paste0(out_dir, "/biology.pdf"), plot = fig2, device = "pdf", dpi = 600, bg = "transparent")
# ggsave(paste0(out_dir, "/topology.pdf"), plot = fig3, device = "pdf", dpi = 600, bg = "transparent")

# Save

In [None]:
write_yaml(list(
    run_time = run_time
    ),
    paste0(out_dir, "/run_time.yaml")
)

In [None]:
# saveRDS(seu_combined, seurat_RDS_file)
sessionInfo()