In [None]:
Sys.setenv(CONDA_BUILD_SYSROOT="/")

library(parallel)

library(ArchR)
library(chromVARmotifs)
library(Seurat)

addArchRVerbose(verbose = FALSE)
addArchRChrPrefix(chrPrefix = FALSE)

# Disable HDF5 file locking
# Workaround for HDF5 I/O issues on NFS
# https://github.com/GreenleafLab/ArchR/issues/248#issuecomment-789453997
Sys.setenv("HDF5_USE_FILE_LOCKING" = "FALSE")
Sys.setenv("RHDF5_USE_FILE_LOCKING" = "FALSE")

In [None]:
params = snakemake@params 
input_paths = snakemake@input
output_paths = snakemake@output
threads = snakemake@threads
log_paths = snakemake@log

seed <- params[["seed"]]
set.seed(seed)

addArchRThreads(threads = threads)

In [None]:
# Load and move project
proj <- loadArchRProject(path = input_paths[["project_in"]], force = FALSE, showLogo = FALSE)
proj <- saveArchRProject(
    ArchRProj = proj,
    outputDirectory = output_paths[["project_out"]],
    overwrite = TRUE,
    load = TRUE,
    logFile = log_paths[["move"]],
)

In [None]:
# Conduct ATAC LSI dimensionality reduction
proj <- addIterativeLSI(
    ArchRProj = proj,
    useMatrix = "TileMatrix", 
    name = "LSI_ATAC", 
    # iterations = 4, 
    # clusterParams = list( 
    #     resolution = c(0.2,0.2,0.6,0.8), 
    #     sampleCells = 10000, 
    #     n.start = 10
    # ), 
    # varFeatures = 25000,
    # dimsToUse = 1:30,
    logFile = log_paths[["lsi_atac"]]
)

# Conduct RNA LSI dimensionality reduction
proj <- addIterativeLSI(
    ArchRProj = proj,
    useMatrix = "GeneExpressionMatrix", 
    name = "LSI_RNA", 
    depthCol = "Gex_nUMI",
    varFeatures = 2500,
    firstSelection = "variable",
    binarize = FALSE,
    logFile = log_paths[["lsi_rna"]]
)

# Add Combined Dims
proj <- addCombinedDims(
    ArchRProj = proj,
    reducedDims = c("LSI_ATAC", "LSI_RNA"), 
    name = "LSI_Combined"
)

In [None]:
# Seurat conversion
matDR_atac <- getReducedDims(
        ArchRProj = proj, 
        reducedDims = "LSI_ATAC"
)
matDR_rna <- getReducedDims(
        ArchRProj = proj, 
        reducedDims = "LSI_RNA"
)
tmp_rna <- matrix(rnorm(nrow(matDR_rna) * 3, 10), ncol = nrow(matDR_rna), nrow = 3)
colnames(tmp_rna) <- rownames(matDR_rna)
rownames(tmp_rna) <- paste0("t",seq_len(nrow(tmp_rna)))
tmp_atac <- matrix(rnorm(nrow(matDR_atac) * 3, 10), ncol = nrow(matDR_atac), nrow = 3)
colnames(tmp_atac) <- rownames(matDR_atac)
rownames(tmp_atac) <- paste0("t",seq_len(nrow(tmp_atac)))
obj <- Seurat::CreateSeuratObject(tmp_rna, project='scMultiome', min.cells=0, min.features=0)
obj[["ATAC"]] <- CreateAssayObject(counts = tmp_atac)
obj[['lsi_rna']] <- Seurat::CreateDimReducObject(embeddings=matDR_rna, key='lsi_rna_', assay='RNA')
obj[['lsi_atac']] <- Seurat::CreateDimReducObject(embeddings=matDR_atac, key='lsi_atac_', assay='ATAC')
message(matDR_atac) ####
message(matDR_rna) ####

In [None]:
# WNN joint clustering
# obj <- FindMultiModalNeighbors(obj, reduction.list = list("lsi_rna", "lsi_atac"), dims.list = list(1:30, 1:30))
obj <- FindMultiModalNeighbors(obj, reduction.list = list("lsi_rna", "lsi_atac"), dims.list = list(1:2, 1:2))

obj <- FindClusters(obj, graph.name = "wsnn", algorithm = 3)
clust <- obj@meta.data[,ncol(obj@meta.data)]
clust <- paste0("Cluster",match(clust, unique(clust)))
names(clust) <- rownames(mat)
out <- rep(paste0(prefix, "1"), length(clust))
proj <- addCellColData(
    ArchRProj = proj, 
    data = out, 
    name = "Clusters_WNN", 
    cells = rownames(matDR_atac),
    force = TRUE
)

# Cluster cells by ATAC LSI values
proj <- addClusters(
    input = proj,
    reducedDims = "LSI_ATAC",
    method = "Seurat",
    name = "Clusters_ATAC",
    resolution = 0.8,
    logFile = log_paths[["cluster_atac"]]
)

# Cluster cells by RNA LSI values
proj <- addClusters(
    input = proj,
    reducedDims = "LSI_RNA",
    method = "Seurat",
    name = "Clusters_RNA",
    resolution = 0.8,
    logFile = log_paths[["cluster_rna"]]
)

In [None]:
# Calculate UMAP coordinates from RNA LSI values
proj <- addUMAP(
    ArchRProj = proj, 
    reducedDims = "LSI_RNA", 
    name = "UMAP_RNA", 
    nNeighbors = 30, 
    minDist = 0.5, 
    metric = "cosine",
)

# Calculate UMAP coordinates from Combined LSI values
proj <- addUMAP(
    ArchRProj = proj, 
    reducedDims = "LSI_Combined", 
    name = "UMAP_Combined", 
    nNeighbors = 30, 
    minDist = 0.5, 
    metric = "cosine",
)

In [None]:
# Plot ATAC clusters
p1 <- plotEmbedding(
    ArchRProj = proj, 
    colorBy = "cellColData", 
    name = "Clusters_ATAC", 
    embedding = "UMAP_ATAC",
    logFile = log_paths[["umap_plot"]]
)
p2 <- plotEmbedding(
    ArchRProj = proj, 
    colorBy = "cellColData", 
    name = "Clusters_ATAC", 
    embedding = "UMAP_RNA",
    logFile = log_paths[["umap_plot"]]
)
p2 <- plotEmbedding(
    ArchRProj = proj, 
    colorBy = "cellColData", 
    name = "Clusters_ATAC", 
    embedding = "UMAP_Combined",
    logFile = log_paths[["umap_plot"]]
)
ggAlignPlots(p1, p2, p3 type = "h")
plotPDF(p1, p2, p3, name = "umap_atac_clusters.pdf", ArchRProj = proj, addDOC = FALSE, width = 5, height = 5)
ggAlignPlots(p1, p2, p3, type = "h")

In [None]:
# Plot RNA clusters
p1 <- plotEmbedding(
    ArchRProj = proj, 
    colorBy = "cellColData", 
    name = "Clusters_RNA", 
    embedding = "UMAP_ATAC",
    logFile = log_paths[["umap_plot"]]
)
p2 <- plotEmbedding(
    ArchRProj = proj, 
    colorBy = "cellColData", 
    name = "Clusters_RNA", 
    embedding = "UMAP_RNA",
    logFile = log_paths[["umap_plot"]]
)
p2 <- plotEmbedding(
    ArchRProj = proj, 
    colorBy = "cellColData", 
    name = "Clusters_RNA", 
    embedding = "UMAP_Combined",
    logFile = log_paths[["umap_plot"]]
)
ggAlignPlots(p1, p2, p3 type = "h")
plotPDF(p1, p2, p3, name = "umap_rna_clusters.pdf", ArchRProj = proj, addDOC = FALSE, width = 5, height = 5)
ggAlignPlots(p1, p2, p3, type = "h")

In [None]:
# Plot WNN clusters
p1 <- plotEmbedding(
    ArchRProj = proj, 
    colorBy = "cellColData", 
    name = "Clusters_WNN", 
    embedding = "UMAP_ATAC",
    logFile = log_paths[["umap_plot"]]
)
p2 <- plotEmbedding(
    ArchRProj = proj, 
    colorBy = "cellColData", 
    name = "Clusters_WNN", 
    embedding = "UMAP_RNA",
    logFile = log_paths[["umap_plot"]]
)
p2 <- plotEmbedding(
    ArchRProj = proj, 
    colorBy = "cellColData", 
    name = "Clusters_WNN", 
    embedding = "UMAP_Combined",
    logFile = log_paths[["umap_plot"]]
)
ggAlignPlots(p1, p2, p3 type = "h")
plotPDF(p1, p2, p3, name = "umap_wnn_clusters.pdf", ArchRProj = proj, addDOC = FALSE, width = 5, height = 5)
ggAlignPlots(p1, p2, p3, type = "h")

In [None]:
saveArchRProject(
    ArchRProj = proj,
    outputDirectory = output_paths[["project_out"]],
    overwrite = TRUE,
    load = FALSE,
    logFile = log_paths[["save"]],
)