# Create Archr object for ATAC analysis

In [None]:
# load libraries
quiet_library <- function(...) {
    suppressPackageStartupMessages(library(...))
}
quiet_library("tidyverse")
quiet_library("hise")
quiet_library("ArchR")
quiet_library("data.table")
quiet_library("jsonlite")
quiet_library("parallel")
quiet_library("Seurat")


In [None]:
# define work directories
proj_path <- "/home/jupyter/data/preRA_teaseq/EXP-00243"
setwd(proj_path)
# define a project name
proj_name <- "Myeloid_tea_seq"
fig_path <- as.character("/home/jupyter/figures/preRA_teaseq/ATAC")
if (!dir.exists(fig_path)) (dir.create(fig_path, recursive = TRUE))


In [None]:
# set ArchR parameters
addArchRThreads(threads = 55)
addArchRGenome("hg38")
set.seed(1221)

In [None]:
# Find the arrow files within the cache we just downloaded
arrows <- grep("atac_arrows/tempEXP00243Arrow",
    list.files(proj_path, pattern = ".arrow", recursive = TRUE),
    value = TRUE
)
names(arrows) <- gsub("_archr", "", gsub(".*/", "", arrows))


In [None]:
arrows

In [None]:
# name the arrow file list, because those names will become the sample names in the metadata
# Standard ArchR Genome is the UCSC Known Gene track.
# I would suggest changing to UCSC Ref Gene track, but we don't have time to cover that here.
# If you change it, it would be changed when you make the ArchR Project or the arrows files.
tea_atac <- ArchRProject(arrows, outputDirectory = "atac_arrows")


In [None]:
######## Prep the ArchRProject for the demo
# Add Doublet scores
tea_atac <- addDoubletScores(tea_atac)


In [None]:
# load the ArchR project
tea_atac <- loadArchRProject(path = "/home/jupyter/data/preRA_teaseq/EXP-00243/atac_arrows")


In [None]:
metadf <- getCellColData(tea_atac) %>%as_tibble()
metadf %>% colnames()
metadf %>% group_by(Sample) %>% tally()

In [None]:
meta_data <- metadf %>% as_tibble(rownames = 'cell_id') %>% 
    mutate(prec_mito=n_mito/n_fragments) 
meta_data %>% group_by(PassQC) %>% tally()

In [None]:
meta_data  %>% head()

In [None]:
saveArchRProject(tea_atac)

## load the seurat obejct and filter out low quality cells

In [None]:
# load the seurat obejct 
all_so <- readRDS(file.path(proj_path, 'PreRA_teaseq_seurat_qc_filtered_cells.rds'))
all_so

In [None]:
# pull the cell barcodes from the qc'ed seurat object
seurat_barcodes <- all_so@meta.data %>% pull(barcodes)
seurat_barcodes %>% length()
# subset the arachR project to just the cells in seurat
idxSample <- BiocGenerics::which(tea_atac$barcodes %in% seurat_barcodes)
cellsSample <- tea_atac$cellNames[idxSample]
tea_atac <- tea_atac[cellsSample, ]
getCellColData(tea_atac) %>% nrow()


In [None]:
tea_atac$barcodes %in% seurat_barcodes %>% length()


In [None]:
tea_atac <- addIterativeLSI(
  ArchRProj = tea_atac,
  useMatrix = "TileMatrix",
  name = "IterativeLSI",
  iterations = 2,
  # varFeatures = 75000, # increase the viable features
  force = TRUE
)

tea_atac <- addClusters(
  input = tea_atac,
  reducedDims = "IterativeLSI",
  method = "Seurat",
  name = "Clusters",
  resolution = 3,
  force = TRUE
)

tea_atac <- addUMAP(
  ArchRProj = tea_atac,
  reducedDims = "IterativeLSI", force = TRUE
)


In [None]:
# add seurat predicted labels
# Loads our Seurat reference
ref <- readRDS("/home/jupyter/data/reference/AIFI-2021-10-26T00:31:31.197552669Z/reference_atac.rds")

tea_atac <- addGeneIntegrationMatrix(
  ArchRProj = tea_atac,
  useMatrix = "GeneScoreMatrix", # You can change this
  matrixName = "GeneIntegrationMatrix", # This is the name of a matrix generated by this function. It contains RNA expression data from scATAC cell to RNA cell
  reducedDims = "IterativeLSI",
  seRNA = ref,
  addToArrow = FALSE, # Use this setting to avoid HDF5 errors.
  groupRNA = "celltype.l1",
  nameCell = "predictedCell_Un",
  nameGroup = "predictedGroup_Un", # Name of metadata column to be created with the ATAC cell labels
  nameScore = "predictedScore_Un", # Name of metadata column to be created with the ATAC cell label scores.
  force = TRUE
)


In [None]:
plotEmbedding(tea_atac,
    embedding = "UMAP",
    colorBy = "cellColData", name = "well_id",
    labelMeans = FALSE
)
ggsave(file.path(fig_path, paste0(proj_name, "_atac_umap_well_id.pdf")))
# p2 <- plotEmbedding(tea_atac, embedding = "UMAP", colorBy = "cellColData",name = "predictedGroup_Un")
# ggsave(file.path(fig_path, paste0(proj_name, '_atac_umap_predictedGroup_Un.pdf')))
# p3 <- plotEmbedding(tea_atac, embedding = "UMAP", colorBy = "cellColData",name = "DoubletEnrichment")
# ggsave(file.path(fig_path, paste0(proj_name, '_atac_umap_DoubletEnrichment.pdf')))
# p4 <- plotEmbedding(tea_atac, embedding = "UMAP", colorBy = "cellColData",name = "Clusters")
# ggsave(file.path(fig_path, paste0(proj_name, '_atac_umap_cluster.pdf')))


In [None]:
plotEmbedding(tea_atac,
    embedding = "UMAP", colorBy = "cellColData", name = "peaks_frac",
    labelMeans = FALSE
)
ggsave(file.path(fig_path, paste0(proj_name, "_atac_umap_peaks_frac.pdf")))
plotEmbedding(tea_atac,
    embedding = "UMAP", colorBy = "cellColData", name = "altius_frac",
    labelMeans = FALSE
)
ggsave(file.path(fig_path, paste0(proj_name, "_atac_umap_altius_frac.pdf")))


In [None]:
metadf %>%
    as_tibble() %>%
    ggplot(aes(x = log10(n_unique), y = peaks_frac, col = Sample)) +
    scattermore::geom_scattermore()
ggsave(file.path(fig_path, paste0(proj_name, "_atac_umap_peaks_frac_nfrag.pdf")))


In [None]:
metadf %>% colnames()

In [None]:
saveArchRProject(tea_atac)

## extra atca information from the archr object and save back in seurat

In [None]:
proj_path

In [None]:
all_so[[]] %>% as_tibble(rownames = 'cell_id') %>% nrow()
getCellColData(tea_atac) %>% nrow()


In [None]:
# number of cells filtered from ATAC pipeline
(118035 - 96595) / 118035


In [None]:
getCellColData(tea_atac) %>% colnames() %>% sort()

In [None]:
# exract the umap info from atac
atac_umap <- tea_atac@embeddings$UMAP$df %>%
    as_tibble(rownames = "atac_cell_id") %>%
    janitor::clean_names()
# save the umap and cluster/doublet scores from atac into the seurat object
atac_seurat <- getCellColData(tea_atac) %>%
    as_tibble(rownames = "atac_cell_id") %>%
    mutate(prec_mito = n_mito / n_fragments) %>%
    select(
        atac_cell_id, n_mito, n_fragments, peaks_frac, altius_frac, barcodes,
        DoubletScore, DoubletEnrichment, Clusters
    ) %>%
    left_join(atac_umap, by = "atac_cell_id")
atac_seurat %>% head()


In [None]:
# check if the cell_id mathes
cell_id <- all_so[[]] %>% rownames()
all(atac_seurat$barcodes %in% cell_id)
all(cell_id %in% atac_seurat$barcodes) # some cells are filtered out in the atac data in the ata pipeline


In [None]:
# add atac data back to seurat obeject
cell_id <- all_so@meta.data %>% rownames()
all_so@meta.data <- all_so@meta.data %>%
    left_join(atac_seurat, by = "barcodes") %>%
    as.data.frame()
rownames(all_so@meta.data) <- cell_id


In [None]:
# filter out cells which didn't pass atac qc
all_so_fl <- subset(all_so, barcodes %in% atac_seurat$barcodes)

In [None]:
all_so
all_so_fl

#### import LSI from atac to seurat

In [None]:
# get lsi data from atac
lsi_data <- getReducedDims(tea_atac)

In [None]:
rownames(lsi_data) <- str_split(rownames(lsi_data), '#', simplify = TRUE)[, 2]
lsi_data %>% head()
lsi_data %>% nrow()

In [None]:
# create a dummy assay slot for lsi
tile_mtx <- matrix(data = 1, nrow = 5, ncol = nrow(lsi_data))
colnames(tile_mtx) <- all_so_fl@meta.data %>% rownames()
rownames(tile_mtx) <- c("A","B","C",'D','E')

In [None]:
all_so_fl[['Tiles']] <- CreateAssayObject(counts = tile_mtx)
all_so_fl[["lsit"]] <- CreateDimReducObject(embeddings = lsi_data, key = "lsit_", assay = "Tiles")

In [None]:
# save the seurat obejct 
all_so_fl %>% saveRDS(file.path(proj_path, 'PreRA_teaseq_seurat_qc_filtered_cells_lsi.rds'))

In [None]:
tea_atac
all_so_fl

In [None]:
sessionInfo()