# Install Libraries
Install information can be found [here](https://www.archrproject.com/index.html)\
Vignette [here](https://www.archrproject.com/articles/Articles/tutorial.html)

In [None]:
#ArchR parameters
atac_frag = "" # Input file  
genome = "" #either hg38 or mm10
peak_set = "" #bed file

#ArchR QC
min_tss = 4 #The minimum numeric transcription start site (TSS) enrichment score required for a cell to pass filtering
min_frags = 1000 #The minimum number of mapped ATAC-seq fragments required per cell to pass filtering for use
add_tile_mat = TRUE #A boolean value indicating whether to add a "Tile Matrix" to each ArrowFile. 
add_gene_score_mat = TRUE #A boolean value indicating whether to add a Gene-Score Matrix to each ArrowFile.

#ArchR Doublet paramaters
find_doublets = FALSE
doublet_k = 10 #The number of cells neighboring a simulated doublet to be considered as putative doublets.
doublet_knn_method = "UMAP" #Refers to the embedding to use for nearest neighbor search.
lsi_method = 1 #A number or string indicating the order of operations in the TF-IDF normalization. Possible values are: 1 or "tf-logidf", 2 or "log(tf-idf)", and 3 or "logtf-logidf". 

copy_arrow_files = TRUE #save a copy of arrow files in the ArchR project (recommended)
iter_LSI_matrix = "TileMatrix" #The name of the data matrix to retrieve from the ArrowFiles associated with the ArchRProject. Valid options are "TileMatrix" or "PeakMatrix".
threads = 1
prefix = "prefix" #project name

#ArchR Plots parameters
marker_features_test = "wilcoxon" #The name of the pairwise test method to use in comparing cell groupings to the null cell grouping during marker feature identification.
heatmap_transpose = TRUE #Boolean to transpose heatmap
heatmap_label_n = 5 #Top n genes to label per cluster in heatmap
heatmap_cutoff = "FDR <= 0.01 & Log2FC >= 0.5" #Cut-off applied to genes in heatmap

#Terra specific parameters
table_name = "demux_BH3KTLDMXY"
experiment_name = "gm12878_fresh_ATAC"

#papermill specific parameters
papermill = TRUE

#jupyter notebook plot sizes
options(repr.plot.width=20, repr.plot.height=15)


In [None]:
papermill <- as.logical(papermill)
add_tile_mat <- as.logical(add_tile_mat)
add_gene_score_mat <- as.logical(add_gene_score_mat)
copy_arrow_files <- as.logical(copy_arrow_files)
heatmap_transpose <- as.logical(heatmap_transpose)
find_doublets <- as.logical(find_doublets)

In [None]:
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager", repos = "https://cran.r-project.org")
if (!requireNamespace("devtools", quietly = TRUE)) 
    install.packages("devtools")
if (!requireNamespace("GenomeInfoDbData", quietly = TRUE))
    BiocManager::install("GenomeInfoDbData")
if (!requireNamespace("GenomicRanges", quietly = TRUE))
    BiocManager::install("GenomicRanges")
if (!requireNamespace("Rsamtools", quietly = TRUE))
    BiocManager::install("Rsamtools")
if (!requireNamespace("presto", quietly = TRUE))
    devtools::install_github('immunogenomics/presto')
if (!requireNamespace("magick", quietly = TRUE))
    install.packages("magick")
if (!requireNamespace("Seurat", quietly = TRUE))
    install.packages("Seurat")
if (!requireNamespace("ArchR", quietly = TRUE)){
    devtools::install_github('GreenleafLab/ArchR@v1.0.1', repos = BiocManager::repositories())
    ArchR::installExtraPackages()
}
if (!requireNamespace("logr", quietly = TRUE))
    install.packages("logr")
if (!requireNamespace("grid", quietly = TRUE))
    install.packages("grid")
if (!requireNamespace("gridExtra", quietly = TRUE))
    install.packages("gridExtra")
if (!requireNamespace("ggplot2", quietly = TRUE))
    install.packages("ggplot2")
if (!requireNamespace("hexbin", quietly = TRUE))
    install.packages("hexbin")
if (!requireNamespace("ggpointdensity", quietly = TRUE))
    install.packages("ggpointdensity")

options("logr.notes" = FALSE)

In [None]:
suppressMessages(library(parallel))
suppressMessages(library(ArchR))
suppressMessages(library(magick))
suppressMessages(library(logr))
suppressMessages(library(GenomicRanges))
suppressMessages(library(grid))
suppressMessages(library(gridExtra))
suppressMessages(library(ggplot2))
suppressMessages(library(ggpointdensity))

set.seed(1)
addArchRThreads(threads = threads)

In [None]:
#Function to save plots
plot_filename = paste0(prefix,".atac.archr.plots.",genome)
dir.create(plot_filename, showWarnings=F)
printPNG <- function(name, plotObject, papermill, wf=22, hf=11){
    filename = paste0(plot_filename,"/",prefix,".atac.archr.",name,".",genome,".png")
    if(papermill){
        ggsave(plot = plotObject, filename = filename, width = wf, height = hf)
    }
}

#Function to create plots
create_plot = function(plot_list, title, subtitle, heights=unit(c(1,1,8), rep("in",3)), width = 7){
    g = c(list(title),list(subtitle),plot_list)
    N = length(plot_list)
    laym = rbind(rep(1,N),rep(2,N),(3:(N+2)))
    widths = unit(rep(width, length(plot_list)), rep("in",length(plot_list)))
    obj = arrangeGrob(grobs=g,layout_matrix=laym, heights=heights, widths = widths)
    return(obj)
}

#Function to calculate density for scatterplot coloring 
get_density <- function(x, y, ...) {
  dens <- MASS::kde2d(x, y, ...)
  ix <- findInterval(x, dens$x)
  iy <- findInterval(y, dens$y)
  ii <- cbind(ix, iy)
  return(dens$z[ii])
}

#Function to write h5 file from dgCMatrix
#Taken from https://rdrr.io/github/AllenInstitute/scrattch.io/src/R/write_10x.R
write_dgCMatrix_h5 <- function(mat,
                               cols_are = "gene_names",
                               h5_target,
                               ref_name = "prefix",
                               gene_ids = NULL) {

  #library(Matrix)

  if(grepl("gene",cols_are)) {
    mat <- Matrix::t(mat)
  }

  # Create target file
  rhdf5::h5createFile(h5_target)
  # Create data group
  rhdf5::h5createGroup(h5_target,
                       ref_name)

  # Store sample ids (barcodes) and gene names
  rhdf5::h5write(colnames(mat),
                 h5_target,
                 paste0("/",ref_name,"/barcodes"))
  rhdf5::h5write(rownames(mat),
                 h5_target,
                 paste0("/",ref_name,"/gene_names"))

  if(is.null(gene_ids)) {
    gene_ids <- rownames(mat)
  }

  rhdf5::h5write(gene_ids,
                 h5_target,
                 paste0("/",ref_name,"/genes"))

  # Store dimensions as shape
  rhdf5::h5write(dim(mat),
                 h5_target,
                 paste0("/",ref_name,"/shape"))

  # Store values from mat@x as data
  rhdf5::h5createDataset(h5_target,
                         paste0("/",ref_name,"/data"),
                         dims = length(mat@x),
                         storage.mode = "integer",
                         chunk = 1000,
                         level = 4)
  rhdf5::h5write(mat@x,
                 h5_target,
                 paste0("/",ref_name,"/data"))

  # Store row indices from mat@i as indices
  rhdf5::h5createDataset(h5_target,
                         paste0("/",ref_name,"/indices"),
                         dims = length(mat@i),
                         storage.mode = "integer",
                         chunk = 1000,
                         level = 4)
  rhdf5::h5write(mat@i,
                 h5_target,
                 paste0("/",ref_name,"/indices"))

  # Store column pointers from mat@p as indptr
  rhdf5::h5write(mat@p,
                 h5_target,
                 paste0("/",ref_name,"/indptr"))

}

#Create log file
logfile <- file.path(paste0(prefix,".atac.archr.logfile.",genome,".txt"))
lf <- log_open(logfile)

In [None]:
#Terra specific code block
get_file <- function(path){
    dest <- getwd()
    gsutil_cp(path, dest)
    name <- basename(path)
    return(name)
}

if (!papermill){
    table <- avtable(table_name)
    atac_frag <- get_file(table$atac_fragment_file_raw[table[, sprintf('%s_id', table_name)] == experiment_name])
}

In [None]:
#Download genome

tryCatch({
        log_print("# Download genome")
    
        #Code start to download genome
    
        addArchRGenome(genome)
    
        #Code end to download genome
    
        log_print("SUCCESSFUL: Download genome")
    },
    error = function(cond) {
        log_print("ERROR: Download genome")
        log_print(cond)
    }
)

In [None]:
#Create Arrow files

ArrowFiles = tryCatch({
        log_print("# Create arrow files")
    
        #Code start to create arrow files
    
        ArrowFiles <- createArrowFiles(
          inputFiles = atac_frag,
          sampleNames = prefix,
          minTSS = 0, 
          minFrags = 0,
          addTileMat = add_tile_mat,
          addGeneScoreMat = add_gene_score_mat
        )
    
        #Code end to create arrow files
    
        log_print("SUCCESSFUL: Create arrow files")
        return(ArrowFiles)
    },
    error = function(cond) {
        log_print("ERROR: Create arrow files")
        log_print(cond)
    }
)

In [None]:
#Create Archr project

proj = tryCatch({
        log_print("# Create Archr project")
    
        #Code start to create Archr project
    
        proj <- ArchRProject(
          ArrowFiles = ArrowFiles, 
          outputDirectory = prefix,
          copyArrows = copy_arrow_files, #This is recommened so that you maintain an unaltered copy for later usage.
          showLogo = FALSE
        )
    
        #Code end to create Archr project
    
        log_print("SUCCESSFUL: Create Archr project")
        return(proj)
    },
    error = function(cond) {
        log_print("ERROR: Create Archr project")
        log_print(cond)
    }
)

In [None]:
# Pre-filtered TSS Enrichment vs Unique Fragments QC plot
xlabel_min = 0

tryCatch({
        log_print("# Pre-filtered TSS Enrichment vs Unique Fragments QC plot")

        # Code start to create pre-filtered TSS Enrichment vs Unique Fragments QC plot    
        df <- as.data.frame(getCellColData(proj, select = c("nFrags", "TSSEnrichment")))
        
        df$density = get_density(df$nFrags, df$TSSEnrichment, n = 100)
        xlabel_min = min(df$nFrags)
        obj = ggplot(data=df) + 
                geom_point(aes(x = nFrags, y = TSSEnrichment, color = density)) +  
                scale_x_continuous(trans = "log10", breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x))) +
                expand_limits(x = xlabel_min, y = 0) + 
                geom_hline(yintercept = min_tss, lty = "dashed") + 
                geom_vline(xintercept = min_frags, lty = "dashed") +
                scale_colour_gradientn(colors = paletteContinuous(set = "sambaNight")) +
                xlab(label ="Unique Fragments") + 
                ylab(label = "TSS Enrichment") + 
                annotation_logticks(sides = "b") +
                theme(axis.title=element_text(size=14), axis.text=element_text(size=10), legend.title=element_text(size=14), legend.text=element_text(size=8.5))                                                                 
           
        tg <- textGrob('Pre-filtered TSS Enrichment vs Unique Fragments QC plot', gp = gpar(fontsize = 25, fontface = 'bold', col = 'red'))
        sg <- textGrob(paste0("Library: ", prefix, "\n", 
                        "Total Barcodes: ", nCells(proj), "\n", 
                        "Median Frags: ", median(proj$nFrags), "\n", 
                        "Median TSS Enrichment: ", median(proj$TSSEnrichment), "\n", 
                        "------------------------------------ \n"), 
                        gp = gpar(fontsize = 18, fontface = 'bold'))
        plot_list = list(obj)
        obj = create_plot(plot_list, tg, sg,heights=unit(c(1.5,1.5,8), rep("in",3)), width = 10)
        grid.draw(obj)
        printPNG(name = "prefiltered_tss_by_uniq_frags", plotObject = obj, papermill = papermill, wf = 11, hf = 12)
            
        # Code end to create pre-filtered TSS Enrichment vs Unique Fragments QC plot 

        log_print("SUCCESSFUL: Pre-filtered TSS Enrichment vs Unique Fragments QC plot")
    },
    error = function(cond) {
        log_print("ERROR: Pre-filtered TSS Enrichment vs Unique Fragments QC plot")
        log_print(cond)
    }
)

In [None]:
# Pre-filtered Fragment Size Distribution plot
tryCatch({
        log_print("# Pre-filtered Fragment Size Distribution plot")

        # Code start to create pre-filtered Fragment Size Distribution    
        obj = plotFragmentSizes(ArchRProj = proj) +
        theme_gray()
            
        tg <- textGrob('Pre-filtered Fragment Size Distribution', gp = gpar(fontsize = 25, fontface = 'bold', col = 'red'))
        sg <- textGrob(paste0("Library: ", prefix, "\n", 
                        "Total Barcodes: ", nCells(proj), "\n", 
                        "Total nFrags: ", paste(format(round(sum(proj$nFrags) / 1e6, 1), trim = TRUE), "M"), "\n", 
                        "------------------------------------ \n"), 
                        gp = gpar(fontsize = 18, fontface = 'bold'))
        plot_list = list(obj)
        obj = create_plot(plot_list, tg, sg,heights=unit(c(1.5,1.5,8), rep("in",3)), width = 10)
        grid.draw(obj)
        printPNG(name = "prefiltered_frag_size_dist", plotObject = obj, papermill = papermill, wf = 11, hf = 12)
            
        # Code end to create pre-filtered Fragment Size Distribution 

        log_print("SUCCESSFUL: Pre-filtered Fragment Size Distribution plot")
    },
    error = function(cond) {
        log_print("ERROR: Pre-filtered Fragment Size Distribution plot")
        log_print(cond)
    }
)

In [None]:
# Adding Peak set

tryCatch({
        log_print("# Adding Peak set")
    
        #Code start to add peak set
    
        peakset = read.table(peak_set, header=F, sep = "\t", stringsAsFactors = F)
        colnames(peakset) = c("chr","start","end")
        gr_peakset = makeGRangesFromDataFrame(peakset)
        gr_peakset = gr_peakset[seqnames(gr_peakset) %in% seqnames(getChromSizes(proj))] #this is required due to a bug in ArchR that was fixed in later version

        proj = addPeakSet(ArchRProj = proj, peakSet = gr_peakset, force = T)
        proj = addPeakMatrix(ArchRProj = proj)
    
        #Code end to add peak set
        gc()
        log_print("SUCCESSFUL: Adding Peak set")
    },
    error = function(cond) {
        log_print("ERROR: Adding Peak set")
        log_print(cond)
    }
)

In [None]:
# Save prefiltered project

tryCatch({
        log_print("# Saving prefiltered project")
    
        #Code start to save prefiltered project
        
        #write raw ArchR proj rds 
        saveRDS(proj, file = paste0(prefix,".atac.archr.raw_project.",genome,".rds"))
    
        #write raw .h5 matrix
        mtx = getMatrixFromProject(ArchRProj = proj,useMatrix = "PeakMatrix")
        rownames(mtx) = proj@peakSet$idx
        write_dgCMatrix_h5(assay(mtx), 
                           cols_are = "barcodes", 
                           h5_target = paste0(prefix,".atac.archr.raw_matrix.",genome,".h5"), 
                           ref_name = prefix)
        
        #create metadata dataframe
        mtx = chromVAR::addGCBias(mtx, genome = getBSgenome(genome)) #get gc content
        idxPass <- which(proj$TSSEnrichment >= min_tss & proj$nFrags >= min_frags) #IDs of cells that will pass filter
        meta = proj@cellColData
        meta$PassQC = FALSE
        meta[idxPass, "PassQC"] = TRUE
        #meta$GCBias = mtx@rowRanges$bias
        meta$barcodes = rownames(meta)
        
        #write metadata tsv 
        write.table(x = meta, file =  paste0(prefix,".atac.archr.barcode_metadata.",genome,".tsv"), sep = "\t", quote = F, row.names = F)
        
        rm(meta)
        rm(mtx)
    
        #Code end to save prefiltered project
        gc()
        log_print("SUCCESSFUL: Saving prefiltered project")
    },
    error = function(cond) {
        log_print("ERROR: Saving prefiltered project")
        log_print(cond)
    }
)

In [None]:
# Filtering Archr Project

tryCatch({
        log_print("# Filter ArchR Project")
    
        #Code start to filter ArchR Project
        
        proj <- proj[idxPass,]
    
        #Code end to filter ArchR Project
    
        log_print("SUCCESSFUL: Filter ArchR Project")
    },
    error = function(cond) {
        log_print("ERROR: Filter ArchR Project")
        log_print(cond)
    }
)

In [None]:
# Post-filtered TSS Enrichment vs Unique Fragments QC plot
tryCatch({
        log_print("# Post-filtered TSS Enrichment vs Unique Fragments QC plot")

        # Code start to create pre-filtered TSS Enrichment vs Unique Fragments QC plot    
        df <- as.data.frame(getCellColData(proj, select = c("nFrags", "TSSEnrichment")))
                                
        #df$density = get_density(df$nFrags, df$TSSEnrichment, n = 100)
                            
        #obj = ggplot(data=df) + 
            #geom_point(aes(x = nFrags, y = TSSEnrichment, color = density)) +  
            #scale_x_continuous(trans = "log10", breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x))) +
            #expand_limits(x = xlabel_min, y = 0) + 
            #geom_hline(yintercept = min_tss, lty = "dashed") + 
            #geom_vline(xintercept = min_frags, lty = "dashed") +
            #scale_colour_gradientn(colors = paletteContinuous(set = "sambaNight")) +
            #xlab(label ="Unique Fragments") + 
            #ylab(label = "TSS Enrichment") + 
            #annotation_logticks(sides = "b")
                                                                              
        obj = ggplot(data=df, aes(x = nFrags, y = TSSEnrichment)) + 
            geom_pointdensity(method = "default") +
            scale_colour_gradientn(colors = paletteContinuous(set = "sambaNight")) +
            scale_x_continuous(trans = "log10", breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x))) +
            expand_limits(x = xlabel_min, y = 0) + 
            geom_hline(yintercept = min_tss, lty = "dashed") + 
            geom_vline(xintercept = min_frags, lty = "dashed") +
            xlab(label ="Unique Fragments") + 
            ylab(label = "TSS Enrichment") + 
            annotation_logticks(sides = "b") +
            labs(fill="density") +
            theme(axis.title=element_text(size=14), axis.text=element_text(size=10), legend.title=element_text(size=14), legend.text=element_text(size=8.5))
        
        tg <- textGrob('Post-filtered TSS Enrichment vs Unique Fragments QC plot', gp = gpar(fontsize = 25, fontface = 'bold', col = 'red'))
        sg <- textGrob(paste0("Library: ", prefix, "\n", 
                        "Total Barcodes: ", nCells(proj), "\n", 
                        "Median Frags: ", median(proj$nFrags), "\n", 
                        "Median TSS Enrichment: ", median(proj$TSSEnrichment), "\n", 
                        "------------------------------------ \n"), 
                        gp = gpar(fontsize = 18, fontface = 'bold'))
        plot_list = list(obj)
        obj = create_plot(plot_list, tg, sg,heights=unit(c(1.5,1.5,8), rep("in",3)), width = 10)
        grid.draw(obj)
        printPNG(name = "postfiltered_tss_by_uniq_frags", plotObject = obj, papermill = papermill, wf = 11, hf = 12)
            
        # Code end to create Post-filtered TSS Enrichment vs Unique Fragments QC plot 

        log_print("SUCCESSFUL: Post-filtered TSS Enrichment vs Unique Fragments QC plot")
    },
    error = function(cond) {
        log_print("ERROR: Post-filtered TSS Enrichment vs Unique Fragments QC plot")
        log_print(cond)
    }
)

In [None]:
# Post-filtered Fragment Size Distribution plot
tryCatch({
        log_print("# Post-filtered Fragment Size Distribution plot")

        # Code start to create pre-filtered Fragment Size Distribution    
        obj = plotFragmentSizes(ArchRProj = proj) +
        theme_gray()
            
        tg <- textGrob('Post-filtered Fragment Size Distribution', gp = gpar(fontsize = 25, fontface = 'bold', col = 'red'))
        sg <- textGrob(paste0("Library: ", prefix, "\n", 
                        "Total Barcodes: ", nCells(proj), "\n", 
                        "Total nFrags: ", paste(format(round(sum(proj$nFrags) / 1e6, 1), trim = TRUE), "M"), "\n", 
                        "------------------------------------ \n"), 
                        gp = gpar(fontsize = 18, fontface = 'bold'))
        plot_list = list(obj)
        obj = create_plot(plot_list, tg, sg,heights=unit(c(1.5,1.5,8), rep("in",3)), width = 10)
        grid.draw(obj)
        printPNG(name = "postfiltered_frag_size_dist", plotObject = obj, papermill = papermill, wf = 11, hf = 12)
            
        # Code end to create pre-filtered Fragment Size Distribution 

        log_print("SUCCESSFUL: Post-filtered Fragment Size Distribution plot")
    },
    error = function(cond) {
        log_print("ERROR: Post-filtered Fragment Size Distribution plot")
        log_print(cond)
    }
)

In [None]:
#Calculate doublet scores
if(find_doublets){
    
    tryCatch({

        # Code start to calculate doublet scores

            log_print("# Calculate doublet scores")
            proj <- addDoubletScores(
                input = proj,
                k = doublet_k,
                knnMethod = doublet_knn_method,
                LSIMethod = lsi_method
            )
            log_print("SUCCESSFUL: Calculate doublet scores")

        # Code end to calculate doublet scores

    },
    error = function(cond) {
        log_print("ERROR: Calculate doublet scores")
        log_print(cond)
    })
    
} else{
    log_print("#Skipping doublet detection step")
}

In [None]:
#Uncomment next line to filter doublets

#proj <- filterDoublets(ArchRProj = proj) 

In [None]:
#Add Iterative LSI

tryCatch({
        log_print("# Add Iterative LSI")
    
        #Code start to add Iterative LSI
    
        proj <- addIterativeLSI(ArchRProj = proj, useMatrix = iter_LSI_matrix, name = "IterativeLSI")
    
        #Code end to add Iterative LSI
    
        log_print("SUCCESSFUL: Add Iterative LSI")
    },
    error = function(cond) {
        log_print("ERROR: Add Iterative LSI")
        log_print(cond)
    }
)

In [None]:
#Add clusters

tryCatch({
        log_print("# Add clusters")
    
        #Code start to add clusters
    
        proj <- addClusters(input = proj, reducedDims = "IterativeLSI") #from the ArchR docs, IterativeLSI is the only supported option for reducedDims 
    
        #Code end to add clusters
    
        log_print("SUCCESSFUL: Add clusters")
    },
    error = function(cond) {
        log_print("ERROR: Add clusters")
        log_print(cond)
    }
)

In [None]:
#Add UMAP

tryCatch({
        log_print("# Add UMAP")
    
        #Code start to add clusters
    
        proj <- addUMAP(ArchRProj = proj, reducedDims = "IterativeLSI") 
    
        #Code end to add clusters
    
        log_print("SUCCESSFUL: Add UMAP")
    },
    error = function(cond) {
        log_print("ERROR: Add UMAP")
        log_print(cond)
    }
)

In [None]:
#Plot UMAP

tryCatch({
        log_print("# Plot UMAP")
    
        #Code start to plot UMAP - Clusters
    
        obj <- plotEmbedding(ArchRProj = proj, colorBy = "cellColData", name = "Clusters", embedding = "UMAP") + 
                geom_point(size = 0.2)+
                theme_gray()  
        tg <- textGrob('UMAP', gp = gpar(fontsize = 25, fontface = 'bold', col = 'red'))
        sg <- textGrob(paste0("Library: ", prefix, "\n", 
                        " Number of Barcodes: ", nCells(proj),"\n", 
                        " Colored by: Clusters using Louvain\n", 
                        "------------------------------------ \n"), 
                        gp = gpar(fontsize = 18, fontface = 'bold'))

        plot_list = list(obj)
        
        obj = create_plot(plot_list, tg, sg, heights=unit(c(1,1,8), rep("in",3)), width = 10)
        grid.draw(obj)
    
        printPNG('umap_clusters', obj, papermill, wf = 11, hf = 12)
    
        log_print("SUCCESSFUL: Plot UMAP")
    
        #Code end to plot UMAP
    },
    error = function(cond) {
        log_print("ERROR: Plot UMAP")
        log_print(cond)
    }
)

In [None]:
#Plot UMAP

tryCatch({
        log_print("# Plot UMAP")
    
    
        #Code start to plot UMAP - nFrags
        proj$lognFrags = log10(proj$nFrags)
        obj <- plotEmbedding(ArchRProj = proj, colorBy = "cellColData", name = "lognFrags", embedding = "UMAP", 
                             pal = ArchRPalettes$purpleOrange) + 
                geom_point(size = 0.2)+
                theme_gray()
        tg <- textGrob('UMAP', gp = gpar(fontsize = 25, fontface = 'bold', col = 'red'))
        sg <- textGrob(paste0("Library: ", prefix, "\n", 
                        " Number of Barcodes: ", nCells(proj),"\n", 
                        " Colored by: log10(nFrags)\n", 
                        "------------------------------------ \n"), 
                        gp = gpar(fontsize = 18, fontface = 'bold'))

        plot_list = list(obj)
        
        obj = create_plot(plot_list, tg, sg, heights=unit(c(1,1,8), rep("in",3)), width = 10)
        grid.draw(obj)
    
        printPNG('umap_num_frags', obj, papermill, wf = 11, hf = 12)
    
        #Code end to plot UMAP
    
        log_print("SUCCESSFUL: Plot UMAP")
    },
    error = function(cond) {
        log_print("ERROR: Plot UMAP")
        log_print(cond)
    }
)

In [None]:
#Plot UMAP

tryCatch({
        log_print("# Plot UMAP")
    
        #Code start to plot UMAP - TSS Enrichment
        obj <- plotEmbedding(ArchRProj = proj, colorBy = "cellColData", name = "TSSEnrichment", embedding = "UMAP",
                            pal = ArchRPalettes$purpleOrange) + 
                geom_point(size = 0.2)+
                theme_gray()  
        tg <- textGrob('UMAP', gp = gpar(fontsize = 25, fontface = 'bold', col = 'red'))
        sg <- textGrob(paste0("Library: ", prefix, "\n", 
                        " Number of Barcodes: ", nCells(proj),"\n", 
                        " Colored by: TSSEnrichment\n", 
                        "------------------------------------ \n"), 
                        gp = gpar(fontsize = 18, fontface = 'bold'))

        plot_list = list(obj)
        
        obj = create_plot(plot_list, tg, sg, heights=unit(c(1,1,8), rep("in",3)), width = 10)
        grid.draw(obj)
    
        printPNG('umap_tss_score', obj, papermill, wf = 11, hf = 12)
    
        #Code end to plot UMAP
    
        log_print("SUCCESSFUL: Plot UMAP")
    },
    error = function(cond) {
        log_print("ERROR: Plot UMAP")
        log_print(cond)
    }
)

In [None]:
#Plot UMAP

tryCatch({
        log_print("# Plot UMAP")
    
        #Code start to plot UMAP - FRIP
        obj <- plotEmbedding(ArchRProj = proj, colorBy = "cellColData", name = "FRIP", embedding = "UMAP",
                            pal = ArchRPalettes$purpleOrange) + 
                geom_point(size = 0.2)+
                theme_gray()  
        tg <- textGrob('UMAP', gp = gpar(fontsize = 25, fontface = 'bold', col = 'red'))
        sg <- textGrob(paste0("Library: ", prefix, "\n", 
                        " Number of Barcodes: ", nCells(proj),"\n", 
                        " Colored by: FRIP\n", 
                        "------------------------------------ \n"), 
                        gp = gpar(fontsize = 18, fontface = 'bold'))

        plot_list = list(obj)
        
        obj = create_plot(plot_list, tg, sg, heights=unit(c(1,1,8), rep("in",3)), width = 10)
        grid.draw(obj)
    
        printPNG('umap_frip', obj, papermill, wf = 11, hf = 12)
    
        #Code end to plot UMAP
    
        log_print("SUCCESSFUL: Plot UMAP")
    },
    error = function(cond) {
        log_print("ERROR: Plot UMAP")
        log_print(cond)
    }
)

In [None]:
#Plot UMAP

tryCatch({
        log_print("# Plot UMAP")
    
        #Code start to plot UMAP - Doublet
        if(find_doublets){
            doub_flag = FALSE
            proj$DoubletScore[is.na(proj$DoubletScore)] <- 0
            if (sum(proj$DoubletScore) > 0){
                doub_flag = TRUE
            }
            obj <- plotEmbedding(ArchRProj = proj, colorBy = "cellColData", name = "DoubletScore", embedding = "UMAP") + 
                    geom_point(size = 0.1)+
                    theme_gray()  
            tg <- textGrob('UMAP', gp = gpar(fontsize = 25, fontface = 'bold', col = 'red'))
            sg <- textGrob(paste0("Library: ", prefix, "\n", 
                            " Number of Barcodes: ", nCells(proj),"\n", 
                            " Colored by: DoubletScore\n", 
                            " Doublets detected: ", doub_flag ,"\n", 
                            "------------------------------------ \n"), 
                            gp = gpar(fontsize = 18, fontface = 'bold'))

            plot_list = list(obj)

            obj = create_plot(plot_list, tg, sg, heights=unit(c(1.5,1.5,7), rep("in",3)), width = 10)
            grid.draw(obj)

            printPNG('umap_doublets', obj, papermill, wf = 11, hf = 12)

            #Code end to plot UMAP
            grid.newpage()
        }
    
        log_print("SUCCESSFUL: Plot UMAP")
    },
    error = function(cond) {
        log_print("ERROR: Plot UMAP")
        log_print(cond)
    }
)

In [None]:
#Extract marker genes
## single core worked, multicore breaks 

markersGS = tryCatch({
        log_print("# Extract marker genes")
    
        #Code start to extract marker genes
    
        markersGS <- getMarkerFeatures(
            ArchRProj = proj, 
            useMatrix = "GeneScoreMatrix", 
            groupBy = "Clusters",
            bias = c("TSSEnrichment", "log10(nFrags)"),
            testMethod = marker_features_test
        )

        #Code end to extract marker genes
    
        log_print("SUCCESSFUL: Extract marker genes")
        return(markersGS)
    },
    error = function(cond) {
        log_print("ERROR: Extract marker genes")
        log_print(cond)
    }
)

In [None]:
#Plot heatmap of upregulated genes in clusters

tryCatch({
        log_print("# Heatmap")
    
        #Code start to plot heatmap
    
        hm <- plotMarkerHeatmap(markersGS, transpose = heatmap_transpose, nLabel = heatmap_label_n, 
                                 cutOff = heatmap_cutoff, plotLog2FC = TRUE) 
        obj = grid.grabExpr(draw(hm))
        tg <- textGrob('Heatmap', gp = gpar(fontsize = 25, fontface = 'bold', col = 'red'))
        sg <- textGrob(paste0("Library: ", prefix, "\n", 
                        " Number of Barcodes: ", nCells(proj),"\n", 
                        "------------------------------------ \n"), 
                        gp = gpar(fontsize = 18, fontface = 'bold'))

        plot_list = list(obj)
        
        obj = create_plot(plot_list, tg, sg, heights=unit(c(1,1,10), rep("in",3)), width = 10)
        grid.draw(obj)
    
        printPNG('heatmap', obj, papermill, wf = 11, hf = 12)

        #Code end to plot heatmap
    
        log_print("SUCCESSFUL: Heatmap")
    },
    error = function(cond) {
        log_print("ERROR: Heatmap")
        log_print(cond)
    }
)

In [None]:
#Create final output files

tryCatch({
        log_print("# Final output files")
    
        #Code start to create final output files
    
        files2zip <- dir(plot_filename, full.names = TRUE)
        zip(zipfile = paste0(plot_filename,'.zip'), files = files2zip)

        saveRDS(proj, file = paste0(prefix,".atac.archr.filtered_project.",genome,".rds"))
    
        mtx = getMatrixFromProject(ArchRProj = proj,useMatrix = "PeakMatrix")
        rownames(mtx) = proj@peakSet$idx
        write_dgCMatrix_h5(assay(mtx), 
                           cols_are = "barcodes", 
                           h5_target = paste0(prefix,".atac.archr.filtered_matrix.",genome,".h5"), 
                           ref_name = prefix)
        rm(mtx)

        #Code end to create final output files
    
        log_print("SUCCESSFUL: Final output files")
    },
    error = function(cond) {
        log_print("ERROR: Final output files")
        log_print(cond)
    }
)

log_close()