# Install Libraries
Install information can be found [here](https://www.archrproject.com/index.html)\
Vignette [here](https://www.archrproject.com/articles/Articles/tutorial.html)

In [None]:
#ArchR parameters
atac_frag = "~/Downloads/mod_AH_msBrain_ATAC_2K1_NoPolyA.atac.fragments.bgz" # Input file atac_frag.tsv.bgz 
genome = "mm10" #either hg38 or mm10

#ArchR QC
min_tss = 4 #The minimum numeric transcription start site (TSS) enrichment score required for a cell to pass filtering
min_frags = 1000 #The minimum number of mapped ATAC-seq fragments required per cell to pass filtering for use
add_tile_mat = TRUE #A boolean value indicating whether to add a "Tile Matrix" to each ArrowFile. 
add_gene_score_mat = TRUE #A boolean value indicating whether to add a Gene-Score Matrix to each ArrowFile.

#ArchR Doublet paramaters
doublet_k = 10 #The number of cells neighboring a simulated doublet to be considered as putative doublets.
doublet_knn_method = "UMAP" #Refers to the embedding to use for nearest neighbor search.
lsi_method = 1 #A number or string indicating the order of operations in the TF-IDF normalization. Possible values are: 1 or "tf-logidf", 2 or "log(tf-idf)", and 3 or "logtf-logidf". 

copy_arrow_files = TRUE #save a copy of arrow files in the ArchR project (recommended)
iter_LSI_matrix = "TileMatrix" #The name of the data matrix to retrieve from the ArrowFiles associated with the ArchRProject. Valid options are "TileMatrix" or "PeakMatrix".
threads = 8
prefix = "prefix" #project name

#ArchR Plots parameters
marker_features_test = "wilcoxon" #The name of the pairwise test method to use in comparing cell groupings to the null cell grouping during marker feature identification.
heatmap_transpose = TRUE #Boolean to transpose heatmap
heatmap_label_n = 5 #Top n genes to label per cluster in heatmap
heatmap_cutoff = "FDR <= 0.01 & Log2FC >= 0.5" #Cut-off applied to genes in heatmap

#Terra specific parameters
table_name = "demux_BH3KTLDMXY"
experiment_name = "gm12878_fresh_ATAC"

#papermill specific parameters
papermill = TRUE

#jupyter notebook plot sizes
options(repr.plot.width=15, repr.plot.height=15)


In [None]:
papermill <- as.logical(papermill)
add_tile_mat <- as.logical(add_tile_mat)
add_gene_score_mat <- as.logical(add_gene_score_mat)
copy_arrow_files <- as.logical(copy_arrow_files)
heatmap_transpose <- as.logical(heatmap_transpose)

In [None]:
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager", repos = "https://cran.r-project.org")
if (!requireNamespace("devtools", quietly = TRUE)) 
    install.packages("devtools")
if (!requireNamespace("GenomeInfoDbData", quietly = TRUE))
    BiocManager::install("GenomeInfoDbData")
if (!requireNamespace("GenomicRanges", quietly = TRUE))
    BiocManager::install("GenomicRanges")
if (!requireNamespace("Rsamtools", quietly = TRUE))
    BiocManager::install("Rsamtools")
if (!requireNamespace("presto", quietly = TRUE))
    devtools::install_github('immunogenomics/presto')
if (!requireNamespace("magick", quietly = TRUE))
    install.packages("magick")
if (!requireNamespace("ArchR", quietly = TRUE)){
    devtools::install_github('GreenleafLab/ArchR@v1.0.1', repos = BiocManager::repositories())
    ArchR::installExtraPackages()
}

In [None]:
suppressMessages(library(parallel))
suppressMessages(library(ArchR))
suppressMessages(library(magick))
set.seed(1)
addArchRThreads(threads = threads) 

In [None]:
#Function to save plots
dir.create("plots", showWarnings=F)
printPNG <- function(name, plotObject, papermill, wf=1, hf=1){
    filename = paste0(prefix,".atac.archr.",name,".",genome)
	options(repr.plot.width = 7*wf, repr.plot.height = 7*hf)
	print(plotObject)
	if(papermill){
		png(sprintf("plots/%s.png", filename), width=480*wf, height=480*hf)
		print(plotObject)
		dev.off()
	}
}

In [None]:
#Terra specific code block
get_file <- function(path){
    dest <- getwd()
    gsutil_cp(path, dest)
    name <- basename(path)
    return(name)
}

if (!papermill){
    table <- avtable(table_name)
    atac_frag <- get_file(table$atac_fragment_file_raw[table[, sprintf('%s_id', table_name)] == experiment_name])
}

In [None]:
addArchRGenome(genome)

In [None]:
ArrowFiles <- createArrowFiles(
  inputFiles = atac_frag,
  sampleNames = prefix,
  minTSS = min_tss, 
  minFrags = min_frags,
  addTileMat = add_tile_mat,
  addGeneScoreMat = add_gene_score_mat
)
ArrowFiles

In [None]:
doubScores <- addDoubletScores(
  input = ArrowFiles,
  k = doublet_k,
  knnMethod = doublet_knn_method,
  LSIMethod = lsi_method
)

In [None]:
list.files(".", pattern=NULL, all.files=TRUE, full.names=TRUE)

In [None]:
proj <- ArchRProject(
  ArrowFiles = ArrowFiles, 
  outputDirectory = prefix,
  copyArrows = copy_arrow_files, #This is recommened so that you maintain an unaltered copy for later usage.
  showLogo = FALSE
)

In [None]:
#Uncomment next line to filter doublets

#proj <- filterDoublets(ArchRProj = proj) 

In [None]:
proj <- addIterativeLSI(ArchRProj = proj, useMatrix = iter_LSI_matrix, name = "IterativeLSI")

In [None]:
proj <- addClusters(input = proj, reducedDims = "IterativeLSI") #from the ArchR docs, IterativeLSI is the only supported option for reducedDims 

In [None]:
proj <- addUMAP(ArchRProj = proj, reducedDims = "IterativeLSI")

In [None]:
#Plot UMAP

p1 <- plotEmbedding(ArchRProj = proj, colorBy = "cellColData", name = "Sample", embedding = "UMAP")
p2 <- plotEmbedding(ArchRProj = proj, colorBy = "cellColData", name = "Clusters", embedding = "UMAP")
obj <- p1 + p2 #ggAlignPlots(p1, p2, type = "h")
printPNG('umap', obj, papermill, wf=2)

In [None]:
#Plot TSS Enrichment vs Unique Fragments QC plot

df <- getCellColData(proj, select = c("log10(nFrags)", "TSSEnrichment"))
p <- ggPoint(
    x = df[,1], 
    y = df[,2], 
    colorDensity = TRUE,
    continuousSet = "sambaNight",
    xlabel = "Log10 Unique Fragments",
    ylabel = "TSS Enrichment"
) + geom_hline(yintercept = min_tss, lty = "dashed") + geom_vline(xintercept = log10(min_frags), lty = "dashed") + ggtitle(label = "TSS Enrichment vs log10(Unique Fragments) after filters applied", subtitle = paste0("Cell count: ", dim(df)[1]))
printPNG('TSS_fragment_qc', p, papermill, wf=2)


In [None]:
#Extract marker genes

# single core worked, multicore breaks 

#markersGS <- getMarkerFeatures(
 #   ArchRProj = proj, 
  #  useMatrix = "GeneScoreMatrix", 
   # groupBy = "Clusters",
    #bias = c("TSSEnrichment", "log10(nFrags)"),
    #testMethod = marker_features_test
#)

In [None]:
#Plot heatmap of upregulated genes in clusters

#hm = plotMarkerHeatmap(markersGS, transpose = heatmap_transpose, nLabel = heatmap_label_n, cutOff = heatmap_cutoff, plotLog2FC = TRUE)
#printPNG('heatmap', hm, papermill, wf=2)

In [None]:
#Create final output files

files2zip <- dir('plots/', full.names = TRUE)
zip(zipfile = 'plots.zip', files = files2zip)

saveRDS(proj, file = paste0(prefix,".atac.archr.rds.",genome,".rds"))