# Parameters

In [None]:
table_name <- "demux_BH3KTLDMXY"
experiment_name <- "gm12878_fresh_ATAC"
papermill <- FALSE
frag.path <- NA

In [None]:
papermill <- as.logical(papermill)

# Install Libraries
Install information can be found [here](https://www.archrproject.com/index.html)\
Vignette [here](https://www.archrproject.com/articles/Articles/tutorial.html)

In [None]:
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager", repos = "https://cran.r-project.org")
if (!requireNamespace("AnVIL", quietly = TRUE))
    BiocManager::install("AnVIL")
if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg38", quietly = TRUE))
    AnVIL::install("BSgenome.Hsapiens.UCSC.hg38")

if (!requireNamespace("devtools", quietly = TRUE)) 
    install.packages("devtools")
if (!requireNamespace("GenomeInfoDbData", quietly = TRUE))
    AnVIL::install("GenomeInfoDbData")
if (!requireNamespace("GenomicRanges", quietly = TRUE))
    AnVIL::install("GenomicRanges")
if (!requireNamespace("Rsamtools", quietly = TRUE))
    AnVIL::install("Rsamtools")
if (!requireNamespace("ArchR", quietly = TRUE)){
    devtools::install_github("GreenleafLab/ArchR", ref="master", repos = BiocManager::repositories())
    devtools::reload(pkg = pkgload::inst("pillar"), quiet = FALSE)
    devtools::reload(pkg = pkgload::inst("magrittr"), quiet = FALSE)
    library(ArchR)
    ArchR::installExtraPackages()
}


In [None]:
suppressMessages(library(AnVIL))
suppressMessages(library(ArchR))
suppressMessages(library(parallel))
set.seed(1)
addArchRThreads(threads = 8) 

# Load Workspace Tables

In [None]:
if (!papermill){
    table <- avtable(table_name)
}

In [None]:
get_file <- function(path){
    dest <- getwd()
    gsutil_cp(path, dest)
    name <- basename(path)
    return(name)
}

# file_path <- get_file(table$atac_aligned_raw_bam[table$name == 'gm12878_fresh_ATAC'])
if (!papermill){
    frag.path <- get_file(table$atac_fragment_file_raw[table[, sprintf('%s_id', table_name)] == experiment_name])
}

In [None]:
# Fix file
# system(sprintf('zcat %s | sort -k1,1 -k2,2n -T tmp | bgzip -c > %s && tabix -p bed %s', frag.path, 'fixed.fragments.tsv.bgz', 'fixed.fragments.tsv.bgz'))

# Generate UMAP

In [None]:
addArchRGenome("hg38")

In [None]:
ArrowFiles <- createArrowFiles(
  inputFiles = 'fixed.fragments.tsv.bgz',
  sampleNames = 'atac',
  minTSS = 4, #Dont set this too high because you can always increase later
  minFrags = 1000,
  addTileMat = TRUE,
  addGeneScoreMat = TRUE,
)
ArrowFiles

In [None]:
doubScores <- addDoubletScores(
  input = ArrowFiles,
  k = 10, #Refers to how many cells near a "pseudo-doublet" to count.
  knnMethod = "UMAP", #Refers to the embedding to use for nearest neighbor search.
  LSIMethod = 1
)

In [None]:
proj <- ArchRProject(
  ArrowFiles = ArrowFiles, 
  outputDirectory = "atac",
  copyArrows = TRUE #This is recommened so that you maintain an unaltered copy for later usage.
)

In [None]:
proj <- filterDoublets(ArchRProj = proj)

In [None]:
proj <- addIterativeLSI(ArchRProj = proj, useMatrix = "TileMatrix", name = "IterativeLSI")

In [None]:
proj <- addClusters(input = proj, reducedDims = "IterativeLSI")

In [None]:
proj <- addUMAP(ArchRProj = proj, reducedDims = "IterativeLSI")

In [None]:
dir.create("plots", showWarnings=F)
printPNG <- function(filename, plotObject, papermill, wf=1, hf=1){
	options(repr.plot.width = 7*wf, repr.plot.height = 7*hf)
	print(plotObject)
	if(papermill){
		png(sprintf("plots/%s.png", filename), width=480*wf, height=480*hf)
		print(plotObject)
		dev.off()
	}
}

p1 <- plotEmbedding(ArchRProj = proj, colorBy = "cellColData", name = "Sample", embedding = "UMAP")
p2 <- plotEmbedding(ArchRProj = proj, colorBy = "cellColData", name = "Clusters", embedding = "UMAP")
obj <- p1 + p2 #ggAlignPlots(p1, p2, type = "h")

printPNG('umap', obj, papermill, wf=2)

# Create Peak-Cell Matrix

In [None]:
# wsData <- avdata()
# ccre.path <- get_file(wsData$value[wsData$key=='cCRE_300bp'])
# ccre <- import.bed(ccre.path)
# ccre

In [None]:
# proj <- addPeakSet(ArchRProj = proj, peakSet=ccre)

In [None]:
# proj <- addPeakMatrix(proj)

In [None]:
# BiocManager::install("DropletUtils")

# library(DropletUtils)

In [None]:
# tmp <- getMatrixFromProject(proj, useMatrix='PeakMatrix')

In [None]:
# names <- paste0(seqnames(ccre), ':', start(ccre), '-', end(ccre))

# write10xCounts(
#   path = "peak.cell.matrix.h5",
#   x = tmp@assays@data@listData$PeakMatrix,
#   barcodes = gsub('atac#', '', colnames(tmp)),
#   gene.id = names,
#   gene.symbol = names,
#   overwrite = FALSE
# )
