# Parameters

In [None]:
table_name <- "demux_BH3KTLDMXY"
experiment_name <- "gm12878_fresh_ATAC"
papermill <- FALSE
frag.path <- NA
h5.path <- NA

In [None]:
papermill <- as.logical(papermill)

# Install Libraries

Install information can be found [here](https://satijalab.org/signac/articles/install.html)\
Vignette [here](https://satijalab.org/signac/articles/overview.html)

In [None]:
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager", repos = "https://cran.r-project.org")
if (BiocManager::version() != '3.14' & R.Version()$major == '4' & strsplit(R.Version()$minor, '.', fixed=T)[[1]][1] == '1'){
    BiocManager::install(version = "3.14", ask=F, force=T)
}
if (!requireNamespace("AnVIL", quietly = TRUE))
    BiocManager::install("AnVIL")
if (!requireNamespace("biovizBase", quietly = TRUE))
    AnVIL::install("biovizBase")
if (!requireNamespace("EnsDb.Hsapiens.v86", quietly = TRUE))
    AnVIL::install("EnsDb.Hsapiens.v86")
if (!requireNamespace("GenomeInfoDbData", quietly = TRUE))
    AnVIL::install("GenomeInfoDbData")
if (!requireNamespace("GenomicRanges", quietly = TRUE))
    AnVIL::install("GenomicRanges")
if (!requireNamespace("Rsamtools", quietly = TRUE))
    AnVIL::install("Rsamtools")
if (!requireNamespace("Seurat", quietly = TRUE))
    AnVIL::install("Seurat")

if (!requireNamespace("Signac", quietly = TRUE)){
    setRepositories(ind=1:2)
    install.packages("Signac")
}
if (!requireNamespace("hdf5r", quietly = TRUE))
    install.packages('hdf5r')

In [None]:
suppressMessages(library(AnVIL))
suppressMessages(library(hdf5r))
suppressMessages(library(EnsDb.Hsapiens.v86))
suppressMessages(library(ggplot2))
suppressMessages(library(Seurat))
suppressMessages(library(Signac))
suppressMessages(library(future))
plan("multiprocess", workers = 8)
set.seed(1234)

# Load Workspace Tables

In [None]:
if (!papermill){
    table <- avtable("demux_BH3KTLDMXY")
}

In [None]:
# # Load cCRE
# wsData <- avdata()
# wsData$value[wsData$key=='cCRE_300bp']

In [None]:
get_file <- function(path){
    dest <- getwd()
    gsutil_cp(path, dest)
    name <- basename(path)
    return(name)
}

if (!papermill){
    frag.path <- get_file(table$atac_fragment_file_raw[table[, sprintf('%s_id', table_name)] == experiment_name])
}
# Barcode seems to be optional path to metadata
# barcode_path <- get.file(table$atac_fragment_file_raw[table$name == 'gm12878_fresh_ATAC'])

In [None]:
if (!papermill){
    wsData <- avdata()
    h5.path <- get_file(wsData$value[wsData$key=='cell_peak_mat'])
}

In [None]:
# Fix file
system(sprintf('zcat %s | sort -k1,1 -k2,2n -T tmp | bgzip -c > %s && tabix -p bed %s', frag.path, 'fixed.fragments.tsv.bgz', 'fixed.fragments.tsv.bgz'))

# Generate UMAP

In [None]:
counts <- Read10X_h5(filename = h5.path)

In [None]:
# metadata <- read.csv(
#   file = barcode_path,
#   header = FALSE,
# )

# Known issue with GenomeInfoDb causing failure when specifying gene annotation in Bioconductor 3.13. Prevents calculation of TSS enrichment.
chrom_assay <- CreateChromatinAssay(
  counts = counts,
  sep = c(":", "-"),
  genome = 'hg38',
  fragments = 'fixed.fragments.tsv.bgz',
  min.cells = 10,
  min.features = 200
)

atac <- CreateSeuratObject(
  counts = chrom_assay,
  assay = "peaks"
)

In [None]:
# extract gene annotations from EnsDb
annotations <- suppressWarnings(GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86, verbose = FALSE))

# change to UCSC style since the data was mapped to hg19
seqlevelsStyle(annotations) <- 'UCSC'

# add the gene information to the object
Annotation(atac) <- annotations

In [None]:
# # QC
atac <- NucleosomeSignal(object = atac)

# compute TSS enrichment score per cell
atac <- TSSEnrichment(object = atac, fast = FALSE)

# add blacklist ratio and fraction of reads in peaks
# atac$pct_reads_in_peaks <- atac$peak_region_fragments / atac$passed_filters * 100
total_fragments <- CountFragments('fixed.fragments.tsv.bgz')
rownames(total_fragments) <- total_fragments$CB
atac$fragments <- total_fragments[colnames(atac), "frequency_count"]
atac$pct_reads_in_peaks <- atac$nCount_peaks / atac$fragments * 100
# atac$blacklist_ratio <- atac$blacklist_region_fragments / atac$peak_region_fragments
atac$blacklist_ratio <- FractionCountsInRegion(
  object = atac, 
  assay = 'peaks',
  regions = blacklist_hg38
)

In [None]:
dir.create("plots", showWarnings=F)
printPNG <- function(filename, plotObject, papermill, wf=1, hf=1){
	options(repr.plot.width = 7*wf, repr.plot.height = 7*hf)
	print(plotObject)
	if(papermill){
		png(sprintf("plots/%s.png", filename), width=480*wf, height=480*hf)
		print(plotObject)
		dev.off()
	}
}

atac$high.tss <- ifelse(atac$TSS.enrichment > 2, 'High', 'Low')
obj <- TSSPlot(atac, group.by = 'high.tss') + NoLegend()

printPNG('tss', obj, papermill, wf=2)

In [None]:
atac$nucleosome_group <- ifelse(atac$nucleosome_signal > 4, 'NS > 4', 'NS < 4')
obj <- FragmentHistogram(object = atac, group.by = 'nucleosome_group')

printPNG('fragment', obj, papermill)

In [None]:
obj <- VlnPlot(
  object = atac,
  features = c('pct_reads_in_peaks', 'nCount_peaks',
               'TSS.enrichment', 'blacklist_ratio', 'nucleosome_signal'),
  pt.size = 0.1,
  ncol = 5
)

printPNG('violin', obj, papermill, wf=2)

In [None]:
# Filter Reads
atac <- subset(
  x = atac,
  subset = nCount_peaks > 3000 &
    nCount_peaks < 20000 &
    pct_reads_in_peaks > 15 &
    blacklist_ratio < 0.05 &
    nucleosome_signal < 4 &
    TSS.enrichment > 2
)
atac

In [None]:
atac <- RunTFIDF(atac)
atac <- FindTopFeatures(atac, min.cutoff = 'q0')
atac <- RunSVD(atac)

In [None]:
atac <- RunUMAP(object = atac, reduction = 'lsi', dims = 2:30)

In [None]:
atac <- FindNeighbors(object = atac, reduction = 'lsi', dims = 2:30)
atac <- FindClusters(object = atac, verbose = FALSE, algorithm = 3)

obj <- DimPlot(object = atac, label = TRUE) + NoLegend()
printPNG('umap', obj, papermill)