# Settings

In [1]:
# Load packages
pacman::p_load(dplyr, stringr, data.table, tidyr, data.table, Matrix, future, 
               hdf5r, Seurat, Signac,harmony, knitr, SoupX, 
               EnsDb.Hsapiens.v86, 
               logr, parallel, 
               ggplot2, ggpubr, ggrepel, ggbreak, gridExtra, patchwork, grid, ggh4x)

In [2]:
# Set options
options(stringsAsFactors = FALSE)
warnLevel <- getOption('warn')
options(warn = -1)
opts_chunk$set(tidy=TRUE)

# set Future
plan("multicore", workers = 4)
# set RAM treshold
## 1000 = 1gb
RAM.tresh = 10000 * 1024^2
options(future.globals.maxSize = RAM.tresh)

In [3]:
# Set directories
base.dir = "/nfs/lab/projects/mega_heart/"
assets.dir = "/nfs/lab/projects/mega_heart/Assets/"

PEAKS.dir = "/nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/2_PeaksMap_Peakcalls/"

In [4]:
# Grep final peak list
peak.list = read.table(paste0(PEAKS.dir, "unified_peaks/FNIH_MultiomePeaks_FiltVariable.bed"))
peak.list = peak.list$V1
peak.list = StringToGRanges(peak.list, sep = c(":", "-"))
peak.list

GRanges object with 285873 ranges and 0 metadata columns:
           seqnames              ranges strand
              <Rle>           <IRanges>  <Rle>
       [1]     chr1       181260-181560      *
       [2]     chr1       191308-191608      *
       [3]     chr1       779626-779926      *
       [4]     chr1       807726-808026      *
       [5]     chr1       818649-818949      *
       ...      ...                 ...    ...
  [285869]     chr9   95784035-95784234      *
  [285870]     chr9 135804950-135805250      *
  [285871]     chrX       338438-338682      *
  [285872]     chrX   47573934-47574234      *
  [285873]     chrX   68652886-68653186      *
  -------
  seqinfo: 24 sequences from an unspecified genome; no seqlengths

In [5]:
# Grep original peak calles
called.peaks = list.files(PEAKS.dir, "_peaks.xls")
called.peaks

In [6]:
# Define lists of peaks by celltype
aCM = c('aCM_peaks.xls')
vCM = c("vCM_peaks.xls")
Adipocyte = c('Adipocyte_peaks.xls')
Fibroblast = c('Fibroblast_peaks.xls')
Endothelial = c("Endothelial_peaks.xls")
Endocardial = c('Endocardial_peaks.xls')
Epicardial = c('Epicardial_peaks.xls')
Lymphoid = c('Lymphoid_peaks.xls')
Myeloid = c('Myeloid_peaks.xls')
Neuronal = c('Neuronal_peaks.xls')
Pericyte = c('Pericyte_peaks.xls')
SM = c('SM_peaks.xls')

In [7]:
'to simplify your life, make sure the names of the lists are equal to cell labels in adata'

In [8]:
# Contatenate peaks together
allPeakCalls = list(aCM, vCM, Adipocyte, Fibroblast, Endothelial, Endocardial, Epicardial, 
                   Lymphoid, Myeloid, Neuronal, Pericyte, SM)
names(allPeakCalls) = c('aCM', 'vCM', 'Adipocyte', 'Fibroblast', 'Endothelial', 'Endocardial', 'Epicardial', 'Lymphoid', 'Myeloid', 'Neuronal', 'Pericyte', 'SM'
                   )

for (i.a in seq_along(allPeakCalls)){
    PeakCalls.list.use = allPeakCalls[[i.a]]
      # Reset concatenated peak list
      PeakCalls.cat = NULL
      for (i.b in seq_along(PeakCalls.list.use)){
        # Load peaks
        PeakCalls.use = PeakCalls.list.use[[i.b]]
        message("Processing :", PeakCalls.use)   
        in.dir = paste0(PEAKS.dir, PeakCalls.use)
        PeakCalls = read.table(in.dir, header = TRUE)
        # Concatenate
        PeakCalls.cat = rbind(PeakCalls.cat, PeakCalls)
        message(" - Concatenated peaks :", nrow(PeakCalls.cat))
    }
    # Convert to granges
    PeakCalls_gr <- GRanges(
            seqnames = PeakCalls.cat$chr,
            ranges = IRanges(start = PeakCalls.cat$start, end = PeakCalls.cat$end))
    # Bed tools intersect
    overlaps <- findOverlaps(peak.list, PeakCalls_gr)
    # Extract the overlapping and unique regions from the unionpeaks
    PeakCalls_gr_overlaps <- unique(peak.list[queryHits(overlaps)])
    # Save final peaks list
    message(" - Final peaks: ", length(PeakCalls_gr_overlaps))
    out.dir = paste0(PEAKS.dir,
                     "unified_peaks/",
                     names(allPeakCalls)[[i.a]],
                     "_UnifiedFiltVariable.bed")
    print(paste0(" - Saving in: ", out.dir))
    write.table(as.data.frame(PeakCalls_gr_overlaps), out.dir,
                col.names = FALSE, row.names = FALSE, quote = FALSE, sep = "\t")
}

Processing :aCM_peaks.xls

 - Concatenated peaks :175499

 - Final peaks: 139616



[1] " - Saving in: /nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/2_PeaksMap_Peakcalls/unified_peaks/aCM_UnifiedFiltVariable.bed"


Processing :vCM_peaks.xls

 - Concatenated peaks :205726

 - Final peaks: 162084



[1] " - Saving in: /nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/2_PeaksMap_Peakcalls/unified_peaks/vCM_UnifiedFiltVariable.bed"


Processing :Adipocyte_peaks.xls

 - Concatenated peaks :65594

 - Final peaks: 35501



[1] " - Saving in: /nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/2_PeaksMap_Peakcalls/unified_peaks/Adipocyte_UnifiedFiltVariable.bed"


Processing :Fibroblast_peaks.xls

 - Concatenated peaks :174333

 - Final peaks: 129296



[1] " - Saving in: /nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/2_PeaksMap_Peakcalls/unified_peaks/Fibroblast_UnifiedFiltVariable.bed"


Processing :Endothelial_peaks.xls

 - Concatenated peaks :131222

 - Final peaks: 89791



[1] " - Saving in: /nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/2_PeaksMap_Peakcalls/unified_peaks/Endothelial_UnifiedFiltVariable.bed"


Processing :Endocardial_peaks.xls

 - Concatenated peaks :96526

 - Final peaks: 62275



[1] " - Saving in: /nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/2_PeaksMap_Peakcalls/unified_peaks/Endocardial_UnifiedFiltVariable.bed"


Processing :Epicardial_peaks.xls

 - Concatenated peaks :94892

 - Final peaks: 49313



[1] " - Saving in: /nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/2_PeaksMap_Peakcalls/unified_peaks/Epicardial_UnifiedFiltVariable.bed"


Processing :Lymphoid_peaks.xls

 - Concatenated peaks :80530

 - Final peaks: 46008



[1] " - Saving in: /nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/2_PeaksMap_Peakcalls/unified_peaks/Lymphoid_UnifiedFiltVariable.bed"


Processing :Myeloid_peaks.xls

 - Concatenated peaks :143460

 - Final peaks: 98275



[1] " - Saving in: /nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/2_PeaksMap_Peakcalls/unified_peaks/Myeloid_UnifiedFiltVariable.bed"


Processing :Neuronal_peaks.xls

 - Concatenated peaks :52911

 - Final peaks: 29658



[1] " - Saving in: /nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/2_PeaksMap_Peakcalls/unified_peaks/Neuronal_UnifiedFiltVariable.bed"


Processing :Pericyte_peaks.xls

 - Concatenated peaks :104004

 - Final peaks: 65897



[1] " - Saving in: /nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/2_PeaksMap_Peakcalls/unified_peaks/Pericyte_UnifiedFiltVariable.bed"


Processing :SM_peaks.xls

 - Concatenated peaks :81624

 - Final peaks: 52412



[1] " - Saving in: /nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/2_PeaksMap_Peakcalls/unified_peaks/SM_UnifiedFiltVariable.bed"


In [9]:
PeakCalls_gr_overlaps

GRanges object with 52412 ranges and 0 metadata columns:
          seqnames              ranges strand
             <Rle>           <IRanges>  <Rle>
      [1]     chr1       181260-181560      *
      [2]     chr1       191308-191608      *
      [3]     chr1       779626-779926      *
      [4]     chr1       858669-858969      *
      [5]     chr1       869758-870058      *
      ...      ...                 ...    ...
  [52408]     chr8 143771112-143771412      *
  [52409]     chr9   33180913-33181213      *
  [52410]     chr9   34589894-34590194      *
  [52411]     chr9 135804950-135805250      *
  [52412]     chrX   47573934-47574234      *
  -------
  seqinfo: 24 sequences from an unspecified genome; no seqlengths

In [10]:
as.data.frame(PeakCalls_gr_overlaps)

seqnames,start,end,width,strand
<fct>,<int>,<int>,<int>,<fct>
chr1,181260,181560,301,*
chr1,191308,191608,301,*
chr1,779626,779926,301,*
chr1,858669,858969,301,*
chr1,869758,870058,301,*
chr1,910044,910344,301,*
chr1,923737,924037,301,*
chr1,925537,925773,237,*
chr1,941651,941896,246,*
chr1,960408,960708,301,*
