# Settings

In [1]:
# Load Reticulate function
Sys.setenv(RETICULATE_PYTHON="/home/luca/anaconda3/envs/reticulate/bin/python")
library(reticulate)
reticulate::use_python("/home/luca/anaconda3/envs/reticulate/bin/python")
reticulate::use_condaenv("/home/luca/anaconda3/envs/reticulate")
reticulate::py_module_available(module='anndata') #needs to be TRUE
reticulate::import('anndata') #good to make sure this doesn't error
reticulate::py_module_available(module='leidenalg') #needs to be TRUE
reticulate::import('leidenalg') #good to make sure this doesn't error

Module(anndata)

Module(leidenalg)

In [2]:
# Load packages
pacman::p_load(dplyr, stringr, data.table, tidyr, data.table, Matrix,
               hdf5r, Seurat, Signac,harmony, knitr, SoupX,
               EnsDb.Hsapiens.v86, 
               logr, parallel, 
               ggplot2, ggpubr, ggrepel, ggbreak, gridExtra)

In [3]:
# Set options
options(stringsAsFactors = FALSE)
warnLevel <- getOption('warn')
options(warn = -1)
opts_chunk$set(tidy=TRUE)
options(scipen = 999)

# set Future
plan("multicore", workers = 4)
# set RAM treshold
## 1000 = 1gb
RAM.tresh = 10000 * 1024^2
options(future.globals.maxSize = RAM.tresh)

In [4]:
# Load table
annotations = readRDS("/nfs/lab/Luca/Assets/references/Cellranger/hg38.annotations.rds")
seqlevelsStyle(annotations) <- 'UCSC'
genome(annotations) <- 'hg38'

# Seq info downloaded from: https://github.com/broadinstitute/ichorCNA/issues/84
seq.info = readRDS("/nfs/lab/Luca/Assets/references/Cellranger/seqinfo_hg38_ucsc.rds")

In [5]:
# Set directories
base.dir = "/nfs/lab/projects/mega_heart/CAREHF/multiome/"
assets.dir = "/nfs/lab/projects/mega_heart/CAREHF/multiome/Assets/"

cell.ranger.dir = "/nfs/lab/projects/mega_heart/CAREHF/multiome/cellranger.symlinks/"

step1.dir = "/nfs/lab/projects/mega_heart/CAREHF/multiome/Analysys/1_preprocessing/"
step2.dir = "/nfs/lab/projects/mega_heart/CAREHF/multiome/Analysys/2_PeaksReformat/"

log.dir = "/nfs/lab/projects/mega_heart/CAREHF/multiome/log/"

In [6]:
# start log
options("logr.on" = TRUE, "logr.notes" = TRUE)
options("logr.autolog" = TRUE)
options("logr.compact" = TRUE)
options("logr.traceback" = TRUE)
log.file = paste(base.dir, Sys.Date(),"2_PeaksReformat", sep="")

In [7]:
log_open(log.file)

# Load peak list

In [8]:
file = "/nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/2_PeaksMap_Peakcalls/unified_peaks/FNIH_MultiomePeaks_FiltVariable.bed"
peaks = read.table(file)
peaks = peaks$V1
peaks = StringToGRanges(peaks, sep = c(":", "-"))
peaks

GRanges object with 285873 ranges and 0 metadata columns:
           seqnames              ranges strand
              <Rle>           <IRanges>  <Rle>
       [1]     chr1       181260-181560      *
       [2]     chr1       191308-191608      *
       [3]     chr1       779626-779926      *
       [4]     chr1       807726-808026      *
       [5]     chr1       818649-818949      *
       ...      ...                 ...    ...
  [285869]     chr9   95784035-95784234      *
  [285870]     chr9 135804950-135805250      *
  [285871]     chrX       338438-338682      *
  [285872]     chrX   47573934-47574234      *
  [285873]     chrX   68652886-68653186      *
  -------
  seqinfo: 24 sequences from an unspecified genome; no seqlengths

# Create ATAC_counts from feature matrix

In [9]:
# Load sample info
sample.info = read.table(paste(assets.dir, "sample.info", sep = ""), sep = "\t", header = TRUE)

In [10]:
head(sample.info, n=2)
nrow(sample.info)

Unnamed: 0_level_0,ID,Chamber,Donor,CellRanger
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>
1,KA_49_1_2_KA_45_1_2,LA,HF3,/nfs/lab/projects/CARE_HF/DATA/multiome/
2,QY_1880_1_2_QY_1879_1_2,LA,D9,/nfs/lab/projects/CARE_HF/DATA/multiome/


In [11]:
# Build variables
sample.ls = sample.info$ID

# Check the paths r right
sample.ls[1]

In [12]:
for (i in seq_along(sample.ls)){
    sample = sample.ls[i]
    gc(reset = TRUE)
    # Set sample variable
    log_print(paste("Processing sample: ", sample))
    adata = readRDS(paste(step1.dir, sample, "_pre.filt.rds", sep = ""))
    log_print(paste("Creating LFM"))
    # Create a peak/cell count matrix
    atac_counts <- FeatureMatrix(fragments = adata@assays$ATAC@fragments,
                                 features = peaks,
                                 cells = colnames(adata@assays$ATAC))
    # Create adata.python
    log_print(paste("Creating chrom_assay"))
    suppressWarnings(chrom_assay <- CreateChromatinAssay(counts=atac_counts,
                                                         sep=c(':', '-'),
                                                         genome= seq.info,
                                                         fragments = adata@assays$ATAC@fragments,
                                                         min.cells=0,
                                                         min.features=0,
                                                         annotation=annotations))
    log_print(paste("Replace with old ATAC assay"))
    DefaultAssay(adata) <- 'RNA'
    adata[['ATAC']] <- NULL
    adata[['ATAC']] <- chrom_assay
    log_print(paste("Saving"))
    saveRDS(adata, paste(step2.dir, sample, "_pre.filt.peaks.rds", sep = ""))
}

[1] "Processing sample:  KA_49_1_2_KA_45_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1880_1_2_QY_1879_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1882_1_2_QY_1881_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1920_1_2_QY_1919_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1971_1_2_QY_1970_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_2015_1_2_QY_2014_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_2039_1_2_QY_2038_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_2047_1_2_QY_2046_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_2051_1_2_QY_2050_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  JB_631_1_2_JB_627_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  JB_632_1_2_JB_628_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  JB_633_1_2_JB_629_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  JB_634_1_2_3_JB_630_1_2_3"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  MM_507_1_2_MM_512_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1856_1_2_QY_1855_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1854_1_2_QY_1853_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1858_1_2_QY_1857_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1860_1_2_QY_1859_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  KA_47_1_2_KA_43_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  KA_50_1_2_KA_46_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1886_1_2_QY_1885_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1914_1_2_QY_1913_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1916_1_2_QY_1915_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1969_1_2_QY_1968_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_2019_1_2_QY_2018_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_2045_1_2_QY_2044_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_2049_1_2_QY_2048_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  KA_48_1_2_KA_44_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1922_1_2_QY_1921_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1924_1_2_QY_1923_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1967_1_2_QY_1966_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_1973_1_2_QY_1972_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_2017_1_2_QY_2016_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_2041_1_2_QY_2040_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_2043_1_2_QY_2042_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
[1] "Processing sample:  QY_2053_1_2_QY_2052_1_2"
[1] "Creating LFM"


Extracting reads overlapping genomic regions



[1] "Creating chrom_assay"
[1] "Replace with old ATAC assay"
[1] "Saving"
