# Settings

In [1]:
# Load Reticulate function
Sys.setenv(RETICULATE_PYTHON="/home/luca/anaconda3/envs/reticulate/bin/python")
library(reticulate)
reticulate::use_python("/home/luca/anaconda3/envs/reticulate/bin/python")
reticulate::use_condaenv("/home/luca/anaconda3/envs/reticulate")
reticulate::py_module_available(module='anndata') #needs to be TRUE
reticulate::import('anndata') #good to make sure this doesn't error
reticulate::py_module_available(module='leidenalg') #needs to be TRUE
reticulate::import('leidenalg') #good to make sure this doesn't error

Module(anndata)

Module(leidenalg)

In [2]:
## Patch for annotations in R4.1
# BiocManager::install("Bioconductor/GenomeInfoDb",lib = "/home/luca/R/x86_64-pc-linux-gnu-library/4.1",force = TRUE)
# library(GenomeInfoDb,lib.loc="/home/luca/R/x86_64-pc-linux-gnu-library/4.1")

In [3]:
# Load packages
pacman::p_load(dplyr, stringr, data.table, tidyr, data.table, Matrix, future, 
               hdf5r, Seurat, Signac,harmony, knitr, SoupX, 
               EnsDb.Hsapiens.v86, 
               logr, parallel, 
               ggplot2, ggpubr, ggrepel, ggbreak, gridExtra, patchwork, grid, ggh4x)

In [4]:
# Load genome
#suppressMessages(annotations <- GetGRangesFromEnsDb(ensdb=EnsDb.Hsapiens.v86))
#genome(annotations) <- 'hg38'
#seqlevelsStyle(annotations) <- 'UCSC'
# Save table
# writeRDS(annotations, "/nfs/lab/Luca/Assets/references/Cellranger/hg38.annotations.rds")

# Load table
annotations = readRDS("/nfs/lab/Luca/Assets/references/Cellranger/hg38.annotations.rds")
seqlevelsStyle(annotations) <- 'UCSC'
genome(annotations) <- 'hg38'

# Seq info downloaded from: https://github.com/broadinstitute/ichorCNA/issues/84
seq.info = readRDS("/nfs/lab/Luca/Assets/references/Cellranger/seqinfo_hg38_ucsc.rds")

In [5]:
# Set options
options(stringsAsFactors = FALSE)
warnLevel <- getOption('warn')
options(warn = -1)
opts_chunk$set(tidy=TRUE)

# set Future
plan("multicore", workers = 4)
# set RAM treshold
## 1000 = 1gb
RAM.tresh = 10000 * 1024^2
options(future.globals.maxSize = RAM.tresh)

# THIS OPTION IS NEEDED FOR THE BEDOPS PART
options(scipen = 999)

In [6]:
# Set directories
base.dir = "/nfs/lab/projects/mega_heart/"
assets.dir = "/nfs/lab/projects/mega_heart/FNIH/multiome/Assets/"

reference.dir = "/nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/LV_RV_LA_RA.multiome.mrg.filt.MTless.silQC.curated.peaks.rds"
reference.LV.dir = paste0("/nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/single_chambers/", 
                       "LV_LibrarySex.multiome.mrg.filt.MTless.silQC.curated.peaks.rds")
reference.RV.dir = paste0("/nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/single_chambers/", 
                       "RV_LibrarySex.multiome.mrg.filt.MTless.silQC.curated.peaks.rds")
reference.LA.dir = paste0("/nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/single_chambers/", 
                       "LA_LibrarySex.multiome.mrg.filt.MTless.silQC.curated.peaks.rds")
reference.RA.dir = paste0("/nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/single_chambers/", 
                       "RA_LibrarySex.multiome.mrg.filt.MTless.silQC.curated.peaks.rds")


In [7]:
# Load markers list
cell.markers = read.table(paste("/nfs/lab/projects/mega_heart/Assets/", "Cell.markers_4.txt", sep = ""), sep = "\t", header = TRUE)
# Make it long, remove useless column and void markers
cell.markers <- cell.markers %>% gather(Key, marker, c(3:ncol(cell.markers)))
cell.markers = cell.markers[,-3]
cell.markers = cell.markers[cell.markers$marker != "", ]
# Factorize columns
cell.markers$Compartment = factor(cell.markers$Compartment, 
                        levels = c("Muscular", "Vascular", "Neuro", "Stromal", "Immune", "Erythroid"))
cell.markers$CellType = factor(cell.markers$CellType,
                        levels = c("CardioMyocyte", "a-CM", "v-CM", "SM", "Endothelial", "Arterial-Endo", "Venous-Endo", "Capillary-Endo", "Endocardial", "Epicardial", "Lymph-Endo", "Pericyte", "Neuronal", "Fibroblast", "a-Fibroblast", "Adipocyte", "Macrophage", "DC", "T", "T-CD4", "T-CD8", "NK", "NK-16", "NK-56", "B", "Plasma", "Mast", "Erythroid"))

cell.compartment = cell.markers[,-3]

In [8]:
log_open(file_name = paste0(base.dir, "Metadata_label.log"))

# Re-label reference

In [9]:
colors.use = c("cadetblue4", "salmon", "lightgoldenrod", 
    "paleturquoise3","palegreen3", "mediumpurple1", 
    "lightblue4", "navajowhite1", "magenta", "coral2", 
    "mediumorchid1", "midnightblue", "lightgoldenrodyellow", 
    "black", "lightgrey", "mistyrose4","darkcyan", "steelblue2", 
    "darkolivegreen3", "mediumpurple1", "lightskyblue", "firebrick2",
    "burlywood", "chartreuse1", "deeppink2", "khaki", "powderblue",
    "slategrey", "springgreen", "yellow3", "orange2", "lightsteelblue3", 
    "tomato3", "palegreen4", "grey27", "darkseagreen", "blue", "darkorchid",
    "snow2", "peachpuff2", "magenta2", "yellowgreen", "cornflowerblue",
    "chocolate", "blueviolet", "lighblue1", "plum2")

In [10]:
in.dir = reference.dir

In [11]:
log_print(" Loading data")
adata = readRDS(in.dir)
log_print(paste("Done"))

[1] " Loading data"
[1] "Done"


# Call peaks by MajorCelltypes and chambers

In [12]:
peaks.dir = "/nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/3_PeaksMap_byChamber_PeakCalls/"

log_print(paste0("Calling peaks"))
peaks <- CallPeaks(
    object = adata,
    assay = "ATAC",
    group.by = "cell.major_types_chamber",
    macs2.path = '/home/luca/.local/bin/macs2',
    broad = FALSE,
    format = "BED", 
    combine.peaks = FALSE,
    outdir = peaks.dir,
    cleanup = FALSE,
    verbose = TRUE)
log_print(paste0("Done"))

[1] "Calling peaks"


Processing file /nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/merged.atac_fragments.tsv.gz






[1] "Done"


# Call peaks by cell-subtypes

In [13]:
peaks.dir = "/nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/PeakCalling/4_PeakMap_Peakcalls_CellSubTypes/"

log_print(paste0("Calling peaks"))
peaks <- CallPeaks(
    object = adata,
    assay = "ATAC",
    group.by = "cell.sub_types",
    macs2.path = '/home/luca/.local/bin/macs2',
    broad = FALSE,
    format = "BED", 
    combine.peaks = FALSE,
    outdir = peaks.dir,
    cleanup = FALSE,
    verbose = TRUE)
log_print(paste0("Done"))

[1] "Calling peaks"


Processing file /nfs/lab/projects/mega_heart/FNIH/multiome/Analysis/1_preprocessing/merged.atac_fragments.tsv.gz






[1] "Done"
