In [10]:
library(ArchR)
library(parallel)
library(Seurat)
library(BSgenome.Hsapiens.UCSC.hg38)
# library(BSgenome.Mmusculus.UCSC.mm10)
addArchRThreads(threads=22)

Setting default number of Parallel threads to 22.



In [15]:
source("/data/work/02.helper/Greenleaf-nature genetic-Support-Function.R")
source("/data/work/02.helper/plotting.R")
source("/data/work/02.helper/misc_helper.R")

“package ‘ggrastr’ was built under R version 4.2.3”
“package ‘dplyr’ was built under R version 4.2.3”


# add p2g for EX and IN

In [3]:
proj <- readRDS("/data/work/Brain/project/ATAC/humanPFC/output/human.rds")

In [9]:
IN <- subsetArchRProject(
  ArchRProj = proj,
  cells = getCellNames(proj[proj$FineClust %in% c("IN_PVALB","IN_SST","IN_LAMP5","IN_VIP"),]),      
  outputDirectory = "/data/work/Brain/project/ATAC/humanPFC/sub_IN",
  dropCells = FALSE,
  logFile = NULL,
  threads = getArchRThreads(),
  force = TRUE)

Copying ArchRProject to new outputDirectory : /data/work/04.Mouse/sub_IN

Copying Arrow Files...

Copying Arrow Files (1 of 6)

Copying Arrow Files (2 of 6)

Copying Arrow Files (3 of 6)

Copying Arrow Files (4 of 6)

Copying Arrow Files (5 of 6)

Copying Arrow Files (6 of 6)

Getting ImputeWeights

No imputeWeights found, returning NULL

Copying Other Files...

Copying Other Files (1 of 17): addGimat.Allen.ver.rds

Copying Other Files (2 of 17): ArchRLogs

Copying Other Files (3 of 17): Embeddings

Copying Other Files (4 of 17): FineClust_specific_peaks

Copying Other Files (5 of 17): gimat.rds

Copying Other Files (6 of 17): GroupCoverages

Copying Other Files (7 of 17): gsmat.rds

Copying Other Files (8 of 17): IterativeLSI

Copying Other Files (9 of 17): mouse_delcelltype.rds

Copying Other Files (10 of 17): mouse.C10.subset.rds

Copying Other Files (11 of 17): Peak2GeneLinks

Copying Other Files (12 of 17): PeakCalls

Copying Other Files (13 of 17): Plots

Copying Other Files (14 

In [None]:
EX_type = c("EX_L23_IT","EX_L4_IT","EX_L5_IT","EX_L6_IT","EX_L5_NP","EX_L6_IT_Car3","EX_L6_CT")
EX <- subsetArchRProject(
  ArchRProj = proj,
  cells = getCellNames(proj[proj$FineClust %in% EX_type,]),      
  outputDirectory = "/data/work/Brain/project/ATAC/humanPFC/sub_EX",
  dropCells = FALSE,
  logFile = NULL,
  threads = getArchRThreads(),
  force = TRUE)

In [11]:
human_rna <- readRDS("/data/work/Brain/project/RNA/human.rds")

In [12]:
cell.select = unique(human_rna$celltype)[which(unique(human_rna$celltype) %in% c('IN_SST','IN_VIP','IN_PVALB','IN_LAMP5'))]
human_in = subset(human_rna,celltype %in% cell.select )
human_ex = subset(human_rna,celltype %in% EX_type)

In [None]:
rds = c("EX","IN")
rna = c("human_ex","human_in")
for(i in 1:2){
    sub_proj <- get(rds[i])
    sub_proj <- addGroupCoverages(
  ArchRProj=sub_proj, 
  groupBy="FineClust", 
  minCells = 50, # The minimum number of cells required in a given cell group to permit insertion coverage file generation. (default = 40)
  force=TRUE
  )
    full_peak <- getPeakSet(proj)
    peaks <- getClusterPeaks(proj, clusterNames=unique(sub_proj$FineClust), peakGR=full_peak)
    sub_proj <- addPeakSet(sub_proj, peakSet=peaks, force=TRUE)
    sub_proj <- addPeakMatrix(sub_proj, force=TRUE)
    sub_proj <- addGeneIntegrationMatrix(
        ArchRProj = sub_proj, 
        useMatrix = "GeneScoreMatrix",
        matrixName = "GeneIntegrationMatrix",
        reducedDims = "Harmony",
        seRNA = get(rna[i]),
        addToArrow = TRUE, 
        force= TRUE,
        groupRNA = "celltype",
        nameCell = "predictedCell",
        nameGroup = "predictedGroup",
        nameScore = "predictedScore"
    )
    sub_proj <- addPeak2GeneLinks(
        ArchRProj = sub_proj,
        reducedDims = "Harmony"
    )
    saveArchRProject(sub_proj)                  
    }

# combined all p2g

In [1]:
# atac_proj <- readRDS("/data/work/01.human_brain/03.额叶/Final_output/Save-ArchR-Project.rds")

In [28]:
corrCutoff <- 0.4       # Default in plotPeak2GeneHeatmap is 0.45
varCutoffATAC <- 0.2   # Default in plotPeak2GeneHeatmap is 0.25
varCutoffRNA <- 0.25    # Default in plotPeak2GeneHeatmap is 0.25

In [29]:
wd = '/data/work/Brain/project/ATAC/humanPFC'

##########################################################################################
# Prepare full-project peak to gene linkages, loops, and coaccessibility (full and subproject links)
##########################################################################################

subclustered_projects <- c("EX", "IN")

# Prepare lists to store peaks, p2g links, loops, coaccessibility
plot_loop_list <- list()
plot_loop_list[["full"]] <- getPeak2GeneLinks(proj, corCutOff=corrCutoff, resolution = 100)[[1]]
peak2gene_list <- list()
p2gGR <- getP2G_GR(proj, corrCutoff=NULL, varCutoffATAC=-Inf, varCutoffRNA=-Inf, filtNA=FALSE)
p2gGR$source <- "full"
peak2gene_list[["full"]] <- p2gGR

# Retrieve information from subclustered objects
for(subgroup in subclustered_projects){
  message(sprintf("Reading in subcluster %s", subgroup))
  # Read in subclustered project
  sub_dir <- sprintf("/data/work/Brain/project/ATAC/humanPFC/sub_%s", subgroup)
  sub_proj <- readRDS(paste0(sub_dir,"/Save-ArchR-Project.rds"))

  # Get sub-project p2g links
  subP2G <- getP2G_GR(sub_proj, corrCutoff=NULL, varCutoffATAC=-Inf, varCutoffRNA=-Inf, filtNA=FALSE)
  subP2G$source <- subgroup
  peak2gene_list[[subgroup]] <- subP2G

  # Get sub-project loops
  plot_loop_list[[subgroup]] <- getPeak2GeneLinks(sub_proj, corCutOff=corrCutoff, resolution = 100)[[1]]
}

full_p2gGR <- as(peak2gene_list, "GRangesList") %>% unlist()

# Fix idxATAC to match the full peak set
idxATAC <- peak2gene_list[["full"]]$idxATAC
names(idxATAC) <- peak2gene_list[["full"]]$peakName
full_p2gGR$idxATAC <- idxATAC[full_p2gGR$peakName]

# # Save lists of p2g objects, etc.
saveRDS(full_p2gGR, file=paste0(wd, "/multilevel_p2gGR.rds")) # NOT merged or correlation filtered
saveRDS(plot_loop_list, file=paste0(wd, "/multilevel_plot_loops.rds"))

Reading in subcluster EX

Reading in subcluster IN



In [30]:
table(full_p2gGR$source)


     EX    full      IN 
2565000 2565000 1423716 

In [32]:
##########################################################################################
# Filter redundant peak to gene links
##########################################################################################
# Collapse redundant p2gLinks:
full_p2gGR <- full_p2gGR[order(full_p2gGR$Correlation, decreasing=TRUE)]
filt_p2gGR <- full_p2gGR[!duplicated(paste0(full_p2gGR$peakName, "_", full_p2gGR$symbol))] %>% sort()
filt_p2gGR <- filt_p2gGR[filt_p2gGR$Correlation > 0.45 & 
    filt_p2gGR$VarQATAC > varCutoffATAC & 
    filt_p2gGR$VarQRNA > varCutoffRNA]

In [33]:
saveRDS(filt_p2gGR, file=paste0(wd,"/filt_p2gGR.rds"))