In [None]:
## runnning Peak to gene links analysis on each brain regions
# loading
suppressPackageStartupMessages({
library(Seurat)
library(Signac)
library(dplyr)

library(RColorBrewer)
library(ComplexHeatmap)
library(circlize)
library(stringr)
library(GenomicRanges)
library(GenomicFeatures)
library(EnsDb.Hsapiens.v86)
library(BSgenome.Hsapiens.UCSC.hg38)

# library for plotting
library(ggplot2)
library(dplyr)
library(viridis)
})
#### set working dir
setwd("/data2/aliu8/2023_AD_multiome/Analysis/")

In [None]:
# Annotating the linked peaks
## loading packages
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
# downloaded hg38 known gene annotation from UCSC: https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/genes/
# read into txdb format
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene

In [None]:
## build find link pipeline
get_link <- function(object, celltype, dir, DEG_list){
    
    print(date())
    
    # print cell type
    message(paste("Start working on ", celltype," in ",dir, " direction.",sep = ""))
    
    # subset object
    #obj <- subset(object, subset = cluster_celltype == celltype)
    obj <- object
    print(obj)
    print(table(obj$cluster_celltype))

    # gene expression normalization
    message("Noramlization in case not done before.")
    DefaultAssay(obj) <- "PC"
    obj <- SCTransform(obj,assay = "PC")

    #subset degs
    DEGs <- unique(DEG_list[DEG_list$dir == dir & DEG_list$celltype == celltype,]$gene)
    message(length(DEGs))

    # atac set normalization 
    DefaultAssay(obj) <- "CTpeaks"
    obj <- RunTFIDF(obj) %>% 
                FindTopFeatures(min.cutoff='q0') %>% 
                RunSVD()
    
    # first compute the GC content for each peak
    obj <- RegionStats(obj, genome = BSgenome.Hsapiens.UCSC.hg38)

    # link peaks to genes
    message("Start calculating links.")
    
    obj <- LinkPeaks(
        object = obj,
        peak.assay = "CTpeaks",
        peak.slot = "data",
        expression.assay = "SCT",
        expression.slot = "data",
        genes.use = DEGs,
        method = "spearman",#c("pearson","spearman") # spearman may preferred.
        score_cutoff = 0.05, # set score to 0 to keep all 
        pvalue_cutoff = 0.05 # set p val = 1 to include all independent tests
    )

    # clean results and calculate adjust pvalues
    ## preform multiple testing correction
    message("Final processing the results.")
    res <- as.data.frame(Links(obj))
    res$p.adj <- p.adjust(res$pvalue,method = "BH")
    res <- res[res$score > 0.05 & res$p.adj < 0.05,]

    ## add annotataion information
    res$celltype <- celltype
    res$deg_dir <- dir
    res$comb <- paste(res$peak,res$gene,res$celltype,res$deg_dir, sep = "_")
    message(paste("Identified",length(res$comb),"links."))

    # reture
    return(res)
}

In [None]:
# pipeline to keep peaks only expressed in assigned celltype.  
get_clean_link <- function(links_df,ctpeaks){
    linked_peaks <- links_df

    # reformat the peak regions 
    linked_peaks$peak.start <-  str_split_fixed(linked_peaks$peak,"-",3)[,2]
    linked_peaks$peak.end <- str_split_fixed(linked_peaks$peak,"-",3)[,3]
    linked_peaks <- unique(linked_peaks)
    table(linked_peaks$celltype)

    final <- data.frame()

    ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")
    
    for(i in ct){
        # assign cell type
        celltype = i

        ## ct peaks that are not cell type AD-DEG linked peaks
        anno_temp <- ctpeaks[grep(celltype,ctpeaks$peak_called_in)]

        # get signals within specific cell type
        atac_peaks <- linked_peaks[linked_peaks$celltype == celltype,]

        link_temp <- GRanges(atac_peaks[,c("seqnames","peak.start","peak.end")])

        print(table(link_temp %in% anno_temp))
        # OUTPUT filtered results
        out <- atac_peaks[which(link_temp %in% anno_temp),]

        final <- rbind(final, out)
    }

    return(final)
}

In [None]:
## CT peaks annotation files by MACS2
ctpeaks <- read.csv("./Results/CTpeaks_annotated.csv",row.names = 1)
colnames(ctpeaks) <- c("chr","start","end","width","strand","peak_called_in")
ctpeaks <- GRanges(ctpeaks)
ctpeaks

In [None]:
### PFC ###
## read in related data 
object <- readRDS("cellbender_PFC_object.rds")

## read in DEG list
DEG_list <- read.csv("./Results/DEG/Overlap_mast_mixed_PFC.csv")

## actuall link analysis
ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")

direction <- c("pos","neg")

## running
final <- data.frame()
for (i in ct){
    for(j in direction){
        celltype <- i
        print(celltype)
        dir = j
        print(dir)
        
        res1 <- get_link(object=object, celltype=celltype, dir=dir, DEG_list=DEG_list)
        print(length(res1$gene))
        final <- rbind(final,res1)
    }
}

# get peaks only shown in assigned celltype
#table(final$celltype, final$deg_dir)
links_clean <- get_clean_link(links_df = final, ctpeaks = ctpeaks)

## check information
table(links_clean$celltype,links_clean$deg_dir)

## saving the peak-gene file
write.csv(links_clean, file = "./Results/LINK/PFC_linkpeaks_all.csv",row.names = F)

### Figure 3c: PFC
# histogram plotting for linkage between peaks and genes in each brain region
df <- table(paste(links_clean$gene,links_clean$celltype,sep = "_"))
#hist(df)
df <- as.data.frame(df)

avg.peaks <- round(median(df[,2]),digits = 0)
message(paste("Mean of number of peaks linked to DEGs: ",avg.peaks,".",sep = ""))


df$Freq <- ifelse(df$Freq >39,40,df$Freq)
# Represent it
p1 <- df %>%
  ggplot( aes(x=Freq)) +
    geom_histogram(fill="#c25757ff", alpha=0.7, position = 'dodge',binwidth = 2,colour='#c25757ff',size=1) +
    scale_fill_viridis(discrete=TRUE)+
    xlab("Linked peaks per gene")+
    ylab("Number of genes")+
    labs(fill="PFC")+theme_classic()+ggtitle(paste("PFC: ",avg.peaks,"linked peaks per DEG."))

pdf(file = "./Figures/LINK/Hist_link.peaks_PFC.pdf",width = 5,height = 5)
p1
dev.off()

In [None]:
### EC ###
## read in related data
object <- readRDS("cellbender_EC_object.rds")

## read in DEG list
DEG_list <- read.csv("./Results/DEG/Overlap_mast_mixed_EC.csv")

## actuall link analysis
ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")

direction <- c("pos","neg")

## running
final <- data.frame()
for (i in ct){
    for(j in direction){
        celltype <- i
        print(celltype)
        dir = j
        print(dir)
        
        res1 <- get_link(object=object, celltype=celltype, dir=dir, DEG_list=DEG_list)
        print(length(res1$gene))
        final <- rbind(final,res1)
    }
}

# get peaks only shown in assigned celltype
#table(final$celltype, final$deg_dir)
links_clean <- get_clean_link(links_df = final, ctpeaks = ctpeaks)

## check information
table(links_clean$celltype,links_clean$deg_dir)

## saving the peak-gene file
write.csv(links_clean, file = "./Results/LINK/EC_linkpeaks_all.csv",row.names = F)

### Figure 3c: EC
# histogram plotting for linkage between peaks and genes in each brain region
df <- table(paste(links_clean$gene,links_clean$celltype,sep = "_"))
#hist(df)
df <- as.data.frame(df)

avg.peaks <- round(median(df[,2]),digits = 0)
message(paste("Mean of number of peaks linked to DEGs: ",avg.peaks,".",sep = ""))


df$Freq <- ifelse(df$Freq >39,40,df$Freq)
# Represent it
p1 <- df %>%
  ggplot( aes(x=Freq)) +
    geom_histogram(fill="#825ca6ff", alpha=0.7, position = 'dodge',binwidth = 2,colour='#c25757ff',size=1) +
    scale_fill_viridis(discrete=TRUE)+
    xlab("Linked peaks per gene")+
    ylab("Number of genes")+
    labs(fill="EC")+theme_classic()+ggtitle(paste("EC: ",avg.peaks,"linked peaks per DEG."))

pdf(file = "./Figures/LINK/Hist_link.peaks_EC.pdf",width = 5,height = 5)
p1
dev.off()

In [None]:
### HIP ###
## read in related data
object <- readRDS("cellbender_HIP_object.rds")

## read in DEG list
DEG_list <- read.csv("./Results/DEG/Overlap_mast_mixed_HIP.csv")

## actuall link analysis
ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")

direction <- c("pos","neg")

## running
final <- data.frame()
for (i in ct){
    for(j in direction){
        celltype <- i
        print(celltype)
        dir = j
        print(dir)
        
        res1 <- get_link(object=object, celltype=celltype, dir=dir, DEG_list=DEG_list)
        print(length(res1$gene))
        final <- rbind(final,res1)
    }
}

# get peaks only shown in assigned celltype
#table(final$celltype, final$deg_dir)
links_clean <- get_clean_link(links_df = final, ctpeaks = ctpeaks)

## check information
table(links_clean$celltype,links_clean$deg_dir)

## saving the peak-gene file
write.csv(links_clean, file = "./Results/LINK/HIP_linkpeaks_all.csv",row.names = F)

### Figure 3c: HIP
# histogram plotting for linkage between peaks and genes in each brain region
df <- table(paste(links_clean$gene,links_clean$celltype,sep = "_"))
#hist(df)
df <- as.data.frame(df)

avg.peaks <- round(median(df[,2]),digits = 0)
message(paste("Mean of number of peaks linked to DEGs: ",avg.peaks,".",sep = ""))


df$Freq <- ifelse(df$Freq >39,40,df$Freq)
# Represent it
p1 <- df %>%
  ggplot( aes(x=Freq)) +
    geom_histogram(fill="#c25757ff", alpha=0.7, position = 'dodge',binwidth = 2,colour='#c25757ff',size=1) +
    scale_fill_viridis(discrete=TRUE)+
    xlab("Linked peaks per gene")+
    ylab("Number of genes")+
    labs(fill="HIP")+theme_classic()+ggtitle(paste("HIP: ",avg.peaks,"linked peaks per DEG."))

pdf(file = "./Figures/LINK/Hist_link.peaks_HIP.pdf",width = 5,height = 5)
p1
dev.off()

In [None]:
### get annotation of the peaks linked to EOAD-DEG ##
## reference peaks bed Li et al. 2023 A comparative atlas of single-cell chrommatin accessibitlity in the human brain 
brain.peaks <- read.delim("./Data/cCREs.bed", header = F)
colnames(brain.peaks) <- c("chr","start","end","annotation")
brain.peaks <- GRanges(brain.peaks)
head(brain.peaks)

## ENCODE cCRE peaks #download from https://genome.ucsc.edu/cgi-bin/hgTrackUi?db=hg38&g=encodeCcreCombined
encode_peaks <- import.bb("./Data/encodeCcreCombined.bb")
head(encode_peaks)

## add brain single cell eqtl information to the linked peaks
## get eqtl information
brain_eqlt <- read.csv("/data2/aliu8/2023_AD_multiome/Analysis/Data/brain_sc_eqtl_sig.csv")
brain_eqlt <-  brain_eqlt[!is.na(brain_eqlt$chr),]
brain_eqlt$chr <- paste("chr",brain_eqlt$chr,sep = "")
dim(brain_eqlt)
#head(brain_eqlt)

# from expanded snps list, generate a GRanges format file
#snp_coords <- as.data.frame(sig[,c("chr","variant_pos","variant_pos","rs_id_dbSNP151_GRCh38p7","gene_name")])
snp_coords <- as.data.frame(brain_eqlt[,c("chr","pos","pos","SNP","symbol","cell_type","dist_TSS","beta")])

colnames(snp_coords) <- c("chr","start","end","RSID","gene_name","cell_type","dist_TSS","beta")
snp_coords <- GRanges(snp_coords)
head(snp_coords)

In [None]:
## create function for peak annotation
get_peak_annotation <- function(linked_peaks = linked_peaks, encode_peaks = encode_peaks, brain.peaks = brain.peaks){
    linked_peaks = linked_peaks
    ## change the format of linked peaks to GRanges
    #linked_peaks$peak.start <-  str_split_fixed(linked_peaks$peak,"-",3)[,2]
    #linked_peaks$peak.end <- str_split_fixed(linked_peaks$peak,"-",3)[,3]
    linked_gr <- linked_peaks[,c(1,14,15,4:13)]
    #linked_gr <- linked_peaks[,c(1,14,15,4:13,16)]
    df_granges <- GRanges(linked_gr)

    ## find overlap with encode and brain cCRE
    overlap_encode <- findOverlaps(df_granges,encode_peaks)
    overlap_brain_ccre <- findOverlaps(df_granges,brain.peaks)
    overlap_eqtl <- findOverlaps(df_granges,snp_coords)

    ## show results
    print("EOAD-DEG linked peaks overlap with ENCODE cis-CRE")
    print(length(unique(queryHits(overlap_encode))))
    print(length(unique(queryHits(overlap_encode)))/length(df_granges))

    print("EOAD-DEG linked peaks overlap with brain cis-CRE atlas")
    print(length(unique(queryHits(overlap_brain_ccre))))
    print(length(unique(queryHits(overlap_brain_ccre)))/length(df_granges))

    print("EOAD-DEG linked peaks overlap with single cell brain eqtl")
    # ATAC-seq peaks contain SNPs
    peaks_contain_snps <- linked_peaks[queryHits(overlap_eqtl),]
    peaks_contain_snps$RSID <- snp_coords[subjectHits(overlap_eqtl)]$RSID
    peaks_contain_snps$eGene <- snp_coords[subjectHits(overlap_eqtl)]$gene_name
    peaks_contain_snps$cell_type <- snp_coords[subjectHits(overlap_eqtl)]$cell_type
    peaks_contain_snps$dist_TSS <- snp_coords[subjectHits(overlap_eqtl)]$dist_TSS
    peaks_contain_snps$beta <- snp_coords[subjectHits(overlap_eqtl)]$beta
    peaks_contain_snps <- peaks_contain_snps[peaks_contain_snps$gene == peaks_contain_snps$eGene & peaks_contain_snps$celltype == peaks_contain_snps$cell_type,]

    print(length(unique(rownames(peaks_contain_snps))))
    print(length(unique(rownames(peaks_contain_snps)))/length(df_granges))

    ## add annotation if in ENCODE or brain cCRE atlas
    linked_peaks$in_encode <- FALSE
    linked_peaks[unique(queryHits(overlap_encode)),]$in_encode <- TRUE
    linked_peaks$in_brain_cCRE <- FALSE
    linked_peaks[unique(queryHits(overlap_brain_ccre)),]$in_brain_cCRE <- TRUE
    linked_peaks$in_brain_sc_eqlt <- ifelse(linked_peaks$comb %in% peaks_contain_snps$comb, T, F)

    ## get annotation from ENCODE
    q_df <- linked_peaks[queryHits(overlap_encode),]
    s_df <- as.data.frame(encode_peaks[subjectHits(overlap_encode)])

    # combine information 
    c_df <- cbind(q_df,s_df)
    c_df$encodeLabel <- factor(c_df$encodeLabel,levels = c("PLS","pELS","DNase-H3K4me3","dELS","CTCF-only","Other"))
    c_df <- c_df[order(c_df$encodeLabel,c_df$celltype,c_df$deg_dir,c_df$seqnames,c_df$start),]

    # based on the priority, keep the first one
    f_df <- c_df[!duplicated(c_df$comb),]

    ### add annotation information to original data
    id <- match(linked_peaks$comb,f_df$comb)

    linked_peaks$encodeLabel <- f_df[id,]$encodeLabel
    linked_peaks$ucscLabel <- f_df[id,]$ucscLabel
    linked_peaks$description <- f_df[id,]$description
    ## change na to other
    linked_peaks[is.na(linked_peaks$encodeLabel),]$encodeLabel <- "Other"
    linked_peaks[is.na(linked_peaks$ucscLabel),]$ucscLabel <- "Other"

    ## adding RSID information for brain eqtl
    # add rsid information for egene
    linked_peaks$eqtl_RSID <- NA
    linked_peaks$dist_TSS <- NA
    linked_peaks$beta <- NA
    id <- match(peaks_contain_snps$comb,linked_peaks$comb)
    linked_peaks[id,]$eqtl_RSID <- peaks_contain_snps$RSID
    linked_peaks[id,]$dist_TSS <- peaks_contain_snps$dist_TSS
    linked_peaks[id,]$beta <- peaks_contain_snps$beta

    return(linked_peaks)
}

In [None]:
### peak annotation on each region
## PFC
pfc_linked_peaks <- read.csv("./Results/LINK/PFC_linkpeaks_all.csv",header = 1)
pfc_linked_peaks <- pfc_linked_peaks[,-11]
pfc_linked_peaks$p.adj <- p.adjust(pfc_linked_peaks$pvalue,method = "BH")
table(pfc_linked_peaks$p.adj < 0.05)
pfc_linked_peaks_annotated <- get_peak_annotation(linked_peaks = pfc_linked_peaks,encode_peaks = encode_peaks, brain.peaks = brain.peaks)
#pfc_linked_peaks_annotated[pfc_linked_peaks_annotated$in_brain_sc_eqlt,]
write.csv(pfc_linked_peaks_annotated, file = "./Results/LINK/PFC_linkpeaks_all_annotated.csv", row.names = F)

## EC
ec_linked_peaks <- read.csv("./Results/LINK/EC_linkpeaks_all.csv",header = 1)
ec_linked_peaks$p.adj <- p.adjust(ec_linked_peaks$pvalue,method = "BH")
table(ec_linked_peaks$p.adj < 0.05)
ec_linked_peaks_annotated <- get_peak_annotation(linked_peaks = ec_linked_peaks,encode_peaks = encode_peaks, brain.peaks = brain.peaks)
#ec_linked_peaks_annotated[ec_linked_peaks_annotated$in_brain_sc_eqlt,]
write.csv(ec_linked_peaks_annotated, file = "./Results/LINK/EC_linkpeaks_all_annotated.csv", row.names = F)

## HIP
hip_linked_peaks <- read.csv("./Results/LINK/HIP_linkpeaks_all.csv",header = 1)
hip_linked_peaks$p.adj <- p.adjust(hip_linked_peaks$pvalue,method = "BH")
table(hip_linked_peaks$p.adj < 0.05)
hip_linked_peaks_annotated <- get_peak_annotation(linked_peaks = hip_linked_peaks,encode_peaks = encode_peaks, brain.peaks = brain.peaks)
#hip_linked_peaks_annotated[hip_linked_peaks_annotated$in_brain_sc_eqlt,]
write.csv(hip_linked_peaks_annotated, file = "./Results/LINK/HIP_linkpeaks_all_annotated.csv", row.names = F)

In [None]:
### check annotated results ###
pfc_linked_peaks_annotated <- read.csv("./Results/LINK/PFC_linkpeaks_all_annotated.csv")
ec_linked_peaks_annotated <- read.csv("./Results/LINK/EC_linkpeaks_all_annotated.csv")
hip_linked_peaks_annotated <- read.csv("./Results/LINK/HIP_linkpeaks_all_annotated.csv")

table(pfc_linked_peaks_annotated$celltype, pfc_linked_peaks_annotated$deg_dir)
table(ec_linked_peaks_annotated$celltype, ec_linked_peaks_annotated$deg_dir)
table(hip_linked_peaks_annotated$celltype, hip_linked_peaks_annotated$deg_dir)

In [None]:
### Figure 3d.
## draw proportional plot of ENCODE cCRE annotation
pfc_pro <- prop.table(table(pfc_linked_peaks_annotated$encodeLabel))
ec_pro <- prop.table(table(ec_linked_peaks_annotated$encodeLabel))
hip_pro <- prop.table(table(hip_linked_peaks_annotated$encodeLabel))

pfc_pro <- as.data.frame(pfc_pro)
pfc_pro$region <- 'PFC'
ec_pro <- as.data.frame(ec_pro)
ec_pro$region <- 'EC'
hip_pro <- as.data.frame(hip_pro)
hip_pro$region <- 'HIP'



temp <- rbind(pfc_pro,ec_pro,hip_pro)
temp$Region <- factor(temp$region,levels = c("HIP","EC","PFC"))
colnames(temp)[1] <- "Annotation"

pdf("/data2/aliu8/2023_AD_multiome/Analysis/Figures/LINK/region_linked_peaks_annotation_3.29.pdf",width = 12,height = 3)    
ggplot(temp, aes(x=Freq, y=Region, fill=Annotation)) + 
    geom_bar(stat="identity")+xlab("Proportion of Linked peaks") + 
    ylab("Region") + 
    theme_classic() +
    scale_fill_manual(values=c("#FF0000","#FFA700","#FFAAAA","#FFCD00","#00B0F0","#706f6f"),labels=c("PLS","pELS","DNase-H3K4me3","dELS","CTCF-only","Other")) +
    theme(legend.position="right", axis.text=element_text(size=12),axis.title=element_text(size=15), legend.text=element_text(size=12))
        
dev.off()

In [None]:
### Supplementary Figs ENCODE cCREs annotation on each regions
#linked_peaks_annotated <- ec_linked_peaks_annotated
linked_peaks_annotated <- pfc_linked_peaks_annotated
#linked_peaks_annotated <- hip_linked_peaks_annotated
## create a dataframe of proportion
df <- data.frame()
ct <- c('Astrocyte','Excitatory','Inhibitory','Microglia','Oligodendrocyte','OPC')
for (i in 1:length(ct)){
    celltype <- ct[i]
    temp <- as.data.frame(prop.table(table(linked_peaks_annotated[linked_peaks_annotated$celltype == celltype,]$encodeLabel)))
    temp$celltype <- celltype

    df <- rbind(df, temp)
}

colnames(df)[1] <- "Annotation"
pdf("./Figures/LINK/region_linked_peaks_annotation_PFC.pdf",width = 10,height = 4)    
ggplot(df, aes(x=Freq, y=celltype, fill=Annotation)) + 
    geom_bar(stat="identity")+xlab("Proportion of Linked peaks") + 
    ylab("Cell type") + 
    theme_classic() +
    scale_fill_manual(values=c("#FF0000","#FFA700","#FFAAAA","#FFCD00","#00B0F0","#706f6f"),labels=c("PLS","pELS","DNase-H3K4me3","dELS","CTCF-only","Other")) +
    theme(legend.position="right", axis.text=element_text(size=12),axis.title=element_text(size=15), legend.text=element_text(size=12))
dev.off()

In [None]:
### Motif enrichment analysis
## loading object
pfc <- readRDS("cellbender_PFC_object.rds")
ec <- readRDS("cellbender_EC_object.rds")
hip <- readRDS("cellbender_HIP_object.rds")

## download human CORE TFBS from JASPAR
fn <- file.path("/data2/aliu8/2023_AD_multiome/Analysis/Data/JASPAR2024_hs_core_755.txt")
fn

pfm <- readJASPARMatrix(fn, matrixClass="PFM")
#names(pfm) <- str_split_fixed(names(pfm),"\\.",n = 3)[,3]
pfm

In [None]:
# build function to run motif enrichment by cell type
get_motif <- function(object,celltype,deg_direction){
    # get interest peaks
    peak_interest <- links[links$celltype == celltype & links$deg_dir == deg_direction & links$score >0.05,]$peak
    message(paste("Testing on",length(peak_interest),"peaks links to",celltype,deg_direction,"DEGs."))

    # find peaks open in selected cell type
    DefaultAssay(object) <- "CTpeaks"
    open.peaks <- AccessiblePeaks(object, idents = celltype)
    
    # match the overall GC content in the peak set
    meta.feature <- GetAssayData(object, assay = "CTpeaks", slot = "meta.features")
    peaks.matched <- MatchRegionStats(
        meta.feature = meta.feature[open.peaks, ],
        query.feature = meta.feature[peak_interest, ],
        n = 50000)

    ## test enrichment
    enriched.motifs <- FindMotifs(
        object = object,
        features = peak_interest,
        background=peaks.matched)
    
    enriched.motifs$celltype <- celltype
    enriched.motifs$deg_dir <- deg_direction
    
    # getting significant results
    sig.enriched.motifs <- enriched.motifs[enriched.motifs$p.adjust < 0.05,]
    
    # results
    return(sig.enriched.motifs)
}

In [None]:
## Get mortif information for finding TF - peaks - differential correlation - DEGs
motif.all <- GetMotifData(
    object = object, assay = "CTpeaks", slot = "data"
  )
# motif.all[1,]
motif.names <- GetMotifData(
    object = object, assay = "CTpeaks", slot = "motif.names"
  )
length(motif.names)
#names(motif.names$V1) <- str_split_fixed(names(motif.names),"\\.",n = 3)[,3]
# generate dataframe of motif name and presents.
motif.names <- as.data.frame(as.matrix(motif.names))
motif.names$V1 <- str_split_fixed(motif.names$V1,"\\.",n = 3)[,3]
motif.names$V2 <- str_split_fixed(motif.names$V1,"::",n = 2)[,1]

head(motif.names)
#unlist(motif.names)
#rownames(motif.names[motif.names$V1 == "ASCL1",])
table(unique(motif.names$V1) %in% rownames(object@assays$PC))
table(unique(motif.names$V2) %in% rownames(object@assays$PC))

length(unique(motif.names$V2))

In [None]:
## running analysis on PFC region object
# load data
object <- pfc
object

## top upregulated genes
degs <- read.csv("./Results/DEG/Overlap_mast_mixed_PFC.csv",row.names = 1)
message("How many DEGs in each cell type in PFC.")
dim(degs)
table(degs$celltype,degs$dir)
#degs

## read in the link data
links<- read.csv("./Results/LINK/PFC_linkpeaks_all_annotated.csv")
links$new_comb <- paste(links$gene,links$deg_dir,links$celltype,sep = "_")
#

message("How many linked peaks in each cell type in PFC.")
dim(links)
table(links$celltype,links$deg_dir)
#

message("How many linked DEGs in each cell type in PFC.")
degs <- degs[degs$comb %in% unique(links$new_comb),]
table(degs$celltype,degs$dir)
length(degs$comb)
#degs
links <- links[links$new_comb %in% degs$comb,]

### running analysis
ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")
direction <- c("pos","neg")
Idents(object) <- "cluster_celltype"
final <- data.frame()

## running
for (i in ct){
    for(j in direction){
        celltype <- i
        print(celltype)
        deg_direction = j
        print(dir)
        
        res1 <- get_motif(object=object, celltype=celltype, deg_direction = deg_direction)
        print(length(res1$motif.name))
        final <- rbind(final,res1)
    }
}

#### generate heatmap for motif enriched results_PFC
enriched.motif <- final
table(enriched.motif$celltype,enriched.motif$deg_dir)

enriched.motif$motif.name <- str_split_fixed(enriched.motif$motif.name,"\\.",n = 3)[,3]
enriched.motif$motif.name <- str_split_fixed(enriched.motif$motif.name,"::",n = 1)[,1]

head(enriched.motif)

## only keeping those TFs expressed in the specific cell type. 
expr.res <- data.frame()
ct = c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")

for (i in ct){
    print(i)

    # calculating co-expression matrix for cell type of interest
    obj <- subset(object, subset = cluster_celltype == i)
    
    # only focus on expressed protein coding genes in the cell type of interes
    counts <- LayerData(obj,assay = "PC",layer = "counts")# code in seurat 5.0+
    genes.percent.expression <- rowMeans(counts>0 )*100   
    genes_selected <- names(genes.percent.expression[genes.percent.expression>25])
    message(paste(length(genes_selected)," genes expressed in >25% cells."))

    motif.res <- enriched.motif[enriched.motif$celltype == i,]
    motif.res <- motif.res[motif.res$motif.name %in% genes_selected,]
    message(length(motif.res$motif))

    expr.res <-rbind(expr.res,motif.res)

}

table(expr.res$celltype,expr.res$deg_dir)
## saving results
write.csv(expr.res, file = "./Results/LINK/PFC_enriched_JASPAR_motif_expr.csv")

In [None]:
## running analysis on EC region object
# load data
object <- ec
object

## top upregulated genes
degs <- read.csv("./Results/DEG/Overlap_mast_mixed_EC.csv",row.names = 1)
message("How many DEGs in each cell type in EC.")
dim(degs)
table(degs$celltype,degs$dir)
#degs

## read in the link data
links<- read.csv("./Results/LINK/EC_linkpeaks_all_annotated.csv")
links$new_comb <- paste(links$gene,links$deg_dir,links$celltype,sep = "_")

#
message("How many linked peaks in each cell type in EC.")
dim(links)
table(links$celltype,links$deg_dir)

#
message("How many linked DEGs in each cell type in EC.")
degs <- degs[degs$comb %in% unique(links$new_comb),]
table(degs$celltype,degs$dir)
length(degs$comb)
#degs
links <- links[links$new_comb %in% degs$comb,]

ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")
direction <- c("pos","neg")
Idents(object) <- "cluster_celltype"
final <- data.frame()

 ## running
for (i in ct){
    for(j in direction){
        celltype <- i
        print(celltype)
        deg_direction = j
        print(dir)
        
        res1 <- get_motif(object=object, celltype=celltype, deg_direction = deg_direction)
        print(length(res1$motif.name))
        final <- rbind(final,res1)
    }
}

#### generate heatmap for motif enriched results_EC
enriched.motif <- final
table(enriched.motif$celltype,enriched.motif$deg_dir)

enriched.motif$motif.name <- str_split_fixed(enriched.motif$motif.name,"\\.",n = 3)[,3]
enriched.motif$motif.name <- str_split_fixed(enriched.motif$motif.name,"::",n = 1)[,1]

head(enriched.motif)

## only keeping those TFs expressed in the specific cell type. 
expr.res <- data.frame()
ct = c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")

for (i in ct){
    print(i)

    # calculating co-expression matrix for cell type of interest
    obj <- subset(object, subset = cluster_celltype == i)
    
    # only focus on expressed protein coding genes in the cell type of interes
    counts <- LayerData(obj,assay = "PC",layer = "counts")# code in seurat 5.0+
    genes.percent.expression <- rowMeans(counts>0 )*100   
    genes_selected <- names(genes.percent.expression[genes.percent.expression>25])
    message(paste(length(genes_selected)," genes expressed in >25% cells."))

    motif.res <- enriched.motif[enriched.motif$celltype == i,]
    motif.res <- motif.res[motif.res$motif.name %in% genes_selected,]
    message(length(motif.res$motif))

    expr.res <-rbind(expr.res,motif.res)

}

table(expr.res$celltype,expr.res$deg_dir)
## saving results
write.csv(expr.res, file = "./Results/LINK/EC_enriched_JASPAR_motif_expr.csv")

In [None]:
## running analysis on HIP region object
# load data
object <- hip
object

## top upregulated genes
degs <- read.csv("./Results/DEG/Overlap_mast_mixed_HIP.csv",row.names = 1)
message("How many DEGs in each cell type in HIP.")
dim(degs)
table(degs$celltype,degs$dir)
#degs

## read in the link data
links<- read.csv("./Results/LINK/HIP_linkpeaks_all_annotated.csv")
links$new_comb <- paste(links$gene,links$deg_dir,links$celltype,sep = "_")

#
message("How many linked peaks in each cell type in HIP.")
dim(links)
table(links$celltype,links$deg_dir)

#
message("How many linked DEGs in each cell type in HIP.")
degs <- degs[degs$comb %in% unique(links$new_comb),]
table(degs$celltype,degs$dir)
length(degs$comb)
#degs
links <- links[links$new_comb %in% degs$comb,]

## running
ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")
direction <- c("pos","neg")
Idents(object) <- "cluster_celltype"
final <- data.frame()

#i = "Excitatory"
#j = "neg" 
for (i in ct){
    for(j in direction){
        celltype <- i
        print(celltype)
        deg_direction = j
        print(dir)
        
        res1 <- get_motif(object=object, celltype=celltype, deg_direction = deg_direction)
        print(length(res1$motif.name))
        final <- rbind(final,res1)
    }
}

#### generate heatmap for motif enriched results_PFC
enriched.motif <- final
table(enriched.motif$celltype,enriched.motif$deg_dir)

enriched.motif$motif.name <- str_split_fixed(enriched.motif$motif.name,"\\.",n = 3)[,3]
enriched.motif$motif.name <- str_split_fixed(enriched.motif$motif.name,"::",n = 1)[,1]

head(enriched.motif)

## only keeping those TFs expressed in the specific cell type. 
expr.res <- data.frame()
ct = c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")

for (i in ct){
    print(i)

    # calculating co-expression matrix for cell type of interest
    obj <- subset(object, subset = cluster_celltype == i)
    
    # only focus on expressed protein coding genes in the cell type of interes
    counts <- LayerData(obj,assay = "PC",layer = "counts")# code in seurat 5.0+
    genes.percent.expression <- rowMeans(counts>0 )*100   
    genes_selected <- names(genes.percent.expression[genes.percent.expression>25])
    message(paste(length(genes_selected)," genes expressed in >25% cells."))

    motif.res <- enriched.motif[enriched.motif$celltype == i,]
    motif.res <- motif.res[motif.res$motif.name %in% genes_selected,]
    message(length(motif.res$motif))

    expr.res <-rbind(expr.res,motif.res)

}

table(expr.res$celltype,expr.res$deg_dir)

## saving results
write.csv(expr.res, file = "./Results/LINK/HIP_enriched_JASPAR_motif_expr.csv")

In [None]:
## visualization Figure 3e
pfc_motif <- read.csv("/data2/aliu8/2023_AD_multiome/Analysis/Results/LINK/PFC_enriched_JASPAR_motif_expr.csv",row.names = 1)
pfc_motif$log2FC <- -log(pfc_motif$fold.enrichment,base = 2)
ec_motif <- read.csv("/data2/aliu8/2023_AD_multiome/Analysis/Results/LINK/EC_enriched_JASPAR_motif_expr.csv",row.names = 1)
ec_motif$log2FC <- -log(ec_motif$fold.enrichment,base = 2)
hip_motif <- read.csv("/data2/aliu8/2023_AD_multiome/Analysis/Results/LINK/HIP_enriched_JASPAR_motif_expr.csv",row.names = 1)
hip_motif$log2FC <- -log(hip_motif$fold.enrichment,base = 2)

dim(pfc_motif)
dim(ec_motif)
dim(hip_motif)
length(unique(c(pfc_motif$motif.name,ec_motif$motif.name,hip_motif$motif.name)))

table(pfc_motif$celltype,pfc_motif$deg_dir)
table(ec_motif$celltype,ec_motif$deg_dir)
table(hip_motif$celltype,hip_motif$deg_dir)

pfc_motif$region <- "PFC"
ec_motif$region <- "EC"
hip_motif$region <- "HIP"

pfc_motif_pos <- pfc_motif[pfc_motif$deg_dir == "pos",]
pfc_motif_neg <- pfc_motif[pfc_motif$deg_dir == "neg",]

ec_motif_pos <- ec_motif[ec_motif$deg_dir == "pos",]
ec_motif_neg <- ec_motif[ec_motif$deg_dir == "neg",]

hip_motif_pos <- hip_motif[hip_motif$deg_dir == "pos",]
hip_motif_neg <- hip_motif[hip_motif$deg_dir == "neg",]

In [None]:
# combine for plotting for upupregulated part
motif_pos <- rbind(pfc_motif_pos,ec_motif_pos,hip_motif_pos)
motif_pos$ct_rg <- paste(substr(motif_pos$celltype,start=1,stop=3),motif_pos$region, sep = "_")
motif_pos$log10.adjp <- -log10(motif_pos$p.adjust)

## order based on the following order
cg_order <- c("Ast_PFC","Ast_EC","Ast_HIP",
                "Exc_PFC","Exc_EC","Exc_HIP",
                "Inh_PFC","Inh_EC","Inh_HIP",
                "Mic_PFC","Mic_EC","Mic_HIP",
                "Oli_PFC","Oli_EC","Oli_HIP",
                "OPC_PFC","OPC_EC","OPC_HIP")
motif_pos <- motif_pos[order(match(motif_pos$ct_rg, cg_order)),]
head(motif_pos)
#motif_pos
table(motif_pos$celltype, motif_pos$ct_rg)

df <- motif_pos %>% group_by(ct_rg) %>% top_n(n = 10, wt = log10.adjp)
#df
length(unique(df$motif))
top <- df$motif
#top

mat <- matrix(nrow=length(unique(top)), ncol=17)
colnames(mat)<-unique(df$ct_rg)

for (j in 1:length(unique(top))){
  for (i in 1:17){
    motifTemp<-unique(top)[j]
    rgTemp<-colnames(mat)[i]

    sub<-motif_pos[which(motif_pos$motif==motifTemp & motif_pos$ct_rg==rgTemp),]
    if (nrow(sub)>0){
      mat[j,i]<-sub$log10.adjp
    }
    else{
      mat[j,i]<-0
    }
  }  
}

rownames(mat)<- unique(top)
rownames(mat) <- str_split_fixed(rownames(mat),"\\.",n = 3)[,3]
#mat

sig_mat<-matrix(nrow=length(unique(top)), ncol=17)
colnames(sig_mat)<-unique(df$ct_rg)

for (j in 1:length(unique(top))){
  for (i in 1:17){
    motifTemp<-unique(top)[j]
    rgTemp<-colnames(sig_mat)[i]

    sub<-motif_pos[which(motif_pos$motif==motifTemp & motif_pos$ct_rg==rgTemp),]
    if (nrow(sub)>0){
      sig_mat[j,i]<-sub$p.adjust}
    else{
      sig_mat[j,i]<-1}  }}

#sig_mat

## plotting
ha<-HeatmapAnnotation(Region=colnames(mat)
                       , col= list(Region=c("Ast_PFC"="#c25757ff","Ast_EC"="#825ca6ff","Ast_HIP"="#3f78c1ff",
                                            "Exc_PFC"="#c25757ff","Exc_EC"="#825ca6ff","Exc_HIP"="#3f78c1ff",
                                            "Inh_PFC"="#c25757ff","Inh_EC"="#825ca6ff","Inh_HIP"="#3f78c1ff",
                                            "Mic_PFC"="#c25757ff","Mic_EC"="#825ca6ff","Mic_HIP"="#3f78c1ff",
                                            "Oli_PFC"="#c25757ff","Oli_EC"="#825ca6ff","Oli_HIP"="#3f78c1ff",
                                            "OPC_PFC"="#c25757ff","OPC_EC"="#825ca6ff","OPC_HIP"="#3f78c1ff")), show_legend=F,annotation_label="Brain region")
ha2<-HeatmapAnnotation(Celltype=colnames(mat)
                       , col= list(Celltype=c("Ast_PFC"="#F06719","Ast_EC"="#F06719","Ast_HIP"="#F06719",
                                            "Exc_PFC"="#33A65C","Exc_EC"="#33A65C","Exc_HIP"="#33A65C",
                                            "Inh_PFC"="#23767C","Inh_EC"="#23767C","Inh_HIP"="#23767C",
                                            "Mic_PFC"="#E03426","Mic_EC"="#E03426","Mic_HIP"="#E03426",
                                            "Oli_PFC"="#1BA3C6","Oli_EC"="#1BA3C6","Oli_HIP"="#1BA3C6",
                                            "OPC_PFC"="#A26DC2","OPC_HIP"="#A26DC2","OPC_EC"="#A26DC2")), show_legend=F,annotation_label="Cell type")
ha <- c(ha2,ha)

ht1 <- Heatmap(mat,
    cluster_rows = F,
    cluster_columns = F,
    col = colorRamp2(c(0,50,100),c("grey99","red","red4")),
    row_names_side = "left",#row_names_gp = gpar(fontface="italic"),
    top_annotation=ha,show_column_names=F,show_row_dend = F,
    cell_fun = function(j, i, x, y, w, h, fill) {
  if(sig_mat[i, j] <0.05) {
    grid.text("*", x, y, gp=gpar(fontsize=15, col="black"), vjust="center")
  } })



## writing figures
pdf("/data2/aliu8/2023_AD_multiome/Analysis/Figures/LINK/motif_enrichment_pos.pdf",height = 8,width = 5.5)
ht1
dev.off()

In [None]:
# combine for plotting for the down regulated
motif_neg <- rbind(pfc_motif_neg,ec_motif_neg,hip_motif_neg)
motif_neg$ct_rg <- paste(substr(motif_neg$celltype,start=1,stop=3),motif_neg$region, sep = "_")
motif_neg$log10.adjp <- -log10(motif_neg$p.adjust)

table(motif_neg$celltype, motif_neg$region)
## order based on the following order
cg_order <- c("Ast_PFC","Ast_EC","Ast_HIP",
                "Exc_PFC","Exc_EC","Exc_HIP",
                "Inh_PFC","Inh_EC","Inh_HIP",
                "Mic_PFC","Mic_EC","Mic_HIP",
                "Oli_PFC","Oli_EC","Oli_HIP",
                "OPC_PFC","OPC_EC","OPC_HIP")

motif_neg <- motif_neg[order(match(motif_neg$ct_rg, cg_order)),]
head(motif_neg)
#motif_pos
table(motif_neg$celltype, motif_neg$region)

df <- motif_neg %>% group_by(ct_rg) %>% top_n(n = 10, wt = log10.adjp)
length(unique(df$motif))
top <- df$motif

#motif_neg

mat <- matrix(nrow=length(unique(top)), ncol=18)
colnames(mat)<-unique(df$ct_rg)

for (j in 1:length(unique(top))){
  for (i in 1:18){
    motifTemp<-unique(top)[j]
    rgTemp<-colnames(mat)[i]

    sub<-motif_neg[which(motif_neg$motif==motifTemp & motif_neg$ct_rg==rgTemp),]
    if (nrow(sub)>0){
      mat[j,i]<-sub$log10.adjp
    }
    else{
      mat[j,i]<-0
    }
  }  
}

rownames(mat)<- unique(top)
rownames(mat) <- str_split_fixed(rownames(mat),"\\.",n = 3)[,3]
#mat


sig_mat<-matrix(nrow=length(unique(top)), ncol=18)
colnames(sig_mat)<-unique(df$ct_rg)

for (j in 1:length(unique(top))){
  for (i in 1:18){
    motifTemp<-unique(top)[j]
    rgTemp<-colnames(sig_mat)[i]

    sub<-motif_neg[which(motif_neg$motif==motifTemp & motif_neg$ct_rg==rgTemp),]
    if (nrow(sub)>0){
      sig_mat[j,i]<-sub$p.adjust}
    else{
      sig_mat[j,i]<-1}  }}


#sig_mat
 
## plotting
ha<-HeatmapAnnotation(Region=colnames(mat)
                       , col= list(Region=c("Ast_PFC"="#c25757ff","Ast_EC"="#825ca6ff","Ast_HIP"="#3f78c1ff",
                                            "Exc_PFC"="#c25757ff","Exc_EC"="#825ca6ff","Exc_HIP"="#3f78c1ff",
                                            "Inh_PFC"="#c25757ff","Inh_EC"="#825ca6ff","Inh_HIP"="#3f78c1ff",
                                            "Mic_PFC"="#c25757ff","Mic_EC"="#825ca6ff","Mic_HIP"="#3f78c1ff",
                                            "Oli_PFC"="#c25757ff","Oli_EC"="#825ca6ff","Oli_HIP"="#3f78c1ff",
                                            "OPC_PFC"="#c25757ff","OPC_EC"="#825ca6ff","OPC_HIP"="#3f78c1ff")), show_legend=F,annotation_label="Brain region")
ha2<-HeatmapAnnotation(Celltype=colnames(mat)
                       , col= list(Celltype=c("Ast_PFC"="#F06719","Ast_EC"="#F06719","Ast_HIP"="#F06719",
                                            "Exc_PFC"="#33A65C","Exc_EC"="#33A65C","Exc_HIP"="#33A65C",
                                            "Inh_PFC"="#23767C","Inh_EC"="#23767C","Inh_HIP"="#23767C",
                                            "Mic_PFC"="#E03426","Mic_EC"="#E03426","Mic_HIP"="#E03426",
                                            "Oli_PFC"="#1BA3C6","Oli_EC"="#1BA3C6","Oli_HIP"="#1BA3C6",
                                            "OPC_PFC"="#A26DC2","OPC_HIP"="#A26DC2","OPC_EC"="#A26DC2")), show_legend=F,annotation_label="Cell type")
ha <- c(ha2,ha)

ht2 <- Heatmap(mat,
    cluster_rows = F,
    cluster_columns = F,
    col = colorRamp2(c(0,50,100),c("grey99","deepskyblue","dodgerblue4")),
    row_names_side = "left", #row_names_gp = gpar(fontface="italic"),
    top_annotation=ha,show_column_names=F,show_row_dend = F,
      cell_fun = function(j, i, x, y, w, h, fill) {
  if(sig_mat[i, j] <0.05) {
    grid.text("*", x, y, gp=gpar(fontsize=15, col="black"), vjust="center")
  } }
    )

## saving
pdf("./Figures/LINK/motif_enrichment_neg.pdf",height = 8,width = 6)
ht2
dev.off()

In [None]:
### Figure 3/4: footprinting analysis 
TF_oi <- read.csv("./Results/LINK/TF_oi.csv")

## working on EC first
object <- ec
## running loop
for (i in 1:length(TF_oi$motif)){
  print(i)
  tf.name <- TF_oi$motif[i]
  
  outpath <- paste("./Figures/LINK/EC/EC_footprinting_",TF_oi$motif.name[i],".pdf",sep = "")
  
  # plotting
  ## footprinting analysis visualization
  object$cell_diag <- paste(object$cluster_celltype, object$diagnosis, sep = "_")
  Idents(object) <- object$cell_diag
  levels(object) <- sort(levels(object))
  
  pdf(outpath,width = 6,height = 5)
  
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Astrocyte_EOAD","Astrocyte_NCI","Excitatory_EOAD","Excitatory_NCI","Inhibitory_NCI","Inhibitory_EOAD","Microglia_EOAD","Microglia_NCI","Oligodendrocyte_EOAD","Oligodendrocyte_NCI","OPC_EOAD","OPC_NCI")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Astrocyte_EOAD","Astrocyte_NCI")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Excitatory_EOAD","Excitatory_NCI")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Inhibitory_NCI","Inhibitory_EOAD")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Microglia_NCI","Microglia_EOAD")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Oligodendrocyte_EOAD","Oligodendrocyte_NCI")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("OPC_EOAD","OPC_NCI")))
  
  dev.off()
}

In [None]:
### Figure 3/4: footprinting analysis 
## HIP
object <- hip

## running loop
for (i in 1:length(TF_oi$motif)){
  print(i)
  tf.name <- TF_oi$motif[i]
  
  outpath <- paste("./Figures/LINK/HIP/HIP_footprinting_",TF_oi$motif.name[i],".pdf",sep = "")
  
  # plotting
  ## footprinting analysis visualization
  object$cell_diag <- paste(object$cluster_celltype, object$diagnosis, sep = "_")
  Idents(object) <- object$cell_diag
  levels(object) <- sort(levels(object))
  
  pdf(outpath,width = 6,height = 5)
  
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Astrocyte_EOAD","Astrocyte_NCI","Excitatory_EOAD","Excitatory_NCI","Inhibitory_NCI","Inhibitory_EOAD","Microglia_EOAD","Microglia_NCI","Oligodendrocyte_EOAD","Oligodendrocyte_NCI","OPC_EOAD","OPC_NCI")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Astrocyte_EOAD","Astrocyte_NCI")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Excitatory_EOAD","Excitatory_NCI")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Inhibitory_NCI","Inhibitory_EOAD")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Microglia_NCI","Microglia_EOAD")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Oligodendrocyte_EOAD","Oligodendrocyte_NCI")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("OPC_EOAD","OPC_NCI")))
  
  dev.off()
}

In [None]:
### Figure 3/4: footprinting analysis 
## PFC
object <- pfc

## running loop
for (i in 1:length(TF_oi$motif)){
  print(i)
  tf.name <- TF_oi$motif[i]
  
  outpath <- paste("/data2/aliu8/2023_AD_multiome/Analysis/Figures/LINK/PFC/PFC_footprinting_",TF_oi$motif.name[i],".pdf",sep = "")
  
  # plotting
  ## footprinting analysis visualization
  object$cell_diag <- paste(object$cluster_celltype, object$diagnosis, sep = "_")
  Idents(object) <- object$cell_diag
  levels(object) <- sort(levels(object))
  
  pdf(outpath,width = 6,height = 5)
  
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Astrocyte_EOAD","Astrocyte_NCI","Excitatory_EOAD","Excitatory_NCI","Inhibitory_NCI","Inhibitory_EOAD","Microglia_EOAD","Microglia_NCI","Oligodendrocyte_EOAD","Oligodendrocyte_NCI","OPC_EOAD","OPC_NCI")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Astrocyte_EOAD","Astrocyte_NCI")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Excitatory_EOAD","Excitatory_NCI")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Inhibitory_NCI","Inhibitory_EOAD")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Microglia_NCI","Microglia_EOAD")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("Oligodendrocyte_EOAD","Oligodendrocyte_NCI")))
  print(PlotFootprint(object, features = tf.name,label = F,idents = c("OPC_EOAD","OPC_NCI")))
  
  dev.off()
}

In [None]:
####################################
## Working on the SCENIC+ results ##
####################################

## first, filter the regulons
c1 <- read.csv("/data2/ccitu/andi_multiome_filtered/eregulon_files_22_03_2024/tf_cistrome_corr_pfc.csv",row.names = 1)
c2 <- read.csv("/data2/ccitu/andi_multiome_filtered/eregulon_files_22_03_2024/tf_cistrome_corr_ec.csv",row.names = 1)
c3 <- read.csv("/data2/ccitu/andi_multiome_filtered/eregulon_files_22_03_2024/tf_cistrome_corr_hip.csv",row.names = 1)

### select the top 5% positive correlation and bottom 5% negetive correlation
c1 <- c1[c1$Rho <= -0.66 | c1$Rho >= 0.89,]
c2 <- c2[c2$Rho <= -0.71 | c2$Rho >= 0.86,]
c3 <- c3[c3$Rho <= -0.78 | c3$Rho >= 0.93,]

dim(c1)
dim(c2)
dim(c3)

intersect(intersect(c1$TF,c2$TF),c3$TF)

write.csv(c1, file = "./Results/SCENIC/PFC_regulon.csv",row.names = F)
write.csv(c2, file = "./Results/SCENIC/EC_regulon.csv",row.names = F)
write.csv(c3, file = "./Results/SCENIC/HIP_regulon.csv",row.names = F)

In [None]:
### Supplementar Figure Overlap TFs predicted by SCENIC+ across three brain regions
## drawing venn diagram
library(VennDiagram)
library(RColorBrewer)
myCol <- c("#C25757", "#825ca6", "#3F78C1")#brewer.pal(3, "Pastel2")

list1 <- unique(c1$TF)
list2 <- unique(c2$TF)
list3 <- unique(c3$TF)

venn.diagram(
  x = list(list1, list2, list3),
  category.names = c("PFC" , "EC" , "HIP"),
  filename = './Figures/GRN/Venn_SCENIC+_ALL.png',
  output=F,
  # Output features
  imagetype="png" ,
  height = 550, 
  width = 550, 
  resolution = 300,
  compression = "lzw",
  
  # Circles
  lwd = 2,
  lty = 'blank',
  fill = myCol,
  
  # Numbers
  cex = .6,
  fontface = "bold",
  fontfamily = "sans",
  
  # Set names
  cat.cex = 0.6,
  cat.fontface = "bold",
  cat.default.pos = "outer",
#  cat.pos = c(-27, 27, 135),
#  cat.dist = c(0.055, 0.055, 0.085),
  cat.fontfamily = "sans"
)


In [None]:
#################################################
## creating jaccard score matrix on gene level ##
#################################################
library(ComplexHeatmap)
library(circlize)
library(viridis)
## all edges from scenic+
## specific region
scenic <- read.csv("./Results/SCENIC_plus/eregulons_hip.csv")
## cistrom
cistrome <- read.csv("./Results/SCENIC_plus/tf_cistrome_corr_hip.csv")
cistrome <- cistrome[cistrome$Rho >= 0.93 | cistrome$Rho <= -0.78,]# hip
#cistrome <- cistrome[cistrome$Rho >= 0.89 | cistrome$Rho <= -0.66,] # pfc
#cistrome <- cistrome[cistrome$Rho >= 0.86 | cistrome$Rho <= -0.71,] # ec

j_mat <- matrix(nrow=length(unique(cistrome$Cistrome)),ncol = length(unique(cistrome$Cistrome)))
rownames(j_mat) <- unique(cistrome$Cistrome)
colnames(j_mat) <- unique(cistrome$Cistrome)
#j_mat

for (i in unique(cistrome$Cistrome)){
    for (j in unique(cistrome$Cistrome)){
        gene_list_i <- unique(scenic[scenic$Region_signature_name == i,]$Gene)
        gene_list_j <- unique(scenic[scenic$Region_signature_name == j,]$Gene)
        
        j_mat[i,j]<- length(intersect(gene_list_i,gene_list_j))/length(union(gene_list_i,gene_list_j))
        
    }

}

pdf(file = "./Figures/GRN/HIP_SCENIC_Gene_Jaccard.pdf", height = 6,width = 8)
p1 <- Heatmap(
    j_mat,cluster_rows = T,
    cluster_columns = T, 
    show_column_names=F,
    show_row_dend = F,
    show_column_dend = F,
    col = colorRamp2(c(0,0.25,0.5),viridis(3)),row_names_side = "left")
p1
dev.off()