In [None]:
# overlap linked peaks to GWAS
library(stringr)
library(GenomicRanges)
library(GenomicFeatures)
library(LDlinkR)

## Loading
library(Seurat)
library(Signac)
library(dplyr)
library(patchwork)
library(future)
library(stringr)
library(tidydr)
library(tidyverse)
library(viridis)
library(ggplot2)
library(EnsDb.Hsapiens.v86)
library(BSgenome.Hsapiens.UCSC.hg38)

# For motif analysis
library(JASPAR2020)
library(TFBSTools)

##
setwd("/data2/aliu8/2023_AD_multiome/Analysis/")

In [None]:
## using linked peaks results as the input for LDSC
list.files("./Results/ldsc/")
list.files("./Results/LINK/")
list.files("./Results/LINK/")

## in house CT peaks annotation files
ctpeaks <- read.csv("./Results/CTpeaks_annotated.csv",row.names = 1)
colnames(ctpeaks) <- c("chr","start","end","width","strand","peak_called_in")
ctpeaks <- GRanges(ctpeaks)
ctpeaks

In [None]:
############################################# 
### Generate bed file for S-LDSC analysis ###
#############################################
########### for DEG linked peaks ############
#############################################
pfc_linked_peaks <- read.csv("./Results/LINK/PFC_linkpeaks_all_annotated.csv")
ec_linked_peaks <- read.csv("./Results/LINK/EC_linkpeaks_all_annotated.csv")
hip_linked_peaks <- read.csv("./Results/LINK/HIP_linkpeaks_all_annotated.csv")

table(pfc_linked_peaks\$celltype)
table(ec_linked_peaks\$celltype)
table(hip_linked_peaks\$celltype)

In [None]:
##### restricting peaks called only in one cell type
## OPTIONAL ##
## generating cleaning cell type peaks files for six major cell types
ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")
# select specific peaks
peaks <- ec_linked_peaks

final <- data.frame()

for(i in ct){
    # assign cell type
    celltype = i

    ## ct peaks that are not cell type AD-DEG linked peaks
    #anno_temp <- ctpeaks[grep(celltype,ctpeaks\$peak_called_in)]
    anno_temp <- ctpeaks[ctpeaks\$peak_called_in == celltype]

    # get signals within specific cell type
    atac_peaks <- peaks[peaks\$celltype == celltype,]
    link_temp <- GRanges(atac_peaks[,c("seqnames","peak.start","peak.end")]) #chr or seqnames

    print(table(link_temp %in% anno_temp))
    # OUTPUT filtered results
    out <- atac_peaks[which(link_temp %in% anno_temp),]

    final <- rbind(final, out)
}
table(final\$celltype,useNA = "always")

############################################# 
### Generate bed file for S-LDSC analysis ###
#############################################
############## for deg results ##############
#############################################
table(final\$celltype)
final <- final[order(c(final\$seqnames)),]
## generating peak region
final\$seqnames <- str_split_fixed(final\$peak, "-",3)[,1]
final\$start <- str_split_fixed(final\$peak, "-",3)[,2]
final\$end <- str_split_fixed(final\$peak, "-",3)[,3]

ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC") #,"OPC"

for (i in ct){
    print(i)
    df <- final[final\$celltype == i,c(1,2,3)]
    print(head(df))
    write.table(
        df, 
        file = paste("./Results/ldsc/DEG_linked_peaks_restricted/","EC_",i,"_DEG_linked_peaks_restricted.bed",sep = ""),
        quote=F,row.names=F, col.names=F, sep="\t")
}

In [None]:
##### restricting peaks called only in one cell type
## OPTIONAL ##
## generating cleaning cell type peaks files for six major cell types
ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")
# select specific peaks
peaks <- pfc_linked_peaks

final <- data.frame()

for(i in ct){
    # assign cell type
    celltype = i

    ## ct peaks that are not cell type AD-DEG linked peaks
    anno_temp <- ctpeaks[ctpeaks\$peak_called_in == celltype]

    # get signals within specific cell type
    atac_peaks <- peaks[peaks\$celltype == celltype,]
    link_temp <- GRanges(atac_peaks[,c("seqnames","peak.start","peak.end")]) #chr or seqnames

    print(table(link_temp %in% anno_temp))
    # OUTPUT filtered results
    out <- atac_peaks[which(link_temp %in% anno_temp),]

    final <- rbind(final, out)
}
table(final\$celltype,useNA = "always")

############################################# 
### Generate bed file for S-LDSC analysis ###
#############################################
############## for deg results ##############
#############################################
table(final\$celltype)
final <- final[order(c(final\$seqnames)),]
## generating peak region
final\$seqnames <- str_split_fixed(final\$peak, "-",3)[,1]
final\$start <- str_split_fixed(final\$peak, "-",3)[,2]
final\$end <- str_split_fixed(final\$peak, "-",3)[,3]

ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC") #,"OPC"

for (i in ct){
    print(i)
    df <- final[final\$celltype == i,c(1,2,3)]
    print(head(df))
    write.table(
        df, 
        file = paste("./Results/ldsc/DEG_linked_peaks_restricted/","PFC_",i,"_DEG_linked_peaks_restricted.bed",sep = ""),
        quote=F,row.names=F, col.names=F, sep="\t")
}

In [None]:
##### restricting peaks called only in one cell type
## OPTIONAL ##
## generating cleaning cell type peaks files for six major cell types
ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")
# select specific peaks
peaks <- ec_linked_peaks

final <- data.frame()

for(i in ct){
    # assign cell type
    celltype = i

    ## ct peaks that are not cell type AD-DEG linked peaks
    #anno_temp <- ctpeaks[grep(celltype,ctpeaks\$peak_called_in)]
    anno_temp <- ctpeaks[ctpeaks\$peak_called_in == celltype]

    # get signals within specific cell type
    atac_peaks <- peaks[peaks\$celltype == celltype,]
    link_temp <- GRanges(atac_peaks[,c("seqnames","peak.start","peak.end")]) #chr or seqnames

    print(table(link_temp %in% anno_temp))
    # OUTPUT filtered results
    out <- atac_peaks[which(link_temp %in% anno_temp),]

    final <- rbind(final, out)
}
table(final\$celltype,useNA = "always")

############################################# 
### Generate bed file for S-LDSC analysis ###
#############################################
############## for deg results ##############
#############################################
table(final\$celltype)
final <- final[order(c(final\$seqnames)),]
## generating peak region
final\$seqnames <- str_split_fixed(final\$peak, "-",3)[,1]
final\$start <- str_split_fixed(final\$peak, "-",3)[,2]
final\$end <- str_split_fixed(final\$peak, "-",3)[,3]

ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC") #,"OPC"

for (i in ct){
    print(i)
    df <- final[final\$celltype == i,c(1,2,3)]
    print(head(df))
    write.table(
        df, 
        file = paste("./Results/ldsc/DEG_linked_peaks_restricted/","EC_",i,"_DEG_linked_peaks_restricted.bed",sep = ""),
        quote=F,row.names=F, col.names=F, sep="\t")
}

In [None]:
##### restricting peaks called only in one cell type
## OPTIONAL ##
## generating cleaning cell type peaks files for six major cell types
ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")
# select specific peaks
peaks <- hip_linked_peaks

final <- data.frame()

for(i in ct){
    # assign cell type
    celltype = i

    ## ct peaks that are not cell type AD-DEG linked peaks
    #anno_temp <- ctpeaks[grep(celltype,ctpeaks\$peak_called_in)]
    anno_temp <- ctpeaks[ctpeaks\$peak_called_in == celltype]

    # get signals within specific cell type
    atac_peaks <- peaks[peaks\$celltype == celltype,]
    link_temp <- GRanges(atac_peaks[,c("seqnames","peak.start","peak.end")]) #chr or seqnames

    print(table(link_temp %in% anno_temp))
    # OUTPUT filtered results
    out <- atac_peaks[which(link_temp %in% anno_temp),]

    final <- rbind(final, out)
}
table(final\$celltype,useNA = "always")

############################################# 
### Generate bed file for S-LDSC analysis ###
#############################################
############## for deg results ##############
#############################################
table(final\$celltype)
final <- final[order(c(final\$seqnames)),]
## generating peak region
final\$seqnames <- str_split_fixed(final\$peak, "-",3)[,1]
final\$start <- str_split_fixed(final\$peak, "-",3)[,2]
final\$end <- str_split_fixed(final\$peak, "-",3)[,3]

ct <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC") #,"OPC"

for (i in ct){
    print(i)
    df <- final[final\$celltype == i,c(1,2,3)]
    print(head(df))
    write.table(
        df, 
        file = paste("./Results/ldsc/DEG_linked_peaks_restricted/","HIP_",i,"_DEG_linked_peaks_restricted.bed",sep = ""),
        quote=F,row.names=F, col.names=F, sep="\t")
}

In [None]:
######################################################
########### Working on S-LDSC results ################
######################################################
get_ldsc_results_deg <- function(dir = check,GWAS_oi = GWAS_oi){
    GWAS_oi <- GWAS_oi
    res_oi <- check[grep(GWAS_oi,check)]
    res_oi <- res_oi[grep("DEG",res_oi)]
    res_deg <- data.frame()
    for (i in res_oi){
    filename <- paste("/data2/aliu8/2023_AD_multiome/Analysis/Results/ldsc/ldsc_out_deg_hg19/", i, sep = "")
    #print(filename)
    df <- read.table(filename,header = 1)
    df <- df[1,c(1,5:10)]
    df\$Category <- gsub(".GWAS.results","",i)
    df\$Region <- str_split_fixed(df\$Category,pattern = "_",4)[,3]
    df\$Category2 <- str_split_fixed(df\$Category,pattern = "_",4)[,4]
    df\$Category3 <- str_split_fixed(df\$Category2,pattern = "_",4)[,1]
    res_deg <- rbind(res_deg, df)
    }
    #res_deg\$Region <-rep(c("PFC","EC","HIP"),6)
    res_deg\$Celltype <- res_deg\$Category3
    res_deg\$cell_region <- paste(res_deg\$Celltype,res_deg\$Region,sep = "_")
    res_deg\$GWAS <- GWAS_oi
    return(res_deg)
}

In [None]:
########### Working on S-LDSC results ################
######################################################
## getting all file names
check <- list.files("/data2/aliu8/2023_AD_multiome/Analysis/Results/ldsc/ldsc_out_deg_hg19/")
check <- check[grep(".log",check,invert = T)]
#check
trait_oi <- c("AD_Bellenguez","AD_Kunkle","AD_Wightman","AD_Jansen","AR_Watanabe","Asthma_Watanabe","Atopic_Watanabe","CD_DeLange","Celiac_Dubois","Hypo_Watanabe","MS_IMSGC2019","PSC_Ji",
"RA_Okada","SLE_Bentham","UC_DeLange","Vitligo_Jin","ADHD_Demontis","ASD_Grove","BIP_Mullins","DEPRESSION_Howard","MDD_Wray","INSOMNIA_Schoeler",
"NEUROTICISM_Nagel","PTSD_Nievergelt","SCZ_Trubetskoy")

## plotting
res_deg <- data.frame()
for (i in trait_oi){
    GWASP_oi <- i
    temp <- get_ldsc_results_deg(check, GWAS_oi = GWASP_oi)
    res_deg <- rbind(res_deg, temp)
}
res_deg <- res_deg[order(res_deg\$Category3),]
#res_deg
table(res_deg\$GWAS)


res_deg\$p.adj <- NA
for(i in trait_oi){
    res_deg[res_deg\$GWAS ==i,]\$p.adj <- p.adjust(res_deg[res_deg\$GWAS ==i,]\$Enrichment_p,method = "fdr")
}
table(res_deg\$Enrichment_p < 0.05)
table(res_deg\$p.adj < 0.05)
write.csv(res_deg, file = "./Results/ldsc/DEG_linked_peaks_hg19_ldsc.csv",row.names = F)

In [None]:
#### Figure 6d
mat<-matrix(nrow=length(unique(res_deg\$cell_region)), ncol=length(unique(res_deg\$GWAS)))
colnames(mat)<-unique(res_deg\$GWAS)
for (j in 1:length(unique(res_deg\$cell_region))){
  for (i in 1:length(unique(res_deg\$GWAS))){
    ctTmp<-unique(res_deg\$cell_region)[j]
    gwasTmp<-colnames(mat)[i]
    sub<-res_deg[which(res_deg\$cell_region==ctTmp & res_deg\$GWAS==gwasTmp),]
    if (nrow(sub)>0){
      mat[j,i]<-sub\$Coefficient_z.score
    }
    else{
      mat[j,i]<-0
    }
  }
}
rownames(mat)<- unique(res_deg\$cell_region)
#mat
sig_mat<-matrix(nrow=length(unique(res_deg\$cell_region)), ncol=length(unique(res_deg\$GWAS)))
colnames(sig_mat)<-unique(res_deg\$GWAS)
for (j in 1:length(unique(res_deg\$cell_region))){
  for (i in 1:length(unique(res_deg\$GWAS))){
    ctTmp<-unique(res_deg\$cell_region)[j]
    gwasTmp<-colnames(mat)[i]
    sub<-res_deg[which(res_deg\$cell_region==ctTmp & res_deg\$GWAS==gwasTmp),]
    if (sub\$Coefficient_z.score>0 & sub\$p.adj <0.1){
      sig_mat[j,i]<-sub\$p.adj
    }
    else{
      sig_mat[j,i]<-1
    }
  }
}
rownames(sig_mat)<- unique(res_deg\$cell_region)
#sig_mat

library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
ha<-rowAnnotation(Region=rownames(mat)
                       , col= list(Region=c("Astrocyte_PFC"="#f06719ff","Astrocyte_EC"="#f06719ff","Astrocyte_HIP"="#f06719ff",
                       "Excitatory_PFC"="#33a65cff","Excitatory_EC"="#33a65cff","Excitatory_HIP"="#33a65cff",
                       "Inhibitory_PFC"="#23767cff","Inhibitory_EC"="#23767cff","Inhibitory_HIP"="#23767cff",
                       "Microglia_PFC"="#e03426ff","Microglia_EC"="#e03426ff","Microglia_HIP"="#e03426ff",
                       "Oligodendrocyte_PFC"="#1ba3c6ff","Oligodendrocyte_EC"="#1ba3c6ff","Oligodendrocyte_HIP"="#1ba3c6ff",
                       "OPC_PFC"="#a26dc2ff","OPC_EC"="#a26dc2ff","OPC_HIP"="#a26dc2ff")), show_legend=F,annotation_label="")
ha2<-rowAnnotation(Region=rownames(mat)
                       , col= list(Region=c("Astrocyte_PFC"="#c25757ff","Astrocyte_EC"="#825ca6ff","Astrocyte_HIP"="#3f78c1ff",
                       "Excitatory_PFC"="#c25757ff","Excitatory_EC"="#825ca6ff","Excitatory_HIP"="#3f78c1ff",
                       "Inhibitory_PFC"="#c25757ff","Inhibitory_EC"="#825ca6ff","Inhibitory_HIP"="#3f78c1ff",
                       "Microglia_PFC"="#c25757ff","Microglia_EC"="#825ca6ff","Microglia_HIP"="#3f78c1ff",
                       "Oligodendrocyte_PFC"="#c25757ff","Oligodendrocyte_EC"="#825ca6ff","Oligodendrocyte_HIP"="#3f78c1ff",
                       "OPC_PFC"="#c25757ff","OPC_EC"="#825ca6ff","OPC_HIP"="#3f78c1ff")), show_legend=F,annotation_label="")
ha <- c(ha,ha2)
ht=Heatmap(mat, cluster_rows=F,cluster_columns=F,
           col=colorRamp2(seq(from = -4, to = 4,length = 11),
           rev(RColorBrewer::brewer.pal(11,"RdBu"))),
           show_column_names=T, show_row_names=F,row_names_side="left",
           column_names_side = "top",
           left_annotation = ha,
           row_names_max_width = max_text_width(rownames(mat),
           gp = gpar(fontsize = 24)),
           cell_fun = function(j, i, x, y, w, h, fill){
            if(sig_mat[i, j] <0.01) {
                    grid.text("**", x, y, gp=gpar(fontsize=35, col="white"), vjust="center")
                }else if (sig_mat[i, j] <0.05) {
                   grid.text("*", x, y, gp=gpar(fontsize=35, col="white"), vjust="center")
                }
            })
pdf("/data2/aliu8/2023_AD_multiome/Analysis/Figures/LDSC_DEG_Linked_peaks_hg19.pdf",height = 8,width = 16)
ht
dev.off()