In [None]:
###Differential expression analysis of PFC
## Andi Liu
# 1/9/2024

## loading
suppressPackageStartupMessages({
library(Seurat)
library(Signac)
library(dplyr)
library(patchwork)
library(future)
library(stringr)
library(tidydr)
library(tidyverse)
library(viridis)
library(qs)
library(ggplot2)
library(readxl)

library(EnhancedVolcano)
library(RColorBrewer)
library(ComplexHeatmap)
library(circlize)

library(GenomicRanges)
library(GenomicFeatures)
library(EnsDb.Hsapiens.v86)
library(BSgenome.Hsapiens.UCSC.hg38)
})
setwd("/data2/aliu8/2023_AD_multiome/Analysis/")

In [None]:
### loading object
clean <- readRDS("03.clean_object_rmOutlier.rds")
ls()
#clean
meta <- clean@meta.data
head(meta)

In [None]:
## identified replicated DEGs
# mast
mast_reseult <- read.csv("./Results/DEG/MAST_major.cell.type_PFC.csv")
mast_sig <- mast_reseult[mast_reseult$p_val_adj <= 0.05 & abs(mast_reseult$avg_log2FC) >= 0.25,]
mast_sig$comb <- paste(mast_sig$gene,mast_sig$dir,mast_sig$cell.type, sep = "_")
table(mast_sig$cell.type,mast_sig$dir)

# mixed model
ast <- read.csv("./Results/DEG/DE_mixed_PFC_Ast.csv")
ex <- read.csv("./Results/DEG/DE_mixed_PFC_Ex.csv")
ind <- read.csv("./Results/DEG/DE_mixed_PFC_In.csv")
mic <- read.csv("./Results/DEG/DE_mixed_PFC_Mic.csv")
opc <- read.csv("./Results/DEG/DE_mixed_PFC_OPC.csv")
oli <- read.csv("./Results/DEG/DE_mixed_PFC_Oli.csv")
# combine
mixed_model_reseult <- rbind(ast,ex,ind,mic,opc,oli)
mixed_model_reseult$comb <- paste(mixed_model_reseult$genes,mixed_model_reseult$dir,mixed_model_reseult$cell.type, sep = "_")
write.csv(mixed_model_reseult, file = "./Results/DEG/Mixed_PFC_major.cell.type.csv")

# filter
mixed_sig <- mixed_model_reseult[mixed_model_reseult$p_val_adj <= 0.05 & abs(mixed_model_reseult$avg_log2FC) >=0.25,]
table(mixed_sig$cell.type, mixed_sig$dir)
length(mixed_model_reseult[mixed_model_reseult$p_val_adj <= 0.01 & abs(mixed_model_reseult$avg_log2FC) >=0.25,]$genes)
length(mixed_model_reseult[mixed_model_reseult$p_val_adj <= 0.01 & abs(mixed_model_reseult$avg_log2FC) >=0.5,]$genes)

# finding the overlapped signals
sig <- mast_sig[mast_sig$comb %in% mixed_sig$comb,]
table(sig$cell.type, sig$dir)
length(sig$gene)
write.csv(sig,"./Results/DEG/Overlap_mast_mixed_EC.csv",row.names = F)

In [None]:
## identified replicated DEGs
# EC
# mast
mast_reseult <- read.csv("./Results/DEG/MAST_major.cell.type_EC.csv")
mast_sig <- mast_reseult[mast_reseult$p_val_adj <= 0.05 & abs(mast_reseult$avg_log2FC) >= 0.25,]
mast_sig$comb <- paste(mast_sig$gene,mast_sig$dir,mast_sig$cell.type, sep = "_")
table(mast_sig$cell.type,mast_sig$dir)

# mixed model
ast <- read.csv("./Results/DEG/DE_mixed_EC_Ast.csv")
ex <- read.csv("./Results/DEG/DE_mixed_EC_Ex.csv")
ind <- read.csv("./Results/DEG/DE_mixed_EC_In.csv")
mic <- read.csv("./Results/DEG/DE_mixed_EC_Mic.csv")
opc <- read.csv("./Results/DEG/DE_mixed_EC_OPC.csv")
oli <- read.csv("./Results/DEG/DE_mixed_EC_Oli.csv")
# combine
mixed_model_reseult <- rbind(ast,ex,ind,mic,opc,oli)
mixed_model_reseult$comb <- paste(mixed_model_reseult$genes,mixed_model_reseult$dir,mixed_model_reseult$cell.type, sep = "_")
write.csv(mixed_model_reseult, file = "./Results/DEG/Mixed_EC_major.cell.type.csv")
# filter
mixed_sig <- mixed_model_reseult[mixed_model_reseult$p_val_adj <= 0.05 & abs(mixed_model_reseult$avg_log2FC) >=0.25,]
table(mixed_sig$cell.type, mixed_sig$dir)
length(mixed_model_reseult[mixed_model_reseult$p_val_adj <= 0.01 & abs(mixed_model_reseult$avg_log2FC) >=0.25,]$genes)
length(mixed_model_reseult[mixed_model_reseult$p_val_adj <= 0.01 & abs(mixed_model_reseult$avg_log2FC) >=0.5,]$genes)

# finding the overlapped signals
sig <- mast_sig[mast_sig$comb %in% mixed_sig$comb,]
table(sig$cell.type, sig$dir)
length(sig$gene)
write.csv(sig,"./Results/DEG/Overlap_mast_mixed_EC.csv",row.names = F)

In [None]:
## identified replicated DEGs
# HIP
# mast
mast_reseult <- read.csv("./Results/DEG/MAST_major.cell.type_HIP.csv")
mast_sig <- mast_reseult[mast_reseult$p_val_adj <= 0.05 & abs(mast_reseult$avg_log2FC) >= 0.25,]
mast_sig$comb <- paste(mast_sig$gene,mast_sig$dir,mast_sig$cell.type, sep = "_")
table(mast_sig$cell.type,mast_sig$dir)

# mixed model
ast <- read.csv("./Results/DEG/DE_mixed_HIP_Ast.csv")
ex <- read.csv("./Results/DEG/DE_mixed_HIP_Ex.csv")
ind <- read.csv("./Results/DEG/DE_mixed_HIP_In.csv")
mic <- read.csv("./Results/DEG/DE_mixed_HIP_Mic.csv")
opc <- read.csv("./Results/DEG/DE_mixed_HIP_OPC.csv")
oli <- read.csv("./Results/DEG/DE_mixed_HIP_Oli.csv")
# combine
mixed_model_reseult <- rbind(ast,ex,ind,mic,opc,oli)
mixed_model_reseult$comb <- paste(mixed_model_reseult$genes,mixed_model_reseult$dir,mixed_model_reseult$cell.type, sep = "_")
write.csv(mixed_model_reseult, file = "./Results/DEG/Mixed_HIP_major.cell.type.csv")
# filter
mixed_sig <- mixed_model_reseult[mixed_model_reseult$p_val_adj <= 0.05 & abs(mixed_model_reseult$avg_log2FC) >=0.25,]
table(mixed_sig$cell.type, mixed_sig$dir)
length(mixed_model_reseult[mixed_model_reseult$p_val_adj <= 0.05 & abs(mixed_model_reseult$avg_log2FC) >=0.25,]$genes)
length(mixed_model_reseult[mixed_model_reseult$p_val_adj <= 0.05 & abs(mixed_model_reseult$avg_log2FC) >=0.5,]$genes)

# finding the overlapped signals
sig <- mast_sig[mast_sig$comb %in% mixed_sig$comb,]
table(sig$cell.type, sig$dir)
length(sig$gene)
write.csv(sig,"./Results/DEG/Overlap_mast_mixed_HIP.csv",row.names = F)

In [None]:
### check overlap between three regions
deg_pfc <- read.csv("./Results/DEG/Overlap_mast_mixed_PFC.csv")
deg_ec <- read.csv("./Results/DEG/Overlap_mast_mixed_EC.csv")
deg_hip <- read.csv("./Results/DEG/Overlap_mast_mixed_HIP.csv")

length(deg_pfc$gene)
length(deg_ec$gene)
length(deg_hip$gene)

In [None]:
### Figure 2 panel a: Number of sEOAD DEGs
df1 <- as.data.frame(table(deg_pfc$celltype,deg_pfc$dir))
df1$region <- "PFC"

df2 <- as.data.frame(table(deg_ec$celltype,deg_ec$dir))
    df2$region <- "EC"

df3 <- as.data.frame(table(deg_hip$celltype,deg_hip$dir))
df3$region <- "HIP"
#table(deg_ec$cell.type,deg_ec$dir)
#table(deg_hip$cell.type,deg_hip$dir)
df <- rbind(df1,df2,df3)
colnames(df) <- c("celltype","dir","freq","region")

df_pos <- df[df$dir == "pos",]
df_pos<- within(df_pos, region <- factor(df_pos$region, levels=c("PFC","EC","HIP")))
df_neg <- df[df$dir == "neg",]
df_neg<- within(df_neg, region <- factor(df_neg$region, levels=c("PFC","EC","HIP")))

# Define a color palette for regions
region_colors <- c("PFC" = "#c25757ff", "EC" = "#825ca6ff", "HIP" = "#3f78c199")
region_colors

generate_shades <- function(base_color, num_shades) {
  alphas <- seq(0.2, 1, length.out = num_shades)
  shades <- rep(base_color, num_shades)
  shades <- scales::alpha(shades, alpha = alphas)
  return(shades)
}

# Create a vector of colors for upregulated genes
upregulated_colors <- generate_shades(region_colors["HIP"], num_shades = 2)
upregulated_colors

# Create a vector of colors for downregulated genes (use a different shade)
downregulated_colors <- generate_shades(region_colors["HIP"], num_shades = 2)
downregulated_colors

# Grouped
p_pos <- ggplot(df_pos, aes(fill=region, y=freq, x=celltype)) + 
         geom_bar(position="dodge", stat="identity") + 
         scale_fill_manual(values=c("#C25757", 
                                    "#825ca6", 
                                    "#3F78C1")) +
         ylab("Number of upregulated genes in EOAD") +
         geom_text(aes(label=freq), position=position_dodge(width = 0.95), vjust=-.5,size =6)+
         theme(line = element_line(),
               axis.title.y = element_text(size = 15),
               axis.ticks.x = element_blank(),
               axis.title.x = element_blank(),
               axis.line.y = element_line(linewidth = 1),
               axis.text.y = element_text(size = 15),
               plot.background = element_blank(),
               panel.background = element_blank(),
               panel.grid.major = element_blank(),
               panel.grid.minor = element_blank()
              )

p_neg <- ggplot(df_neg, aes(fill=region, y=freq, x=celltype)) + 
         geom_bar(position="dodge", stat="identity") + 
         scale_fill_manual(values=c("#C2575788", 
                                    "#825ca688", 
                                    "#3F78C188")) +
         ylab("Number of downregulated genes in EOAD") +
         scale_y_reverse()+
         geom_text(aes(label=freq), position=position_dodge(width = 0.95), vjust=1.3,size =6)+
         theme(line = element_line(),axis.text.x.bottom = element_blank(),
               axis.title.y = element_text(size = 15),
               axis.ticks.x = element_blank(),
               axis.title.x = element_blank(),
               axis.line.y = element_line(linewidth = 1),
               axis.text.y = element_text(size = 15),
               plot.background = element_blank(),
               panel.background = element_blank(),
               panel.grid.major = element_blank(),
               panel.grid.minor = element_blank()
              )
pdf("./Figures/DEG/Number_of_DEGs.pdf",width = 16,height =10)
patchwork::wrap_plots(p_pos,p_neg, ncol = 1)
dev.off()

In [None]:
### Figure 2 panel b
# jitter plot codes
library(ggplot2)
library(ggrepel)
library(dplyr)

jitter_deg <- function(deg_mast_path,deg_mixed_path){
    set.seed(42)
    ## merging two results for plotting
    deg_mast <- read.csv(deg_mast_path,row.names = 1)
    deg_mixed <- read.csv(deg_mixed_path)

    deg_mast$sig <- ifelse(abs(deg_mast$avg_log2FC) > 0.25 & deg_mast$p_val_adj <0.05, T, F)
    deg_mixed$sig <- ifelse(abs(deg_mixed$avg_log2FC) > 0.25 & deg_mixed$p_val_adj <0.05, T, F)

    deg_mast$comb <- paste(deg_mast$gene,deg_mast$cell.type, sep = "_")
    deg_mixed$comb <- paste(deg_mixed$genes,deg_mixed$cell.type, sep = "_")

    deg <- merge(deg_mast,deg_mixed, by = "comb")
    colnames(deg) <- gsub("\\.x","_mast",colnames(deg))
    colnames(deg) <-gsub("\\.y","_mixed",colnames(deg))

    deg$sig <- ifelse(deg$sig_mast == T & deg$sig_mixed == T, T, F)

    ##head(deg)
    ### top up & down for labeling
    deg %>%
        group_by(cell.type_mast) %>%
        filter(sig == T) %>%
        top_n(n = 5, wt = avg_log2FC_mast) %>%
        filter(avg_log2FC_mast > 1)-> top_up
    
    deg %>%
        group_by(cell.type_mast) %>%
        filter(sig == T) %>%
        top_n(n = 5, wt = -avg_log2FC_mast) %>%
        filter(avg_log2FC_mast < -1)-> top_down
    
    deg_sig <- rbind(top_up, top_down)
    deg$label <- ifelse(deg$comb %in% deg_sig$comb, T, F)
    
    deg_sig <- deg[deg$label == T,]

   
    pos <- position_jitter(width = 0.4, seed = 1)
    thred <- 1

    p <- ggplot(deg, aes(x=cell.type_mast, y=avg_log2FC_mast)) + 
        geom_jitter(data=subset(deg,sig == F),color='grey', size=0.5,position = pos) +
        geom_jitter(data=subset(deg,sig == T & abs(avg_log2FC_mast)<thred),color='orange', size=0.8,position = pos) +
        geom_jitter(data=subset(deg,sig == T & abs(avg_log2FC_mast)>thred),color='red', size=0.8,position = pos) +
        geom_jitter(data=subset(deg,label == T & abs(avg_log2FC_mast)>thred),color='red', size=0.8, position=pos) +
        geom_hline(yintercept=thred, linetype="dashed", color = "red") +
        geom_hline(yintercept=-thred, linetype="dashed", color = "red") +
        geom_text_repel(data=deg_sig, label=deg_sig$gene, position=pos, size=3, max.overlaps=15,min.segment.length = 0,fontface="italic") +
        theme(panel.border = element_rect(fill = "#00000000",linewidth = 0.5),
            axis.title = element_blank(),
            panel.background = element_blank(),
            panel.grid.minor.y = element_line(color = "#6e6e6e29", linewidth = 0.2),
            panel.grid.major.x = element_line(color = "#6e6e6e29", linewidth = 0.2),
            axis.ticks.x = element_blank())
    return(p)
}

## plotting
deg_mast_pfc <- "./Results/DEG/MAST_major.cell.type_PFC.csv"
deg_mixed_pfc <- "./Results/DEG/Mixed_PFC_major.cell.type.csv"
p1 <- jitter_deg(deg_mast_pfc,deg_mixed_pfc)

deg_mast_ec <- "./Results/DEG/MAST_major.cell.type_EC.csv"
deg_mixed_ec <- "./Results/DEG/Mixed_EC_major.cell.type.csv"
p2 <- jitter_deg(deg_mast_ec,deg_mixed_ec)

deg_mast_hip <- "./Results/DEG/MAST_major.cell.type_HIP.csv"
deg_mixed_hip <- "./Results/DEG/Mixed_HIP_major.cell.type.csv"
p3 <- jitter_deg(deg_mast_hip,deg_mixed_hip)

pdf(file = "./Figures/DEG/Jitterplot_DEG_1.pdf",width = 12,height = 8/3)
patchwork::wrap_plots(p1,ncol = 1)
dev.off()

pdf(file = "./Figures/DEG/Jitterplot_DEG_2.pdf",width = 12,height = 8/3)
patchwork::wrap_plots(p2,ncol = 1)
dev.off()

pdf(file = "./Figures/DEG/Jitterplot_DEG_3.pdf",width = 12,height = 8/3)
patchwork::wrap_plots(p3,ncol = 1)
dev.off()

In [None]:
### Figure 2 panel c: Venn diagram across three regions
list1 <- deg_pfc$comb
list2 <- deg_ec$comb
list3 <- deg_hip$comb

library(VennDiagram)
library(RColorBrewer)
myCol <- c("#C25757", "#825ca6", "#3F78C1")#brewer.pal(3, "Pastel2")

venn.diagram(
  x = list(list1, list2, list3),
  category.names = c("sEOAD-DEGs in PFC" , "sEOAD-DEGs in EC" , "sEOAD-DEGs in HIP"),
  filename = './Figures/DEG/Venn_DEG_three_regions.png',
  output=F,
  # Output features
  imagetype="png" ,
  height = 800, 
  width = 800, 
  resolution = 300,
  compression = "lzw",
  
  # Circles
  lwd = 2,
  lty = 'blank',
  fill = myCol,
  
  # Numbers
  cex = .6,
  fontface = "bold",
  fontfamily = "sans",
  
  # Set names
  cat.cex = 0,
  cat.fontface = "bold",
  cat.default.pos = "outer",
  cat.pos = c(-27, 27, 135),
  cat.dist = c(0.055, 0.055, 0.085),
  cat.fontfamily = "sans"
)

In [None]:
## Figure 2 panel e/f: barplot of selected DEGs

# working on EC/HIP/PFC
# retrive metadata

# CACNA1A in microglia
DefaultAssay(object)
meta <- object@meta.data
table(meta$cluster_celltype)
meta <- meta[meta$cluster_celltype == "Cicroglia",]
meta$rg_diagnosis <- paste(meta$regions, meta$diagnosis, sep = "_")

# creating dataframe for plotting
temp <- as.data.frame(LayerData(object,layer = "data",assay = "PC",features = "CACNA1A",cells = rownames(meta)))
temp <- as.data.frame(t(temp))

# adding variable 
colnames(temp)[1] <- "expression"
temp$diagnosis <- meta[rownames(temp),]$diagnosis
temp$rg_diagnosis <- meta[rownames(temp),]$rg_diagnosis
temp$rg_diagnosis <- factor(temp$rg_diagnosis,levels = c("PFC_EOAD","PFC_NCI","EC_EOAD","EC_NCI","HIP_EOAD","HIP_NCI"))

#temp$individual_ID <- with(temp,reorder(individual_ID,diagnosis))
p1 <- ggplot(temp, aes(x = rg_diagnosis, y = expression)) + geom_boxplot(aes(fill = diagnosis), trim=F, scale = "width")+theme_classic()+scale_fill_manual(values=c("#d40000ff", "#005980ff"))+theme(legend.position = "none")

pdf("./Figures/DEG/Mic_CACNA1A_boxplot.pdf",width = 6,height = 5.3)
p1
dev.off()


# GRM3 in astrocytes
DefaultAssay(object)
meta <- object@meta.data
table(meta$cluster_celltype)
meta <- meta[meta$cluster_celltype == "Astrocyte",]
meta$rg_diagnosis <- paste(meta$regions, meta$diagnosis, sep = "_")

# creating dataframe for plotting
temp <- as.data.frame(LayerData(object,layer = "data",assay = "PC",features = "GRM3",cells = rownames(meta)))
temp <- as.data.frame(t(temp))

# adding variable 
colnames(temp)[1] <- "expression"
temp$diagnosis <- meta[rownames(temp),]$diagnosis
temp$rg_diagnosis <- meta[rownames(temp),]$rg_diagnosis
temp$rg_diagnosis <- factor(temp$rg_diagnosis,levels = c("PFC_EOAD","PFC_NCI","EC_EOAD","EC_NCI","HIP_EOAD","HIP_NCI"))

#temp$individual_ID <- with(temp,reorder(individual_ID,diagnosis))
p2 <- ggplot(temp, aes(x = rg_diagnosis, y = expression)) + geom_boxplot(aes(fill = diagnosis), trim=F, scale = "width")+theme_classic()+scale_fill_manual(values=c("#d40000ff", "#005980ff"))+theme(legend.position = "none")

pdf("./Figures/DEG/Ast_GRM3_boxplot.pdf",width = 6,height = 5.3)
p2
dev.off()

In [None]:
################################################################################
############# performing enrichment analysis on the results ####################
################################################################################
################################ Figure 2g #####################################
################################################################################
#enrichment analysis
library(clusterProfiler)
library('org.Hs.eg.db')
# Package that contains MSigDB gene sets in tidy format
library(msigdbr)
# We will need this so we can use the pipe: %>%
library(magrittr)
library(DOSE)
library(dplyr)
library(stringr)
library(ComplexHeatmap)
library(circlize)

In [None]:
### interested in the consistent signals across regions
### check overlap between three regions
pfc_degs <- read.csv("./Results/DEG/Overlap_mast_mixed_PFC.csv")
ec_degs <- read.csv("./Results/DEG/Overlap_mast_mixed_EC.csv")
hip_degs <- read.csv("./Results/DEG/Overlap_mast_mixed_HIP.csv")

length(pfc_degs$gene)
length(ec_degs$gene)
length(hip_degs$gene)

#head(deg_hip) 

## find the overlap and check
l1 <- pfc_degs$comb
l2 <- ec_degs$comb
l3 <- hip_degs$comb

deg_oi <- intersect(intersect(l1,l2),l3)
sEOAD_deg <- read.csv("./Results/DEG/sEOAD_deg.csv")
deg_oi <- deg_oi[deg_oi%in%sEOAD_deg$comb]

pfc_oi <- pfc_degs[pfc_degs$comb %in% deg_oi,]
ec_oi <- ec_degs[ec_degs$comb %in% deg_oi,]
hip_oi <- hip_degs[hip_degs$comb %in% deg_oi,]

table(pfc_oi$celltype, pfc_oi$dir)

In [None]:
#########################################################################
#### find enriched GOBP for consistent signals in inhibitory neurons ####
#########################################################################
pfc_degs$gene_id <- mapIds(
  # Replace with annotation package for the organism relevant to your data
  org.Hs.eg.db,
  # The vector of gene identifiers we want to map
  keys = pfc_degs$gene,
  # Replace with the type of gene identifiers in your data
  keytype = "SYMBOL",
  # Replace with the type of gene identifiers you would like to map to
  column = "ENTREZID",
  # In the case of 1:many mappings, return the
  # first one. This is default behavior!
  multiVals = "first"
)


## get entrez id in data frame
gene_key_df <- data.frame(
  ensembl_id = pfc_degs$gene,
  entrez_id = pfc_degs$gene_id,
  stringsAsFactors = FALSE
) %>%
  # If an Ensembl gene identifier doesn't map to a gene symbol, drop that
  # from the data frame
  dplyr::filter(!is.na(entrez_id))

degs <- pfc_degs
head(degs)
dim(degs)

In [None]:
#########################################################################
#### find enriched GOBP for consistent signals in inhibitory neurons ####
#########################################################################
ec_degs$gene_id <- mapIds(
  # Replace with annotation package for the organism relevant to your data
  org.Hs.eg.db,
  # The vector of gene identifiers we want to map
  keys = ec_degs$gene,
  # Replace with the type of gene identifiers in your data
  keytype = "SYMBOL",
  # Replace with the type of gene identifiers you would like to map to
  column = "ENTREZID",
  # In the case of 1:many mappings, return the
  # first one. This is default behavior!
  multiVals = "first"
)


## get entrez id in data frame
gene_key_df <- data.frame(
  ensembl_id = ec_degs$gene,
  entrez_id = ec_degs$gene_id,
  stringsAsFactors = FALSE
) %>%
  # If an Ensembl gene identifier doesn't map to a gene symbol, drop that
  # from the data frame
  dplyr::filter(!is.na(entrez_id))

degs <- ec_degs
head(degs)
dim(degs)

In [None]:
#########################################################################
#### find enriched GOBP for consistent signals in inhibitory neurons ####
#########################################################################
hip_degs$gene_id <- mapIds(
  # Replace with annotation package for the organism relevant to your data
  org.Hs.eg.db,
  # The vector of gene identifiers we want to map
  keys = hip_degs$gene,
  # Replace with the type of gene identifiers in your data
  keytype = "SYMBOL",
  # Replace with the type of gene identifiers you would like to map to
  column = "ENTREZID",
  # In the case of 1:many mappings, return the
  # first one. This is default behavior!
  multiVals = "first"
)


## get entrez id in data frame
gene_key_df <- data.frame(
  ensembl_id = hip_degs$gene,
  entrez_id = hip_degs$gene_id,
  stringsAsFactors = FALSE
) %>%
  # If an Ensembl gene identifier doesn't map to a gene symbol, drop that
  # from the data frame
  dplyr::filter(!is.na(entrez_id))

degs <- hip_degs
head(degs)
dim(degs)

In [None]:
## create a function to ran ORA for all degs in each cell type region. Then, combine the result ##
get_ct_enrichment <- function(celltype,deg_dir,pfc_degs,ec_degs,hip_degs){
    ## set parameters
    celltype = celltype
    deg_dir = deg_dir
    pfc_degs = pfc_degs
    ec_degs = ec_degs
    hip_degs = hip_degs
    message(paste("Working on ",celltype," ", deg_dir,".",sep = ""))
    #### enrichment analysis of degs
    ego <- enrichGO(gene          = pfc_degs[pfc_degs$celltype==celltype & pfc_degs$dir==deg_dir,]$gene_id,
                    OrgDb         = org.Hs.eg.db,
                    ont           = c("BP"),
                    pAdjustMethod = "BH",
                    pvalueCutoff  = 0.05,
#                   qvalueCutoff  = 0.2,
                    readable      = TRUE)
    ## remove redundant results                
    ego <- simplify(ego)
    pfc_up<-ego@result

    ego <- enrichGO(gene          = ec_degs[ec_degs$celltype == celltype & ec_degs$dir == deg_dir, ]$gene_id,
                    OrgDb         = org.Hs.eg.db,
                    ont           = c("BP"),
                    pAdjustMethod = "BH",
                    pvalueCutoff  = 0.05,
#                   qvalueCutoff  = 0.2,
                    readable      = TRUE)
    ## remove redundant results                
    ego <- simplify(ego)
    ec_up<-ego@result

# 
    ego <- enrichGO(gene          = hip_degs[hip_degs$celltype == celltype & hip_degs$dir == deg_dir, ]$gene_id,
                    OrgDb         = org.Hs.eg.db,
                    ont           = c("BP"),
                    pAdjustMethod = "BH",
                    pvalueCutoff  = 0.05,
#                   qvalueCutoff  = 0.2,
                    readable      = TRUE)
                    
    ## remove redundant results                
    ego <- simplify(ego)
    hip_up<-ego@result

    ## keep all results? then also plot non-significant results
    #pfc_up <- pfc_up[pfc_up$p.adjust <= 0.05,]
    ifelse(nrow(pfc_up) > 0,pfc_up$region <- "PFC",pfc_up$region <- character())
    #head(pfc_up)

    #ec_up <- ec_up[ec_up$p.adjust <= 0.05,]
    ifelse(nrow(ec_up) > 0,ec_up$region <- "EC",ec_up$region <- character())
    #head(ec_up)

    #hip_up <- hip_up[hip_up$p.adjust <= 0.05,]
    ifelse(nrow(hip_up) > 0,hip_up$region <- "HIP",hip_up$region <- character())
    #head(hip_up)
    df_up <- data.frame()
    df_up <- rbind(df_up,pfc_up,ec_up,hip_up)
    df_up$percent <- as.numeric(str_split_fixed(df_up$GeneRatio,pattern = "/",n = 2)[,1]) / as.numeric(str_split_fixed(df_up$GeneRatio,pattern = "/",n = 2)[,2])
    #terms <- intersect(intersect(pfc_ast_up$ID, ec_ast_up$ID),hip_ast_up$ID)

    print(table(df_up$region))

    # check how many significat
    print(table(df_up[df_up$qvalue < 0.05,]$region))

    # provide cell type information
    df_up$celltype <- celltype
    return(df_up)
}

In [None]:
## running enrichment analysis
ast_up <- get_ct_enrichment(celltype = "Astrocyte",deg_dir = "pos", pfc_degs = pfc_degs,ec_degs = ec_degs,hip_degs = hip_degs)
exc_up <- get_ct_enrichment(celltype = "Excitatory",deg_dir = "pos", pfc_degs = pfc_degs,ec_degs = ec_degs,hip_degs = hip_degs)
inh_up <- get_ct_enrichment(celltype = "Inhibitory",deg_dir = "pos", pfc_degs = pfc_degs,ec_degs = ec_degs,hip_degs = hip_degs)
mic_up <- get_ct_enrichment(celltype = "Microglia",deg_dir = "pos", pfc_degs = pfc_degs,ec_degs = ec_degs,hip_degs = hip_degs)
oli_up <- get_ct_enrichment(celltype = "Oligodendrocyte",deg_dir = "pos", pfc_degs = pfc_degs,ec_degs = ec_degs,hip_degs = hip_degs)
opc_up <- get_ct_enrichment(celltype = "OPC",deg_dir = "pos", pfc_degs = pfc_degs,ec_degs = ec_degs,hip_degs = hip_degs)
#table(ast_up$region)
#table(ast_up[ast_up$qvalue <0.05,]$region)

## combine all df
df <- rbind(ast_up, exc_up, inh_up, mic_up, oli_up, opc_up)
table(df$region,df$celltype)
#table(df[df$qvalue < 0.05,]$region,df[df$qvalue < 0.05,]$celltype)

# giving unique colnames
df$colname <- paste(substr(df$celltype,start=1,stop=3),df$region,sep = "_")

# saving
write.csv(df,file = "./Results/DEG/GO_pos_deg_all_ct_regions_no_redundant.csv.csv")

df_all <- read.csv("./Results/DEG/GO_pos_deg_all_ct_regions.csv",row.names = 1)
df <- read.csv("./Results/DEG/GO_pos_deg_all_ct_regions_no_redundant.csv",row.names = 1)

table(df_all$region,df_all$celltype)
table(df_all[df_all$qvalue < 0.05,]$region,df_all[df_all$qvalue < 0.05,]$celltype)
table(df[df$qvalue < 0.05,]$region,df[df$qvalue < 0.05,]$celltype)

In [None]:
## plotting
top <- df %>% group_by(colname) %>% top_n(n = 5, wt = -qvalue) %>% top_n(n = 5, wt = -pvalue)
dim(top)
top <- top[top$qvalue < 0.05,]
top <- top$Description 

mat<-matrix(nrow=length(unique(top)), ncol=18)
colnames(mat) <- unique(df_all$colname)

for (j in 1:length(unique(top))){
  for (i in 1:18){
    termTemp<-unique(top)[j]
    rgTemp<-colnames(mat)[i]

    sub<-df_all[which(df_all$Description==termTemp & df_all$colname==rgTemp),]
    if (nrow(sub)>0){
      mat[j,i]<-sub$Count
    }
    else{
      mat[j,i]<-0
    }
  }  
}
rownames(mat)<- unique(top)

#mat

#generating a significance matrix
sig_mat<-matrix(nrow=length(unique(top)), ncol=18)
colnames(sig_mat)<-unique(df_all$colname)

for (j in 1:length(unique(top))){
  for (i in 1:18){
    termTemp<-unique(top)[j]
    rgTemp<-colnames(sig_mat)[i]
    sub<-df_all[which(df_all$Description==termTemp & df_all$colname==rgTemp),]
    if (nrow(sub)>0){
      sig_mat[j,i]<-sub$qvalue}
    else{
      sig_mat[j,i]<-1}  }}


#### plotting the upregulated gene GSEA
ha<-HeatmapAnnotation(Region=colnames(mat)
                       , col= list(Region=c("Ast_PFC"="#c25757ff","Ast_EC"="#825ca6ff","Ast_HIP"="#3f78c1ff",
                       "Exc_PFC"="#c25757ff","Exc_EC"="#825ca6ff","Exc_HIP"="#3f78c1ff",
                       "Inh_PFC"="#c25757ff","Inh_EC"="#825ca6ff","Inh_HIP"="#3f78c1ff",
                       "Mic_PFC"="#c25757ff","Mic_EC"="#825ca6ff","Mic_HIP"="#3f78c1ff",
                       "Oli_PFC"="#c25757ff","Oli_EC"="#825ca6ff","Oli_HIP"="#3f78c1ff",
                       "OPC_PFC"="#c25757ff","OPC_EC"="#825ca6ff","OPC_HIP"="#3f78c1ff")), show_legend=F,annotation_label="")

ha2<-HeatmapAnnotation(celltype = colnames(mat)
                       , col= list(celltype=c("Ast_PFC"="#F06719","Ast_EC"="#F06719","Ast_HIP"="#F06719",
                       "Exc_PFC"="#33A65C","Exc_EC"="#33A65C","Exc_HIP"="#33A65C",
                       "Inh_PFC"="#23767C","Inh_EC"="#23767C","Inh_HIP"="#23767C",
                       "Mic_PFC"="#E03426","Mic_EC"="#E03426","Mic_HIP"="#E03426",
                       "Oli_PFC"="#1BA3C6","Oli_EC"="#1BA3C6","Oli_HIP"="#1BA3C6",
                       "OPC_PFC"="#A26DC2","OPC_EC"="#A26DC2","OPC_HIP"="#A26DC2")), show_legend=F,annotation_label="")
ha <- c(ha2,ha)


ht=Heatmap(mat, cluster_rows=F,cluster_columns=F,
           col=colorRamp2(c(0,30,60),c("grey95","red","red4")),  
           top_annotation=ha, #name="Number of genes with significant expression changes",  
           show_column_names=T, show_row_names=T, column_title=NULL,
           row_names_side="right", row_title_gp=gpar(fontsize=60),
           row_names_max_width = max_text_width(rownames(mat), gp = gpar(fontsize = 24)), 
           cell_fun = function(j, i, x, y, w, h, fill) {
  if(sig_mat[i, j] <0.05) {
    grid.text("*", x, y, gp=gpar(fontsize=35, col="black"), vjust="center")
  } })

p1 <- ht

pdf("./Figures/DEG/up_GSEA.pdf", width=12, height=12)
p1
dev.off()

In [None]:
## running enrichment analysis for down regulated genes
ast_down <- get_ct_enrichment(celltype = "Astrocyte",deg_dir = "neg", pfc_degs = pfc_degs,ec_degs = ec_degs,hip_degs = hip_degs)
exc_down <- get_ct_enrichment(celltype = "Excitatory",deg_dir = "neg", pfc_degs = pfc_degs,ec_degs = ec_degs,hip_degs = hip_degs)
inh_down <- get_ct_enrichment(celltype = "Inhibitory",deg_dir = "neg", pfc_degs = pfc_degs,ec_degs = ec_degs,hip_degs = hip_degs)
mic_down <- get_ct_enrichment(celltype = "Microglia",deg_dir = "neg", pfc_degs = pfc_degs,ec_degs = ec_degs,hip_degs = hip_degs)
oli_down <- get_ct_enrichment(celltype = "Oligodendrocyte",deg_dir = "neg", pfc_degs = pfc_degs,ec_degs = ec_degs,hip_degs = hip_degs)
opc_down <- get_ct_enrichment(celltype = "OPC",deg_dir = "neg", pfc_degs = pfc_degs,ec_degs = ec_degs,hip_degs = hip_degs)
#table(ast_up$region)
#table(ast_up[ast_up$qvalue <0.05,]$region)

## combine all df
df <- rbind(ast_down, exc_down, inh_down, mic_down, oli_down, opc_down)
table(df$region,df$celltype)
table(df[df$qvalue < 0.05,]$region,df[df$qvalue < 0.05,]$celltype)

# giving unique colnames
df$colname <- paste(substr(df$celltype,start=1,stop=3),df$region,sep = "_")

# saving
write.csv(df,file = "./Results/DEG/GO_neg_deg_all_ct_regions_no_redundant.csv")

##
df_all <- read.csv("./Results/DEG/GO_neg_deg_all_ct_regions.csv",row.names = 1)
df <- read.csv("./Results/DEG/GO_neg_deg_all_ct_regions_no_redundant.csv",row.names = 1)
#df <- read.csv("./Results/DEG/GO_pos_deg_all_ct_regions.csv",row.names = 1)

table(df_all[df_all$qvalue < 0.05,]$region,df_all[df_all$qvalue < 0.05,]$celltype)
table(df[df$qvalue < 0.05,]$region,df[df$qvalue < 0.05,]$celltype)

In [None]:
##n plotting 
top <- df %>% group_by(colname) %>% top_n(n = 5, wt = -qvalue) %>% top_n(n = 5, wt = -pvalue)
dim(top)
top <- top[top$qvalue < 0.05,]
top <- top$Description 

mat<-matrix(nrow=length(unique(top)), ncol=18)
colnames(mat) <- unique(df_all$colname)

for (j in 1:length(unique(top))){
  for (i in 1:18){
    termTemp<-unique(top)[j]
    rgTemp<-colnames(mat)[i]

    sub<-df_all[which(df_all$Description==termTemp & df_all$colname==rgTemp),]
    if (nrow(sub)>0){
      mat[j,i]<-sub$Count
    }
    else{
      mat[j,i]<-0
    }
  }  
}
rownames(mat)<- unique(top)

#mat

#generating a significance matrix
sig_mat<-matrix(nrow=length(unique(top)), ncol=18)
colnames(sig_mat)<-unique(df_all$colname)

for (j in 1:length(unique(top))){
  for (i in 1:18){
    termTemp<-unique(top)[j]
    rgTemp<-colnames(sig_mat)[i]
    sub<-df_all[which(df_all$Description==termTemp & df_all$colname==rgTemp),]
    if (nrow(sub)>0){
      sig_mat[j,i]<-sub$qvalue}
    else{
      sig_mat[j,i]<-1}  }}

#sig_mat

## plotting for the results of GSEA_downregulated
ha<-HeatmapAnnotation(Region=colnames(mat)
                       , col= list(Region=c("Ast_PFC"="#c25757ff","Ast_EC"="#825ca6ff","Ast_HIP"="#3f78c1ff",
                       "Exc_PFC"="#c25757ff","Exc_EC"="#825ca6ff","Exc_HIP"="#3f78c1ff",
                       "Inh_PFC"="#c25757ff","Inh_EC"="#825ca6ff","Inh_HIP"="#3f78c1ff",
                       "Mic_PFC"="#c25757ff","Mic_EC"="#825ca6ff","Mic_HIP"="#3f78c1ff",
                       "Oli_PFC"="#c25757ff","Oli_EC"="#825ca6ff","Oli_HIP"="#3f78c1ff",
                       "OPC_PFC"="#c25757ff","OPC_EC"="#825ca6ff","OPC_HIP"="#3f78c1ff")), show_legend=F,annotation_label="")

ha2<-HeatmapAnnotation(celltype = colnames(mat)
                       , col= list(celltype=c("Ast_PFC"="#F06719","Ast_EC"="#F06719","Ast_HIP"="#F06719",
                       "Exc_PFC"="#33A65C","Exc_EC"="#33A65C","Exc_HIP"="#33A65C",
                       "Inh_PFC"="#23767C","Inh_EC"="#23767C","Inh_HIP"="#23767C",
                       "Mic_PFC"="#E03426","Mic_EC"="#E03426","Mic_HIP"="#E03426",
                       "Oli_PFC"="#1BA3C6","Oli_EC"="#1BA3C6","Oli_HIP"="#1BA3C6",
                       "OPC_PFC"="#A26DC2","OPC_EC"="#A26DC2","OPC_HIP"="#A26DC2")), show_legend=F,annotation_label="")
ha <- c(ha2,ha)

ht=Heatmap(mat, cluster_rows=F,cluster_columns=F,
           col=colorRamp2(c(0,30,60),c("grey95","deepskyblue","dodgerblue4")),  
           top_annotation=ha, #name="Number of genes with significant expression changes",  
           show_column_names=T, show_row_names=T, column_title=NULL,
           row_names_side="right", row_title_gp=gpar(fontsize=60),
           row_names_max_width = max_text_width(rownames(mat), gp = gpar(fontsize = 24)), 
           cell_fun = function(j, i, x, y, w, h, fill) {
  if(sig_mat[i, j] <0.05) {
    grid.text("*", x, y, gp=gpar(fontsize=35, col="black"), vjust="center")
  } })

p1 <- ht

pdf("./Figures/DEG/down_GSEA.pdf", width=12, height=12)
p1
dev.off()

In [None]:
## Supplmentary figure overlap of sEOAD DEGs in PFC with existing LOAD snRNA-seq studies
# load PFC overlap DEG results
sig <- read.csv("./Results/DEG/Overlap_mast_mixed_PFC.csv",row.names = 1)
length(sig$comb)

In [None]:
#### overlap between DEG of PFC in current study and existing publications
library(readxl)
## Morabito et al. 
morabito <- read_excel("./Data/Morabito.xlsx",sheet = 5)
morabito <- morabito[abs(morabito$avg_logFC)>=0.25 & morabito$p_val_adj <=0.05, ]
table(morabito$celltype)
morabito[morabito$celltype == "ASC",]$celltype <- "Astrocyte"
morabito[morabito$celltype == "EX",]$celltype <- "Excitatory"
morabito[morabito$celltype == "INH",]$celltype <- "Inhibitory"
morabito[morabito$celltype == "MG",]$celltype <- "Microglia"
morabito[morabito$celltype == "ODC",]$celltype <- "Oligodendrocyte"
morabito[morabito$celltype == "OPC",]$celltype <- "OPC"
morabito <- morabito[morabito$celltype != "PER.END",]
morabito$dir <- ifelse(morabito$avg_logFC >0, "pos","neg")
morabito$comb <- paste(morabito$gene,morabito$dir,morabito$celltype, sep = "_")


## LOAD multiome Anderson et al.
LOAD_multiome <- read_excel("./Data/LOAD_multiome.xlsx",sheet = 2)
LOAD_multiome <- LOAD_multiome[abs(LOAD_multiome$avg_log2FC)>=0.25 & LOAD_multiome$p_val_adj <=0.05, ]
LOAD_multiome$dir <- ifelse(LOAD_multiome$cat == "up","pos","neg")
table(LOAD_multiome$celltype)
LOAD_multiome[LOAD_multiome$celltype == "Astrocytes",]$celltype <- "Astrocyte"
LOAD_multiome[LOAD_multiome$celltype == "Microglia",]$celltype <- "Microglia"
LOAD_multiome[LOAD_multiome$celltype == "Oligodendrocytes",]$celltype <- "Oligodendrocyte"
LOAD_multiome[LOAD_multiome$celltype == "OPCs",]$celltype <- "OPC"
LOAD_multiome$comb <- paste(LOAD_multiome$gene,LOAD_multiome$dir,LOAD_multiome$celltype, sep = "_")

## Mathy et al. 
mathy <- read.csv("./Data/Mathy.csv")
table(mathy$celltype)

In [None]:
## Supplmentary figure overlap of sEOAD DEGs in PFC with existing LOAD snRNA-seq studies
## list of degs
list1 <- sig$comb
list2 <- morabito$comb
list3 <- LOAD_multiome$comb
list4 <- mathy$comb

## drawing venn diagram
library(VennDiagram)
library(RColorBrewer)
myCol <- brewer.pal(4, "Pastel2")

venn.diagram(
  x = list(l5, list2, list3,list4),
  category.names = c("EOAD_PFC" , "Morabito et al." , "Anderson et al.","Mathy et al."),
  filename = './Figures/Venn_DEG_PFC.png',
  output=F,
  # Output features
  imagetype="png" ,
  height = 1200, 
  width = 1500, 
  resolution = 300,
  compression = "lzw",
  
  # Circles
  lwd = 2,
  lty = 'blank',
  fill = myCol,
  
  # Numbers
  cex = .6,
  fontface = "bold",
  fontfamily = "sans",
  
  # Set names
  cat.cex = 0.6,
  cat.fontface = "bold",
  cat.default.pos = "outer",
#  cat.pos = c(-27, 27, 135),
#  cat.dist = c(0.055, 0.055, 0.085),
  cat.fontfamily = "sans"
)

In [None]:
## Supplementary figures upset plots
### check overlap between three regions
deg_pfc <- read.csv("./Results/DEG/Overlap_mast_mixed_PFC.csv")
deg_ec <- read.csv("./Results/DEG/Overlap_mast_mixed_EC.csv")
deg_hip <- read.csv("./Results/DEG/Overlap_mast_mixed_HIP.csv")

length(deg_pfc$gene)
length(deg_ec$gene)
length(deg_hip$gene)

### function for plotting
upset_region <- function(ct){
    print(ct)
    ct = ct
    l1 <- deg_pfc[deg_pfc$celltype == ct,]$comb
    l2 <- deg_ec[deg_ec$celltype == ct,]$comb
    l3 <- deg_hip[deg_hip$celltype == ct,]$comb

    lt = list(PFC = l1,
              EC = l2,
              HIP = l3)
    print(length(lt$PFC))
    m1 = make_comb_mat(lt)

    p <- UpSet(m1,set_order = c("PFC", "EC", "HIP"),comb_order = order(-comb_size(m1)),
        top_annotation = upset_top_annotation(m1, gp = gpar(col = comb_degree(m1))),
        row_title = ct)
    return(p)
}

In [None]:
## plot the upset plot of each cell type.
ct_list <- c("Astrocyte","Excitatory","Inhibitory","Microglia","Oligodendrocyte","OPC")

p1 <- upset_region(ct_list[1])
p2 <- upset_region(ct_list[2])
p3 <- upset_region(ct_list[3])
p4 <- upset_region(ct_list[4])
p5 <- upset_region(ct_list[5])
p6 <- upset_region(ct_list[6])

## plotting
pdf(file = "./Figures/DEG/Upset_DEGs_Ast.pdf",width = 4, height = 3)
p1
dev.off()

pdf(file = "./Figures/DEG/Upset_DEGs_Exc.pdf",width = 4, height = 3)
p2
dev.off()

pdf(file = "./Figures/DEG/Upset_DEGs_Inh.pdf",width = 4, height = 3)
p3
dev.off()

pdf(file = "./Figures/DEG/Upset_DEGs_Mic.pdf",width = 4, height = 3)
p4
dev.off()

pdf(file = "./Figures/DEG/Upset_DEGs_Oli.pdf",width = 4, height = 3)
p5
dev.off()

pdf(file = "./Figures/DEG/Upset_DEGs_Opc.pdf",width = 4, height = 3)
p6
dev.off()