In [None]:
### Extract the cell barcodes of interest, and generate 40 (customized n) barcodes in a text file to accelerate the speed of step 2
# packages
library(utils)
library(stats)
library(Seurat)
library(Signac)
library(tidyr)
library(dplyr)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
# library(parallel)
library(future.apply)

## parameters for multicores
options(future.globals.maxSize = 13 * 1024^3)
plan(multisession, workers = 20) 

rm(list =ls())
gc()

In [None]:
## load general files
clean <- readRDS("03.clean_object_10.31.rds")
ls()

In [None]:
clean
DimPlot(clean,reduction = "harmony.atac.umap" )
meta <- clean@meta.data
colnames(meta)

In [None]:
## load in metadata for each dataset
sample.meta <- read.csv("./Merge_multi_b2345.csv")
sample.meta

In [None]:
## working on three lists of samples based on the brain regions
PFC_samp <- c("UT04","UT09","UT2105","NIH01","NIH04","NIH10","NIH13","NIH16","NIH28")
EC_samp <- c("NIH02","NIH05","NIH11","NIH14","NIH17","NIH29")
HIP_samp <- c("NIH03","NIH06","NIH12","NIH15","NIH18","NIH30")

In [None]:
get_ChromHMM_ref <- function(tissue.code){
    tissue.code = tissue.code
    # prepare 18-state granges
    ## specify the epigenome
    state18 <- read.table(paste("./Data/peak.ref/",tissue.code,"_18_core_K27ac_hg38lift_mnemonics.bed",sep = ""), stringsAsFactors = F)
    state18 <- state18[state18$V1 != "chrM", ]
    #message(dim(state18))
    print(head(state18))

    # convert 18 state model (in peaks) into list of granges
    state <- unique(sort(state18[, 4]))
    state18.gr.list <- list()
    for (i in seq_along(state)) {
    tmp <- state18[state18[, 4] == state[i], ]
    state18.gr.list[[i]] <- GRanges(tmp[, 1], IRanges(tmp[, 2], tmp[, 3]))
    }
    names(state18.gr.list) <- state

    #returning the gr.list
    return(state18.gr.list)

}
get_fragments <- function(object, sample_id, sample.meta){
    ### read in fragments for each sample
    message(paste("Loading fragments file for ",sample_id,".",sep = ""))
    frag_dir <- sample.meta[sample.meta$sample_id == sample_id,]$fragments
    fragments <- read.table(frag_dir)
    
    # show how many fragments here
    message(dim(fragments))
    colnames(fragments) <- c("chr", "start", "end", "barcode", "duplicates")
    message(head(fragments))
    
    # returning the fragments file
    return(fragments)
}
get_barcode <- function(object,sample_id){
    # output barcode
    barcode <- data.frame(rownames(object@meta.data), stringsAsFactors = F)
    colnames(barcode) <- NULL
    # show how many cells in total
    message(dim(barcode))  

    # get cell barcode for the sample
    id <- grep(sample_id,barcode[,1]) 
    samp <- barcode[id,]
    # keep only the individual of interested. 
    keep <- gsub(paste(sample_id,"_",sep = ""),"",samp[,1])
    # show how many fragments left for analysis belong to cells passed QC
    message(length(keep))
    message(Sys.time())

    # returning   
    return(keep)
}

ChromHMM_enrich <- function(barcode){
    #message(Sys.time())
    #message(paste("Running enrichment ChromHMM analysis."))
    res <- matrix(0, nrow = 18, ncol =1)
    df <- fragments[fragments$barcode %in% barcode,]
    head(df)
    dim(df)
    # prepare cut site granges
    fragments.long <- pivot_longer(df, cols = c("start", "end"), names_to = "cutsite", values_to = "position")
    cutsite.gr <- GRanges(fragments.long$chr, IRanges(fragments.long$position, fragments.long$position))

    # count cutsite for each state
    res[,1] <- sapply(state18.gr.list, function(x) GenomicRanges::intersect(cutsite.gr, x) %>% length())
    message(Sys.time())
    return(res)
}  


In [None]:
## running loop for PFC samples first
tissue.code = "E073"
state18.gr.list <- get_ChromHMM_ref(tissue.code = tissue.code)
for (i in PFC_samp){
    t1 <- Sys.time()
    message(t1)
    message(paste("Working on ",i,".",sep = ""))

    # sample id
    sample_id = i

    #prepare
    fragments <- get_fragments(object = clean, sample_id = sample_id,sample.meta = sample.meta)
    barcode <- get_barcode(object = clean, sample_id = sample_id)

    dim(barcode)
    final_barcode <- paste(paste(sample_id,"_",sep = ""),barcode,sep = "")
    #head(final_barcode)

    # real analysis on paralell
    message("Running analysis...")
    # res <- mclapply(barcode,ChromHMM_enrich,mc.cores = 12)
    res <- future_lapply(barcode, suppressWarnings(ChromHMM_enrich))
    final <- do.call(cbind, res)

    # adding col and row names
    message("Cleaning the data...")
    print(dim(final))
    rownames(final) <- names(state18.gr.list)
    colnames(final) <- final_barcode
    head(final)
    dim(final)

    # saving
    out_dir <- paste("./Results/ChromHMM/",i,".txt",sep = "")
    write.table(final, out_dir, quote = F, sep = "\t")

    t2 <- Sys.time()
    print(paste("Time used for ",i,": ", round(t2-t1,digits = 2)," mins.",sep = ""))
}


In [None]:
## running loop for EC samples first
tissue.code = "E072"
for (i in EC_samp){
t1 <- Sys.time()
message(t1)
message(paste("Working on ",i,".",sep = ""))

# sample id
sample_id = i

#prepare
state18.gr.list <- get_ChromHMM_ref(tissue.code = tissue.code)
fragments <- get_fragments(object = clean, sample_id = sample_id,sample.meta = sample.meta)
barcode <- get_barcode(object = clean, sample_id = sample_id)

dim(barcode)
final_barcode <- paste(paste(sample_id,"_",sep = ""),barcode,sep = "")
#head(final_barcode)

# real analysis on paralell
message("Running analysis...")
res <- future_lapply(barcode, suppressWarnings(ChromHMM_enrich))
final <- do.call(cbind, res)

# adding col and row names
message("Cleaning the data...")
rownames(final) <- names(state18.gr.list)
colnames(final) <- final_barcode
head(final)
dim(final)

# saving
out_dir <- paste("./Analysis/Results/ChromHMM/",i,".txt",sep = "")
write.table(final, out_dir, quote = F, sep = "\t")

t2 <- Sys.time()
print(paste("Time used for ",i,": ", round(t2-t1,digits = 2)," mins.",sep = ""))
}

In [None]:
## running loop for HIP samples last
tissue.code = "E071"
for (i in HIP_samp){
t1 <- Sys.time()
message(t1)
message(paste("Working on ",i,".",sep = ""))

# sample id
sample_id = i

#prepare
state18.gr.list <- get_ChromHMM_ref(tissue.code = tissue.code)
fragments <- get_fragments(object = clean, sample_id = sample_id,sample.meta = sample.meta)
barcode <- get_barcode(object = clean, sample_id = sample_id)

dim(barcode)
final_barcode <- paste(paste(sample_id,"_",sep = ""),barcode,sep = "")
#head(final_barcode)

# real analysis on paralell
message("Running analysis...")
res <- future_lapply(barcode, suppressWarnings(ChromHMM_enrich))
final <- do.call(cbind, res)

# adding col and row names
message("Cleaning the data...")
rownames(final) <- names(state18.gr.list)
colnames(final) <- final_barcode
head(final)
dim(final)

# saving
out_dir <- paste("./Results/ChromHMM/",i,".txt",sep = "")
write.table(final, out_dir, quote = F, sep = "\t")

t2 <- Sys.time()
print(paste("Time used for ",i,": ", round(t2-t1,digits = 2)," mins.",sep = ""))
}

In [None]:
## read and combine all results
filenames <- list.files("./Analysis/Results/ChromHMM/", pattern="*.txt", full.names=TRUE)
ldf <- lapply(filenames, read.delim)
final <- do.call(cbind, ldf)
dim(final)


In [None]:
## change name format
library(stringr)
check <- str_split_fixed(colnames(final),"_",2)[,1]
table(check)

colnames(final) <- gsub("\\.","-",colnames(final))

# check if overlap
meta <- clean@meta.data
table(rownames(meta) %in% colnames(final))

In [None]:
# transpose and check
final <- t(final)
head(final)

In [None]:
## calculating proportion of each cell per column
final2 <- final/rowSums(final)

# got the porportion of each category for each cell
head(final2)

In [None]:
## performing the clr normalization
library(compositions)
final3 <- as.data.frame(clr(final2)) # perform clr normalization on each column(across cell)
dim(final3)
head(final3)

In [None]:
# calculating deviation of each value to ChromHMM state mean
colMeans(final3)
final4 <- final3 - colMeans(final3)
head(final4)

In [None]:
## calculating the errosion score defined by Xiong et al. 2023
#colnames(final4)

active <- c("14_TssBiv","11_EnhWk","15_EnhBiv","4_TssFlnkD","7_EnhG1","10_EnhA2","2_TssFlnk","3_TssFlnkU","1_TssA","9_EnhA1","8_EnhG2")
repressive <- c("13_Het","18_Quies","12_ZNF/Rpts","17_ReprPCWk","16_ReprPC")

final4$errosion_score <- rowSums(final4[,repressive]) - rowSums(final4[,active])

hist(final4$errosion_score)
summary(final4$errosion_score)

In [None]:
## assign the errosion score back to object and ploting
id <- match(rownames(meta),rownames(final4))
meta$erosion_score <- final4[id,]$errosion_score

## cap the TSS enrichment
meta$TSS_cap <- ifelse(meta$TSS.enrichment >10, 10,meta$TSS.enrichment)
hist(meta$TSS_cap)
clean@meta.data <- meta

In [None]:
colnames(clean@meta.data)

In [None]:
library(RColorBrewer)
library(ggplot2)
library(tidydr)
umap_theme <- theme_dr()+theme(panel.grid.major = element_blank(), 
                                            panel.grid.minor = element_blank(),
                                            panel.background = element_blank(), 
                                            axis.line = element_line(colour = "black",linewidth = 3))

#######################################################
## region umap ########################################
#######################################################
col1=c('#F06719','#33A65C','#23767C','#E03426','#1BA3C6',"#A26DC2","#FCB905","#EB73B3")
names(col1)=c('Astrocyte','Excitatory','Inhibitory','Microglia','Oligodendrocyte',"OPC","Endothelial","VLMC/Per")
cols=c(unname(col1),'grey90')
names(cols)=c(names(col1),'other')

p1 <- DimPlot(clean,reduction = 'harmony.atac.umap',label = T,group.by = 'cluster_celltype',cols=cols,pt.size = 0.2)+umap_theme+ggtitle("ALL")
# p2 <- DimPlot(clean, group.by = "diagnosis",reduction = "harmony.atac.umap",pt.size = 0.2)+ scale_colour_gradientn(colours = rev(brewer.pal(n = 20, name = "RdBu")))+umap_theme+ggtitle("TSS enrichment")
p3 <- FeaturePlot(clean, features = "erosion_score",reduction = "harmony.atac.umap",pt.size = 0.2) + scale_colour_gradientn(colours = rev(brewer.pal(n = 20, name = "RdBu")))+umap_theme+ggtitle("Erosion score")



In [None]:
# plotting
pdf("./Figures/UMAP/TSS_enrichment.pdf",width = 11,height = 5)
patchwork::wrap_plots(p1,p3,ncol = 2)
dev.off()

In [None]:
meta <- clean@meta.data
colnames(meta)

In [None]:
################################################################################
############################# modify the metadata ##############################
################################################################################
# pull out metadata
biospecimen_meta <- read.csv("./EOAD_biospecimen_metadata.csv")
meta <- clean@meta.data

# add phenotype information
id <- match(clean$individual_ID,biospecimen_meta$Simple_ID)

meta$sex <- as.factor(biospecimen_meta[id,]$Sex)
meta$age <- biospecimen_meta[id,]$Age
meta$race <- as.factor(biospecimen_meta[id,]$Race)
meta$ethinicity <- as.factor(biospecimen_meta[id,]$Ethinicity)
meta$PMI <- biospecimen_meta[id,]$PMI.Hours.
meta$RIN <- biospecimen_meta[id,]$New_RIN
meta$batch <- as.factor(meta$batch)

meta$Abeta_plaque_score <-  biospecimen_meta[id,]$Abeta_plaque_score
meta$NFT_stage <-  biospecimen_meta[id,]$NFT_stage
meta$Neuritic_plaque_score_CERAD <-  biospecimen_meta[id,]$Neuritic_plaque_score_CERAD
meta$Braak <-  biospecimen_meta[id,]$Braak
#meta$batch <- biospecimen_meta[id,]$Batch

#meta
colnames(meta)
head(meta)
## assign back
clean@meta.data <- meta

In [None]:
saveRDS(clean, file = "./03.clean_object_10.31.rds")

In [None]:
### focus on PFC first
PFC_object <- subset(clean, subset = regions == "PFC")
DefaultAssay(PFC_object) <- "PC"
################################################################################
############################## log normalization ###############################
################################################################################
PFC_object <- NormalizeData(PFC_object, normalization.method = "LogNormalize", scale.factor = 10000)
all.genes <- rownames(PFC_object)
PFC_object <- ScaleData(PFC_object, features = all.genes)

## find variable features 
PFC_object <- FindVariableFeatures(PFC_object, selection.method = "vst", nfeatures = 2000)

PFC_object

saveRDS(PFC_object, file = "cellbender_PFC_object_10.31.rds")

In [None]:
### focus on PFC first
HIP_object <- subset(clean, subset = regions == "HIP")
DefaultAssay(HIP_object) <- "PC"
################################################################################
############################## log normalization ###############################
################################################################################
HIP_object <- NormalizeData(HIP_object, normalization.method = "LogNormalize", scale.factor = 10000)
all.genes <- rownames(HIP_object)
HIP_object <- ScaleData(HIP_object, features = all.genes)

## find variable features 
HIP_object <- FindVariableFeatures(HIP_object, selection.method = "vst", nfeatures = 2000)

HIP_object

saveRDS(HIP_object, file = "cellbender_HIP_object_1.9.rds")

In [None]:
### focus on PFC first
EC_object <- subset(clean, subset = regions == "EC")
DefaultAssay(EC_object) <- "PC"
################################################################################
############################## log normalization ###############################
################################################################################
EC_object <- NormalizeData(EC_object, normalization.method = "LogNormalize", scale.factor = 10000)
all.genes <- rownames(EC_object)
EC_object <- ScaleData(EC_object, features = all.genes)

## find variable features 
EC_object <- FindVariableFeatures(EC_object, selection.method = "vst", nfeatures = 2000)

EC_object

saveRDS(EC_object, file = "cellbender_EC_object_1.9.rds")

In [None]:
head(final)
dim(final)

write.csv(final, file = "./Results/ChromHMM/Merged_result.csv")