In [4]:
# If running in Rstudio, set the working directory to current path
if (Sys.getenv("RSTUDIO") == "1"){
  setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
}

myPath <- .libPaths()
myPath <- c(myPath,'/packages')
.libPaths(myPath)

source("../../code/utils.R")
source("../../code/getCounts.R")
source("../../code/getBias.R")
source("../../code/getFootprints.R")
source("../../code/visualization.R")
source("../../code/getTFBS.R")
source("../../code/getTargetPathway.R")
library("ComplexHeatmap")

###################
# Load input data #
###################

# Load region ranges
regions <- readRDS("../../data/mHSCAging10xV3/regionRanges.rds")

# Load pathway gene sets from hypeR
library(hypeR)
c5GO <- msigdb_gsets(species = "Mus musculus","C5","BP",clean = TRUE)$genesets
names(c5GO) <- stringr::str_replace_all(names(c5GO), " ",  "_")

# Get CRE to gene mapping
TSS <- FigR::mm10TSSRanges
TSSCREOv <- findOverlaps(resize(TSS, 1000, fix = "center"), regions)
CREGeneMapping <- data.frame(
  Gene = as.character(TSS$gene_name)[TSSCREOv@from],
  CREInd = TSSCREOv@to,
  CRE = as.character(regions)[TSSCREOv@to],
  Region = as.character(regions[TSSCREOv@to])
)

# Load scATAC data
scATAC <- readRDS("../../data/mHSCAging10xV3/scATACSeurat.rds")

In [5]:
# Generate a position-by-sample matrix of accessibility
countSE <- SummarizedExperiment(
  assays = list(TFBS = as.matrix(rowSums(scATAC@assays$ATAC$counts))),
  rowRanges = GRanges(rownames(scATAC))
)

In [6]:
##########################
# Get TF motif positions #
##########################

# Load PWM data
cisBPMotifs <- readRDS("../../data/shared/cisBP_mouse_pwms_2021.rds")

# Find motif matches for all TFs
motifPath <- "../../data/mHSCAging10xV3/TFMotifRanges.rds"
if(!file.exists(motifPath)){
  TFMotifRanges <- pbmcapply::pbmclapply(
    names(cisBPMotifs),
    function(TF){
      motifmatchr::matchMotifs(cisBPMotifs[TF], 
                               regions, 
                               genome = "mm10",
                               out = "positions",
                               p.cutoff = 1e-4)[[1]]
    },
    mc.cores = 16
  )
  names(TFMotifRanges) <- names(cisBPMotifs)
  saveRDS(TFMotifRanges, motifPath)
}else{
  TFMotifRanges <- readRDS(motifPath)
}

In [14]:
############################################
# Find target pathwayss of a particular TF #
############################################
for(TF in c("E2f1", "Egr1", "Ets1", "Fli1", "Jun", "Klf6", "Mycn", "Nfe2", "Nfe2l2",
           "Nrf1", "Pbx3", "Smad1", "Spi1", "Tfec", "Xbp1")){
    print(TF)  
    
    enrichment <- mapTargets(
      TFMotifSites = TFMotifRanges[TF], 
      siteSE = countSE,
      regionGeneCorr = CREGeneMapping,
      geneSets = c5GO,
      threshold = 0,
      genome = "mm10")
    
    enrichment[[TF]][1:20, -7]
    
    # Visualize results
    plotData <- as.data.frame(enrichment[[TF]][10:1, ])
    plotData$pathway <- stringr::str_replace_all(plotData$pathway, "_", " ")
    plotData$pathway <- sapply(plotData$pathway, function(s){gsub('(.{1,30})(\\s|$)', '\\1\n', s)}) # Add newline to long strings
    plotData$pathway <- factor(plotData$pathway, levels = plotData$pathway) # This keeps the entries in the original order when plotting
    plotData$logP <- -log10(plotData$pval)
    system("mkdir ../../data/mHSCAging10xV3/plots/TFPathwayMapping")
    pdf(paste0("../../data/mHSCAging10xV3/plots/TFPathwayMapping/", TF, "_target_pathways_motif_only.pdf"),
        width = 8, height = 6)
    print(ggplot(plotData) +
      geom_bar(aes(x = pathway, y = logP), stat = "identity", width = 0.5, fill = "#CA9B80") +
      xlab("Enriched pathways")  + 
      ylab("Log10(p-value)") +
      coord_flip() +
      theme_classic() +
      ggtitle(TF) +
      theme(axis.text = element_text(size = 10)))  
    dev.off()
}

[1] "E2f1"
[1] "Egr1"
[1] "Ets1"
[1] "Fli1"
[1] "Jun"
[1] "Klf6"
[1] "Mycn"
[1] "Nfe2"
[1] "Nfe2l2"
[1] "Nrf1"
[1] "Pbx3"
[1] "Smad1"
[1] "Spi1"
[1] "Tfec"
[1] "Xbp1"


In [None]:
#######################################
# Get TF regulators of the gene lists #
#######################################

TFs <- names(cisBPMotifs)
enrichmentList <- mapTargets(
  TFMotifSites = TFMotifRanges, 
  siteSE = countSE,
  regionGeneCorr = CREGeneMapping,
  geneSets = c5GO,
  threshold = 0,
  genome = "mm10")
saveRDS(enrichmentList, "../../data/mHSCAging10xV3/regulatorEnrichment_motif_only.rds")
gc()

Loading required package: TFBSTools





In [None]:
###################################################
# Get TF regulators of HSC subpop signature genes #
###################################################

# Load HSC subpopulation gene signatures from previous literature
signatureDir <- "../../data/mHSCAging10xV3/markers/RNA/"
markerFiles <- list.files(signatureDir)
markerFiles <- markerFiles[stringr::str_detect(markerFiles, ".txt")]
markerIDs <- unname(sapply(markerFiles, function(s){stringr::str_split(s, "\\.")[[1]][1]}))
HSCSubpopSigs <- list()
for(i in 1:length(markerIDs)){
    sig <- markerIDs[i]
    filePath <- paste(signatureDir, markerFiles[i], sep = "/")
    HSCSubpopSigs[[sig]] <- read.table(filePath, sep = "\t")$V1
}

TFs <- names(cisBPMotifs)
enrichmentListSubpop <- mapTargets(
  TFMotifSites = TFMotifRanges, 
  siteSE = countSE,
  regionGeneCorr = CREGeneMapping,
  geneSets = HSCSubpopSigs,
  threshold = 0,
  genome = "mm10",
  nCores = 16
)

saveRDS(enrichmentListSubpop, "../../data/mHSCAging10xV3/regulatorEnrichment_subpop_signature_motif_only.rds")

In [None]:
enrichmentList <- readRDS("../../data/mHSCAging10xV3/regulatorEnrichment_motif_only.rds")
for(TF in names(enrichmentListGO)){
    enrichmentList[[TF]] <- rbind(enrichmentListSubpop[[TF]], enrichmentList[[TF]])
}

In [17]:

####################################
# Get pathway-to-TF mapping matrix #
####################################

# Generate pathway-by-TF regulation matrix
pathways <- names(c5GO)
enrichmentMat <- pbmcapply::pbmcmapply(
  function(TF){
    enrichment <- enrichmentList[[TF]]
    enrichVec <- rep(0, length(pathways))
    names(enrichVec) <- pathways
    enrichVec[enrichment$pathway] <- -log10(enrichment$fdrs)
    enrichVec
  },
  names(enrichmentList),
  mc.cores = 16
)
enrichmentMatFilt <- enrichmentMat[rowMaxs(enrichmentMat) > 5, colMaxs(enrichmentMat) > -log10(0.25)]

In [None]:
#############################
# Visualize mapping results #
#############################

# Find TFs up-regulated in aging
upTFs <- sort(Reduce(c, lapply(
    readLines("../../data//mHSCAging10xV3/seqTF_foot_stat_aging_TF_grouping_up.txt"),
    function(x){
        stringr::str_split(x, ",")[[1]]
    }
)))


# Find TFs down-regulated in aging
downTFs <- sort(Reduce(c, lapply(
    readLines("../../data//mHSCAging10xV3/seqTF_foot_stat_aging_TF_grouping_down.txt"),
    function(x){
        stringr::str_split(x, ",")[[1]]
    }
)))

# Filter based on age-associated differential RNA
diffRNA <- read.table("../../data/mHSCAging10xV3/diffRNA.tsv", header = T, sep = "\t")
diffRNATFs <- intersect(
  colnames(enrichmentMatFilt), 
  rownames(diffRNA)[diffRNA$padj < 1e-1]
)
upTFs <- intersect(upTFs, diffRNATFs[diffRNA[diffRNATFs, ]$log2FoldChange > 0])
downTFs <- intersect(downTFs, diffRNATFs[diffRNA[diffRNATFs, ]$log2FoldChange < 0])

pdf("../../data/mHSCAging10xV3/plots/TFPathwayMapping/seq2PRINT_aging_TF_to_pathway_full_motif_onlly.pdf",
    width = 150, height = 200)
plotMtx <- enrichmentMat[rowMaxs(enrichmentMat) > 2, c(upTFs, downTFs)]
plotMtx <- t(t(plotMtx) / colMaxs(plotMtx))
plotMtx <- plotMtx[, !is.na(colMaxs(plotMtx))]
plotMtx <- plotMtx[rowSums(plotMtx > 0.2) > 1, ]
diffSign <- c("Down", "Up")[(colnames(plotMtx) %in% upTFs) + 1]
colors <- circlize::colorRamp2(seq(0, quantile(plotMtx, 0.99),length.out=9),
                               colors = BuenColors::jdb_palette("solar_rojos"))
Heatmap(
  plotMtx,
  col = colors,
  column_split = diffSign,
  row_split = c("subpop")
  row_names_max_width = unit(20, "cm"),
  column_names_max_height = unit(20, "cm")
)
dev.off()

In [None]:
# Plot a subset of pathways regulated by aging-up-regulated TFs
selectedPathways <- c(
  "Macroautophagy", "Regulation_Of_Macroautophagy", "Regulation_Of_Autophagy", 
  "Endoplasmic_Reticulum_Unfolded_Protein_Response", "Cellular_Response_To_Topologically_Incorrect_Protein",
  "Response_To_Endoplasmic_Reticulum_Stress",
  "Proteasomal_Protein_Catabolic_Process", "Proteolysis", "Protein_Catabolic_Process",
  "Mitochondrion_Organization", "Mitochondrial_Translation", "Mitochondrial_Transport",
  "Respiratory_Electron_Transport_Chain", "Mitotic_Cell_Cycle",
  "Regulation_Of_Cell_Cycle", "Ribonucleoprotein_Complex_Biogenesis", "Ribosome_Biogenesis", "Rrna_Metabolic_Process",
  "Double_Strand_Break_Repair", "Recombinational_Repair", 
  "Response_To_Virus", "Response_To_Type_I_Interferon", "Innate_Immune_Response", "Response_To_Biotic_Stimulus",
  "Microtubule_Based_Process", "Microtubule_Cytoskeleton_Organization", "Cilium_Movement", "Cell_Projection_Assembly",
  "Regulation_Of_Lymphocyte_Mediated_Immunity", "Regulation_Of_Cd4_Positive_Alpha_Beta_T_Cell_Differentiation",
  "Alpha_Beta_T_Cell_Activation", "Antigen_Processing_And_Presentation_Of_Peptide_Antigen_Via_Mhc_Class_I",
  "Telomere_Organization", "Positive_Regulation_Of_Cytokine_Production",
  "Pei_et_al_differentiation_inactive", "Rodriguez_Fraticelli_et_al_lowOutput", "Rodriguez_Fraticelli_et_al_mkBiased",
  "Pei_et_al_multilineage", "Rodriguez_Fraticelli_et_al_highOutput", "Rodriguez_Fraticelli_et_al_multilineage"
)
selectedPathways <- c(selectedPathways, names(HSCSubpopSigs))
selectedPathways <- intersect(selectedPathways, rownames(enrichmentMat))

pdf("../../data/mHSCAging10xV3/plots/TFPathwayMapping/seq2PRINT_aging_TF_to_pathway_motif_only.pdf",
    width = 24, height = 12)
plotMtx <- enrichmentMat[selectedPathways, c(upTFs, downTFs)]
plotMtx <- plotMtx[, colMaxs(plotMtx) > -log10(0.2)]
plotMtx <- t(t(plotMtx) / colMaxs(plotMtx))
plotMtx <- plotMtx[, !is.na(colMaxs(plotMtx))]
rownames(plotMtx) <- sapply(rownames(plotMtx), function(x){stringr::str_replace_all(x, "_", " ")})
colors <- circlize::colorRamp2(seq(0, quantile(plotMtx, 0.99),length.out=9),
                         colors = BuenColors::jdb_palette("solar_rojos"))
diffSign <- c("Down", "Up")[(colnames(plotMtx) %in% upTFs) + 1]
pathwayType <- c("Biological pathway", "HSC subpop signature")[rownames(plotMtx) %in% names(HSCSubpopSigs) + 1]
Heatmap(
  plotMtx,
  col = colors,
  column_split = diffSign,
  row_split = pathwayType,
  name = "Rescaled\n enrichment",
  cluster_rows = F,
  rect_gp = gpar(col = "grey", lwd = 2),
  column_names_max_height = unit(10, "cm"),
  row_names_max_width = unit(10, "cm")
)
dev.off()