Skip to content

Commit

Permalink
Include pathways from MSigDB among supported feature collections
Browse files Browse the repository at this point in the history
  • Loading branch information
csoneson committed Apr 23, 2024
1 parent e8dd451 commit 67f2679
Show file tree
Hide file tree
Showing 9 changed files with 130 additions and 75 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

* Add details about DIA-NN command line to report
* Add column with links to ComplexPortal query to link table
* Include pathways from MSigDB among supported feature collections

# einprot 0.9.3

Expand Down
2 changes: 1 addition & 1 deletion R/checkArgumentsDIANN.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@

## Complexes
.assertVector(x = includeFeatureCollections, type = "character",
validValues = c("complexes", "GO"), allowNULL = TRUE)
validValues = c("complexes", "GO", "pathways"), allowNULL = TRUE)
.assertVector(x = customComplexes, type = "list")
if (length(customComplexes) > 0) {
.assertVector(x = names(customComplexes), type = "character")
Expand Down
2 changes: 1 addition & 1 deletion R/checkArgumentsFragPipe.R
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@

## Complexes
.assertVector(x = includeFeatureCollections, type = "character",
validValues = c("complexes", "GO"), allowNULL = TRUE)
validValues = c("complexes", "GO", "pathways"), allowNULL = TRUE)
.assertVector(x = customComplexes, type = "list")
if (length(customComplexes) > 0) {
.assertVector(x = names(customComplexes), type = "character")
Expand Down
2 changes: 1 addition & 1 deletion R/checkArgumentsMaxQuant.R
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@

## Complexes
.assertVector(x = includeFeatureCollections, type = "character",
validValues = c("complexes", "GO"), allowNULL = TRUE)
validValues = c("complexes", "GO", "pathways"), allowNULL = TRUE)
.assertVector(x = customComplexes, type = "list")
if (length(customComplexes) > 0) {
.assertVector(x = names(customComplexes), type = "character")
Expand Down
2 changes: 1 addition & 1 deletion R/checkArgumentsPDTMT.R
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@

## Complexes
.assertVector(x = includeFeatureCollections, type = "character",
validValues = c("complexes", "GO"), allowNULL = TRUE)
validValues = c("complexes", "GO", "pathways"), allowNULL = TRUE)
.assertVector(x = customComplexes, type = "list")
if (length(customComplexes) > 0) {
.assertVector(x = names(customComplexes), type = "character")
Expand Down
2 changes: 1 addition & 1 deletion R/checkArgumentsSpectronaut.R
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@

## Complexes
.assertVector(x = includeFeatureCollections, type = "character",
validValues = c("complexes", "GO"), allowNULL = TRUE)
validValues = c("complexes", "GO", "pathways"), allowNULL = TRUE)
.assertVector(x = customComplexes, type = "list")
if (length(customComplexes) > 0) {
.assertVector(x = names(customComplexes), type = "character")
Expand Down
113 changes: 58 additions & 55 deletions R/plotVolcano.R
Original file line number Diff line number Diff line change
Expand Up @@ -705,69 +705,72 @@ plotVolcano <- function(sce, res, testType, xv = NULL, yv = NULL, xvma = NULL,
}

## -------------------------------------------------------------------------
## Create a volcano plot for each significantly enriched complex
## Create a volcano plot for each significantly enriched complex/GO/pathway
## -------------------------------------------------------------------------
if ("complexes" %in% names(featureCollections)) {
## Find significant complexes
idx <- which(
mcols(featureCollections$complexes)[, paste0(comparisonString,
"_FDR")] <
complexFDRThr &
mcols(featureCollections$complexes)[, paste0(comparisonString,
"_NGenes")] > 1
)
tmpcomplx <- mcols(featureCollections$complexes)[idx, , drop = FALSE]
tmpcomplx <- tmpcomplx[order(tmpcomplx[paste0(comparisonString,
"_PValue")]), ,
drop = FALSE]
cplxs <- rownames(tmpcomplx)
cplxs <- cplxs[seq_len(min(length(cplxs), maxNbrComplexesToPlot))]
for (ftype in c("complexes", "GO", "pathways")) {
if (ftype %in% names(featureCollections)) {
## Find significant complexes
idx <- which(
mcols(featureCollections[[ftype]])[, paste0(comparisonString,
"_FDR")] <
complexFDRThr &
mcols(featureCollections[[ftype]])[, paste0(comparisonString,
"_NGenes")] > 1
)
tmpcomplx <- mcols(featureCollections[[ftype]])[idx, , drop = FALSE]
tmpcomplx <- tmpcomplx[order(tmpcomplx[paste0(comparisonString,
"_PValue")]), ,
drop = FALSE]
cplxs <- rownames(tmpcomplx)
cplxs <- cplxs[seq_len(min(length(cplxs), maxNbrComplexesToPlot))]

if (length(cplxs) > 0 && !is.null(baseFileName)) {
grDevices::pdf(paste0(baseFileName, "_complexes.pdf"),
width = 10.5, height = 7.5)
for (cplx in cplxs) {
prs <- featureCollections$complexes[[cplx]]
cplxpval <- signif(mcols(
featureCollections$complexes)[cplx, paste0(comparisonString,
"_PValue")],
digits = 3)
cplxfdr <- signif(mcols(
featureCollections$complexes)[cplx, paste0(comparisonString,
"_FDR")],
digits = 3)
if (length(intersect(prs, res$pid)) > 1) {
gg <- ggbase +
ggplot2::geom_point(
fill = "lightgrey", color = "grey",
pch = 21, size = 1.5) +
ggplot2::geom_point(
data = res %>%
dplyr::filter(.data$pid %in% prs),
fill = "red", color = "grey", pch = 21,
size = 1.5) +
ggrepel::geom_text_repel(
data = res %>%
dplyr::filter(.data$pid %in% prs),
aes(label = .data$pid), max.overlaps = Inf,
size = 4,
min.segment.length = 0, force = 1) +
ggplot2::labs(caption = paste0(cplx, ", PValue = ",
cplxpval,
", FDR = ", cplxfdr))
print(gg)
if (length(cplxs) > 0 && !is.null(baseFileName)) {
grDevices::pdf(paste0(baseFileName, "_", ftype, ".pdf"),
width = 10.5, height = 7.5)
for (cplx in cplxs) {
prs <- featureCollections[[ftype]][[cplx]]
cplxpval <- signif(mcols(
featureCollections[[ftype]])[cplx, paste0(comparisonString,
"_PValue")],
digits = 3)
cplxfdr <- signif(mcols(
featureCollections[[ftype]])[cplx, paste0(comparisonString,
"_FDR")],
digits = 3)
if (length(intersect(prs, res$pid)) > 1) {
gg <- ggbase +
ggplot2::geom_point(
fill = "lightgrey", color = "grey",
pch = 21, size = 1.5) +
ggplot2::geom_point(
data = res %>%
dplyr::filter(.data$pid %in% prs),
fill = "red", color = "grey", pch = 21,
size = 1.5) +
ggrepel::geom_text_repel(
data = res %>%
dplyr::filter(.data$pid %in% prs),
aes(label = .data$pid), max.overlaps = Inf,
size = 4,
min.segment.length = 0, force = 1) +
ggplot2::labs(caption = paste0(cplx, ", PValue = ",
cplxpval,
", FDR = ", cplxfdr))
print(gg)

## Bar plot
for (acp in setdiff(abundanceColPat, "")) {
print(.complexBarPlot(
res = res, prs = prs, sce = sce, cplx = cplx,
colpat = acp, groupmap = groupmap))
## Bar plot
for (acp in setdiff(abundanceColPat, "")) {
print(.complexBarPlot(
res = res, prs = prs, sce = sce, cplx = cplx,
colpat = acp, groupmap = groupmap))
}
}
}
grDevices::dev.off()
}
grDevices::dev.off()
}
}

return(list(gg = ggtest, ggint = ggint,
ggma = ggma, ggwf = ggwf, ggbar = ggbar,
pidLabelVolcano = pidLabelVolcano))
Expand Down
63 changes: 55 additions & 8 deletions R/prepareFeatureCollections.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,26 @@
#' Prepare feature collections for testing with camera
#'
#' Prepare feature collections for testing with \code{limma::camera}. The
#' function maps the feature IDs in the collections (complexes or GO terms) to
#' the values in the specified \code{idCol} column of \code{rowData(sce)},
#' and subsequently replaces them with the corresponding row names of the
#' \code{SummarizedExperiment} object. Feature sets with too few features
#' (after the matching) are removed.
#' function maps the feature IDs in the collections (complexes, GO terms
#' or pathways) to the values in the specified \code{idCol} column of
#' \code{rowData(sce)}, and subsequently replaces them with the corresponding
#' row names of the \code{SummarizedExperiment} object. Feature sets with
#' too few features (after the matching) are removed.
#' Complexes are obtained from the database provided via `complexDbPath`.
#' GO terms and pathways (BIOCARTA, KEGG, PID, REACTOME and WIKIPATHWAYS) are
#' retrieved from `MSigDB` via the `msigdbr` package.
#'
#' @param sce A \code{SummarizedExperiment} object (or a derivative).
#' @param idCol Character scalar, indicating which column in
#' \code{rowData(sce)} that contains IDs matching those in the
#' feature collections (gene symbols).
#' @param includeFeatureCollections Character vector indicating the types
#' of feature collections to prepare. Should be a subset of
#' \code{c("complexes", "GO")} or \code{NULL}.
#' \code{c("complexes", "GO", "pathways")} or \code{NULL}.
#' @param complexDbPath Character scalar providing the path to the database
#' of complexes, generated using \code{makeComplexDB()} and serialized
#' to a .rds file.
#' to a .rds file. If `NULL`, the complex database provided with
#' einprot will be used.
#' @param speciesInfo List with at least two entries (\code{species} and
#' \code{speciesCommon}), providing the species information. Typically
#' generated using \code{getSpeciesInfo()}.
Expand Down Expand Up @@ -95,7 +99,8 @@ prepareFeatureCollections <- function(sce, idCol, includeFeatureCollections,
.assertScalar(x = idCol, type = "character",
validValues = colnames(SummarizedExperiment::rowData(sce)))
.assertVector(x = includeFeatureCollections, type = "character",
validValues = c("complexes", "GO"), allowNULL = TRUE)
validValues = c("complexes", "GO", "pathways"),
allowNULL = TRUE)
.assertScalar(x = complexDbPath, type = "character", allowNULL = TRUE)
if (is.null(complexDbPath) && "complexes" %in% includeFeatureCollections) {
complexDbPath <- system.file(EINPROT_COMPLEXES_FILE,
Expand Down Expand Up @@ -208,5 +213,47 @@ prepareFeatureCollections <- function(sce, idCol, includeFeatureCollections,
featureCollections$GO <- goannots
}

## -------------------------------------------------------------------------
## Pathways
## -------------------------------------------------------------------------
if ("pathways" %in% includeFeatureCollections &&
speciesInfo$species %in% getSupportedSpecies()$species) {
pws <- msigdbr::msigdbr(species = speciesInfo$species,
category = "C2", subcategory = "CP:BIOCARTA") %>%
dplyr::select("gs_name", "gene_symbol") %>%
dplyr::bind_rows(
msigdbr::msigdbr(species = speciesInfo$species,
category = "C2", subcategory = "CP:KEGG") %>%
dplyr::select("gs_name", "gene_symbol")
) %>%
dplyr::bind_rows(
msigdbr::msigdbr(species = speciesInfo$species,
category = "C2", subcategory = "CP:PID") %>%
dplyr::select("gs_name", "gene_symbol")
) %>%
dplyr::bind_rows(
msigdbr::msigdbr(species = speciesInfo$species,
category = "C2", subcategory = "CP:REACTOME") %>%
dplyr::select("gs_name", "gene_symbol")
) %>%
dplyr::bind_rows(
msigdbr::msigdbr(species = speciesInfo$species,
category = "C2", subcategory = "CP:WIKIPATHWAYS") %>%
dplyr::select("gs_name", "gene_symbol")
)
pws <- methods::as(lapply(split(pws, f = pws$gs_name),
function(w) unique(w$gene_symbol)),
"CharacterList")
S4Vectors::mcols(pws)$genes <- vapply(pws, function(w)
gsub(pat, "\\1; ", paste(w, collapse = ";")), ""
)
S4Vectors::mcols(pws)$nGenes <- lengths(pws)
pws <- .replaceIdsInList(chl = pws, dfConv = dfGene,
currentIdCol = "genes",
newIdCol = "rowName", pat = pat)
pws <- pws[lengths(pws) >= minSizeToKeep]
featureCollections$pathways <- pws
}

featureCollections
}
18 changes: 11 additions & 7 deletions man/prepareFeatureCollections.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 67f2679

Please sign in to comment.