From 84d9639e6ebe14a3c3f656a170af50e9b3f70d6d Mon Sep 17 00:00:00 2001 From: Charlotte Soneson Date: Tue, 1 Aug 2023 10:34:07 +0200 Subject: [PATCH] Bug fix in filtering plot when only one criterion is available. --- NAMESPACE | 1 + NEWS.md | 4 +++ R/doFilter.R | 45 +++++++++++++++++++++++++--------- tests/testthat/test-doFilter.R | 38 ++++++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 12 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 2a9ebc1..f1b48ee 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -240,6 +240,7 @@ importFrom(stringdist,amatch) importFrom(stringr,str_extract) importFrom(tibble,rownames_to_column) importFrom(tidyr,gather) +importFrom(tidyr,pivot_longer) importFrom(tidyr,pivot_wider) importFrom(tidyr,separate_rows) importFrom(tidyr,unnest) diff --git a/NEWS.md b/NEWS.md index d1d9049..2809515 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# einprot 0.7.6 + +* Bug fix in filtering plot when only one criterion is available + # einprot 0.7.5 * Move QC plot function diff --git a/R/doFilter.R b/R/doFilter.R index ddfb4e1..9922b2a 100644 --- a/R/doFilter.R +++ b/R/doFilter.R @@ -1,3 +1,33 @@ +#' @keywords internal +#' @noRd +#' +#' @importFrom ggplot2 ggplot aes geom_col labs +#' @importFrom cowplot theme_cowplot +#' @importFrom tidyr pivot_longer +#' @importFrom dplyr summarize everything +#' @importFrom ComplexUpset upset +#' +.makeFilterPlot <- function(filtdf, plotUpset) { + if (plotUpset && any(rowSums(filtdf) > 0)) { + if (ncol(filtdf) > 1) { + print(ComplexUpset::upset(filtdf[rowSums(filtdf) > 0, , drop = FALSE], + intersect = colnames(filtdf))) + } else { + print(ggplot2::ggplot( + data = filtdf %>% + dplyr::summarize(across(dplyr::everything(), + function(x) length(which(x > 0)))) %>% + tidyr::pivot_longer(cols = dplyr::everything(), + names_to = "criterion", values_to = "number"), + ggplot2::aes(x = criterion, y = number)) + + ggplot2::geom_col() + + cowplot::theme_cowplot() + + ggplot2::labs(x = "", y = "Number of excluded features") + ) + } + } +} + #' Filter out features in MaxQuant data #' #' Exclude features with 'Score' below \code{minScore}, 'Peptides' below @@ -108,10 +138,7 @@ filterMaxQuant <- function(sce, minScore, minPeptides, plotUpset = TRUE, "different sizes") #nocov end } - if (plotUpset && any(rowSums(filtdf) > 0)) { - print(ComplexUpset::upset(filtdf[rowSums(filtdf) > 0, , drop = FALSE], - intersect = colnames(filtdf))) - } + .makeFilterPlot(filtdf = filtdf, plotUpset = plotUpset) if (!is.null(exclFile)) { write.table(exclude, file = exclFile, quote = FALSE, sep = "\t", @@ -385,10 +412,7 @@ filterPDTMT <- function(sce, inputLevel, minScore = 0, minPeptides = 0, stop("Something went wrong in the filtering - filtdf and sce are of ", "different sizes") } - if (plotUpset && any(rowSums(filtdf) > 0)) { - print(ComplexUpset::upset(filtdf[rowSums(filtdf) > 0, , drop = FALSE], - intersect = colnames(filtdf))) - } + .makeFilterPlot(filtdf = filtdf, plotUpset = plotUpset) if (!is.null(exclFile)) { write.table(exclude, file = exclFile, quote = FALSE, sep = "\t", @@ -494,10 +518,7 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE, "different sizes") #nocov end } - if (plotUpset && any(rowSums(filtdf) > 0)) { - print(ComplexUpset::upset(filtdf[rowSums(filtdf) > 0, , drop = FALSE], - intersect = colnames(filtdf))) - } + .makeFilterPlot(filtdf = filtdf, plotUpset = plotUpset) if (!is.null(exclFile)) { write.table(exclude, file = exclFile, quote = FALSE, sep = "\t", diff --git a/tests/testthat/test-doFilter.R b/tests/testthat/test-doFilter.R index 7dcb62f..dddb6f0 100644 --- a/tests/testthat/test-doFilter.R +++ b/tests/testthat/test-doFilter.R @@ -128,6 +128,20 @@ test_that("filtering works (MaxQuant)", { ))) expect_equal(nrow(out), 112L) + ## Only one column present + tmp <- sce_mq_final + rowData(tmp)$Score <- NULL + rowData(tmp)$Only.identified.by.site <- NULL + rowData(tmp)$Peptides <- NULL + rowData(tmp)$Reverse <- NULL + out <- filterMaxQuant(tmp, minScore = 7, minPeptides = 1, + plotUpset = TRUE, exclFile = NULL) + expect_equal(nrow(out), length(which( + (rowData(sce_mq_final)$Potential.contaminant == "" | + is.na(rowData(sce_mq_final)$Potential.contaminant)) + ))) + expect_equal(nrow(out), 112L) + ## Missing columns - Potential.contaminant tmp <- sce_mq_final rowData(tmp)$Potential.contaminant <- NULL @@ -318,6 +332,30 @@ test_that("filtering works (PD/TMT - proteins)", { ))) expect_equal(nrow(out), 30L) ## same test as above, just with precomputed answer + ## Only one column present + tmp <- sce_pd_final + rowData(tmp)$Number.of.Peptides <- NULL + rowData(tmp)$Contaminant <- NULL + out <- filterPDTMT(tmp, inputLevel = "Proteins", minScore = 10, + minPeptides = 3, minDeltaScore = 0, minPSMs = 1, + masterProteinsOnly = FALSE, plotUpset = TRUE, + exclFile = NULL) + expect_equal(nrow(out), length(which( + rowData(sce_pd_final)$Score.Sequest.HT.Sequest.HT >= 10 + ))) + expect_equal(nrow(out), 30L) ## same test as above, just with precomputed answer + + ## Only one column present, but no features excluded (should not plot) + tmp <- sce_pd_final + rowData(tmp)$Number.of.Peptides <- NULL + rowData(tmp)$Score.Sequest.HT.Sequest.HT <- NULL + out <- filterPDTMT(tmp, inputLevel = "Proteins", minScore = 10, + minPeptides = 3, minDeltaScore = 0, minPSMs = 1, + masterProteinsOnly = FALSE, plotUpset = TRUE, + exclFile = NULL) + expect_equal(nrow(out), nrow(tmp)) + expect_equal(nrow(out), 70L) ## same test as above, just with precomputed answer + ## Missing columns - Master tmp <- sce_pd_final rowData(tmp)$Master <- NULL