From 84d9639e6ebe14a3c3f656a170af50e9b3f70d6d Mon Sep 17 00:00:00 2001
From: Charlotte Soneson <charlottesoneson@gmail.com>
Date: Tue, 1 Aug 2023 10:34:07 +0200
Subject: [PATCH] Bug fix in filtering plot when only one criterion is
 available.

---
 NAMESPACE                      |  1 +
 NEWS.md                        |  4 +++
 R/doFilter.R                   | 45 +++++++++++++++++++++++++---------
 tests/testthat/test-doFilter.R | 38 ++++++++++++++++++++++++++++
 4 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 2a9ebc1..f1b48ee 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -240,6 +240,7 @@ importFrom(stringdist,amatch)
 importFrom(stringr,str_extract)
 importFrom(tibble,rownames_to_column)
 importFrom(tidyr,gather)
+importFrom(tidyr,pivot_longer)
 importFrom(tidyr,pivot_wider)
 importFrom(tidyr,separate_rows)
 importFrom(tidyr,unnest)
diff --git a/NEWS.md b/NEWS.md
index d1d9049..2809515 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+# einprot 0.7.6
+
+* Bug fix in filtering plot when only one criterion is available
+
 # einprot 0.7.5
 
 * Move QC plot function
diff --git a/R/doFilter.R b/R/doFilter.R
index ddfb4e1..9922b2a 100644
--- a/R/doFilter.R
+++ b/R/doFilter.R
@@ -1,3 +1,33 @@
+#' @keywords internal
+#' @noRd
+#'
+#' @importFrom ggplot2 ggplot aes geom_col labs
+#' @importFrom cowplot theme_cowplot
+#' @importFrom tidyr pivot_longer
+#' @importFrom dplyr summarize everything
+#' @importFrom ComplexUpset upset
+#'
+.makeFilterPlot <- function(filtdf, plotUpset) {
+    if (plotUpset && any(rowSums(filtdf) > 0)) {
+        if (ncol(filtdf) > 1) {
+            print(ComplexUpset::upset(filtdf[rowSums(filtdf) > 0, , drop = FALSE],
+                                      intersect = colnames(filtdf)))
+        } else {
+            print(ggplot2::ggplot(
+                data = filtdf %>%
+                    dplyr::summarize(across(dplyr::everything(),
+                                            function(x) length(which(x > 0)))) %>%
+                    tidyr::pivot_longer(cols = dplyr::everything(),
+                                        names_to = "criterion", values_to = "number"),
+                ggplot2::aes(x = criterion, y = number)) +
+                    ggplot2::geom_col() +
+                    cowplot::theme_cowplot() +
+                    ggplot2::labs(x = "", y = "Number of excluded features")
+            )
+        }
+    }
+}
+
 #' Filter out features in MaxQuant data
 #'
 #' Exclude features with 'Score' below \code{minScore}, 'Peptides' below
@@ -108,10 +138,7 @@ filterMaxQuant <- function(sce, minScore, minPeptides, plotUpset = TRUE,
              "different sizes")
         #nocov end
     }
-    if (plotUpset && any(rowSums(filtdf) > 0)) {
-        print(ComplexUpset::upset(filtdf[rowSums(filtdf) > 0, , drop = FALSE],
-                                  intersect = colnames(filtdf)))
-    }
+    .makeFilterPlot(filtdf = filtdf, plotUpset = plotUpset)
 
     if (!is.null(exclFile)) {
         write.table(exclude, file = exclFile, quote = FALSE, sep = "\t",
@@ -385,10 +412,7 @@ filterPDTMT <- function(sce, inputLevel, minScore = 0, minPeptides = 0,
         stop("Something went wrong in the filtering - filtdf and sce are of ",
              "different sizes")
     }
-    if (plotUpset && any(rowSums(filtdf) > 0)) {
-        print(ComplexUpset::upset(filtdf[rowSums(filtdf) > 0, , drop = FALSE],
-                                  intersect = colnames(filtdf)))
-    }
+    .makeFilterPlot(filtdf = filtdf, plotUpset = plotUpset)
 
     if (!is.null(exclFile)) {
         write.table(exclude, file = exclFile, quote = FALSE, sep = "\t",
@@ -494,10 +518,7 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE,
              "different sizes")
         #nocov end
     }
-    if (plotUpset && any(rowSums(filtdf) > 0)) {
-        print(ComplexUpset::upset(filtdf[rowSums(filtdf) > 0, , drop = FALSE],
-                                  intersect = colnames(filtdf)))
-    }
+    .makeFilterPlot(filtdf = filtdf, plotUpset = plotUpset)
 
     if (!is.null(exclFile)) {
         write.table(exclude, file = exclFile, quote = FALSE, sep = "\t",
diff --git a/tests/testthat/test-doFilter.R b/tests/testthat/test-doFilter.R
index 7dcb62f..dddb6f0 100644
--- a/tests/testthat/test-doFilter.R
+++ b/tests/testthat/test-doFilter.R
@@ -128,6 +128,20 @@ test_that("filtering works (MaxQuant)", {
     )))
     expect_equal(nrow(out), 112L)
 
+    ## Only one column present
+    tmp <- sce_mq_final
+    rowData(tmp)$Score <- NULL
+    rowData(tmp)$Only.identified.by.site <- NULL
+    rowData(tmp)$Peptides <- NULL
+    rowData(tmp)$Reverse <- NULL
+    out <- filterMaxQuant(tmp, minScore = 7, minPeptides = 1,
+                          plotUpset = TRUE, exclFile = NULL)
+    expect_equal(nrow(out), length(which(
+        (rowData(sce_mq_final)$Potential.contaminant == "" |
+                 is.na(rowData(sce_mq_final)$Potential.contaminant))
+    )))
+    expect_equal(nrow(out), 112L)
+
     ## Missing columns - Potential.contaminant
     tmp <- sce_mq_final
     rowData(tmp)$Potential.contaminant <- NULL
@@ -318,6 +332,30 @@ test_that("filtering works (PD/TMT - proteins)", {
     )))
     expect_equal(nrow(out), 30L)  ## same test as above, just with precomputed answer
 
+    ## Only one column present
+    tmp <- sce_pd_final
+    rowData(tmp)$Number.of.Peptides <- NULL
+    rowData(tmp)$Contaminant <- NULL
+    out <- filterPDTMT(tmp, inputLevel = "Proteins", minScore = 10,
+                       minPeptides = 3, minDeltaScore = 0, minPSMs = 1,
+                       masterProteinsOnly = FALSE, plotUpset = TRUE,
+                       exclFile = NULL)
+    expect_equal(nrow(out), length(which(
+        rowData(sce_pd_final)$Score.Sequest.HT.Sequest.HT >= 10
+    )))
+    expect_equal(nrow(out), 30L)  ## same test as above, just with precomputed answer
+
+    ## Only one column present, but no features excluded (should not plot)
+    tmp <- sce_pd_final
+    rowData(tmp)$Number.of.Peptides <- NULL
+    rowData(tmp)$Score.Sequest.HT.Sequest.HT <- NULL
+    out <- filterPDTMT(tmp, inputLevel = "Proteins", minScore = 10,
+                       minPeptides = 3, minDeltaScore = 0, minPSMs = 1,
+                       masterProteinsOnly = FALSE, plotUpset = TRUE,
+                       exclFile = NULL)
+    expect_equal(nrow(out), nrow(tmp))
+    expect_equal(nrow(out), 70L)  ## same test as above, just with precomputed answer
+
     ## Missing columns - Master
     tmp <- sce_pd_final
     rowData(tmp)$Master <- NULL