Add more unit tests

fmicompbio · Apr 15, 2023 · 4230512 · 4230512
1 parent 90aaf40
commit 4230512
Show file tree

Hide file tree

Showing 8 changed files with 241 additions and 1 deletion.
diff --git a/R/checkArgumentsFragPipe.R b/R/checkArgumentsFragPipe.R
@@ -54,6 +54,11 @@
     if (length(fpConfigFile) > 1) {
         stop("There are more than one config file in the FragPipe directory")
     }
+    fpWorkflowFile <- list.files(fragpipeDir, pattern = "^fragpipe.*.workflow$",
+                               full.names = TRUE)
+    if (length(fpWorkflowFile) > 1) {
+        stop("There are more than one workflow file in the FragPipe directory")
+    }
     fpLogFile <- list.files(fragpipeDir, pattern = "^log_.+.txt$",
                             full.names = TRUE)
     if (length(fpLogFile) > 1) {

diff --git a/tests/testthat/test-checkArgumentsFragPipe.R b/tests/testthat/test-checkArgumentsFragPipe.R
@@ -16,7 +16,7 @@ test_that("argument checking for FP works", {
         idCol = function(df) combineIds(df, combineCols = c("Gene", "Protein.ID")),
         labelCol = function(df) combineIds(df, combineCols = c("Gene", "Protein.ID")),
         geneIdCol = function(df) getFirstId(df, colName = "Gene"),
-        proteinIdCol = "Protein.ID",
+        proteinIdCol = function(df) getFirstId(df, colName = "Protein.ID"),
         stringIdCol = function(df) combineIds(df, combineCols = c("Gene", "Protein.ID"),
                                               combineWhen = "missing", makeUnique = FALSE),
         iColPattern = "\\.MaxLFQ\\.Intensity$",
@@ -167,6 +167,26 @@ test_that("argument checking for FP works", {
     expect_error(do.call(.checkArgumentsFragPipe, args),
                  "The file missing/combined_protein.tsv doesn't exist")
 
+    ## Multiple log files
+    dir.create(file.path(tempdir(), "fragpipe_dir_temp"))
+    file.copy(system.file("extdata", "fp_example", package = "einprot"),
+              file.path(tempdir(), "fragpipe_dir_temp"), recursive = TRUE)
+    file.copy(file.path(tempdir(), "fragpipe_dir_temp", "fp_example",
+                        "log_2023-04-12_20-12-46.txt"),
+              file.path(tempdir(), "fragpipe_dir_temp", "fp_example",
+                        "log_2024-04-12_20-12-46.txt"))
+    args <- args0
+    args$fragpipeDir <- file.path(tempdir(), "fragpipe_dir_temp", "fp_example")
+    expect_error(do.call(.checkArgumentsFragPipe, args),
+                 "There are more than one log file")
+
+    file.copy(file.path(tempdir(), "fragpipe_dir_temp", "fp_example",
+                        "fragpipe.workflow"),
+              file.path(tempdir(), "fragpipe_dir_temp", "fp_example",
+                        "fragpipe2.workflow"))
+    expect_error(do.call(.checkArgumentsFragPipe, args),
+                 "There are more than one workflow file")
+
     ## idCol
     args <- args0
     args$idCol <- 1
@@ -190,6 +210,9 @@ test_that("argument checking for FP works", {
     args$proteinIdCol <- 1
     expect_error(do.call(.checkArgumentsFragPipe, args),
                  "'proteinIdCol' must be of class 'character'")
+    args <- args0
+    args$proteinIdCol <- "Protein.ID"
+    expect_null(do.call(.checkArgumentsFragPipe, args))
 
     ## stringIdCol
     args <- args0

diff --git a/tests/testthat/test-plotMissingValues.R b/tests/testthat/test-plotMissingValues.R
@@ -43,6 +43,12 @@ test_that("missing value plots work", {
     expect_s3_class(out, "ggplot")
     expect_named(out$data, c("sample", "nNA", "pNA", "assay"))
 
+    out <- plotFractionDetectedPerSample(
+        dfNA = DataFrame(as.data.frame(nbr_na_mq$nNAcols) %>%
+            dplyr::rename(sample = name)))
+    expect_s3_class(out, "ggplot")
+    expect_named(out$data, c("sample", "nNA", "pNA", "assay"))
+
     ## --------------------------------------------------------------------- ##
     ## plotDetectedInSamples
     ## --------------------------------------------------------------------- ##
@@ -66,6 +72,15 @@ test_that("missing value plots work", {
     }
     expect_equal(levels(out$data$nObs), as.character(c(0, seq_len(9))))
 
+    out <- plotDetectedInSamples(dfNA = DataFrame(nbr_na_mq$nNArows))
+    expect_s3_class(out, "ggplot")
+    expect_named(out$data, c("nNA", "n", "nObs"))
+    for (i in c(0, seq_len(9))) {
+        expect_equal(sum(nbr_na_mq$nNArows$nNA == 9 - i),
+                     out$data$n[out$data$nObs == i])
+    }
+    expect_equal(levels(out$data$nObs), as.character(c(0, seq_len(9))))
+
     ## PD data
     out <- plotDetectedInSamples(dfNA = as.data.frame(nbr_na_pd$nNArows))
     expect_s3_class(out, "ggplot")

diff --git a/tests/testthat/test-prepareFinalSCE.R b/tests/testthat/test-prepareFinalSCE.R
@@ -27,6 +27,13 @@ test_that("assembling the SCE works", {
         expType = "ProteomeDiscoverer"
     )
 
+    args0_fp <- list(
+        sce = sce_fp_final,
+        baseFileName = tempfile(),
+        featureCollections = fcoll_fp_final,
+        expType = "FragPipe"
+    )
+
     ## Fail with wrong arguments
     ## --------------------------------------------------------------------- ##
     ## sce
@@ -185,4 +192,39 @@ test_that("assembling the SCE works", {
     expect_equal(SummarizedExperiment::rowData(sce)$Gene.Symbol,
                  SummarizedExperiment::rowData(args0_pd$sce)$Gene.Symbol)
 
+    ## Works with correct arguments - FragPipe
+    ## --------------------------------------------------------------------- ##
+    sce <- do.call(prepareFinalSCE, args0_fp)
+    expect_s4_class(sce, "SingleCellExperiment")
+    expect_equal(nrow(sce), 150)
+    expect_equal(ncol(sce), 9)
+    expect_true(all(c("MaxLFQ.intensity", "log2_MaxLFQ.intensity",
+                      "log2_MaxLFQ.intensity_withNA", "imputed_MaxLFQ.intensity",
+                      "Unique.spectral.count", "Total.spectral.count",
+                      "Spectral.count", "Intensity") %in%
+                        SummarizedExperiment::assayNames(sce)))
+    expect_true(all(c("sample", "group") %in%
+                        colnames(SummarizedExperiment::colData(sce))))
+    expect_true(all(c("Gene", "Protein.ID") %in%
+                        colnames(SummarizedExperiment::rowData(sce))))
+    expect_false(file.exists(paste0(args0_fp$baseFileName,
+                                   "_sce_extra_annots.tsv")))
+    # tmp <- read.delim(paste0(args0_pd$baseFileName,
+    #                          "_sce_extra_annots.tsv"),  nrow = 2)
+    # expect_named(tmp, c("ID", "Proteins.Unique.Sequence.ID",
+    #                     "GO.Accessions"))
+    md <- S4Vectors::metadata(sce)
+    expect_type(md, "list")
+    expect_type(md$iSEE$options, "list")
+    expect_equal(length(md$iSEE$options), 4)
+    expect_s4_class(md$iSEE$options$iSEEu_FeatureSetTable_collections$complexes,
+                    "CharacterList")
+    expect_equal(md$iSEE$options$iSEEu_LogFC_Fields, character(0))
+    expect_equal(md$iSEE$options$iSEEu_AveAb_Fields, character(0))
+    expect_equal(md$iSEE$options$iSEEu_PValue_Fields, character(0))
+    expect_equal(SummarizedExperiment::assay(sce, "MaxLFQ.intensity")[, 1],
+                 SummarizedExperiment::assay(args0_fp$sce, "MaxLFQ.intensity")[, 1])
+    expect_equal(SummarizedExperiment::rowData(sce)$Gene,
+                 SummarizedExperiment::rowData(args0_fp$sce)$Gene)
+
 })
diff --git a/tests/testthat/test-readFragPipeInfo.R b/tests/testthat/test-readFragPipeInfo.R
@@ -79,4 +79,48 @@ test_that("readFragPipeInfo works", {
     expect_equal(fp$`Variable modifications`, "M(15.9949), N-term(42.0106)")
     expect_equal(fp$`Fixed modifications`, "C(57.0215)")
     expect_equal(fp$`Database decoy tag`, "rev_")
+
+    ## -------------------------------------------------------------------------
+    ## Create file where msfragger.search_enzyme_name_2 is not null
+    ## Copy also workflow file to folder above
+    file.copy(from = system.file("extdata", "fp_example", "fragpipe.workflow",
+                                 package = "einprot"),
+              to = file.path(tempdir(), "tempfp"))
+    tmp <- readLines(file.path(tempdir(), "tempfp", "fragpipe.workflow"))
+    i <- grep("msfragger.search_enzyme_name_2", tmp)
+    expect_equal(i, 142L)
+    tmp[i] <- "msfragger.search_enzyme_name_2=trypsin"
+    writeLines(tmp, file.path(tempdir(), "tempfp", "fragpipe.workflow"))
+    fp <- readFragPipeInfo(file.path(tempdir(), "tempfp"))
+    expect_type(fp, "list")
+    expect_equal(length(fp), 16)
+    expect_named(fp, c("FragPipe version", "FragPipe parameter file",
+                       "FragPipe log file", "Search engine",
+                       "Raw file location", "Raw files", "Sample names",
+                       "Databases", "Contaminants", "Peptides (ranges)",
+                       "Mass error tolerances", "Quantification settings (LFQ)",
+                       "Enzymes", "Variable modifications",
+                       "Fixed modifications", "Database decoy tag"))
+
+    ## All entries should be scalar values
+    expect_true(all(vapply(fp, length, 0) == 1))
+
+    ## Check individual values
+    expect_equal(fp$`FragPipe version`, "19.1")
+    expect_equal(basename(fp$`FragPipe parameter file`), "fragpipe.workflow")
+    expect_equal(basename(fp$`FragPipe log file`), "log_2023-04-12_20-12-46.txt")
+    expect_equal(fp$`Search engine`, "MSFragger-3.7")
+    expect_equal(fp$`Raw file location`, "D:/Data/FUSION")
+    expect_equal(fp$`Raw files`, "F_160817_AdnpFB_IP06.raw, F_160817_AdnpFB_IP05.raw, F_160817_RBC_ctrl_IP02.raw, F_160817_AdnpFB_IP04.raw, F_160817_RBC_ctrl_IP01.raw, F_160817_Chd4BF_IP09.raw, F_160817_Chd4BF_IP07.raw, F_160817_Chd4BF_IP08.raw, F_160817_RBC_ctrl_IP03.raw")
+    expect_equal(fp$`Sample names`, "Adnp_IP04, Adnp_IP05, Adnp_IP06, Chd4BF_IP07, Chd4BF_IP08, Chd4BF_IP09, RBC_ctrl_IP01, RBC_ctrl_IP02, RBC_ctrl_IP03")
+    expect_equal(fp$Databases, "D/://Data//FASTA//2023-04-12-decoys-contam_MOUSE__190410.fasta.fas")
+    expect_equal(fp$Contaminants, "cRAP")
+    expect_equal(fp$`Peptides (ranges)`, "length: 7-50 AA; mass: 500-5000 Da")
+    expect_equal(fp$`Mass error tolerances`, "precursor:-20-20 [ppm]; fragment:0.7 [Da] (after optimization:200 PPM)")
+    expect_equal(fp$`Quantification settings (LFQ)`, "IonQuant: TRUE, Calculate MaxLFQ intensity: TRUE, Normalization: TRUE, match-between runs (MBR): FALSE, min. ions: 2")
+    expect_equal(fp$Enzymes, "stricttrypsin[KR, C-terminal, 2 missed cleavages]; trypsin; [, C-terminal, 2 missed cleavages]")
+    expect_equal(fp$`Variable modifications`, "M(15.9949), N-term(42.0106)")
+    expect_equal(fp$`Fixed modifications`, "C(57.0215)")
+    expect_equal(fp$`Database decoy tag`, "rev_")
+
 })
diff --git a/tests/testthat/test-runPDTMTAnalysis.R b/tests/testthat/test-runPDTMTAnalysis.R
@@ -808,6 +808,22 @@ test_that("runPDTMTAnalysis works", {
                    "already exists but forceOverwrite = TRUE")
     expect_true(file.exists(file.path(outDir, paste0(outBaseName, "_PDTMTqc.pdf"))))
 
+    ## Not all files present -> no QC plot
+    dir.create(file.path(outDir, "pdtmt_missing_files"), showWarnings = FALSE,
+               recursive = TRUE)
+    file.copy(system.file("extdata", "pdtmt_example",
+                          "Fig2_m23139_RTS_QC_varMods_Proteins.txt",
+                          package = "einprot"),
+              file.path(outDir, "pdtmt_missing_files"))
+    args <- args0
+    args$pdOutputFolder <- file.path(outDir, "pdtmt_missing_files")
+    args$outputDir <- file.path(outDir, "pdtmt_missing_files")
+    args$generateQCPlot <- TRUE
+    expect_warning(res <- do.call(runPDTMTAnalysis, args),
+                   "The following files were not found, will not generate")
+    expect_false(file.exists(file.path(outDir, "pdtmt_missing_files",
+                                       paste0(outBaseName, "_PDTMTqc.pdf"))))
+
     ## iColPattern without escaped period
     args <- args0
     args$forceOverwrite <- TRUE

diff --git a/tests/testthat/test-runPDTMTptmAnalysis.R b/tests/testthat/test-runPDTMTptmAnalysis.R
@@ -399,6 +399,15 @@ test_that("runPDTMTptmAnalysis works", {
     expect_equal(basename(res), paste0(outBaseName, ".Rmd"))
     expect_true(file.exists(file.path(outDir, paste0(outBaseName, ".Rmd"))))
 
+    ## Non-existing output directory
+    args <- args0
+    args$outputDir <- file.path(outDir, "new_directory_pdtmtptm")
+    res <- do.call(runPDTMTptmAnalysis, args)
+    expect_type(res, "character")
+    expect_equal(basename(res), paste0(outBaseName, ".Rmd"))
+    expect_true(file.exists(file.path(outDir, "new_directory_pdtmtptm",
+                                      paste0(outBaseName, ".Rmd"))))
+
     ## Stop if forceOverwrite = FALSE
     args <- args0
     args$forceOverwrite <- FALSE

diff --git a/tests/testthat/test-runTests.R b/tests/testthat/test-runTests.R
@@ -1286,6 +1286,92 @@ test_that("testing works", {
     expect_equal(out$tests[[1]]$logFC / out$tests[[1]]$se.logFC,
                  out$tests[[1]]$t, ignore_attr = TRUE)
 
+    ## -------------------------------------------------------------------------
+    ## Merged groups, with batch effect, with sample weights
+    args <- args0
+    args$groupComposition <- list(rbc_adnp = c("RBC_ctrl", "Adnp"))
+    args$comparisons <- list(c("Adnp", "RBC_ctrl"), c("rbc_adnp", "Chd4BF"))
+    args$sce$batch <- c("B1", "B2", "B3", "B1", "B2", "B3", "B1", "B2", "B3")
+    args$sce$sampleweight <-
+        c(Adnp_IP04 = 1, Adnp_IP05 = 6, Adnp_IP06 = 2,
+          Chd4BF_IP07 = 6, Chd4BF_IP08 = 1, Chd4BF_IP09 = 5,
+          RBC_ctrl_IP01 = 7, RBC_ctrl_IP02 = 1, RBC_ctrl_IP03 = 2)[colnames(args$sce)]
+    out <- do.call(runTest, args)
+    expect_type(out, "list")
+    expect_length(out, 9)
+    expect_named(out, c("plottitles", "plotsubtitles", "plotnotes",
+                        "tests", "curveparams", "topsets", "messages",
+                        "design", "featureCollections"))
+    expect_s3_class(out$tests[[1]], "data.frame")
+    expect_type(out$plotnotes[[1]], "character")
+    expect_type(out$plottitles[[1]], "character")
+    expect_s3_class(out$tests[[2]], "data.frame")
+    expect_type(out$plotnotes[[2]], "character")
+    expect_type(out$plottitles[[2]], "character")
+    expect_type(out$featureCollections, "list")
+    expect_type(out$design, "list")
+    expect_named(out$design, c("RBC_ctrl_vs_Adnp", "Chd4BF_vs_rbc_adnp"))
+    expect_type(out$design$RBC_ctrl_vs_Adnp, "list")
+    expect_named(out$design$RBC_ctrl_vs_Adnp, c("design", "sampleData", "contrast",
+                                                "sampleWeights"))
+    expect_named(out$design$RBC_ctrl_vs_Adnp$sampleData, c("fc", "bc"))
+    expect_equal(out$design$RBC_ctrl_vs_Adnp$contrast, c(0, 0, 0, 1))
+    expect_equal(out$design$RBC_ctrl_vs_Adnp$sampleWeights,
+                 args$sce$sampleweight[rownames(out$design$RBC_ctrl_vs_Adnp$sampleData)])
+    expect_type(out$design$Chd4BF_vs_rbc_adnp, "list")
+    expect_named(out$design$Chd4BF_vs_rbc_adnp, c("design", "sampleData", "contrast",
+                                                  "sampleWeights"))
+    expect_named(out$design$Chd4BF_vs_rbc_adnp$sampleData, c("fc", "bc"))
+    expect_equal(out$design$Chd4BF_vs_rbc_adnp$contrast, c(0, 0, 0, 1))
+    expect_equal(out$design$Chd4BF_vs_rbc_adnp$sampleWeights,
+                 args$sce$sampleweight[rownames(out$design$Chd4BF_vs_rbc_adnp$sampleData)])
+    expect_type(out$curveparams[[1]], "list")
+    expect_equal(nrow(out$tests[[1]]), 150)
+    expect_type(out$curveparams[[2]], "list")
+    expect_equal(nrow(out$tests[[2]]), 150)
+    expect_true(all(c("adj.P.Val", "iBAQ.Adnp_IP04",
+                      "showInVolcano", "IDsForSTRING") %in% colnames(out$tests[[1]])))
+    expect_true(all(c("adj.P.Val", "iBAQ.Adnp_IP04",
+                      "showInVolcano", "IDsForSTRING") %in% colnames(out$tests[[2]])))
+    expect_equal(out$tests[[1]]$pid, rownames(sce_mq_final))
+    expect_equal(out$tests[[2]]$pid, rownames(sce_mq_final))
+    expect_equal(substr(out$plotnotes[[1]], 1, 8), "df.prior")
+    expect_equal(substr(out$plotnotes[[2]], 1, 8), "df.prior")
+    expect_equal(out$plottitles[[1]], "RBC_ctrl vs Adnp, limma")
+    expect_equal(out$plottitles[[2]], "Chd4BF vs rbc_adnp, limma")
+    expect_s4_class(out$featureCollections$complexes, "CharacterList")
+    expect_s4_class(S4Vectors::mcols(out$featureCollections$complexes), "DFrame")
+    expect_true("RBC_ctrl_vs_Adnp_FDR" %in%
+                    colnames(S4Vectors::mcols(out$featureCollections$complexes)))
+    expect_equal(out$tests[[1]]$iBAQ.Adnp_IP04,
+                 SummarizedExperiment::assay(args$sce, "iBAQ")[, "Adnp_IP04"],
+                 ignore_attr = TRUE)
+    expect_equal(out$tests[[2]]$iBAQ.Adnp_IP04,
+                 SummarizedExperiment::assay(args$sce, "iBAQ")[, "Adnp_IP04"],
+                 ignore_attr = TRUE)
+    ## Compare to values calculated manually
+    expect_equal(out$tests[[2]][c("Mbd3", "Mta1.F8WHY8", "Pogz", "Zfp462.B1AWL2"), "logFC"],
+                 c(13.237967, 15.502178, 9.519576, 10.742178),
+                 tolerance = 0.001)
+    expect_equal(out$tests[[2]][c("Mbd3", "Mta1.F8WHY8", "Pogz", "Zfp462.B1AWL2"), "t"],
+                 c(10.793285, 10.408011, 8.845311, 8.634606),
+                 tolerance = 0.001)
+    ## Check consistency of values
+    ## logFC +/- t * se = CI.R/CI.L
+    expect_equal(out$tests[[1]]$logFC + qt(p = 0.975, df = out$tests[[1]]$df.total) *
+                     out$tests[[1]]$se.logFC,
+                 out$tests[[1]]$CI.R, ignore_attr = TRUE)
+    expect_equal(out$tests[[1]]$logFC - qt(p = 0.975, df = out$tests[[1]]$df.total) *
+                     out$tests[[1]]$se.logFC,
+                 out$tests[[1]]$CI.L, ignore_attr = TRUE)
+    ## p-values
+    expect_equal(2 * stats::pt(abs(out$tests[[1]]$t),
+                               out$tests[[1]]$df.total, lower.tail = FALSE),
+                 out$tests[[1]]$P.Value, ignore_attr = TRUE)
+    ## t-statistics
+    expect_equal(out$tests[[1]]$logFC / out$tests[[1]]$se.logFC,
+                 out$tests[[1]]$t, ignore_attr = TRUE)
+
     ## -------------------------------------------------------------------------
     ## Merged groups, with batch effect, single fit
     args <- args0