diff --git a/R/checkArgumentsFragPipe.R b/R/checkArgumentsFragPipe.R index 8446a55..0b0a678 100755 --- a/R/checkArgumentsFragPipe.R +++ b/R/checkArgumentsFragPipe.R @@ -54,6 +54,11 @@ if (length(fpConfigFile) > 1) { stop("There are more than one config file in the FragPipe directory") } + fpWorkflowFile <- list.files(fragpipeDir, pattern = "^fragpipe.*.workflow$", + full.names = TRUE) + if (length(fpWorkflowFile) > 1) { + stop("There are more than one workflow file in the FragPipe directory") + } fpLogFile <- list.files(fragpipeDir, pattern = "^log_.+.txt$", full.names = TRUE) if (length(fpLogFile) > 1) { diff --git a/tests/testthat/test-checkArgumentsFragPipe.R b/tests/testthat/test-checkArgumentsFragPipe.R index 5c1d504..12fe742 100644 --- a/tests/testthat/test-checkArgumentsFragPipe.R +++ b/tests/testthat/test-checkArgumentsFragPipe.R @@ -16,7 +16,7 @@ test_that("argument checking for FP works", { idCol = function(df) combineIds(df, combineCols = c("Gene", "Protein.ID")), labelCol = function(df) combineIds(df, combineCols = c("Gene", "Protein.ID")), geneIdCol = function(df) getFirstId(df, colName = "Gene"), - proteinIdCol = "Protein.ID", + proteinIdCol = function(df) getFirstId(df, colName = "Protein.ID"), stringIdCol = function(df) combineIds(df, combineCols = c("Gene", "Protein.ID"), combineWhen = "missing", makeUnique = FALSE), iColPattern = "\\.MaxLFQ\\.Intensity$", @@ -167,6 +167,26 @@ test_that("argument checking for FP works", { expect_error(do.call(.checkArgumentsFragPipe, args), "The file missing/combined_protein.tsv doesn't exist") + ## Multiple log files + dir.create(file.path(tempdir(), "fragpipe_dir_temp")) + file.copy(system.file("extdata", "fp_example", package = "einprot"), + file.path(tempdir(), "fragpipe_dir_temp"), recursive = TRUE) + file.copy(file.path(tempdir(), "fragpipe_dir_temp", "fp_example", + "log_2023-04-12_20-12-46.txt"), + file.path(tempdir(), "fragpipe_dir_temp", "fp_example", + "log_2024-04-12_20-12-46.txt")) + args <- args0 + args$fragpipeDir <- file.path(tempdir(), "fragpipe_dir_temp", "fp_example") + expect_error(do.call(.checkArgumentsFragPipe, args), + "There are more than one log file") + + file.copy(file.path(tempdir(), "fragpipe_dir_temp", "fp_example", + "fragpipe.workflow"), + file.path(tempdir(), "fragpipe_dir_temp", "fp_example", + "fragpipe2.workflow")) + expect_error(do.call(.checkArgumentsFragPipe, args), + "There are more than one workflow file") + ## idCol args <- args0 args$idCol <- 1 @@ -190,6 +210,9 @@ test_that("argument checking for FP works", { args$proteinIdCol <- 1 expect_error(do.call(.checkArgumentsFragPipe, args), "'proteinIdCol' must be of class 'character'") + args <- args0 + args$proteinIdCol <- "Protein.ID" + expect_null(do.call(.checkArgumentsFragPipe, args)) ## stringIdCol args <- args0 diff --git a/tests/testthat/test-plotMissingValues.R b/tests/testthat/test-plotMissingValues.R index a01d7e3..52d7558 100644 --- a/tests/testthat/test-plotMissingValues.R +++ b/tests/testthat/test-plotMissingValues.R @@ -43,6 +43,12 @@ test_that("missing value plots work", { expect_s3_class(out, "ggplot") expect_named(out$data, c("sample", "nNA", "pNA", "assay")) + out <- plotFractionDetectedPerSample( + dfNA = DataFrame(as.data.frame(nbr_na_mq$nNAcols) %>% + dplyr::rename(sample = name))) + expect_s3_class(out, "ggplot") + expect_named(out$data, c("sample", "nNA", "pNA", "assay")) + ## --------------------------------------------------------------------- ## ## plotDetectedInSamples ## --------------------------------------------------------------------- ## @@ -66,6 +72,15 @@ test_that("missing value plots work", { } expect_equal(levels(out$data$nObs), as.character(c(0, seq_len(9)))) + out <- plotDetectedInSamples(dfNA = DataFrame(nbr_na_mq$nNArows)) + expect_s3_class(out, "ggplot") + expect_named(out$data, c("nNA", "n", "nObs")) + for (i in c(0, seq_len(9))) { + expect_equal(sum(nbr_na_mq$nNArows$nNA == 9 - i), + out$data$n[out$data$nObs == i]) + } + expect_equal(levels(out$data$nObs), as.character(c(0, seq_len(9)))) + ## PD data out <- plotDetectedInSamples(dfNA = as.data.frame(nbr_na_pd$nNArows)) expect_s3_class(out, "ggplot") diff --git a/tests/testthat/test-prepareFinalSCE.R b/tests/testthat/test-prepareFinalSCE.R index a11b46d..0c90a35 100644 --- a/tests/testthat/test-prepareFinalSCE.R +++ b/tests/testthat/test-prepareFinalSCE.R @@ -27,6 +27,13 @@ test_that("assembling the SCE works", { expType = "ProteomeDiscoverer" ) + args0_fp <- list( + sce = sce_fp_final, + baseFileName = tempfile(), + featureCollections = fcoll_fp_final, + expType = "FragPipe" + ) + ## Fail with wrong arguments ## --------------------------------------------------------------------- ## ## sce @@ -185,4 +192,39 @@ test_that("assembling the SCE works", { expect_equal(SummarizedExperiment::rowData(sce)$Gene.Symbol, SummarizedExperiment::rowData(args0_pd$sce)$Gene.Symbol) + ## Works with correct arguments - FragPipe + ## --------------------------------------------------------------------- ## + sce <- do.call(prepareFinalSCE, args0_fp) + expect_s4_class(sce, "SingleCellExperiment") + expect_equal(nrow(sce), 150) + expect_equal(ncol(sce), 9) + expect_true(all(c("MaxLFQ.intensity", "log2_MaxLFQ.intensity", + "log2_MaxLFQ.intensity_withNA", "imputed_MaxLFQ.intensity", + "Unique.spectral.count", "Total.spectral.count", + "Spectral.count", "Intensity") %in% + SummarizedExperiment::assayNames(sce))) + expect_true(all(c("sample", "group") %in% + colnames(SummarizedExperiment::colData(sce)))) + expect_true(all(c("Gene", "Protein.ID") %in% + colnames(SummarizedExperiment::rowData(sce)))) + expect_false(file.exists(paste0(args0_fp$baseFileName, + "_sce_extra_annots.tsv"))) + # tmp <- read.delim(paste0(args0_pd$baseFileName, + # "_sce_extra_annots.tsv"), nrow = 2) + # expect_named(tmp, c("ID", "Proteins.Unique.Sequence.ID", + # "GO.Accessions")) + md <- S4Vectors::metadata(sce) + expect_type(md, "list") + expect_type(md$iSEE$options, "list") + expect_equal(length(md$iSEE$options), 4) + expect_s4_class(md$iSEE$options$iSEEu_FeatureSetTable_collections$complexes, + "CharacterList") + expect_equal(md$iSEE$options$iSEEu_LogFC_Fields, character(0)) + expect_equal(md$iSEE$options$iSEEu_AveAb_Fields, character(0)) + expect_equal(md$iSEE$options$iSEEu_PValue_Fields, character(0)) + expect_equal(SummarizedExperiment::assay(sce, "MaxLFQ.intensity")[, 1], + SummarizedExperiment::assay(args0_fp$sce, "MaxLFQ.intensity")[, 1]) + expect_equal(SummarizedExperiment::rowData(sce)$Gene, + SummarizedExperiment::rowData(args0_fp$sce)$Gene) + }) diff --git a/tests/testthat/test-readFragPipeInfo.R b/tests/testthat/test-readFragPipeInfo.R index b58cba0..11b1881 100644 --- a/tests/testthat/test-readFragPipeInfo.R +++ b/tests/testthat/test-readFragPipeInfo.R @@ -79,4 +79,48 @@ test_that("readFragPipeInfo works", { expect_equal(fp$`Variable modifications`, "M(15.9949), N-term(42.0106)") expect_equal(fp$`Fixed modifications`, "C(57.0215)") expect_equal(fp$`Database decoy tag`, "rev_") + + ## ------------------------------------------------------------------------- + ## Create file where msfragger.search_enzyme_name_2 is not null + ## Copy also workflow file to folder above + file.copy(from = system.file("extdata", "fp_example", "fragpipe.workflow", + package = "einprot"), + to = file.path(tempdir(), "tempfp")) + tmp <- readLines(file.path(tempdir(), "tempfp", "fragpipe.workflow")) + i <- grep("msfragger.search_enzyme_name_2", tmp) + expect_equal(i, 142L) + tmp[i] <- "msfragger.search_enzyme_name_2=trypsin" + writeLines(tmp, file.path(tempdir(), "tempfp", "fragpipe.workflow")) + fp <- readFragPipeInfo(file.path(tempdir(), "tempfp")) + expect_type(fp, "list") + expect_equal(length(fp), 16) + expect_named(fp, c("FragPipe version", "FragPipe parameter file", + "FragPipe log file", "Search engine", + "Raw file location", "Raw files", "Sample names", + "Databases", "Contaminants", "Peptides (ranges)", + "Mass error tolerances", "Quantification settings (LFQ)", + "Enzymes", "Variable modifications", + "Fixed modifications", "Database decoy tag")) + + ## All entries should be scalar values + expect_true(all(vapply(fp, length, 0) == 1)) + + ## Check individual values + expect_equal(fp$`FragPipe version`, "19.1") + expect_equal(basename(fp$`FragPipe parameter file`), "fragpipe.workflow") + expect_equal(basename(fp$`FragPipe log file`), "log_2023-04-12_20-12-46.txt") + expect_equal(fp$`Search engine`, "MSFragger-3.7") + expect_equal(fp$`Raw file location`, "D:/Data/FUSION") + expect_equal(fp$`Raw files`, "F_160817_AdnpFB_IP06.raw, F_160817_AdnpFB_IP05.raw, F_160817_RBC_ctrl_IP02.raw, F_160817_AdnpFB_IP04.raw, F_160817_RBC_ctrl_IP01.raw, F_160817_Chd4BF_IP09.raw, F_160817_Chd4BF_IP07.raw, F_160817_Chd4BF_IP08.raw, F_160817_RBC_ctrl_IP03.raw") + expect_equal(fp$`Sample names`, "Adnp_IP04, Adnp_IP05, Adnp_IP06, Chd4BF_IP07, Chd4BF_IP08, Chd4BF_IP09, RBC_ctrl_IP01, RBC_ctrl_IP02, RBC_ctrl_IP03") + expect_equal(fp$Databases, "D/://Data//FASTA//2023-04-12-decoys-contam_MOUSE__190410.fasta.fas") + expect_equal(fp$Contaminants, "cRAP") + expect_equal(fp$`Peptides (ranges)`, "length: 7-50 AA; mass: 500-5000 Da") + expect_equal(fp$`Mass error tolerances`, "precursor:-20-20 [ppm]; fragment:0.7 [Da] (after optimization:200 PPM)") + expect_equal(fp$`Quantification settings (LFQ)`, "IonQuant: TRUE, Calculate MaxLFQ intensity: TRUE, Normalization: TRUE, match-between runs (MBR): FALSE, min. ions: 2") + expect_equal(fp$Enzymes, "stricttrypsin[KR, C-terminal, 2 missed cleavages]; trypsin; [, C-terminal, 2 missed cleavages]") + expect_equal(fp$`Variable modifications`, "M(15.9949), N-term(42.0106)") + expect_equal(fp$`Fixed modifications`, "C(57.0215)") + expect_equal(fp$`Database decoy tag`, "rev_") + }) diff --git a/tests/testthat/test-runPDTMTAnalysis.R b/tests/testthat/test-runPDTMTAnalysis.R index 385b387..2b9cda9 100644 --- a/tests/testthat/test-runPDTMTAnalysis.R +++ b/tests/testthat/test-runPDTMTAnalysis.R @@ -808,6 +808,22 @@ test_that("runPDTMTAnalysis works", { "already exists but forceOverwrite = TRUE") expect_true(file.exists(file.path(outDir, paste0(outBaseName, "_PDTMTqc.pdf")))) + ## Not all files present -> no QC plot + dir.create(file.path(outDir, "pdtmt_missing_files"), showWarnings = FALSE, + recursive = TRUE) + file.copy(system.file("extdata", "pdtmt_example", + "Fig2_m23139_RTS_QC_varMods_Proteins.txt", + package = "einprot"), + file.path(outDir, "pdtmt_missing_files")) + args <- args0 + args$pdOutputFolder <- file.path(outDir, "pdtmt_missing_files") + args$outputDir <- file.path(outDir, "pdtmt_missing_files") + args$generateQCPlot <- TRUE + expect_warning(res <- do.call(runPDTMTAnalysis, args), + "The following files were not found, will not generate") + expect_false(file.exists(file.path(outDir, "pdtmt_missing_files", + paste0(outBaseName, "_PDTMTqc.pdf")))) + ## iColPattern without escaped period args <- args0 args$forceOverwrite <- TRUE diff --git a/tests/testthat/test-runPDTMTptmAnalysis.R b/tests/testthat/test-runPDTMTptmAnalysis.R index cc3040c..42b9bf3 100644 --- a/tests/testthat/test-runPDTMTptmAnalysis.R +++ b/tests/testthat/test-runPDTMTptmAnalysis.R @@ -399,6 +399,15 @@ test_that("runPDTMTptmAnalysis works", { expect_equal(basename(res), paste0(outBaseName, ".Rmd")) expect_true(file.exists(file.path(outDir, paste0(outBaseName, ".Rmd")))) + ## Non-existing output directory + args <- args0 + args$outputDir <- file.path(outDir, "new_directory_pdtmtptm") + res <- do.call(runPDTMTptmAnalysis, args) + expect_type(res, "character") + expect_equal(basename(res), paste0(outBaseName, ".Rmd")) + expect_true(file.exists(file.path(outDir, "new_directory_pdtmtptm", + paste0(outBaseName, ".Rmd")))) + ## Stop if forceOverwrite = FALSE args <- args0 args$forceOverwrite <- FALSE diff --git a/tests/testthat/test-runTests.R b/tests/testthat/test-runTests.R index 5b88fe3..00de64a 100644 --- a/tests/testthat/test-runTests.R +++ b/tests/testthat/test-runTests.R @@ -1286,6 +1286,92 @@ test_that("testing works", { expect_equal(out$tests[[1]]$logFC / out$tests[[1]]$se.logFC, out$tests[[1]]$t, ignore_attr = TRUE) + ## ------------------------------------------------------------------------- + ## Merged groups, with batch effect, with sample weights + args <- args0 + args$groupComposition <- list(rbc_adnp = c("RBC_ctrl", "Adnp")) + args$comparisons <- list(c("Adnp", "RBC_ctrl"), c("rbc_adnp", "Chd4BF")) + args$sce$batch <- c("B1", "B2", "B3", "B1", "B2", "B3", "B1", "B2", "B3") + args$sce$sampleweight <- + c(Adnp_IP04 = 1, Adnp_IP05 = 6, Adnp_IP06 = 2, + Chd4BF_IP07 = 6, Chd4BF_IP08 = 1, Chd4BF_IP09 = 5, + RBC_ctrl_IP01 = 7, RBC_ctrl_IP02 = 1, RBC_ctrl_IP03 = 2)[colnames(args$sce)] + out <- do.call(runTest, args) + expect_type(out, "list") + expect_length(out, 9) + expect_named(out, c("plottitles", "plotsubtitles", "plotnotes", + "tests", "curveparams", "topsets", "messages", + "design", "featureCollections")) + expect_s3_class(out$tests[[1]], "data.frame") + expect_type(out$plotnotes[[1]], "character") + expect_type(out$plottitles[[1]], "character") + expect_s3_class(out$tests[[2]], "data.frame") + expect_type(out$plotnotes[[2]], "character") + expect_type(out$plottitles[[2]], "character") + expect_type(out$featureCollections, "list") + expect_type(out$design, "list") + expect_named(out$design, c("RBC_ctrl_vs_Adnp", "Chd4BF_vs_rbc_adnp")) + expect_type(out$design$RBC_ctrl_vs_Adnp, "list") + expect_named(out$design$RBC_ctrl_vs_Adnp, c("design", "sampleData", "contrast", + "sampleWeights")) + expect_named(out$design$RBC_ctrl_vs_Adnp$sampleData, c("fc", "bc")) + expect_equal(out$design$RBC_ctrl_vs_Adnp$contrast, c(0, 0, 0, 1)) + expect_equal(out$design$RBC_ctrl_vs_Adnp$sampleWeights, + args$sce$sampleweight[rownames(out$design$RBC_ctrl_vs_Adnp$sampleData)]) + expect_type(out$design$Chd4BF_vs_rbc_adnp, "list") + expect_named(out$design$Chd4BF_vs_rbc_adnp, c("design", "sampleData", "contrast", + "sampleWeights")) + expect_named(out$design$Chd4BF_vs_rbc_adnp$sampleData, c("fc", "bc")) + expect_equal(out$design$Chd4BF_vs_rbc_adnp$contrast, c(0, 0, 0, 1)) + expect_equal(out$design$Chd4BF_vs_rbc_adnp$sampleWeights, + args$sce$sampleweight[rownames(out$design$Chd4BF_vs_rbc_adnp$sampleData)]) + expect_type(out$curveparams[[1]], "list") + expect_equal(nrow(out$tests[[1]]), 150) + expect_type(out$curveparams[[2]], "list") + expect_equal(nrow(out$tests[[2]]), 150) + expect_true(all(c("adj.P.Val", "iBAQ.Adnp_IP04", + "showInVolcano", "IDsForSTRING") %in% colnames(out$tests[[1]]))) + expect_true(all(c("adj.P.Val", "iBAQ.Adnp_IP04", + "showInVolcano", "IDsForSTRING") %in% colnames(out$tests[[2]]))) + expect_equal(out$tests[[1]]$pid, rownames(sce_mq_final)) + expect_equal(out$tests[[2]]$pid, rownames(sce_mq_final)) + expect_equal(substr(out$plotnotes[[1]], 1, 8), "df.prior") + expect_equal(substr(out$plotnotes[[2]], 1, 8), "df.prior") + expect_equal(out$plottitles[[1]], "RBC_ctrl vs Adnp, limma") + expect_equal(out$plottitles[[2]], "Chd4BF vs rbc_adnp, limma") + expect_s4_class(out$featureCollections$complexes, "CharacterList") + expect_s4_class(S4Vectors::mcols(out$featureCollections$complexes), "DFrame") + expect_true("RBC_ctrl_vs_Adnp_FDR" %in% + colnames(S4Vectors::mcols(out$featureCollections$complexes))) + expect_equal(out$tests[[1]]$iBAQ.Adnp_IP04, + SummarizedExperiment::assay(args$sce, "iBAQ")[, "Adnp_IP04"], + ignore_attr = TRUE) + expect_equal(out$tests[[2]]$iBAQ.Adnp_IP04, + SummarizedExperiment::assay(args$sce, "iBAQ")[, "Adnp_IP04"], + ignore_attr = TRUE) + ## Compare to values calculated manually + expect_equal(out$tests[[2]][c("Mbd3", "Mta1.F8WHY8", "Pogz", "Zfp462.B1AWL2"), "logFC"], + c(13.237967, 15.502178, 9.519576, 10.742178), + tolerance = 0.001) + expect_equal(out$tests[[2]][c("Mbd3", "Mta1.F8WHY8", "Pogz", "Zfp462.B1AWL2"), "t"], + c(10.793285, 10.408011, 8.845311, 8.634606), + tolerance = 0.001) + ## Check consistency of values + ## logFC +/- t * se = CI.R/CI.L + expect_equal(out$tests[[1]]$logFC + qt(p = 0.975, df = out$tests[[1]]$df.total) * + out$tests[[1]]$se.logFC, + out$tests[[1]]$CI.R, ignore_attr = TRUE) + expect_equal(out$tests[[1]]$logFC - qt(p = 0.975, df = out$tests[[1]]$df.total) * + out$tests[[1]]$se.logFC, + out$tests[[1]]$CI.L, ignore_attr = TRUE) + ## p-values + expect_equal(2 * stats::pt(abs(out$tests[[1]]$t), + out$tests[[1]]$df.total, lower.tail = FALSE), + out$tests[[1]]$P.Value, ignore_attr = TRUE) + ## t-statistics + expect_equal(out$tests[[1]]$logFC / out$tests[[1]]$se.logFC, + out$tests[[1]]$t, ignore_attr = TRUE) + ## ------------------------------------------------------------------------- ## Merged groups, with batch effect, single fit args <- args0