Skip to content

Commit

Permalink
Allow '^Abundance.' iColPattern
Browse files Browse the repository at this point in the history
  • Loading branch information
csoneson committed Apr 15, 2023
1 parent 3c57118 commit d9abdf0
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 8 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* Change approach for injecting values into the Rmd file, to avoid the need to duplicate escapes
* Allow the user to set the column to colour by in the PCA
* Include 'extra' columns in output from runPTMTest
* Allow '^Abundance.' iColPattern

# einprot 0.6.10

Expand Down
41 changes: 34 additions & 7 deletions R/importExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,20 @@
"Intensity"
} else if (patmatch1 == "iBAQ.") {
"iBAQ"
} else if (patmatch1 == "Abundance.") {
"Abundance"
} else if (patmatch1 == "Abundance.F[0-9]+.") {
"Abundance"
} else if (patmatch1 == "Abundance.F.+.Sample.") {
"Abundance"
} else if (patmatch1 == "Abundances.Count.") {
"Abundances.count"
} else if (patmatch1 == "Abundances.Count.F[0-9]+.") {
"Abundances.count"
} else if (patmatch1 == "Abundances.Count.F.+.Sample.") {
"Abundances.count"
} else if (patmatch1 == "Abundances.Normalized.") {
"Abundances.normalized"
} else if (patmatch1 == "Abundances.Normalized.F[0-9]+.") {
"Abundances.normalized"
} else if (patmatch1 == "Abundances.Normalized.F.+.Sample.") {
Expand Down Expand Up @@ -70,7 +76,7 @@
#' @param iColPattern Character scalar defining a regular expression to
#' identify sample columns. For MaxQuant output, this is typically
#' one of "^iBAQ\\.", "^LFQ\\.intensity\\." or "^Intensity\\.". For PD,
#' it is typically "^Abundance\\.F[0-9]+\\." or
#' it is typically "^Abundance\\.", "^Abundance\\.F[0-9]+\\." or
#' "^Abundance\\.F.+\\.Sample\\.". For FragPipe,
#' it is typically "\\.MaxLFQ\\.Intensity$". Columns matching the
#' given pattern will form the first assay in the output object.
Expand Down Expand Up @@ -108,10 +114,13 @@ importExperiment <- function(inFile, iColPattern, includeOnlySamples = "",
"^Unique\\.peptides\\.", "^Razor\\.+unique\\.peptides\\.",
"^Peptides\\.", "^iBAQ\\.", "^Identification\\.type\\.",
## ProteomeDiscoverer
"^Abundance\\.",
"^Abundance\\.F[0-9]+\\.",
"^Abundance\\.F.+\\.Sample\\.",
"^Abundances\\.Count\\.",
"^Abundances\\.Count\\.F[0-9]+\\.",
"^Abundances\\.Count\\.F.+\\.Sample\\.",
"^Abundances\\.Normalized\\.",
"^Abundances\\.Normalized\\.F[0-9]+\\.",
"^Abundances\\.Normalized\\.F.+\\.Sample\\.",
"^Abundances\\.Grouped\\.Count\\.",
Expand Down Expand Up @@ -148,8 +157,8 @@ importExperiment <- function(inFile, iColPattern, includeOnlySamples = "",
stop("Specifying ", iColPattern, " as the main assay is currently ",
"not supported.")
}
## The exception is Abundances.Grouped - in this case, allow it but give
## a warning
## The exception is Abundances.Grouped - in this case,
## allow it but give a warning
if (iColPattern %in% c("^Abundances\\.Grouped\\.",
"^Abundances.Grouped.")) {
warning("Note that the specified iColPattern may match different ",
Expand Down Expand Up @@ -177,12 +186,28 @@ importExperiment <- function(inFile, iColPattern, includeOnlySamples = "",
## multiple patterns corresponding to the same assay)
if (iColPattern %in% c("^Abundance\\.F[0-9]+\\.", "^Abundances\\.Count\\.F[0-9]+\\.",
"^Abundances\\.Normalized\\.F[0-9]+\\.")) {
pats <- pats[!(pats %in% c("^Abundance\\.F.+\\.Sample\\.",
pats <- pats[!(pats %in% c("^Abundance\\.Sample\\.",
"^Abundance\\.F.+\\.Sample\\.",
"^Abundances\\.Count\\.F.+\\.Sample\\.",
"^Abundances\\.Normalized\\.F.+\\.Sample\\."))]
"^Abundances\\.Normalized\\.F.+\\.Sample\\.",
"^Abundance\\.",
"^Abundances\\.Count\\.",
"^Abundances\\.Normalized\\."))]
} else if (iColPattern %in% c("^Abundance\\.", "^Abundances\\.Count\\.",
"^Abundances\\.Normalized\\.")) {
pats <- pats[!(pats %in% c("^Abundance\\.Sample\\.",
"^Abundance\\.F.+\\.Sample\\.",
"^Abundances\\.Count\\.F.+\\.Sample\\.",
"^Abundances\\.Normalized\\.F.+\\.Sample\\.",
"^Abundance\\.F[0-9]+\\.",
"^Abundances\\.Count\\.F[0-9]+\\.",
"^Abundances\\.Normalized\\.F[0-9]+\\."))]
} else {
pats <- pats[!(pats %in% c("^Abundance\\.F[0-9]+\\.",
pats <- pats[!(pats %in% c("^Abundance\\.",
"^Abundance\\.F[0-9]+\\.",
"^Abundances\\.Count\\.",
"^Abundances\\.Count\\.F[0-9]+\\.",
"^Abundances\\.Normalized\\.",
"^Abundances\\.Normalized\\.F[0-9]+\\."))]
}

Expand Down Expand Up @@ -248,7 +273,9 @@ importExperiment <- function(inFile, iColPattern, includeOnlySamples = "",
if (any(duplicated(names(assayList)))) {
## Should never end up in here
#nocov start
warning("Multiple column patterns corresponding to the same assay name")
warning("Multiple column patterns corresponding to the same assay name: ",
paste(names(assayList)[duplicated(names(assayList))],
collapse = "; "))
#nocov end
}

Expand Down
103 changes: 102 additions & 1 deletion tests/testthat/test-importExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ test_that(".getAssayNames works", {
expect_equal(.getAssayName("^Peptides\\."), "Peptides")
expect_equal(.getAssayName("^iBAQ\\."), "iBAQ")
expect_equal(.getAssayName("^Identification\\.type\\."), "Identification.type")
expect_equal(.getAssayName("^Abundance\\."), "Abundance")
expect_equal(.getAssayName("^Abundance\\.F[0-9]+\\."), "Abundance")
expect_equal(.getAssayName("^Abundance\\.F.+\\.Sample\\."), "Abundance")
expect_equal(.getAssayName("^Abundances\\.Count\\."), "Abundances.count")
expect_equal(.getAssayName("^Abundances\\.Count\\.F[0-9]+\\."), "Abundances.count")
expect_equal(.getAssayName("^Abundances\\.Count\\.F.+\\.Sample\\."), "Abundances.count")
expect_equal(.getAssayName("^Abundances\\.Normalized\\."), "Abundances.normalized")
expect_equal(.getAssayName("^Abundances\\.Normalized\\.F[0-9]+\\."), "Abundances.normalized")
expect_equal(.getAssayName("^Abundances\\.Normalized\\.F.+\\.Sample\\."), "Abundances.normalized")
expect_equal(.getAssayName("^Abundances\\.Grouped\\.Count\\."), "Abundances.grouped.count")
Expand Down Expand Up @@ -1034,9 +1037,107 @@ test_that("importExperiment works", {
"Abundances.Normalized.F12.132C.Sample.WT_S13", "Abundances.Normalized.F12.133N.Sample.WT_S14",
"Abundances.Normalized.F12.133C.Sample.WT_S15", "Abundances.Normalized.F12.134N.Sample.WT_S16"))

## -------------------------------------------------------------------------
## Another different iColPattern
## -------------------------------------------------------------------------
## Without escaping periods
out <- importExperiment(
inFile = pdFile, iColPattern = "^Abundance.",
nrows = 20)
expect_type(out, "list")
expect_named(out, c("sce", "aName"))
expect_equal(out$aName, "Abundance")
expect_equal(nrow(out$sce), 20)
expect_equal(ncol(out$sce), 16)
expect_s4_class(out$sce, "SingleCellExperiment")
expect_equal(rownames(out$sce), as.character(seq_len(20)))
expect_equal(SummarizedExperiment::assayNames(out$sce),
c("Abundance", "Abundances.count", "Abundances.normalized"))
idx_not_na <- c(1, 3, 5, 8, 9, 10, 12, 13, 14, 15, 16, 18, 20)
idx_na <- setdiff(seq_len(20), idx_not_na)
expect_equal(SummarizedExperiment::assay(
out$sce, "Abundance")[idx_not_na, "F12.129N.Sample.HIS4KO_S06"],
tmp$Abundance.F12.129N.Sample.HIS4KO_S06[idx_not_na], ignore_attr = TRUE)
expect_true(all(is.na(SummarizedExperiment::assay(
out$sce, "Abundance")[idx_na, "F12.129N.Sample.HIS4KO_S06"])))

expect_equal(SummarizedExperiment::assay(
out$sce, "Abundances.count")[idx_not_na, "F12.134N.Sample.WT_S16"],
tmp$Abundances.Count.F12.134N.Sample.WT_S16[idx_not_na], ignore_attr = TRUE)
expect_true(all(is.na(SummarizedExperiment::assay(
out$sce, "Abundances.count")[idx_na, "F12.134N.Sample.WT_S16"])))

expect_equal(SummarizedExperiment::assay(
out$sce, "Abundances.normalized")[idx_not_na, "F12.126.Sample.MET6KO_S01"],
tmp$Abundances.Normalized.F12.126.Sample.MET6KO_S01[idx_not_na], ignore_attr = TRUE)
expect_true(all(is.na(SummarizedExperiment::assay(
out$sce, "Abundances.normalized")[idx_na, "F12.126.Sample.MET6KO_S01"])))

expect_equal(SummarizedExperiment::assay(
out$sce, "Abundance")[idx_not_na, "F12.129C.Sample.HIS4KO_S07"],
tmp$Abundance.F12.129C.Sample.HIS4KO_S07[idx_not_na], ignore_attr = TRUE)
expect_true(all(is.na(SummarizedExperiment::assay(
out$sce, "Abundance")[idx_na, "F12.129C.Sample.HIS4KO_S07"])))

expect_equal(SummarizedExperiment::rowData(out$sce)$Accession,
tmp$Accession, ignore_attr = TRUE)
expect_equal(SummarizedExperiment::rowData(out$sce)$Number.of.Peptides,
tmp$Number.of.Peptides, ignore_attr = TRUE)
expect_equal(SummarizedExperiment::rowData(out$sce)$Gene.Symbol,
tmp$Gene.Symbol, ignore_attr = TRUE)
expect_equal(SummarizedExperiment::rowData(out$sce)$Modifications,
tmp$Modifications, ignore_attr = TRUE)
expect_true(all(c("Accession", "Number.of.Peptides",
"Score.Sequest.HT.Sequest.HT", "Gene.Symbol") %in%
colnames(SummarizedExperiment::rowData(out$sce))))

## Check that no sample-specific columns remain in the rowData
## (but they should be there in the temp data loaded above)
for (nms in c("Abundance", "Abundances.count", "Abundances.normalized",
"Abundances.grouped.count", "Abundances.grouped.CV",
"Abundances.grouped")) {
nmstmp <- dplyr::case_when(
nms == "Abundance" ~ "Abundance.F.+.Sample",
nms == "Abundances.count" ~ "Abundances.Count.F.+.Sample",
nms == "Abundances.normalized" ~ "Abundances.Normalized.F.+.Sample",
nms == "Abundances.grouped.count" ~ "Abundances.Grouped.Count",
nms == "Abundances.grouped.CV" ~ "Abundances.Grouped.CV.in.Percent",
nms == "Abundances.grouped" ~ "Abundances.Grouped"
)
expect_false(any(grepl(paste0(nms, ".MET6KO_S01"),
colnames(SummarizedExperiment::rowData(out$sce)))))
expect_false(any(grepl(paste0(nms, ".URA2KO_S10"),
colnames(SummarizedExperiment::rowData(out$sce)))))
expect_false(any(grepl(paste0(nmstmp, ".MET6KO_S01"),
colnames(SummarizedExperiment::rowData(out$sce)))))
expect_false(any(grepl(paste0(nmstmp, ".URA2KO_S10"),
colnames(SummarizedExperiment::rowData(out$sce)))))
expect_true(any(grepl(paste0(nmstmp, ".MET6KO_S01"), colnames(tmp))))
}
## Columns for each assay
expect_named(S4Vectors::metadata(out$sce)$colList,
c("Abundance", "Abundances.count", "Abundances.normalized"))
expect_equal(S4Vectors::metadata(out$sce)$colList$Abundance,
c("Abundance.F12.128C.Sample.HIS4KO_S05", "Abundance.F12.129N.Sample.HIS4KO_S06",
"Abundance.F12.129C.Sample.HIS4KO_S07", "Abundance.F12.130N.Sample.HIS4KO_S08",
"Abundance.F12.126.Sample.MET6KO_S01", "Abundance.F12.127N.Sample.MET6KO_S02",
"Abundance.F12.127C.Sample.MET6KO_S03", "Abundance.F12.128N.Sample.MET6KO_S04",
"Abundance.F12.130C.Sample.URA2KO_S09", "Abundance.F12.131N.Sample.URA2KO_S10",
"Abundance.F12.131C.Sample.URA2KO_S11", "Abundance.F12.132N.Sample.URA2KO_S12",
"Abundance.F12.132C.Sample.WT_S13", "Abundance.F12.133N.Sample.WT_S14",
"Abundance.F12.133C.Sample.WT_S15", "Abundance.F12.134N.Sample.WT_S16"))
expect_equal(S4Vectors::metadata(out$sce)$colList$Abundances.normalized,
c("Abundances.Normalized.F12.128C.Sample.HIS4KO_S05", "Abundances.Normalized.F12.129N.Sample.HIS4KO_S06",
"Abundances.Normalized.F12.129C.Sample.HIS4KO_S07", "Abundances.Normalized.F12.130N.Sample.HIS4KO_S08",
"Abundances.Normalized.F12.126.Sample.MET6KO_S01", "Abundances.Normalized.F12.127N.Sample.MET6KO_S02",
"Abundances.Normalized.F12.127C.Sample.MET6KO_S03", "Abundances.Normalized.F12.128N.Sample.MET6KO_S04",
"Abundances.Normalized.F12.130C.Sample.URA2KO_S09", "Abundances.Normalized.F12.131N.Sample.URA2KO_S10",
"Abundances.Normalized.F12.131C.Sample.URA2KO_S11", "Abundances.Normalized.F12.132N.Sample.URA2KO_S12",
"Abundances.Normalized.F12.132C.Sample.WT_S13", "Abundances.Normalized.F12.133N.Sample.WT_S14",
"Abundances.Normalized.F12.133C.Sample.WT_S15", "Abundances.Normalized.F12.134N.Sample.WT_S16"))

## -------------------------------------------------------------------------
## Another different iColPattern (currently not supported)
## Another different iColPattern
## -------------------------------------------------------------------------
expect_warning(out <- importExperiment(
inFile = pdFile, iColPattern = "^Abundances\\.Grouped\\.", nrows = 20),
Expand Down

0 comments on commit d9abdf0

Please sign in to comment.