Skip to content

Commit

Permalink
Expand the set of allowed species
Browse files Browse the repository at this point in the history
  • Loading branch information
csoneson committed Jul 23, 2023
1 parent 7e18b6d commit 59d6544
Show file tree
Hide file tree
Showing 20 changed files with 81 additions and 53 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* Add argument to define the assays(s) to use for exported values and barplots
* Harmonize treatment of merged groups in runTest and plotVolcano
* Sort exported test results by p-value instead of logFC
* Expand the set of allowed species

# einprot 0.7.3

Expand Down
30 changes: 25 additions & 5 deletions R/getSupportedSpecies.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,26 @@
#'
getSupportedSpecies <- function() {
data.frame(
taxId = c(10090, 9606, 6239, 7955, 7227, 4932, 284812),
taxId = c(10090, 9606, 6239, 7955, 7227, 4932, 284812, 28377,
9913, 9615, 9796, 9685, 9031, 9544, 13616, 9258,
9598, 10116, 9823, 8364),
species = c("Mus musculus", "Homo sapiens", "Caenorhabditis elegans",
"Danio rerio", "Drosophila melanogaster",
"Saccharomyces cerevisiae",
"Schizosaccharomyces pombe 972h-"),
"Schizosaccharomyces pombe 972h-",
"Anolis carolinensis", "Bos taurus",
"Canis lupus familiaris", "Equus caballus",
"Felis catus", "Gallus gallus",
"Macaca mulatta", "Monodelphis domestica",
"Ornithorhynchus anatinus", "Pan troglodytes",
"Rattus norvegicus", "Sus scrofa",
"Xenopus tropicalis"),
speciesCommon = c("mouse", "human", "roundworm", "zebrafish",
"fruitfly", "baker's yeast", "fission yeast")
"fruitfly", "baker's yeast", "fission yeast",
"green anole", "bovine", "dog", "horse", "cat",
"chicken", "rhesus macaque", "opossum",
"platypus", "chimpanzee", "Norway rat",
"pig", "tropical clawed frog")
)
}

Expand All @@ -44,6 +57,8 @@ getSupportedSpecies <- function() {
#' getSpeciesInfo("mouse")
#' getSpeciesInfo(6239)
#' getSpeciesInfo("Homo sapiens")
#' ## unsupported species
#' getSpeciesInfo("E.coli")
#'
getSpeciesInfo <- function(species) {
stopifnot(length(species) == 1)
Expand All @@ -56,21 +71,26 @@ getSpeciesInfo <- function(species) {
species_common <-
taxTable$speciesCommon[match(tolower(species),
tolower(taxTable$speciesCommon))]
tax_id <- taxTable$taxId[match(species_id, taxTable$species)]
} else if (tolower(species) %in% tolower(taxTable$species)) {
species_id <- taxTable$species[match(tolower(species),
tolower(taxTable$species))]
species_common <-
taxTable$speciesCommon[match(tolower(species),
tolower(taxTable$species))]
tax_id <- taxTable$taxId[match(species_id, taxTable$species)]
} else if (species %in% taxTable$taxId) {
species_id <- taxTable$species[match(species,
taxTable$taxId)]
species_common <- taxTable$speciesCommon[match(species,
taxTable$taxId)]
tax_id <- taxTable$taxId[match(species_id, taxTable$species)]
} else {
stop("Unknown species ", species)
warning("Unknown species ", species)
species_id <- species
species_common <- ""
tax_id <- NA_real_
}
tax_id <- taxTable$taxId[match(species_id, taxTable$species)]

list(species = species_id,
speciesCommon = species_common,
Expand Down
4 changes: 4 additions & 0 deletions R/getUniProtToGeneSymbolMapping.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
f <- "YEAST_559292_idmapping.dat.gz"
} else if (spi$species == "Schizosaccharomyces pombe 972h-") {
f <- "SCHPO_284812_idmapping.dat.gz"
} else if (spi$species == "Gallus gallus") {
f <- "CHICK_9031_idmapping.dat.gz"
} else if (spi$species == "Rattus norvegicus") {
f <- "RAT_10116_idmapping.dat.gz"
} else {
stop("Unsupported species")
}
Expand Down
2 changes: 1 addition & 1 deletion R/makeDbLinkTable.R
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ makeDbLinkTable <- function(df, idCol, speciesCommon,
.assertVector(x = df, type = "data.frame")
.assertScalar(x = idCol, type = "character", validValues = colnames(df))
.assertScalar(x = speciesCommon, type = "character",
validValues = getSupportedSpecies()$speciesCommon)
validValues = c("", getSupportedSpecies()$speciesCommon))
.assertScalar(x = addSpeciesSpecificColumns, type = "logical")
.assertVector(x = convTablePomBase, type = "data.frame", allowNULL = TRUE)
.assertVector(x = convTableWormBase, type = "data.frame",
Expand Down
33 changes: 19 additions & 14 deletions R/prepareFeatureCollections.R
Original file line number Diff line number Diff line change
Expand Up @@ -135,21 +135,25 @@ prepareFeatureCollections <- function(sce, idCol, includeFeatureCollections,
## -------------------------------------------------------------------------
if ("complexes" %in% includeFeatureCollections) {
complexes <- readRDS(complexDbPath)
if (speciesInfo$speciesCommon %in% names(complexes)) {
crl <- complexes[[speciesInfo$speciesCommon]]
} else if (speciesInfo$species %in% names(complexes)) {
crl <- complexes[[speciesInfo$species]]
if (any(c(speciesInfo$speciesCommon, speciesInfo$species) %in%
names(complexes))) {
if (speciesInfo$speciesCommon %in% names(complexes)) {
crl <- complexes[[speciesInfo$speciesCommon]]
} else if (speciesInfo$species %in% names(complexes)) {
crl <- complexes[[speciesInfo$species]]
}
if (complexSpecies == "current") {
## Only test complexes defined for the current species
crl <- crl[S4Vectors::mcols(crl)$Species.common %in%
c(speciesInfo$species, speciesInfo$speciesCommon)]
}
S4Vectors::mcols(crl)$genes <- vapply(
crl, function(w) gsub(pat, "\\1; ", paste(w, collapse = ";")), "")
S4Vectors::mcols(crl)$nGenes <- lengths(crl)
} else {
stop("No complex database available for the current species")
warning("No complex database available for the current species")
crl <- IRanges::CharacterList()
}
if (complexSpecies == "current") {
## Only test complexes defined for the current species
crl <- crl[S4Vectors::mcols(crl)$Species.common %in%
c(speciesInfo$species, speciesInfo$speciesCommon)]
}
S4Vectors::mcols(crl)$genes <- vapply(
crl, function(w) gsub(pat, "\\1; ", paste(w, collapse = ";")), "")
S4Vectors::mcols(crl)$nGenes <- lengths(crl)
} else {
crl <- IRanges::CharacterList()
}
Expand Down Expand Up @@ -185,7 +189,8 @@ prepareFeatureCollections <- function(sce, idCol, includeFeatureCollections,
## -------------------------------------------------------------------------
## GO terms
## -------------------------------------------------------------------------
if ("GO" %in% includeFeatureCollections) {
if ("GO" %in% includeFeatureCollections &&
speciesInfo$species %in% getSupportedSpecies()$species) {
goannots <- msigdbr::msigdbr(species = speciesInfo$species,
category = "C5") %>%
dplyr::select("gs_name", "gene_symbol")
Expand Down
5 changes: 3 additions & 2 deletions inst/extdata/process_FragPipe_template.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ of the data are provided via [principal component analysis](#run-pca) and
## Get species info and define STRINGdb object
speciesInfo <- getSpeciesInfo(species)
if (is.null(stringDir)) stringDir <- ""
if (is.null(stringIdCol)) {
## If no STRING IDs are extracted, don't do STRING analysis
if (is.null(stringIdCol) || is.na(speciesInfo$taxId)) {
## If no STRING IDs are extracted or an unknown species is provided,
## don't do STRING analysis
string_db <- NULL
} else {
string_db <- tryCatch({
Expand Down
5 changes: 3 additions & 2 deletions inst/extdata/process_MaxQuant_template.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ of the data are provided via [principal component analysis](#run-pca) and
## Get species info and define STRINGdb object
speciesInfo <- getSpeciesInfo(species)
if (is.null(stringDir)) stringDir <- ""
if (is.null(stringIdCol)) {
## If no STRING IDs are extracted, don't do STRING analysis
if (is.null(stringIdCol) || is.na(speciesInfo$taxId)) {
## If no STRING IDs are extracted or an unknown species is provided,
## don't do STRING analysis
string_db <- NULL
} else {
string_db <- tryCatch({
Expand Down
5 changes: 3 additions & 2 deletions inst/extdata/process_PD_TMT_template.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,9 @@ speciesInfo <- getSpeciesInfo(species)
if (inputLevel == "Proteins") {
if (is.null(stringDir)) stringDir <- ""
if (is.null(stringIdCol)) {
## If no STRING IDs are extracted, don't do STRING analysis
if (is.null(stringIdCol) || is.na(speciesInfo$taxId)) {
## If no STRING IDs are extracted or an unknown species is provided,
## don't do STRING analysis
string_db <- NULL
} else {
string_db <- tryCatch({
Expand Down
2 changes: 2 additions & 0 deletions man/getSpeciesInfo.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions tests/testthat/test-checkArgumentsFragPipe.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ test_that("argument checking for FP works", {
## species
args <- args0
args$species <- 1
expect_error(do.call(.checkArgumentsFragPipe, args),
"Unknown species 1")
expect_warning(do.call(.checkArgumentsFragPipe, args),
"Unknown species 1")
args$species <- c("Mouse", "Human")
expect_error(do.call(.checkArgumentsFragPipe, args),
"length(species) == 1 is not TRUE", fixed = TRUE)
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-checkArgumentsMaxQuant.R
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ test_that("argument checking for MQ works", {
## species
args <- args0
args$species <- 1
expect_error(do.call(.checkArgumentsMaxQuant, args),
"Unknown species 1")
expect_warning(do.call(.checkArgumentsMaxQuant, args),
"Unknown species 1")
args$species <- c("Mouse", "Human")
expect_error(do.call(.checkArgumentsMaxQuant, args),
"length(species) == 1 is not TRUE", fixed = TRUE)
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-checkArgumentsPDTMT.R
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ test_that("argument checking for PD-TMT works", {
## species
args <- args0
args$species <- 1
expect_error(do.call(.checkArgumentsPDTMT, args),
"Unknown species 1")
expect_warning(do.call(.checkArgumentsPDTMT, args),
"Unknown species 1")
args$species <- c("Mouse", "Human")
expect_error(do.call(.checkArgumentsPDTMT, args),
"length(species) == 1 is not TRUE", fixed = TRUE)
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-checkArgumentsPDTMTptm.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ test_that("argument checking for PD-TMT PTM works", {
## species
args <- args0
args$species <- 1
expect_error(do.call(.checkArgumentsPDTMTptm, args),
"Unknown species 1")
expect_warning(do.call(.checkArgumentsPDTMTptm, args),
"Unknown species 1")
args$species <- c("Mouse", "Human")
expect_error(do.call(.checkArgumentsPDTMTptm, args),
"length(species) == 1 is not TRUE", fixed = TRUE)
Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-getSupportedSpecies.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ test_that("geting species information works", {
expect_s3_class(df, "data.frame")
expect_equal(ncol(df), 3)
expect_named(df, c("taxId", "species", "speciesCommon"))
expect_equal(nrow(df), 7)
expect_equal(nrow(df), 20)

expect_error(getSpeciesInfo("missing"),
"Unknown species missing")
expect_warning(getSpeciesInfo("missing"),
"Unknown species missing")
expect_error(getSpeciesInfo(list(x = 1)),
"is.character(species) || is.numeric(species) is not TRUE",
fixed = TRUE)
Expand Down
9 changes: 7 additions & 2 deletions tests/testthat/test-getUniProtToGeneSymbolMapping.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
test_that("getUniProtToIDMapping works", {
expect_error(getUniProtToIDMapping(1), "Unknown species")
expect_error(getUniProtToIDMapping("wrongSpecies"), "Unknown species")
expect_warning(expect_error(getUniProtToIDMapping(1), "Unsupported species"))
expect_warning(expect_error(getUniProtToIDMapping("wrongSpecies"),
"Unsupported species"))
expect_error(getUniProtToIDMapping("fruitfly", targetId = 1),
"'targetId' must be of class 'character'")

Expand All @@ -22,6 +23,10 @@ test_that("getUniProtToIDMapping works", {
"YEAST_559292_idmapping.dat.gz")
expect_equal(.getUniProtToIDMappingFile(getSpeciesInfo("fission yeast")),
"SCHPO_284812_idmapping.dat.gz")
expect_equal(.getUniProtToIDMappingFile(getSpeciesInfo("Norway rat")),
"RAT_10116_idmapping.dat.gz")
expect_equal(.getUniProtToIDMappingFile(getSpeciesInfo("chicken")),
"CHICK_9031_idmapping.dat.gz")
expect_error(.getUniProtToIDMappingFile(list(species = "missing")),
"Unsupported species")

Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-prepareFeatureCollections.R
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,8 @@ test_that("preparing feature collections works", {
expect_error(do.call(prepareFeatureCollections, args),
"All values in 'namesspeciesInfo' must be one of")
args$speciesInfo <- list(speciesCommon = "missing", species = "missing")
expect_error(do.call(prepareFeatureCollections, args),
"No complex database available for the current species")
expect_warning(do.call(prepareFeatureCollections, args),
"No complex database available for the current species")

args <- args0
args$complexSpecies <- 1
Expand Down
3 changes: 0 additions & 3 deletions tests/testthat/test-runFragPipeAnalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,6 @@ test_that("runFragPipeAnalysis works", {

## species
args <- args0
args$species <- 1
expect_error(do.call(runFragPipeAnalysis, args),
"Unknown species 1")
args$species <- c("Mouse", "Human")
expect_error(do.call(runFragPipeAnalysis, args),
"length(species) == 1 is not TRUE", fixed = TRUE)
Expand Down
3 changes: 0 additions & 3 deletions tests/testthat/test-runMaxQuantAnalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,6 @@ test_that("runMaxQuantAnalysis works", {

## species
args <- args0
args$species <- 1
expect_error(do.call(runMaxQuantAnalysis, args),
"Unknown species 1")
args$species <- c("Mouse", "Human")
expect_error(do.call(runMaxQuantAnalysis, args),
"length(species) == 1 is not TRUE", fixed = TRUE)
Expand Down
3 changes: 0 additions & 3 deletions tests/testthat/test-runPDTMTAnalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,6 @@ test_that("runPDTMTAnalysis works", {

## species
args <- args0
args$species <- 1
expect_error(do.call(runPDTMTAnalysis, args),
"Unknown species 1")
args$species <- c("Mouse", "Human")
expect_error(do.call(runPDTMTAnalysis, args),
"length(species) == 1 is not TRUE", fixed = TRUE)
Expand Down
3 changes: 0 additions & 3 deletions tests/testthat/test-runPDTMTptmAnalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,6 @@ test_that("runPDTMTptmAnalysis works", {

## species
args <- args0
args$species <- 1
expect_error(do.call(runPDTMTptmAnalysis, args),
"Unknown species 1")
args$species <- c("Mouse", "Human")
expect_error(do.call(runPDTMTptmAnalysis, args),
"length(species) == 1 is not TRUE", fixed = TRUE)
Expand Down

0 comments on commit 59d6544

Please sign in to comment.