From 26cd706a196761ded76f77f28b32963e8e9ea2ac Mon Sep 17 00:00:00 2001 From: Charlotte Soneson Date: Thu, 2 May 2024 13:20:10 +0200 Subject: [PATCH] Add BioGRID column in link table --- NEWS.md | 1 + R/makeDbLinkTable.R | 19 +++++-- man/makeDbLinkTable.Rd | 4 +- tests/testthat/test-makeDbLinkTable.R | 72 +++++++++++++++++++++------ 4 files changed, 73 insertions(+), 23 deletions(-) diff --git a/NEWS.md b/NEWS.md index 7c358f2..4cc5219 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,7 @@ * Expand input data paths in run*Analysis() functions * Add support for importing Spectronaut PG pivot files * Add initial support for reading Spectronaut setup.txt files +* Add BioGRID column in link table # einprot 0.9.3 diff --git a/R/makeDbLinkTable.R b/R/makeDbLinkTable.R index a873d37..c7bc332 100644 --- a/R/makeDbLinkTable.R +++ b/R/makeDbLinkTable.R @@ -69,7 +69,7 @@ formatTableColumns <- function(tbl, columns, signifDigits, maxLevels = 10) { .assertScalar(x = id, type = "character") .assertScalar(x = linktype, type = "character", validValues = c("UniProt", "AlphaFold", "PomBase", - "WormBase", "ComplexPortal")) + "WormBase", "ComplexPortal", "BioGRID")) if (!is.na(id) && id != "") { if (removeSuffix) { @@ -91,6 +91,9 @@ formatTableColumns <- function(tbl, columns, signifDigits, maxLevels = 10) { } else if (linktype == "ComplexPortal") { sprintf(' %s', paste0("https://www.ebi.ac.uk/complexportal/complex/search?query=", id), id) + } else if (linktype == "BioGRID") { + sprintf(' %s', + paste0("https://thebiogrid.org/search.php?search=", id, "&organism=all"), id) } else { "" } @@ -179,8 +182,8 @@ getConvTable <- function(type) { #' generated using \code{getConvTable(type = "WormBase")}. #' @param removeSuffix Logical scalar indicating whether suffixes of the #' form \code{-[0-9]+} should be removed from the protein ID before -#' generating the URL. Currently only influencing the AlphaFold and -#' ComplexPortal URLs. +#' generating the URL. Currently only influencing the AlphaFold, +#' ComplexPortal and BioGRID URLs. #' @param signifDigits Numeric scalar giving the number of significant digits #' to round numeric columns to. If \code{NULL}, no rounding will be #' performed. @@ -226,7 +229,7 @@ makeDbLinkTable <- function(df, idCol, speciesCommon, .assertScalar(x = signifDigits, type = "numeric", allowNULL = TRUE) ## ------------------------------------------------------------------------- - ## Create UniProt and AlphaFold columns + ## Create UniProt, AlphaFold, ComplexPortal and BioGRID columns ## ------------------------------------------------------------------------- linkTable <- df %>% dplyr::mutate(UniProt = vapply(.data[[idCol]], function(mpds) { @@ -246,6 +249,12 @@ makeDbLinkTable <- function(df, idCol, speciesCommon, .makeLinkFromId(mpd, linktype = "ComplexPortal", removeSuffix = removeSuffix) }, ""), collapse = ";") + }, "NA")) %>% + dplyr::mutate(BioGRID = vapply(.data[[idCol]], function(mpds) { + paste(vapply(strsplit(mpds, ";")[[1]], function(mpd) { + .makeLinkFromId(mpd, linktype = "BioGRID", + removeSuffix = removeSuffix) + }, ""), collapse = ";") }, "NA")) if (addSpeciesSpecificColumns && @@ -309,7 +318,7 @@ makeDbLinkTable <- function(df, idCol, speciesCommon, linkTable <- formatTableColumns( tbl = linkTable, columns = setdiff(colnames(linkTable), c("UniProt", "AlphaFold", - "ComplexPortal", + "ComplexPortal", "BioGRID", "WormBase", "PomBase")), signifDigits = signifDigits, maxLevels = 10) diff --git a/man/makeDbLinkTable.Rd b/man/makeDbLinkTable.Rd index 6fdcf98..d641563 100644 --- a/man/makeDbLinkTable.Rd +++ b/man/makeDbLinkTable.Rd @@ -42,8 +42,8 @@ generated using \code{getConvTable(type = "WormBase")}.} \item{removeSuffix}{Logical scalar indicating whether suffixes of the form \code{-[0-9]+} should be removed from the protein ID before -generating the URL. Currently only influencing the AlphaFold and -ComplexPortal URLs.} +generating the URL. Currently only influencing the AlphaFold, +ComplexPortal and BioGRID URLs.} \item{signifDigits}{Numeric scalar giving the number of significant digits to round numeric columns to. If \code{NULL}, no rounding will be diff --git a/tests/testthat/test-makeDbLinkTable.R b/tests/testthat/test-makeDbLinkTable.R index f041d68..de23af2 100644 --- a/tests/testthat/test-makeDbLinkTable.R +++ b/tests/testthat/test-makeDbLinkTable.R @@ -31,6 +31,10 @@ test_that("making the link table works", { ' Q7YTG1') expect_equal(.makeLinkFromId("Q7YTG1-1", "ComplexPortal", removeSuffix = FALSE), ' Q7YTG1-1') + expect_equal(.makeLinkFromId("Q7YTG1-1", "BioGRID", removeSuffix = TRUE), + ' Q7YTG1') + expect_equal(.makeLinkFromId("Q7YTG1-1", "BioGRID", removeSuffix = FALSE), + ' Q7YTG1-1') ## getConvTable ## ------------------------------------------------------------------------- @@ -186,10 +190,10 @@ test_that("making the link table works", { idCol = "id", speciesCommon = "fission yeast", signifDigits = 3) expect_s3_class(dblt, "data.frame") - expect_equal(ncol(dblt), 6) + expect_equal(ncol(dblt), 7) expect_equal(nrow(dblt), 3) expect_named(dblt, c("id", "numcol", "intcol", "UniProt", "AlphaFold", - "ComplexPortal")) + "ComplexPortal", "BioGRID")) expect_equal(dblt$id, factor(c("B5BP45", "O13282", "B5BP45"))) expect_equal(dblt$numcol, c(1.23, 0.000346, 7630)) expect_type(dblt$intcol, "integer") @@ -209,6 +213,11 @@ test_that("making the link table works", { ' O13282', ' B5BP45'), ignore_attr = TRUE) + expect_equal(dblt$BioGRID, c( + ' B5BP45', + ' O13282', + ' B5BP45'), + ignore_attr = TRUE) ## As above, but different number of significant digits dblt <- makeDbLinkTable(data.frame(id = c("B5BP45", "O13282", "O13282"), @@ -216,9 +225,10 @@ test_that("making the link table works", { idCol = "id", speciesCommon = "fission yeast", signifDigits = 1) expect_s3_class(dblt, "data.frame") - expect_equal(ncol(dblt), 5) + expect_equal(ncol(dblt), 6) expect_equal(nrow(dblt), 3) - expect_named(dblt, c("id", "numcol", "UniProt", "AlphaFold", "ComplexPortal")) + expect_named(dblt, c("id", "numcol", "UniProt", "AlphaFold", "ComplexPortal", + "BioGRID")) expect_equal(dblt$id, factor(c("B5BP45", "O13282", "O13282"))) expect_equal(dblt$numcol, c(1, 0.0003, 8000)) expect_equal(dblt$UniProt, c( @@ -236,6 +246,11 @@ test_that("making the link table works", { ' O13282', ' O13282'), ignore_attr = TRUE) + expect_equal(dblt$BioGRID, c( + ' B5BP45', + ' O13282', + ' O13282'), + ignore_attr = TRUE) ## With Pombase column dblt2 <- makeDbLinkTable( @@ -245,9 +260,10 @@ test_that("making the link table works", { PomBaseID = c("SPCC5E4.03c", "SPBC460.01c"), UniProtID = c("O13282", "B5BP45"))) expect_s3_class(dblt2, "data.frame") - expect_equal(ncol(dblt2), 5) + expect_equal(ncol(dblt2), 6) expect_equal(nrow(dblt2), 2) - expect_named(dblt2, c("id", "UniProt", "AlphaFold", "ComplexPortal", "PomBase")) + expect_named(dblt2, c("id", "UniProt", "AlphaFold", "ComplexPortal", + "BioGRID", "PomBase")) expect_equal(dblt2$id, factor(c("B5BP45", "O13282"))) expect_equal(dblt2$UniProt, c( ' B5BP45', @@ -261,6 +277,10 @@ test_that("making the link table works", { ' B5BP45', ' O13282'), ignore_attr = TRUE) + expect_equal(dblt2$BioGRID, c( + ' B5BP45', + ' O13282'), + ignore_attr = TRUE) expect_equal(dblt2$PomBase, c( ' SPBC460.01c', ' SPCC5E4.03c'), @@ -274,9 +294,10 @@ test_that("making the link table works", { PomBaseID = c("SPBC460.01c", "SPCC5E4.03c"), UniProtID = c("B5BP45", "O13282"))) expect_s3_class(dblt3, "data.frame") - expect_equal(ncol(dblt3), 4) + expect_equal(ncol(dblt3), 5) expect_equal(nrow(dblt3), 2) - expect_named(dblt3, c("id", "UniProt", "AlphaFold", "ComplexPortal")) + expect_named(dblt3, c("id", "UniProt", "AlphaFold", "ComplexPortal", + "BioGRID")) expect_equal(dblt3$id, factor(c("B5BP45", "O13282"))) expect_equal(dblt3$UniProt, c( ' B5BP45', @@ -290,6 +311,10 @@ test_that("making the link table works", { ' B5BP45', ' O13282'), ignore_attr = TRUE) + expect_equal(dblt3$BioGRID, c( + ' B5BP45', + ' O13282'), + ignore_attr = TRUE) ## With Wormbase column dblt4 <- makeDbLinkTable( @@ -303,12 +328,14 @@ test_that("making the link table works", { "WBGene00001330", "WBGene00001328"), check.names = FALSE)) expect_s3_class(dblt4, "data.frame") - expect_equal(ncol(dblt4), 6) + expect_equal(ncol(dblt4), 7) expect_equal(nrow(dblt4), 2) - expect_named(dblt4, c("gid", "pid", "UniProt", "AlphaFold", "ComplexPortal", "WormBase")) + expect_named(dblt4, c("gid", "pid", "UniProt", "AlphaFold", "ComplexPortal", + "BioGRID", "WormBase")) expect_equal(grep(";", dblt4$UniProt), c(1, 2)) expect_equal(grep(";", dblt4$AlphaFold), c(1, 2)) expect_equal(grep(";", dblt4$ComplexPortal), c(1, 2)) + expect_equal(grep(";", dblt4$BioGRID), c(1, 2)) expect_equal(grep(";", dblt4$WormBase), integer(0)) expect_equal(dblt4$gid, factor(c("eps-8", "epi-1"))) expect_equal(dblt4$pid, factor(c("Q7YTG1;O18250", "C1P641;C1P640"))) @@ -324,6 +351,10 @@ test_that("making the link table works", { ' Q7YTG1; O18250', ' C1P641; C1P640'), ignore_attr = TRUE) + expect_equal(dblt4$BioGRID, c( + ' Q7YTG1; O18250', + ' C1P641; C1P640'), + ignore_attr = TRUE) expect_equal(dblt4$WormBase, c( ' WBGene00001330', ' WBGene00001328'), @@ -341,15 +372,18 @@ test_that("making the link table works", { "WBGene00001330", "WBGene00001328"), check.names = FALSE)) expect_s3_class(dblt5, "data.frame") - expect_equal(ncol(dblt5), 5) + expect_equal(ncol(dblt5), 6) expect_equal(nrow(dblt5), 2) - expect_named(dblt5, c("gid", "pid", "UniProt", "AlphaFold", "ComplexPortal")) + expect_named(dblt5, c("gid", "pid", "UniProt", "AlphaFold", "ComplexPortal", + "BioGRID")) expect_equal(grep(";", dblt5$UniProt), c(1, 2)) expect_equal(grep(";", dblt5$AlphaFold), c(1, 2)) expect_equal(grep(";", dblt5$ComplexPortal), c(1, 2)) + expect_equal(grep(";", dblt5$BioGRID), c(1, 2)) expect_equal(dblt5$AlphaFold, dblt4$AlphaFold) expect_equal(dblt5$UniProt, dblt4$UniProt) expect_equal(dblt5$ComplexPortal, dblt4$ComplexPortal) + expect_equal(dblt5$BioGRID, dblt4$BioGRID) ## With Wormbase column, but missing conversion dblt6 <- makeDbLinkTable( @@ -362,18 +396,21 @@ test_that("making the link table works", { WormBaseID = c("WBGene00001330", "WBGene00001328"), check.names = FALSE)) expect_s3_class(dblt6, "data.frame") - expect_equal(ncol(dblt6), 6) + expect_equal(ncol(dblt6), 7) expect_equal(nrow(dblt6), 2) - expect_named(dblt6, c("gid", "pid", "UniProt", "AlphaFold", "ComplexPortal", "WormBase")) + expect_named(dblt6, c("gid", "pid", "UniProt", "AlphaFold", "ComplexPortal", + "BioGRID", "WormBase")) expect_equal(grep(";", dblt6$UniProt), c(1, 2)) expect_equal(grep(";", dblt6$AlphaFold), c(1, 2)) expect_equal(grep(";", dblt6$ComplexPortal), c(1, 2)) + expect_equal(grep(";", dblt6$BioGRID), c(1, 2)) expect_equal(grep(";", dblt6$WormBase), integer(0)) expect_equal(dblt6$gid, factor(c("eps-8", "epi-1"))) expect_equal(dblt6$pid, factor(c("Q7YTG1;O18250", "C1P641;C1P640"))) expect_equal(dblt6$UniProt, dblt4$UniProt) expect_equal(dblt6$AlphaFold, dblt4$AlphaFold) expect_equal(dblt6$ComplexPortal, dblt4$ComplexPortal) + expect_equal(dblt6$BioGRID, dblt4$BioGRID) expect_equal(dblt6$WormBase, dblt4$WormBase) ## With Wormbase column, but missing conversion (2) @@ -386,18 +423,21 @@ test_that("making the link table works", { WormBaseID = c("WBGene00001330"), check.names = FALSE)) expect_s3_class(dblt7, "data.frame") - expect_equal(ncol(dblt7), 6) + expect_equal(ncol(dblt7), 7) expect_equal(nrow(dblt7), 2) - expect_named(dblt7, c("gid", "pid", "UniProt", "AlphaFold", "ComplexPortal", "WormBase")) + expect_named(dblt7, c("gid", "pid", "UniProt", "AlphaFold", "ComplexPortal", + "BioGRID", "WormBase")) expect_equal(grep(";", dblt7$UniProt), c(1, 2)) expect_equal(grep(";", dblt7$AlphaFold), c(1, 2)) expect_equal(grep(";", dblt7$ComplexPortal), c(1, 2)) + expect_equal(grep(";", dblt7$BioGRID), c(1, 2)) expect_equal(grep(";", dblt7$WormBase), integer(0)) expect_equal(dblt7$gid, factor(c("eps-8", "epi-1"))) expect_equal(dblt7$pid, factor(c("Q7YTG1;O18250", "C1P641;C1P640"))) expect_equal(dblt7$UniProt, dblt4$UniProt) expect_equal(dblt7$AlphaFold, dblt4$AlphaFold) expect_equal(dblt7$ComplexPortal, dblt4$ComplexPortal) + expect_equal(dblt7$BioGRID, dblt4$BioGRID) expect_equal(dblt7$WormBase, c(dblt4$WormBase[1], ""), ignore_attr = TRUE)