Skip to content

Commit

Permalink
Add getCompTox to Mbworkflow MassBank#215
Browse files Browse the repository at this point in the history
getCompTox retrieves DTXSID from EPA webservices using InChiKey. Modify generation of infolists and of final Mbrecord to include DTXSID.
Resolves MassBank#215
  • Loading branch information
Adelene Lai committed May 28, 2019
1 parent cdbd99b commit 40083b7
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 6 deletions.
23 changes: 17 additions & 6 deletions R/createMassBank.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,17 +111,16 @@ resetInfolists <- function(mb)
CH.IUPAC = character(0), CH.LINK.CAS = character(0), CH.LINK.CHEBI = integer(0),
CH.LINK.HMDB = character(0), CH.LINK.KEGG = character(0), CH.LINK.LIPIDMAPS = character(0),
CH.LINK.PUBCHEM = character(0), CH.LINK.INCHIKEY = character(0),
CH.LINK.CHEMSPIDER = integer(0)), .Names = c("X", "id", "dbcas",
CH.LINK.CHEMSPIDER = integer(0), CH.LINK.COMPTOX = character(0)), .Names = c("X", "id", "dbcas",
"dbname", "dataused", "COMMENT.CONFIDENCE", "COMMENT.ID",
"CH.NAME1", "CH.NAME2", "CH.NAME3", "CH.COMPOUND_CLASS", "CH.FORMULA",
"CH.EXACT_MASS", "CH.SMILES", "CH.IUPAC", "CH.LINK.CAS", "CH.LINK.CHEBI",
"CH.LINK.HMDB", "CH.LINK.KEGG", "CH.LINK.LIPIDMAPS", "CH.LINK.PUBCHEM",
"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER"), row.names = integer(0), class = "data.frame")
"CH.LINK.HMDB", "CH.LINK.KEGG", "CH.LINK.LIPIDMAPS", "CH.LINK.PUBCHEM",
"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER", "CH.LINK.COMPTOX"), row.names = integer(0), class = "data.frame")
return(mb)

}

# The workflow function, i.e. (almost) the only thing you actually need to call.
# See below for explanation of steps.
#' MassBank record creation workflow
#'
Expand Down Expand Up @@ -527,6 +526,13 @@ gatherData <- function(id)
csid <- getCactus(inchikey_split, 'chemspider_id')
}

##Get CompTox
comptox <- getCompTox(inchikey_split)

if(is.null(comptox)){
comptox <- NA
}

##Use CTS to retrieve information
CTSinfo <- getCtsRecord(inchikey_split)

Expand Down Expand Up @@ -711,6 +717,7 @@ gatherData <- function(id)
}

link[["INCHIKEY"]] <- inchikey_split
link[["COMPTOX"]] <- comptox
if(length(csid)>0) if(any(!is.na(csid))) link[["CHEMSPIDER"]] <- min(as.numeric(as.character(csid)))
mbdata[['CH$LINK']] <- link

Expand Down Expand Up @@ -1071,7 +1078,9 @@ flatten <- function(mbdata)
"CH$LINK.LIPIDMAPS",
"CH$LINK.PUBCHEM",
"CH$LINK.INCHIKEY",
"CH$LINK.CHEMSPIDER")
"CH$LINK.CHEMSPIDER",
"CH$LINK.COMPTOX"
)
# make an empty data frame with the right length
rows <- length(mbdata)
cols <- length(colList)
Expand Down Expand Up @@ -1136,7 +1145,8 @@ readMbdata <- function(row)
"CH$LINK.LIPIDMAPS",
"CH$LINK.PUBCHEM",
"CH$LINK.INCHIKEY",
"CH$LINK.CHEMSPIDER")
"CH$LINK.CHEMSPIDER",
"CH$LINK.COMPTOX")
mbdata[["COMMENT"]] = list()
mbdata[["COMMENT"]][["CONFIDENCE"]] <- row[["COMMENT.CONFIDENCE"]]
# Again, our ID field.
Expand All @@ -1163,6 +1173,7 @@ readMbdata <- function(row)
link[["PUBCHEM"]] = row[["CH.LINK.PUBCHEM"]]
link[["INCHIKEY"]] = row[["CH.LINK.INCHIKEY"]]
link[["CHEMSPIDER"]] = row[["CH.LINK.CHEMSPIDER"]]
link[["COMPTOX"]] = row[["CH.LINK.COMPTOX"]]
link[which(is.na(link))] <- NULL
mbdata[["CH$LINK"]] <- link
# again, these constants are read from the options:
Expand Down
40 changes: 40 additions & 0 deletions R/webAccess.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
NULL
## library(XML)
## library(RCurl)
## library(jsonlite)



Expand Down Expand Up @@ -352,6 +353,45 @@ getPcCHEBI <- function(query, from = "inchikey")
}
}

#' Retrieves DTXSID (if it exists) from EPA Comptox Dashboard
#'
#' @usage getCompTox(query)
#' @param query The InChIKey of the compound.
#' @return Returns the DTXSID.
#'
#'
#' @examples
#'
#' \dontrun{
#' # getCompTox("MKXZASYAUGDDCJ-NJAFHUGGSA-N")
#' }
#'
#' @author Adelene Lai <adelene.lai@uni.lu>
#' @export

getCompTox <- function(query)
{
baseURL <- "https://actorws.epa.gov/actorws/chemIdentifier/v01/resolve.json?identifier="
url <- paste0(baseURL,query)
errorvar <- 0
currEnvir <- environment()
tryCatch(
data <- getURL(URLencode(url), timeout=5),
error=function(e){
currEnvir$errorvar <- 1 #TRUE?
}
)

if(errorvar){ #if TRUE?
warning("EPA web service is currently offline")
return(NA)
}

r <- fromJSON(data) #returns list
return(r$DataRow$dtxsid)

}

#' Retrieve the Chemspider ID for a given compound
#'
#' Given an InChIKey, this function queries the chemspider web API to retrieve
Expand Down

0 comments on commit 40083b7

Please sign in to comment.