From 7e659f76b9322511ee1f63939685b98d43d754a0 Mon Sep 17 00:00:00 2001 From: edward-burn <9583964+edward-burn@users.noreply.github.com> Date: Wed, 3 May 2023 17:13:46 +0100 Subject: [PATCH] v1.2 --- DESCRIPTION | 5 +- NAMESPACE | 2 + NEWS.md | 3 + R/compareCodelists.R | 2 +- R/drugCodes.R | 267 ++++++++++++++++++++++++ R/getCandidateCodes.R | 21 +- R/getMappings.R | 16 +- R/{utils.R => inputValidation.R} | 15 +- R/mockVocabRef.R | 69 +++++- R/runSearch.R | 2 +- R/vocabUtilities.R | 157 ++++++++++++-- man/getATCCodes.Rd | 34 +++ man/getCandidateCodes.Rd | 2 +- man/getDescendants.Rd | 9 +- man/getDrugIngredientCodes.Rd | 31 +++ tests/testthat/test-drugCodes.R | 78 +++++++ tests/testthat/test-getCandidateCodes.R | 2 +- tests/testthat/test-vocabUtilities.R | 45 +++- 18 files changed, 703 insertions(+), 57 deletions(-) create mode 100644 R/drugCodes.R rename R/{utils.R => inputValidation.R} (70%) create mode 100644 man/getATCCodes.Rd create mode 100644 man/getDrugIngredientCodes.Rd create mode 100644 tests/testthat/test-drugCodes.R diff --git a/DESCRIPTION b/DESCRIPTION index 9625f70..b244fc6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: CodelistGenerator Title: Generate Code Lists for the OMOP Common Data Model -Version: 1.1.0 +Version: 1.2.0 Authors@R: person("Edward", "Burn", , "edward.burn@ndorms.ox.ac.uk", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9286-1128")) @@ -19,7 +19,8 @@ Imports: glue (>= 1.5.0), stringr (>= 1.4.0), tidyselect (>= 1.2.0), - tidyr (>= 1.2.0) + tidyr (>= 1.2.0), + cli (>= 3.1.0) Suggests: arrow (>= 9.0.0), covr, diff --git a/NAMESPACE b/NAMESPACE index 5d7586a..a191138 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,11 +2,13 @@ export("%>%") export(compareCodelists) +export(getATCCodes) export(getCandidateCodes) export(getConceptClassId) export(getDescendants) export(getDomains) export(getDoseForm) +export(getDrugIngredientCodes) export(getMappings) export(getVocabVersion) export(getVocabularies) diff --git a/NEWS.md b/NEWS.md index d80d199..d047090 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +# CodelistGenerator 1.2.0 +* Added functions getATCCodes and getDrugIngredientCodes. + # CodelistGenerator 1.1.0 * Added exactMatch and includeSequela options to getCandidateCodes function. diff --git a/R/compareCodelists.R b/R/compareCodelists.R index 79aacdb..46c3d6a 100644 --- a/R/compareCodelists.R +++ b/R/compareCodelists.R @@ -1,4 +1,4 @@ -# Copyright 2022 DARWIN EU® +# Copyright 2023 DARWIN EU® # # This file is part of IncidencePrevalence # diff --git a/R/drugCodes.R b/R/drugCodes.R new file mode 100644 index 0000000..6b179a3 --- /dev/null +++ b/R/drugCodes.R @@ -0,0 +1,267 @@ +# Copyright 2023 DARWIN EU® +# +# This file is part of IncidencePrevalence +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#' Get descendant codes for ATC levels +#' +#' @param cdm cdm_reference via CDMConnector +#' @param level ATC level. Can be one or more of "ATC 1st", "ATC 2nd", +#' "ATC 3rd", "ATC 4th", and "ATC 5th" +#' @param name ATC name of interest. For example, c("Dermatologicals", +#' "Nervous System"), would result in a list of length two with the descendant +#' concepts for these two particular ATC groups. +#' @param doseForm Only descendants codes with the specified dose form +#' will be returned. If NULL, descendant codes will be returned regardless +#' of dose form. +#' +#' @return A named list, with each element containing the descendant +#' concepts for a particular ATC group +#' @export +#' +#' @examples +#' cdm <- mockVocabRef() +#' getATCCodes(cdm = cdm, level = "ATC 1st") +#' DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +getATCCodes <- function(cdm, + level = c("ATC 1st"), + name = NULL, + doseForm = NULL) { + errorMessage <- checkmate::makeAssertCollection() + checkDbType(cdm = cdm, type = "cdm_reference", messageStore = errorMessage) + levelCheck <- all(level %in% + c( + "ATC 1st", + "ATC 2nd", + "ATC 3rd", + "ATC 4th", + "ATC 5th" + )) + if (!isTRUE(levelCheck)) { + errorMessage$push( + "- level can only be from: ATC 1st, ATC 2nd, ATC 3rd, ATC 4th, ATC 5th" + ) + } + checkmate::assertTRUE(levelCheck, add = errorMessage) + checkmate::assertVector(name, + add = errorMessage, + null.ok = TRUE + ) + checkmate::reportAssertions(collection = errorMessage) + + atc_groups <- cdm$concept %>% + dplyr::filter(.data$vocabulary_id == "ATC") %>% + dplyr::filter(.data$concept_class_id %in% .env$level) %>% + dplyr::collect() + + if (!is.null(name)) { + atc_groups <- atc_groups %>% + dplyr::filter(tolower(.data$concept_name) %in% tolower(.env$name)) + } + + errorMessage <- checkmate::makeAssertCollection() + atcCheck <- nrow(atc_groups) > 0 + if (!isTRUE(atcCheck)) { + errorMessage$push( + "- No matching ATC codes found" + ) + } + checkmate::assertTRUE(atcCheck, add = errorMessage) + checkmate::reportAssertions(collection = errorMessage) + + + atc_descendants <- getDescendants( + cdm = cdm, + conceptId = atc_groups$concept_id, + withAncestor = TRUE, + doseForm = doseForm + ) + if (nrow(atc_descendants) > 0) { + atc_descendants <- atc_descendants %>% + dplyr::select(c("concept_id", "ancestor_concept_id")) %>% + # split different ancestors into multiple cols + tidyr::separate_wider_delim( + cols = "ancestor_concept_id", + delim = ";", + names_sep = "", + too_few = "align_start" + ) + + atc_descendants <- atc_descendants %>% + # one row per concept + ancestor + tidyr::pivot_longer(!"concept_id", + names_to = NULL, + values_to = "ancestor_concept_id", + values_drop_na = TRUE + ) + + # to list + # one item per concept + atc_descendants <- split( + x = atc_descendants, + f = as.factor(atc_descendants$ancestor_concept_id), + drop = TRUE + ) + + # for each item in the list - pull out concepts and name + for (i in seq_along(atc_descendants)) { + workingLevel <- atc_groups %>% + dplyr::filter(.data$concept_id == names(atc_descendants)[i]) %>% + dplyr::pull("concept_class_id") + workingName <- atc_groups %>% + dplyr::filter(.data$concept_id == names(atc_descendants)[i]) %>% + dplyr::pull("concept_name") + + atc_descendants[[i]] <- atc_descendants[[i]] %>% + dplyr::select("concept_id") %>% + dplyr::distinct() %>% + dplyr::pull() + + names(atc_descendants)[i] <- paste0( + workingLevel, ": ", workingName, + " (", names(atc_descendants)[i], ")" + ) + } + } + return(atc_descendants) +} + +#' Get descendant codes for drug ingredients +#' +#' @param cdm cdm_reference via CDMConnector +#' @param name Names of ingredients of interest. For example, c("acetaminophen", +#' "codeine"), would result in a list of length two with the descendant +#' concepts for these two particular drug ingredients. +#' @param doseForm Only descendants codes with the specified dose form +#' will be returned. If NULL, descendant codes will be returned regardless +#' of dose form. +#' +#' @return A named list, with each item containing descendant concepts of +#' an ingredient +#' @export +#' +#' @examples +#'cdm <- mockVocabRef() +#'getDrugIngredientCodes(cdm = cdm, name = "Adalimumab") +#'DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +getDrugIngredientCodes <- function(cdm, name = NULL, doseForm = NULL) { + errorMessage <- checkmate::makeAssertCollection() + checkDbType(cdm = cdm, type = "cdm_reference", messageStore = errorMessage) + checkmate::assertVector(name, + add = errorMessage, + null.ok = TRUE + ) + checkmate::reportAssertions(collection = errorMessage) + + ingredientConcepts <- cdm$concept %>% + dplyr::filter(.data$standard_concept == "S") %>% + dplyr::filter(.data$concept_class_id == "Ingredient") %>% + dplyr::select("concept_id", "concept_name") %>% + dplyr::collect() + + if (!is.null(name)) { + ingredientConcepts <- ingredientConcepts %>% + dplyr::filter(tolower(.data$concept_name) %in% tolower(.env$name)) + } + + errorMessage <- checkmate::makeAssertCollection() + ingredientCheck <- nrow(ingredientConcepts) > 0 + if (!isTRUE(ingredientCheck)) { + errorMessage$push( + "- No matching Ingredient codes found" + ) + } + checkmate::assertTRUE(ingredientCheck, add = errorMessage) + checkmate::reportAssertions(collection = errorMessage) + + # to avoid potential memory problems will batch + if (nrow(ingredientConcepts) > 0) { + ingredientCodes <- fetchBatchedDrugIngredientCodes(cdm, + codes = ingredientConcepts$concept_id, + batchSize = 500, + doseForm = doseForm + ) + } + + ingredientCodes <- ingredientCodes %>% + dplyr::select(c("concept_id", "ancestor_concept_id")) %>% + # split different ancestors into multiple cols + tidyr::separate_wider_delim( + cols = "ancestor_concept_id", + delim = ";", + names_sep = "", + too_few = "align_start" + ) + + ingredientCodes <- ingredientCodes %>% + # one row per concept + ancestor + tidyr::pivot_longer(!"concept_id", + names_to = NULL, + values_to = "ancestor_concept_id", + values_drop_na = TRUE + ) + + # to list + # one item per concept + ingredientCodes <- split( + x = ingredientCodes, + f = as.factor(ingredientCodes$ancestor_concept_id), + drop = TRUE + ) + + # for each item in the list - pull out concepts and name + for (i in seq_along(ingredientCodes)) { + workingName <- ingredientConcepts %>% + dplyr::filter(.data$concept_id == names(ingredientCodes)[[i]]) %>% + dplyr::pull("concept_name") + + ingredientCodes[[i]] <- ingredientCodes[[i]] %>% + dplyr::select("concept_id") %>% + dplyr::distinct() %>% + dplyr::pull() + + names(ingredientCodes)[[i]] <- paste0( + "Ingredient", ": ", workingName, + " (", names(ingredientCodes)[[i]], ")" + ) + } + return(ingredientCodes) +} + +fetchBatchedDrugIngredientCodes <- function(cdm, codes, batchSize, doseForm) { + codeBatches <- split( + codes, + ceiling(seq_along(codes) / batchSize) + ) + + descendants <- vector("list", length(codeBatches)) + + cli::cli_progress_bar( + total = length(descendants), + format = " -- getting descendants {cli::pb_bar} {cli::pb_current} of {cli::pb_total} ingredient groups" + ) + for (i in seq_along(descendants)) { + cli::cli_progress_update() + descendants[[i]] <- getDescendants( + cdm = cdm, + conceptId = codeBatches[[i]], + withAncestor = TRUE, + doseForm = doseForm + ) + } + cli::cli_progress_done() + descendants <- dplyr::bind_rows(descendants) + + return(descendants) +} diff --git a/R/getCandidateCodes.R b/R/getCandidateCodes.R index 0252ef1..313b02b 100644 --- a/R/getCandidateCodes.R +++ b/R/getCandidateCodes.R @@ -1,4 +1,4 @@ -# Copyright 2022 DARWIN EU® +# Copyright 2023 DARWIN EU® # # This file is part of IncidencePrevalence # @@ -77,7 +77,7 @@ #' keywords = "osteoarthritis" #' ) #' DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) -getCandidateCodes <- function(cdm = NULL, +getCandidateCodes <- function(cdm, keywords, exclude = NULL, domains = "Condition", @@ -162,13 +162,18 @@ getCandidateCodes <- function(cdm = NULL, checkmate::assert_numeric(maxDistanceCost, add = errorMessage) checkmate::assert_logical(verbose, add = errorMessage) checkmate::reportAssertions(collection = errorMessage) - # in addition, now will check we have the required tables + errorMessage <- checkmate::makeAssertCollection() - checkTableExists(cdm, "concept", errorMessage) - checkTableExists(cdm, "concept_relationship", errorMessage) - checkTableExists(cdm, "concept_ancestor", errorMessage) - checkTableExists(cdm, "concept_synonym", errorMessage) - checkTableExists(cdm, "vocabulary", errorMessage) + assertTablesExist(cdm, tableName = c("concept", + "concept_relationship", + "concept_ancestor", + "concept_synonym", + "vocabulary"), + messageStore = errorMessage) + if ("drug" %in% tolower(domains)) { + assertTablesExist(cdm, tableName = c("drug_strength"), + messageStore = errorMessage) + } checkmate::reportAssertions(collection = errorMessage) errorMessage <- checkmate::makeAssertCollection() diff --git a/R/getMappings.R b/R/getMappings.R index 6bd5c3b..4c11506 100644 --- a/R/getMappings.R +++ b/R/getMappings.R @@ -1,4 +1,4 @@ -# Copyright 2022 DARWIN EU® +# Copyright 2023 DARWIN EU® # # This file is part of IncidencePrevalence # @@ -44,18 +44,20 @@ getMappings <- function(candidateCodelist, "RxNorm", "RxNorm Extension", "SNOMED" )) { + errorMessage <- checkmate::makeAssertCollection() checkDbType(cdm = cdm, type = "cdm_reference", messageStore = errorMessage) checkmate::assertVector(nonStandardVocabularies, add = errorMessage) checkmate::assertDataFrame(candidateCodelist, add = errorMessage) checkmate::reportAssertions(collection = errorMessage) - # in addition, now will check we have the required tables + errorMessage <- checkmate::makeAssertCollection() - checkTableExists(cdm, "concept", errorMessage) - checkTableExists(cdm, "concept_relationship", errorMessage) - checkTableExists(cdm, "concept_ancestor", errorMessage) - checkTableExists(cdm, "concept_synonym", errorMessage) - checkTableExists(cdm, "vocabulary", errorMessage) + assertTablesExist(cdm, tableName = c("concept", + "concept_relationship", + "concept_ancestor", + "concept_synonym", + "vocabulary"), + messageStore = errorMessage) checkmate::reportAssertions(collection = errorMessage) conceptDb <- cdm$concept diff --git a/R/utils.R b/R/inputValidation.R similarity index 70% rename from R/utils.R rename to R/inputValidation.R index e2e0814..3b6cf50 100644 --- a/R/utils.R +++ b/R/inputValidation.R @@ -1,4 +1,4 @@ -# Copyright 2022 DARWIN EU® +# Copyright 2023 DARWIN EU® # # This file is part of IncidencePrevalence # @@ -23,12 +23,15 @@ checkDbType <- function(cdm, type = "cdm_reference", messageStore) { } } -checkTableExists <- function(cdm, tableName, messageStore) { - tableExists <- inherits(cdm[[tableName]], c("tbl_dbi", - "ArrowObject", "ArrowTabular", - "tbl", "data.frame")) +assertTablesExist <- function(cdm, tableName, messageStore) { + + for(i in seq_along(tableName)){ + tableExists <- inherits(cdm[[tableName[[i]]]], c("tbl_dbi", + "ArrowObject", "ArrowTabular", + "tbl", "data.frame")) checkmate::assertTRUE(tableExists, add = messageStore) if (!isTRUE(tableExists)) { - messageStore$push(glue::glue("- {tableName} is not found in the cdm reference")) + messageStore$push(glue::glue("- {tableName[[i]]} is not found in the cdm reference")) + } } } diff --git a/R/mockVocabRef.R b/R/mockVocabRef.R index 7324280..730089a 100644 --- a/R/mockVocabRef.R +++ b/R/mockVocabRef.R @@ -1,4 +1,4 @@ -# Copyright 2022 DARWIN EU® +# Copyright 2023 DARWIN EU® # # This file is part of IncidencePrevalence # @@ -33,7 +33,7 @@ mockVocabRef <- function(backend = "database") { # tables concept <- data.frame( - concept_id = 1:11, + concept_id = 1:14, concept_name = c( "Musculoskeletal disorder", "Osteoarthrosis", @@ -45,27 +45,43 @@ mockVocabRef <- function(backend = "database") { "Knee osteoarthritis", "H/O osteoarthritis", "Adalimumab", - "Injection" + "Injection", + "ALIMENTARY TRACT AND METABOLISM", + "Descendant drug", + "Injectable" ), - domain_id = c(rep("Condition", 8), "Observation", "Drug", "Drug"), + domain_id = c(rep("Condition", 8), "Observation",rep("Drug", 5)), vocabulary_id = c( rep("SNOMED", 6), rep("Read", 2), - "LOINC", "RxNorm", "OMOP" + "LOINC", "RxNorm", "OMOP", + "ATC", + "RxNorm", "OMOP" ), standard_concept = c( rep("S", 6), rep(NA, 2), - "S", "S", NA + "S", "S", NA, + NA, "S", NA ), concept_class_id = c( rep("Clinical Finding", 6), rep("Diagnosis", 2), - "Observation", "Ingredient", "Dose Form" + "Observation", "Ingredient", "Dose Form", + "ATC 1st", "Drug", "Dose Form" ), - concept_code = "1234" + concept_code = "1234", + valid_start_date = NA, + valid_end_date = NA, + invalid_reason = NA ) conceptAncestor <- dplyr::bind_rows( + data.frame( + ancestor_concept_id = 1L, + descendant_concept_id = 1L, + min_levels_of_separation = 1, + max_levels_of_separation = 1 + ), data.frame( ancestor_concept_id = 1L, descendant_concept_id = 2L, @@ -101,6 +117,30 @@ mockVocabRef <- function(backend = "database") { descendant_concept_id = 5L, min_levels_of_separation = 1, max_levels_of_separation = 1 + ), + data.frame( + ancestor_concept_id = 10L, + descendant_concept_id = 10L, + min_levels_of_separation = 1, + max_levels_of_separation = 1 + ), + data.frame( + ancestor_concept_id = 10L, + descendant_concept_id = 13L, + min_levels_of_separation = 1, + max_levels_of_separation = 1 + ), + data.frame( + ancestor_concept_id = 12L, + descendant_concept_id = 12L, + min_levels_of_separation = 1, + max_levels_of_separation = 1 + ), + data.frame( + ancestor_concept_id = 12L, + descendant_concept_id = 13L, + min_levels_of_separation = 1, + max_levels_of_separation = 1 ) ) conceptSynonym <- dplyr::bind_rows( @@ -112,7 +152,8 @@ mockVocabRef <- function(backend = "database") { concept_id = 3L, concept_synonym_name = "Osteoarthrosis" ) - ) + )%>% + dplyr::mutate(language_concept_id = NA) conceptRelationship <- dplyr::bind_rows( data.frame( concept_id_1 = 2L, @@ -133,8 +174,16 @@ mockVocabRef <- function(backend = "database") { concept_id_1 = 3L, concept_id_2 = 6L, relationship_id = "Due to of" + ), + data.frame( + concept_id_1 = 13L, + concept_id_2 = 14L, + relationship_id = "RxNorm has dose form" ) - ) + ) %>% + dplyr::mutate(valid_start_date = NA, + valid_end_date = NA, + invalid_reason = NA) vocabulary <- dplyr::bind_rows( data.frame( vocabulary_id = "SNOMED", diff --git a/R/runSearch.R b/R/runSearch.R index 833a783..031f374 100644 --- a/R/runSearch.R +++ b/R/runSearch.R @@ -1,4 +1,4 @@ -# Copyright 2022 DARWIN EU® +# Copyright 2023 DARWIN EU® # # This file is part of IncidencePrevalence # diff --git a/R/vocabUtilities.R b/R/vocabUtilities.R index 10c33aa..7149f81 100644 --- a/R/vocabUtilities.R +++ b/R/vocabUtilities.R @@ -1,4 +1,4 @@ -# Copyright 2022 DARWIN EU® +# Copyright 2023 DARWIN EU® # # This file is part of IncidencePrevalence # @@ -287,6 +287,11 @@ getDoseForm <- function(cdm) { #' #' @param cdm cdm_reference via CDMConnector #' @param conceptId concpet_id to search +#' @param withAncestor If TRUE, return column with ancestor. In case of multiple +#' ancestors, concepts will be separated by ";" +#' @param doseForm Only descendants codes with the specified drug dose form +#' will be returned. If NULL, descendant codes will be returned regardless +#' of dose form. #' #' @return The descendants of a given concept id #' @export @@ -295,11 +300,16 @@ getDoseForm <- function(cdm) { #' cdm <- mockVocabRef() #' getDescendants(cdm = cdm, conceptId = 1) #' DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) -getDescendants <- function(cdm, conceptId) { + +getDescendants <- function(cdm, + conceptId, + withAncestor = FALSE, + doseForm = NULL) { + errorMessage <- checkmate::makeAssertCollection() cdmInheritsCheck <- inherits(cdm, "cdm_reference") checkmate::assertTRUE(cdmInheritsCheck, - add = errorMessage + add = errorMessage ) if (!isTRUE(cdmInheritsCheck)) { errorMessage$push( @@ -307,28 +317,145 @@ getDescendants <- function(cdm, conceptId) { ) } checkmate::assert_numeric(conceptId, - add = errorMessage + add = errorMessage ) checkmate::reportAssertions(collection = errorMessage) +if(isFALSE(withAncestor)){ + descendants <- getDescendantsOnly(cdm, conceptId, doseForm)} + + if(isTRUE(withAncestor)){ + descendants <- getDescendantsAndAncestor(cdm, conceptId, doseForm)} + + return(descendants) +} + +getDescendantsOnly <- function(cdm, conceptId, doseForm) { descendants <- cdm$concept_ancestor %>% dplyr::filter(.data$ancestor_concept_id %in% .env$conceptId) %>% dplyr::select("descendant_concept_id") %>% dplyr::distinct() %>% dplyr::rename("concept_id" = "descendant_concept_id") %>% dplyr::left_join(cdm$concept, - by = "concept_id" + by = "concept_id") + + if(!is.null(doseForm)){ + descendantDoseForms <- getPresentDoseForms(cdm, concepts = descendants) + } + + descendants <- descendants %>% + dplyr::collect() + + if(!is.null(doseForm)){ + descendants <- filterOnDoseForm(concepts = descendants, + conceptDoseForms = descendantDoseForms, + doseForm = doseForm) + } + + # nb conceptId will also be a descendant of itself (if we don't specify dose) + return(descendants) +} + +getDescendantsAndAncestor <- function(cdm, conceptId, doseForm) { + + descendants <- cdm$concept_ancestor %>% + dplyr::inner_join(dplyr::tibble(ancestor_concept_id = as.integer(conceptId)), + by = "ancestor_concept_id", + copy = TRUE) %>% + dplyr::rename("concept_id" = "descendant_concept_id") %>% + dplyr::left_join(cdm$concept, + by = "concept_id") %>% + dplyr::mutate(name = paste0("concept_", .data$ancestor_concept_id)) + + if(!is.null(doseForm)){ + descendantDoseForms <- getPresentDoseForms(cdm, concepts = descendants) + } + + descendants <- descendants %>% + dplyr::collect() + + if(nrow(descendants)>0){ +descendants <- descendants %>% + tidyr::pivot_wider(names_from = "name", + values_from = "ancestor_concept_id") + + # one row per concept, with ancestor (of which there may be multiple) + working_cols <- stringr::str_subset(string = colnames(descendants), + pattern = paste(c(colnames(cdm$concept), + colnames(cdm$concept_ancestor)), + collapse = "|"), + negate = TRUE) + +descendants <- descendants %>% + tidyr::unite(col="ancestor_concept_id", + working_cols, sep=";") +# quicker to replace NAs afterwards rather than inside unite +# (especially when there are many columns) +descendants$ancestor_concept_id <- stringr::str_replace_all( + string = descendants$ancestor_concept_id, + pattern = ";NA|NA;", + replacement = "" +) + } + + if(!is.null(doseForm)){ + descendants <- filterOnDoseForm(concepts = descendants, + conceptDoseForms = descendantDoseForms, + doseForm = doseForm) + } + + # nb conceptId will also be a descendant of itself + return(descendants) + +} + +getPresentDoseForms <- function(cdm, concepts){ + + presentDoseForms <- concepts %>% + dplyr::left_join( + cdm$concept_relationship %>% + dplyr::filter(.data$relationship_id == "RxNorm has dose form") %>% + dplyr::select("concept_id_1", "concept_id_2") %>% + dplyr::rename("concept_id" = "concept_id_2") %>% + dplyr::distinct() %>% + dplyr::left_join(cdm$concept, by = "concept_id") %>% + dplyr::select("concept_id_1", "concept_name") %>% + dplyr::rename("concept_id"="concept_id_1", + "dose_form"="concept_name") , + by ="concept_id" ) %>% + dplyr::select("concept_id", "dose_form") %>% dplyr::collect() - # return concept_id used along with descendants - all <- dplyr::bind_rows( - cdm$concept %>% - dplyr::filter(.data$concept_id %in% .env$conceptId) %>% - dplyr::collect(), - descendants - ) %>% - dplyr::distinct() %>% - dplyr::arrange("concept_id") - return(all) + presentDoseForms <- presentDoseForms %>% + dplyr::group_by(.data$concept_id) %>% + dplyr::mutate(seq = dplyr::row_number()) %>% + tidyr::pivot_wider( + names_from = "seq", + values_from = "dose_form" + ) + presentDoseForms <- presentDoseForms %>% + tidyr::unite( + col = "dose_form", 2:ncol(presentDoseForms), sep = "; ", + na.rm = TRUE + ) + return(presentDoseForms) + +} + +filterOnDoseForm <- function(concepts, conceptDoseForms, doseForm){ + concepts <- concepts %>% + dplyr::inner_join( + conceptDoseForms %>% + dplyr::filter(stringr::str_detect( + string = tolower(.data$dose_form), + pattern = paste(tolower(.env$doseForm), + collapse = "|" + ) + )) %>% + dplyr::select("concept_id"), + by = "concept_id") + + return(concepts) + } diff --git a/man/getATCCodes.Rd b/man/getATCCodes.Rd new file mode 100644 index 0000000..5ca9981 --- /dev/null +++ b/man/getATCCodes.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/drugCodes.R +\name{getATCCodes} +\alias{getATCCodes} +\title{Get descendant codes for ATC levels} +\usage{ +getATCCodes(cdm, level = c("ATC 1st"), name = NULL, doseForm = NULL) +} +\arguments{ +\item{cdm}{cdm_reference via CDMConnector} + +\item{level}{ATC level. Can be one or more of "ATC 1st", "ATC 2nd", +"ATC 3rd", "ATC 4th", and "ATC 5th"} + +\item{name}{ATC name of interest. For example, c("Dermatologicals", +"Nervous System"), would result in a list of length two with the descendant +concepts for these two particular ATC groups.} + +\item{doseForm}{Only descendants codes with the specified dose form +will be returned. If NULL, descendant codes will be returned regardless +of dose form.} +} +\value{ +A named list, with each element containing the descendant +concepts for a particular ATC group +} +\description{ +Get descendant codes for ATC levels +} +\examples{ +cdm <- mockVocabRef() +getATCCodes(cdm = cdm, level = "ATC 1st") +DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +} diff --git a/man/getCandidateCodes.Rd b/man/getCandidateCodes.Rd index 30e6124..e74344b 100644 --- a/man/getCandidateCodes.Rd +++ b/man/getCandidateCodes.Rd @@ -5,7 +5,7 @@ \title{Generate candidate codelist for the OMOP CDM} \usage{ getCandidateCodes( - cdm = NULL, + cdm, keywords, exclude = NULL, domains = "Condition", diff --git a/man/getDescendants.Rd b/man/getDescendants.Rd index ab5a470..516bc3c 100644 --- a/man/getDescendants.Rd +++ b/man/getDescendants.Rd @@ -4,12 +4,19 @@ \alias{getDescendants} \title{getDescendants} \usage{ -getDescendants(cdm, conceptId) +getDescendants(cdm, conceptId, withAncestor = FALSE, doseForm = NULL) } \arguments{ \item{cdm}{cdm_reference via CDMConnector} \item{conceptId}{concpet_id to search} + +\item{withAncestor}{If TRUE, return column with ancestor. In case of multiple +ancestors, concepts will be separated by ";"} + +\item{doseForm}{Only descendants codes with the specified drug dose form +will be returned. If NULL, descendant codes will be returned regardless +of dose form.} } \value{ The descendants of a given concept id diff --git a/man/getDrugIngredientCodes.Rd b/man/getDrugIngredientCodes.Rd new file mode 100644 index 0000000..ef1f2dc --- /dev/null +++ b/man/getDrugIngredientCodes.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/drugCodes.R +\name{getDrugIngredientCodes} +\alias{getDrugIngredientCodes} +\title{Get descendant codes for drug ingredients} +\usage{ +getDrugIngredientCodes(cdm, name = NULL, doseForm = NULL) +} +\arguments{ +\item{cdm}{cdm_reference via CDMConnector} + +\item{name}{Names of ingredients of interest. For example, c("acetaminophen", +"codeine"), would result in a list of length two with the descendant +concepts for these two particular drug ingredients.} + +\item{doseForm}{Only descendants codes with the specified dose form +will be returned. If NULL, descendant codes will be returned regardless +of dose form.} +} +\value{ +A named list, with each item containing descendant concepts of +an ingredient +} +\description{ +Get descendant codes for drug ingredients +} +\examples{ +cdm <- mockVocabRef() +getDrugIngredientCodes(cdm = cdm, name = "Adalimumab") +DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +} diff --git a/tests/testthat/test-drugCodes.R b/tests/testthat/test-drugCodes.R new file mode 100644 index 0000000..8c9eca6 --- /dev/null +++ b/tests/testthat/test-drugCodes.R @@ -0,0 +1,78 @@ +test_that("getATCCodes working", { + + backends <- c("database", "arrow", "data_frame") + for (i in seq_along(backends)) { + cdm <- mockVocabRef(backend = backends[i]) + atcCodes <- getATCCodes(cdm, level = "ATC 1st") + expect_true(all(atcCodes[[1]] == c(12,13))) + + atcCodes2 <- getATCCodes(cdm, level = "ATC 1st", + name = "ALIMENTARY TRACT AND METABOLISM") + expect_true(all(atcCodes2[[1]] == c(12,13))) + + if (backends[[i]] == "database") { + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + } + + } + +}) + +test_that("getATCCodes expected errors", { + + backends <- c("database", "arrow", "data_frame") + for (i in seq_along(backends)) { + cdm <- mockVocabRef(backend = backends[i]) + expect_error(getATCCodes(cdm, level = "Not an ATC level")) + expect_error(getATCCodes(cdm, level = "ATC 1st", + name = "Not an ATC name")) + + if (backends[[i]] == "database") { + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + } + } + +}) + +test_that("getDrugIngredientCodes working", { + + backends <- c("database", "arrow", "data_frame") + for (i in seq_along(backends)) { + cdm <- mockVocabRef(backend = backends[i]) + ing_codes <- getDrugIngredientCodes(cdm) + expect_true(all(ing_codes[[1]] == c(10,13))) + + ing_codes2 <- getDrugIngredientCodes(cdm, name = "Adalimumab") + expect_true(all(ing_codes2[[1]] == c(10,13))) + + ing_codes3 <- getDrugIngredientCodes(cdm, + name = "Adalimumab", + doseForm = "injectable") + expect_true(all(ing_codes3[[1]] == c(13))) + + ing_codes4 <- getDrugIngredientCodes(cdm, + name = "Adalimumab", + doseForm = "injection") + expect_true(all(ing_codes4[[1]] == c(10))) + + if (backends[[i]] == "database") { + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + } + + } +}) + +test_that("getDrugIngredientCodes expected errors", { + + backends <- c("database", "arrow", "data_frame") + for (i in seq_along(backends)) { + cdm <- mockVocabRef(backend = backends[i]) + expect_error(getDrugIngredientCodes(cdm, name = "Not an Ingredient")) + + if (backends[[i]] == "database") { + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + } + } + +}) + diff --git a/tests/testthat/test-getCandidateCodes.R b/tests/testthat/test-getCandidateCodes.R index 94b2216..c43b329 100644 --- a/tests/testthat/test-getCandidateCodes.R +++ b/tests/testthat/test-getCandidateCodes.R @@ -224,7 +224,7 @@ test_that("tests with mock db", { domains = "Drug", searchInSynonyms = TRUE, searchViaSynonyms = TRUE, - includeDescendants = TRUE, + includeDescendants = FALSE, includeAncestor = TRUE ) expect_true(all(c( diff --git a/tests/testthat/test-vocabUtilities.R b/tests/testthat/test-vocabUtilities.R index 10bdf51..101b847 100644 --- a/tests/testthat/test-vocabUtilities.R +++ b/tests/testthat/test-vocabUtilities.R @@ -26,15 +26,52 @@ test_that("tests with mock db", { ) expect_true(is.character(conceptClasses)) - descendants <- getDescendants( + descendants1 <- getDescendants( cdm = cdm, - conceptId = 1 + conceptId = 1, + withAncestor = FALSE ) - expect_true(all(descendants$concept_id == c(1, 2, 3, 4, 5))) + expect_true(all(descendants1$concept_id == c(1, 2, 3, 4, 5))) + expect_true("concept_name" %in% colnames(descendants1)) + + descendants2 <- getDescendants( + cdm = cdm, + conceptId = 1, + withAncestor = TRUE + ) + expect_true(all(descendants2$concept_id == c(1, 2, 3, 4, 5))) + expect_true("ancestor_concept_id" %in% colnames(descendants2)) + expect_true(all(descendants2$ancestor_concept_id == 1)) + + descendants3 <- getDescendants( + cdm = cdm, + conceptId = 10, + withAncestor = FALSE, + doseForm = c("Injection", "Injectable") + ) + expect_true(all(descendants3$concept_id == c(10, 13))) + + descendants4 <- getDescendants( + cdm = cdm, + conceptId = 10, + withAncestor = TRUE, + doseForm = c("Injection", "Injectable") + ) + expect_true(all(descendants4$concept_id == c(10, 13))) + + + descendants5 <- getDescendants( + cdm = cdm, + conceptId = 10, + withAncestor = TRUE, + doseForm = c("Injectable") + ) + expect_true(all(descendants5$concept_id == c(13))) + doseForms <- getDoseForm(cdm = cdm) - expect_true(doseForms == "Injection") + expect_true(all(doseForms == c("Injection", "Injectable"))) # expected errors expect_error(getVocabVersion(cdm = "a"))