Skip to content

Commit

Permalink
Merge pull request #71 from darwin-eu/develop
Browse files Browse the repository at this point in the history
v1.2
  • Loading branch information
edward-burn committed May 4, 2023
2 parents 0691a0f + 7e659f7 commit 990a669
Show file tree
Hide file tree
Showing 18 changed files with 703 additions and 57 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: CodelistGenerator
Title: Generate Code Lists for the OMOP Common Data Model
Version: 1.1.0
Version: 1.2.0
Authors@R:
person("Edward", "Burn", , "edward.burn@ndorms.ox.ac.uk", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-9286-1128"))
Expand All @@ -19,7 +19,8 @@ Imports:
glue (>= 1.5.0),
stringr (>= 1.4.0),
tidyselect (>= 1.2.0),
tidyr (>= 1.2.0)
tidyr (>= 1.2.0),
cli (>= 3.1.0)
Suggests:
arrow (>= 9.0.0),
covr,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

export("%>%")
export(compareCodelists)
export(getATCCodes)
export(getCandidateCodes)
export(getConceptClassId)
export(getDescendants)
export(getDomains)
export(getDoseForm)
export(getDrugIngredientCodes)
export(getMappings)
export(getVocabVersion)
export(getVocabularies)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# CodelistGenerator 1.2.0
* Added functions getATCCodes and getDrugIngredientCodes.

# CodelistGenerator 1.1.0
* Added exactMatch and includeSequela options to getCandidateCodes function.

Expand Down
2 changes: 1 addition & 1 deletion R/compareCodelists.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2022 DARWIN EU®
# Copyright 2023 DARWIN EU®
#
# This file is part of IncidencePrevalence
#
Expand Down
267 changes: 267 additions & 0 deletions R/drugCodes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
# Copyright 2023 DARWIN EU®
#
# This file is part of IncidencePrevalence
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Get descendant codes for ATC levels
#'
#' @param cdm cdm_reference via CDMConnector
#' @param level ATC level. Can be one or more of "ATC 1st", "ATC 2nd",
#' "ATC 3rd", "ATC 4th", and "ATC 5th"
#' @param name ATC name of interest. For example, c("Dermatologicals",
#' "Nervous System"), would result in a list of length two with the descendant
#' concepts for these two particular ATC groups.
#' @param doseForm Only descendants codes with the specified dose form
#' will be returned. If NULL, descendant codes will be returned regardless
#' of dose form.
#'
#' @return A named list, with each element containing the descendant
#' concepts for a particular ATC group
#' @export
#'
#' @examples
#' cdm <- mockVocabRef()
#' getATCCodes(cdm = cdm, level = "ATC 1st")
#' DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
getATCCodes <- function(cdm,

Check warning on line 37 in R/drugCodes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/drugCodes.R,line=37,col=1,[object_name_linter] Variable and function name style should be snake_case or symbols.
level = c("ATC 1st"),
name = NULL,
doseForm = NULL) {

Check warning on line 40 in R/drugCodes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/drugCodes.R,line=40,col=25,[object_name_linter] Variable and function name style should be snake_case or symbols.
errorMessage <- checkmate::makeAssertCollection()

Check warning on line 41 in R/drugCodes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/drugCodes.R,line=41,col=3,[object_name_linter] Variable and function name style should be snake_case or symbols.
checkDbType(cdm = cdm, type = "cdm_reference", messageStore = errorMessage)
levelCheck <- all(level %in%

Check warning on line 43 in R/drugCodes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/drugCodes.R,line=43,col=3,[object_name_linter] Variable and function name style should be snake_case or symbols.
c(
"ATC 1st",
"ATC 2nd",
"ATC 3rd",
"ATC 4th",
"ATC 5th"
))
if (!isTRUE(levelCheck)) {
errorMessage$push(
"- level can only be from: ATC 1st, ATC 2nd, ATC 3rd, ATC 4th, ATC 5th"
)
}
checkmate::assertTRUE(levelCheck, add = errorMessage)
checkmate::assertVector(name,
add = errorMessage,
null.ok = TRUE
)
checkmate::reportAssertions(collection = errorMessage)

atc_groups <- cdm$concept %>%
dplyr::filter(.data$vocabulary_id == "ATC") %>%
dplyr::filter(.data$concept_class_id %in% .env$level) %>%
dplyr::collect()

if (!is.null(name)) {
atc_groups <- atc_groups %>%
dplyr::filter(tolower(.data$concept_name) %in% tolower(.env$name))
}

errorMessage <- checkmate::makeAssertCollection()

Check warning on line 73 in R/drugCodes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/drugCodes.R,line=73,col=3,[object_name_linter] Variable and function name style should be snake_case or symbols.
atcCheck <- nrow(atc_groups) > 0

Check warning on line 74 in R/drugCodes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/drugCodes.R,line=74,col=3,[object_name_linter] Variable and function name style should be snake_case or symbols.
if (!isTRUE(atcCheck)) {
errorMessage$push(
"- No matching ATC codes found"
)
}
checkmate::assertTRUE(atcCheck, add = errorMessage)
checkmate::reportAssertions(collection = errorMessage)


atc_descendants <- getDescendants(
cdm = cdm,
conceptId = atc_groups$concept_id,
withAncestor = TRUE,
doseForm = doseForm
)
if (nrow(atc_descendants) > 0) {
atc_descendants <- atc_descendants %>%
dplyr::select(c("concept_id", "ancestor_concept_id")) %>%
# split different ancestors into multiple cols
tidyr::separate_wider_delim(
cols = "ancestor_concept_id",
delim = ";",
names_sep = "",
too_few = "align_start"
)

atc_descendants <- atc_descendants %>%
# one row per concept + ancestor
tidyr::pivot_longer(!"concept_id",
names_to = NULL,
values_to = "ancestor_concept_id",
values_drop_na = TRUE
)

# to list
# one item per concept
atc_descendants <- split(
x = atc_descendants,
f = as.factor(atc_descendants$ancestor_concept_id),
drop = TRUE
)

# for each item in the list - pull out concepts and name
for (i in seq_along(atc_descendants)) {
workingLevel <- atc_groups %>%

Check warning on line 119 in R/drugCodes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/drugCodes.R,line=119,col=7,[object_name_linter] Variable and function name style should be snake_case or symbols.
dplyr::filter(.data$concept_id == names(atc_descendants)[i]) %>%
dplyr::pull("concept_class_id")
workingName <- atc_groups %>%

Check warning on line 122 in R/drugCodes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/drugCodes.R,line=122,col=7,[object_name_linter] Variable and function name style should be snake_case or symbols.
dplyr::filter(.data$concept_id == names(atc_descendants)[i]) %>%
dplyr::pull("concept_name")

atc_descendants[[i]] <- atc_descendants[[i]] %>%
dplyr::select("concept_id") %>%
dplyr::distinct() %>%
dplyr::pull()

names(atc_descendants)[i] <- paste0(
workingLevel, ": ", workingName,
" (", names(atc_descendants)[i], ")"
)
}
}
return(atc_descendants)
}

#' Get descendant codes for drug ingredients
#'
#' @param cdm cdm_reference via CDMConnector
#' @param name Names of ingredients of interest. For example, c("acetaminophen",
#' "codeine"), would result in a list of length two with the descendant
#' concepts for these two particular drug ingredients.
#' @param doseForm Only descendants codes with the specified dose form
#' will be returned. If NULL, descendant codes will be returned regardless
#' of dose form.
#'
#' @return A named list, with each item containing descendant concepts of
#' an ingredient
#' @export
#'
#' @examples
#'cdm <- mockVocabRef()
#'getDrugIngredientCodes(cdm = cdm, name = "Adalimumab")
#'DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
getDrugIngredientCodes <- function(cdm, name = NULL, doseForm = NULL) {
errorMessage <- checkmate::makeAssertCollection()
checkDbType(cdm = cdm, type = "cdm_reference", messageStore = errorMessage)
checkmate::assertVector(name,
add = errorMessage,
null.ok = TRUE
)
checkmate::reportAssertions(collection = errorMessage)

ingredientConcepts <- cdm$concept %>%
dplyr::filter(.data$standard_concept == "S") %>%
dplyr::filter(.data$concept_class_id == "Ingredient") %>%
dplyr::select("concept_id", "concept_name") %>%
dplyr::collect()

if (!is.null(name)) {
ingredientConcepts <- ingredientConcepts %>%
dplyr::filter(tolower(.data$concept_name) %in% tolower(.env$name))
}

errorMessage <- checkmate::makeAssertCollection()
ingredientCheck <- nrow(ingredientConcepts) > 0
if (!isTRUE(ingredientCheck)) {
errorMessage$push(
"- No matching Ingredient codes found"
)
}
checkmate::assertTRUE(ingredientCheck, add = errorMessage)
checkmate::reportAssertions(collection = errorMessage)

# to avoid potential memory problems will batch
if (nrow(ingredientConcepts) > 0) {
ingredientCodes <- fetchBatchedDrugIngredientCodes(cdm,
codes = ingredientConcepts$concept_id,
batchSize = 500,
doseForm = doseForm
)
}

ingredientCodes <- ingredientCodes %>%
dplyr::select(c("concept_id", "ancestor_concept_id")) %>%
# split different ancestors into multiple cols
tidyr::separate_wider_delim(
cols = "ancestor_concept_id",
delim = ";",
names_sep = "",
too_few = "align_start"
)

ingredientCodes <- ingredientCodes %>%
# one row per concept + ancestor
tidyr::pivot_longer(!"concept_id",
names_to = NULL,
values_to = "ancestor_concept_id",
values_drop_na = TRUE
)

# to list
# one item per concept
ingredientCodes <- split(
x = ingredientCodes,
f = as.factor(ingredientCodes$ancestor_concept_id),
drop = TRUE
)

# for each item in the list - pull out concepts and name
for (i in seq_along(ingredientCodes)) {
workingName <- ingredientConcepts %>%
dplyr::filter(.data$concept_id == names(ingredientCodes)[[i]]) %>%
dplyr::pull("concept_name")

ingredientCodes[[i]] <- ingredientCodes[[i]] %>%
dplyr::select("concept_id") %>%
dplyr::distinct() %>%
dplyr::pull()

names(ingredientCodes)[[i]] <- paste0(
"Ingredient", ": ", workingName,
" (", names(ingredientCodes)[[i]], ")"
)
}
return(ingredientCodes)
}

fetchBatchedDrugIngredientCodes <- function(cdm, codes, batchSize, doseForm) {
codeBatches <- split(
codes,
ceiling(seq_along(codes) / batchSize)
)

descendants <- vector("list", length(codeBatches))

cli::cli_progress_bar(
total = length(descendants),
format = " -- getting descendants {cli::pb_bar} {cli::pb_current} of {cli::pb_total} ingredient groups"
)
for (i in seq_along(descendants)) {
cli::cli_progress_update()
descendants[[i]] <- getDescendants(
cdm = cdm,
conceptId = codeBatches[[i]],
withAncestor = TRUE,
doseForm = doseForm
)
}
cli::cli_progress_done()
descendants <- dplyr::bind_rows(descendants)

return(descendants)
}
21 changes: 13 additions & 8 deletions R/getCandidateCodes.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2022 DARWIN EU®
# Copyright 2023 DARWIN EU®
#
# This file is part of IncidencePrevalence
#
Expand Down Expand Up @@ -77,7 +77,7 @@
#' keywords = "osteoarthritis"
#' )
#' DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
getCandidateCodes <- function(cdm = NULL,
getCandidateCodes <- function(cdm,
keywords,
exclude = NULL,
domains = "Condition",
Expand Down Expand Up @@ -162,13 +162,18 @@ getCandidateCodes <- function(cdm = NULL,
checkmate::assert_numeric(maxDistanceCost, add = errorMessage)
checkmate::assert_logical(verbose, add = errorMessage)
checkmate::reportAssertions(collection = errorMessage)
# in addition, now will check we have the required tables

errorMessage <- checkmate::makeAssertCollection()
checkTableExists(cdm, "concept", errorMessage)
checkTableExists(cdm, "concept_relationship", errorMessage)
checkTableExists(cdm, "concept_ancestor", errorMessage)
checkTableExists(cdm, "concept_synonym", errorMessage)
checkTableExists(cdm, "vocabulary", errorMessage)
assertTablesExist(cdm, tableName = c("concept",
"concept_relationship",
"concept_ancestor",
"concept_synonym",
"vocabulary"),
messageStore = errorMessage)
if ("drug" %in% tolower(domains)) {
assertTablesExist(cdm, tableName = c("drug_strength"),
messageStore = errorMessage)
}
checkmate::reportAssertions(collection = errorMessage)

errorMessage <- checkmate::makeAssertCollection()
Expand Down
16 changes: 9 additions & 7 deletions R/getMappings.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2022 DARWIN EU®
# Copyright 2023 DARWIN EU®
#
# This file is part of IncidencePrevalence
#
Expand Down Expand Up @@ -44,18 +44,20 @@ getMappings <- function(candidateCodelist,
"RxNorm", "RxNorm Extension",
"SNOMED"
)) {

errorMessage <- checkmate::makeAssertCollection()
checkDbType(cdm = cdm, type = "cdm_reference", messageStore = errorMessage)
checkmate::assertVector(nonStandardVocabularies, add = errorMessage)
checkmate::assertDataFrame(candidateCodelist, add = errorMessage)
checkmate::reportAssertions(collection = errorMessage)
# in addition, now will check we have the required tables

errorMessage <- checkmate::makeAssertCollection()
checkTableExists(cdm, "concept", errorMessage)
checkTableExists(cdm, "concept_relationship", errorMessage)
checkTableExists(cdm, "concept_ancestor", errorMessage)
checkTableExists(cdm, "concept_synonym", errorMessage)
checkTableExists(cdm, "vocabulary", errorMessage)
assertTablesExist(cdm, tableName = c("concept",
"concept_relationship",
"concept_ancestor",
"concept_synonym",
"vocabulary"),
messageStore = errorMessage)
checkmate::reportAssertions(collection = errorMessage)

conceptDb <- cdm$concept
Expand Down
Loading

0 comments on commit 990a669

Please sign in to comment.