Navigation Menu

Skip to content

Commit

Permalink
fix sort and order naming and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Jack O. Wasey committed Feb 16, 2019
1 parent 56ff170 commit 6cb60c4
Show file tree
Hide file tree
Showing 36 changed files with 256 additions and 243 deletions.
7 changes: 4 additions & 3 deletions NAMESPACE
Expand Up @@ -117,9 +117,9 @@ S3method(short_to_parts,character)
S3method(short_to_parts,icd10)
S3method(short_to_parts,icd10cm)
S3method(short_to_parts,icd9)
S3method(sort_icd,default)
S3method(sort_icd,icd10)
S3method(sort_icd,icd9)
S3method(sort,icd10)
S3method(sort,icd10cm)
S3method(sort,icd9)
S3method(van_walraven,data.frame)
export("%i9d%")
export("%i9da%")
Expand Down Expand Up @@ -339,6 +339,7 @@ export(is_defined)
export(is_leaf)
export(is_valid)
export(long_to_wide)
export(order.icd10be)
export(order.icd10cm)
export(order.icd9)
export(plot_comorbid)
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Expand Up @@ -15,7 +15,8 @@
* Increased test coverage with resulting fixes in corner cases
* Accept integer identifiers in patient data
* Default to not validating input factors: all factors generated by R are valid. This gives a significant performance increase for big data.
* PCCC benchmarking improved (`icd` is about 10x faster than the `pccc` package)
* 'sort_icd' renamed to be a simple S3 method for base R sort. Sorting and ordering ICD-10-CM and ICD-10-BE codes is now supported. 'sort_icd' is still available so that ICD-9 versus ICD-10 can be guessed, since base sort cannot do this.
* PCCC benchmarking improved (`icd` is about 10x faster than the `pccc` package, which also uses 'Rcpp')
* Better documentation, including more links between documentation, removed internal documentation from public view. Added "Table One" example demonstration to README.
* code cleaning, simplification and linting, e.g. replaced sapply with vapply throughout, and dropped custom wide-long-wide conversion C++ code in favour of base R.

Expand Down
30 changes: 15 additions & 15 deletions R/RcppExports.R
Expand Up @@ -51,7 +51,7 @@ categorize_rcpp <- function() {
.Call(`_icd_categorize_rcpp`)
}

icd10_children_defined_cpp <- function(x, lookup, nc) {
icd10_children_defined_rcpp <- function(x, lookup, nc) {
.Call(`_icd_icd10ChildrenDefined`, x, lookup, nc)
}

Expand Down Expand Up @@ -128,11 +128,11 @@ icd9DecimalToParts <- function(icd9Decimal, mnrEmpty) {
.Call(`_icd_icd9DecimalToParts`, icd9Decimal, mnrEmpty)
}

icd9_short_to_decimal_cpp <- function(x) {
icd9_short_to_decimal_rcpp <- function(x) {
.Call(`_icd_icd9ShortToDecimal`, x)
}

icd9_decimal_to_short_cpp <- function(x) {
icd9_decimal_to_short_rcpp <- function(x) {
.Call(`_icd_icd9DecimalToShort`, x)
}

Expand Down Expand Up @@ -200,20 +200,20 @@ guess_short <- function(x_, short_code = NULL, n = 1000L, icd_name = NULL) {
#' @param x \code{const char*} of choices of first character to match
#' @param invert single logical, if TRUE, negates the condition
#' @keywords internal
icd9_is_n_cpp <- function(sv) {
.Call(`_icd_icd9_is_n_cpp`, sv)
icd9_is_n_rcpp <- function(sv) {
.Call(`_icd_icd9_is_n_rcpp`, sv)
}

#' @rdname icd9_is_n_cpp
#' @rdname icd9_is_n_rcpp
#' @keywords internal
icd9_is_v_cpp <- function(sv) {
.Call(`_icd_icd9_is_v_cpp`, sv)
icd9_is_v_rcpp <- function(sv) {
.Call(`_icd_icd9_is_v_rcpp`, sv)
}

#' @rdname icd9_is_n_cpp
#' @rdname icd9_is_n_rcpp
#' @keywords internal
icd9_is_e_cpp <- function(sv) {
.Call(`_icd_icd9_is_e_cpp`, sv)
icd9_is_e_rcpp <- function(sv) {
.Call(`_icd_icd9_is_e_rcpp`, sv)
}

#' Simpler add leading zeroes without converting to parts and back
Expand All @@ -238,7 +238,7 @@ icd9_add_leading_zeroes_major <- function(mjr) {
#' @template short_code
#' @return character vector of ICD-9 codes with leading zeroes
#' @keywords internal manip
icd9_add_leading_zeroes_cpp <- function(x, short_code) {
icd9_add_leading_zeroes_rcpp <- function(x, short_code) {
.Call(`_icd_icd9AddLeadingZeroes`, x, short_code)
}

Expand Down Expand Up @@ -308,15 +308,15 @@ icd9_compare_rcpp <- function(a, b) {
.Call(`_icd_icd9Compare`, a, b)
}

icd9_order_cpp <- function(x) {
icd9_order_rcpp <- function(x) {
.Call(`_icd_icd9Order`, x)
}

icd10cm_compare_cpp <- function(x, y) {
icd10cm_compare_rcpp <- function(x, y) {
.Call(`_icd_icd10cmCompare`, x, y)
}

icd10cm_sort_cpp <- function(x) {
icd10cm_sort_rcpp <- function(x) {
.Call(`_icd_icd10cmSort`, x)
}

Expand Down
6 changes: 3 additions & 3 deletions R/children.R
Expand Up @@ -67,7 +67,7 @@ children.icd9cm <- function(x, short_code = guess_short(x),
icd9Decimal = toupper(x),
icd9cmReal = icd.data::icd9cm_hierarchy$code,
onlyReal = defined)
res <- sort_icd.icd9(res)
res <- sort.icd9(res)
res <- if (billable)
get_billable.icd9cm(icd9cm(res), short_code)
else
Expand Down Expand Up @@ -163,7 +163,7 @@ children_defined.icd10cm <- function(x,
"2016"
}
nc <- .chars_in_icd10cm[[ver]]
kids <- icd10_children_defined_cpp(
kids <- icd10_children_defined_rcpp(
x,
dat,
nc)
Expand Down Expand Up @@ -198,6 +198,6 @@ children_defined.icd10who <- function(
if (!short_code)
x <- decimal_to_short.icd10cm(x)
d <- get_from_icd_data("icd10who2016")
kids <- icd10_children_defined_cpp(x, d, nchar(d$code))
kids <- icd10_children_defined_rcpp(x, d, nchar(d$code))
as.icd10who(kids, short_code = short_code)
}
2 changes: 1 addition & 1 deletion R/condense.R
Expand Up @@ -153,7 +153,7 @@ icd9_condense_short <- function(x, defined = NULL, warn = TRUE,
fout <- fout[-which(fout %in% test_kids)]
}
}
out <- unique(sort_icd.icd9(c(out, fout, i9w), short_code = TRUE))
out <- unique(sort.icd9(c(out, fout, i9w), short_code = TRUE))
if (!is.null(icd9Levels)) {
if (keep_factor_levels)
out <- factor(out, icd9Levels)
Expand Down
6 changes: 3 additions & 3 deletions R/convert.R
Expand Up @@ -272,7 +272,7 @@ short_to_decimal.default <- function(x) {
#' @export
#' @keywords internal manip
short_to_decimal.icd9 <- function(x) {
icd9(as.decimal_diag(icd9_short_to_decimal_cpp(x)))
icd9(as.decimal_diag(icd9_short_to_decimal_rcpp(x)))
}

#' @describeIn short_to_decimal convert ICD-10 codes from short to decimal
Expand Down Expand Up @@ -314,10 +314,10 @@ decimal_to_short <- function(x) {
#' @keywords internal manip
decimal_to_short.icd9 <- function(x) {
if (is.factor(x)) {
levels(x) <- icd9(as.short_diag(icd9_decimal_to_short_cpp(levels(x))))
levels(x) <- icd9(as.short_diag(icd9_decimal_to_short_rcpp(levels(x))))
return(x)
}
icd9(as.short_diag(icd9_decimal_to_short_cpp(x)))
icd9(as.short_diag(icd9_decimal_to_short_rcpp(x)))
}

#' @describeIn decimal_to_short convert ICD-10 codes from decimal to short
Expand Down
4 changes: 2 additions & 2 deletions R/generate_mappings.R
Expand Up @@ -329,7 +329,7 @@ icd10_generate_map_quan_elix <- function(save_data = TRUE) {
f <- function(x, verbose = TRUE) {
if (verbose) message("f working on: ", paste(x, collapse = " "))
kids <- children_defined.icd10cm(x, short_code = TRUE)
sort_icd.icd10(unique(c(kids, x)))
sort.icd10(unique(c(kids, x)))
}
icd10_map_quan_elix <- lapply(quan_elix_raw, f)
# It does appear that there are numerous codes in the Quan Elixhauser scheme
Expand Down Expand Up @@ -407,7 +407,7 @@ icd10_generate_map_quan_deyo <- function(save_data = TRUE) {
# children will likely be needed. Maybe generating a huge structure is still
# worth it, even for ICD-10-CM, because I do end up cutting it back down to
# size based on the input data before comorbidity matching.
f <- function(x) sort_icd.icd10(
f <- function(x) sort.icd10(
unique(c(children_defined.icd10cm(x, short_code = TRUE), x)))
icd10_map_quan_deyo <- lapply(quan_charl_raw, f)
icd10_map_quan_deyo <- lapply(icd10_map_quan_deyo, as.short_diag)
Expand Down
5 changes: 3 additions & 2 deletions R/icd-package.R
Expand Up @@ -69,8 +69,9 @@
#' You can find children of a higher-level ICD-9 code with
#' \code{\link{children}} and find a common parent to a set of children (or
#' arbitrary list of ICD-9 codes) with \code{\link{condense}}.
#' \code{\link{sort_icd}} sorts in hierarchical, then numerical order, so
#' '100.0' comes before '100.00', for example.
#' \code{\link{sort}} (\code{\link{sort.icd9}}, \code{\link{sort.icd10cm}},
#' etc.) sorts in hierarchical, then numerical order, so '100.0' comes before
#' '100.00', for example.
#'
#' \code{\link{wide_to_long}} and \code{\link{long_to_wide}} convert the two
#' most common data structures containing patient disease data. This is more
Expand Down
4 changes: 2 additions & 2 deletions R/manip.R
Expand Up @@ -49,9 +49,9 @@ icd9_add_leading_zeroes <- function(x, short_code = guess_short(x)) {
assert_fac_or_char(x)
assert_flag(short_code)
if (is.factor(x)) {
levels(x) <- icd9_add_leading_zeroes_cpp(levels(x), short_code)
levels(x) <- icd9_add_leading_zeroes_rcpp(levels(x), short_code)
x
} else icd9_add_leading_zeroes_cpp(x, short_code)
} else icd9_add_leading_zeroes_rcpp(x, short_code)
}

#' @rdname icd9_drop_leading_zeroes
Expand Down
2 changes: 1 addition & 1 deletion R/parse-comorbid.R
Expand Up @@ -99,7 +99,7 @@ icd9_parse_ahrq_sas <- function(save_data = FALSE, offline = TRUE) {
icd9_map_ahrq[[cmb]] <-
as.short_diag(
as.icd9(
sort_icd.icd9(
sort.icd9(
unique(
c(icd9_map_ahrq[[cmb]], p)), short_code = TRUE)))
}
Expand Down
2 changes: 1 addition & 1 deletion R/ranges.R
Expand Up @@ -264,7 +264,7 @@ icd9_expand_range_worker <- function(start, end, lookup, defined,
if (!ex_ambig_end && ex_ambig_start)
lapply(lookup$vec[start_index:(start_index + 5L)], exclude_ambiguous_parent,
defined)
sort_icd.icd9(ls(out_env), short_code = TRUE)
sort.icd9(ls(out_env), short_code = TRUE)
}

#' @rdname expand_range
Expand Down
2 changes: 1 addition & 1 deletion R/sas.R
Expand Up @@ -176,7 +176,7 @@ sas_expand_range <- function(start, end) {
short_code = TRUE,
ex_ambig_start = TRUE,
ex_ambig_end = TRUE)
sort_icd.icd9(unique(c(halfway, nonrealrange)), short_code = TRUE)
sort.icd9(unique(c(halfway, nonrealrange)), short_code = TRUE)
}

#nocov end
114 changes: 70 additions & 44 deletions R/sort.R
@@ -1,83 +1,106 @@
#' Sort short-form ICD-9 codes
#'
#' Sorts lists of numeric only, V or E codes. Note that a simple numeric sort
#' does not work for ICD-9 codes, since "162" > "1620", and also V codes precede
#' E codes.
#' @details Implementation used fast built-in sort, then shuffles the E codes to
#' the end.
#' @param x vector of ICD codes to sort
#' @title Sort or get order of ICD-9 or ICD-10 codes according to published
#' sequence
#' @description The default method will guess whether ICD-9 or ICD-10 then sort
#' based on that type. For ICD-10 codes, note that setting \code{short} is
#' unnecessary and ignored. All codes should consistently use the decimal
#' divider.
#' @section ICD-9: Sorts lists of numeric, V or E codes. Note that a simple
#' numeric sort does not work for ICD-9 codes, since "162" > "1620", and also
#' V codes precede E codes. Numeric codes are first, then 'V', then 'E'. Will
#' return a factor if a factor is given.
#' @section ICD-10-CM and ICD-10-BE: There are some codes which are sequenced
#' out of lexicographic order, e.g., \code{C7A} and \code{C7B} are between
#' \code{C75} and \code{C76}; \code{D3A} is between \code{D48} and \code{D49}.
#' @param x vector of ICD codes to sort or order
#' @template short_code
#' @template dotdotdot
#' @return sorted vector of ICD-9 codes. Numeric, then E codes, then V codes.
#' @return For sort, a sorted vector of ICD-9 codes. Numeric, then E codes, then
#' V codes. For order, an integer vector is returned with the order of each
#' code.
#' @keywords manip
#' @export
sort_icd <- function(x, ...)
UseMethod("sort_icd")

#' @describeIn sort_icd Guess whether ICD-9 or ICD-10 (or possibly sub-type in
#' the future) then sort based on that type. ICD-10 codes, note that setting
#' \code{short} is unnecessary and ignored.
#' @export
#' @keywords internal
sort_icd.default <- function(x, short_code = guess_short(x), ...) {
sort_icd <- function(x, decreasing = FALSE, short_code = guess_short(x), ...) {
switch(
guess_version(x, short_code = short_code),
"icd9" = sort_icd.icd9(x, short_code),
"icd10" = sort_icd.icd10(x, short_code),
"icd9" = sort.icd9(x, decreasing = decreasing, short_code),
"icd10" = sort.icd10(x, decreasing = decreasing, short_code),
stop("ICD version not known")
)
}

#' @describeIn sort_icd Sort ICD-10 codes, note that setting \code{short} is
#' unnecessary and ignored.
#' @rdname sort_icd
#' @keywords internal
#' @export
sort_icd.icd10 <- function(x, short_code = NULL, ...) {
sort.icd10 <- function(
x,
decreasing = FALSE,
...
) {
r <- sort(x, index.return = TRUE)
res <- r[["x"]]
attributes(res) <- attributes(x)
names(res) <- names(x)[r[["ix"]]]
class(res) <- class(x)
res
}

#' @rdname sort_icd
#' @keywords internal
#' @export
sort.icd10cm <- function(
x,
...
) {
# ignore short, it doesn't matter
sort(x)
o <- icd10cm_order_rcpp(x)
o <- match(seq_along(x), o)
res <- x[o]
attributes(res) <- attributes(x)
names(res) <- names(x)[o]
class(res) <- class(x)
res
}

#' @describeIn sort_icd sort ICD-9 codes respecting numeric, then 'V', then 'E'
#' codes, and accounting for leading zeroes. Will return a factor if a factor is given.
#' @rdname sort_icd
#' @keywords internal
#' @export
sort_icd.icd9 <- function(x, short_code = guess_short(x), ...) {
sort.icd9 <- function(
x,
decreasing = FALSE,
short_code = guess_short(x),
...
) {
# no assertions here: they are slower than the actual sorting...
y <- if (short_code)
x
else
decimal_to_short.icd9(x)
res <- if (is.factor(x))
x[icd9_order_cpp(as_char_no_warn(y))]
x[o <- icd9_order_rcpp(as_char_no_warn(y))]
else
x[icd9_order_cpp(y)]
x[o <- icd9_order_rcpp(y)]
o <- match(seq_along(x), o)
class(res) <- class(x)
keep_names <- names(res)
attributes(res) <- attributes(x)
names(res) <- keep_names
res
names(res) <- names(x)[o]
if (decreasing)
rev(res)
else
res
}

#' Get order of a vector of ICD codes
#'
#' @section ICD-9: Puts E codes after V codes. \code{NA} values can't be ordered
#' and are dropped with a warning if found.
#' @section ICD-10-CM: there are some codes which are sequenced out of
#' lexicographic order, e.g., \code{C7A} and \code{C7B} are between \code{C75}
#' and \code{C76}; \code{D3A} is between \code{D48} and \code{D49}.
#' @param x vector or factor of ICD-9 codes
#' @return vector of integers with length of the non-NA values in \code{x}
#' @rdname sort_icd
#' @export
order.icd9 <- function(x) {
if (anyNA(x)) {
warning("Dropping NA values")
x <- x[!is.na(x)]
if (length(x) == 0) return(integer())
}
icd9_order_cpp(x)
icd9_order_rcpp(x)
}

#' @rdname order.icd9
#' @rdname sort_icd
#' @examples
#' # order ICD-10-CM is not lexicographic:
#' codes <- c("C7A", "C74", "C75", "C76", "C7B")
Expand All @@ -88,6 +111,9 @@ order.icd10cm <- function(x) {
icd10cm_order_rcpp(x);
}


#' @rdname sort_icd
#' @export
order.icd10be <- function(x) {
order.icd10cm(x)
order.icd10cm(x)
}

0 comments on commit 6cb60c4

Please sign in to comment.