/
condense.R
199 lines (192 loc) · 6.84 KB
/
condense.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
#' Condense ICD-9 code by replacing complete families with parent codes
#'
#' These functions take a set of ICD codes, and look for parent ICD codes in the
#' hierarchy. If all the children of a parent are found, then the returned
#' vector will just contain the parents of those codes. Any additional codes
#' which did not have all the siblings present are retained. This can be thought
#' of as the inverse operation to \code{children}.
#' @template icd9-any
#' @template icd9-short
#' @template icd9-decimal
#' @template short_code
#' @template dotdotdot
#' @examples
#' kids <- children("V40")
#' kids
#' condense(kids, defined = TRUE)
#' kids_with_undefined <- children("V40", defined = FALSE)
#' head(kids_with_undefined)
#' length(kids_with_undefined)
#' condense(kids, defined = FALSE)
#'
#' # what happens if we have additional codes?
#' # (condense will warn if we don't explicitly ask to condense
#' # based on defined or possible codes.)
#' condense(c(kids, "41100"), warn = FALSE)
#'
#' # a good use is to summarise the contents of a comorbidity:
#' icd9_map_elix$CHF
#' condense(icd9_map_elix$CHF, warn = FALSE)
#' # explaining big groups of codes takes advantage of this:
#' explain_code(icd9_map_elix$CHF, condense = TRUE, warn = FALSE)
#' @family ICD-9 ranges
#' @keywords manip
#' @export
condense <- function(x, short_code = guess_short(x),
defined = NULL, warn = TRUE, ...) {
UseMethod("condense")
}
#' @export
#' @keywords internal
condense.icd9 <- function(x, short_code = guess_short(x),
defined = NULL, warn = TRUE, ...) {
assert_fac_or_char(x)
assert_flag(short_code)
stopifnot(is.null(defined) ||
(is.logical(defined) && length(defined) == 1L))
assert_flag(warn)
if (short_code) {
icd9_condense_short(x, defined = defined, warn = warn, ...)
} else {
icd9_condense_decimal(x, defined = defined, warn = warn, ...)
}
}
# TODO: condense.icd10, especially thinking of the long lists of ICD-10 codes in
# PCCC maps.
#' @describeIn condense Condense a set of ICD codes, guessing ICD version
#' from input data
#' @export
#' @keywords internal manip
condense.character <- function(x, short_code = guess_short(x),
defined = NULL, ...) {
ver <- guess_version.character(x, short_code = short_code)
if (ver %in% icd9_classes) {
if (is.null(short_code)) short_code <- guess_short(x)
condense.icd9(x, short_code = short_code, defined = defined, ...)
} else if (ver %in% icd10_classes) {
if (is.null(short_code)) short_code <- guess_short(x)
stop("condense.icd10 not implemented yet")
} else {
stop("Unknown ICD type")
}
}
#' @describeIn condense Condense a factor of ICD codes
#' @export
#' @keywords internal manip
condense.factor <- function(x,
short_code = guess_short(x),
defined = NULL,
...) {
condense.character(levels(x),
short_code = short_code,
defined = defined,
...
)
}
#' @rdname condense
#' @keywords internal manip
icd9_condense_decimal <- function(x, defined = NULL, warn = TRUE,
keep_factor_levels = FALSE) {
short_to_decimal.icd9(
icd9_condense_short(
decimal_to_short.icd9(x),
defined = defined, warn = warn, keep_factor_levels = keep_factor_levels
)
)
}
#' Condense short ICD-9 codes
#' @template warn
#' @param keep_factor_levels single logical value, default \code{FALSE}. If
#' \code{TRUE}, will reuse the factor levels from the input data for the
#' output data. This only applies if a factor is given for the input codes.
#' @keywords internal manip
#' @noRd
icd9_condense_short <- function(x,
defined = NULL,
warn = TRUE,
keep_factor_levels = FALSE) {
stopifnot(is.null(defined) ||
(is.logical(defined) && length(defined) == 1L))
assert_flag(warn)
assert_flag(keep_factor_levels)
icd9Levels <- levels(x) # NULL if not a factor
# we can convert back to factor later. Lots of scope for errors by handling
# factors and character vectors in this function, so keep simple with
# character only.
x <- as_char_no_warn(x)
i9w <- unique(get_valid.icd9(x, short_code = TRUE))
if (is.null(defined)) {
if (all(is_defined.icd9(i9w, short_code = TRUE))) {
defined <- TRUE
if (warn) {
message("'defined' not given, but all codes are indeed defined")
}
} else {
defined <- FALSE
if (warn) {
warning("'defined' not given, but not all codes are defined")
}
}
}
if (warn && defined && !all(is_defined.icd9(x, short_code = TRUE))) {
x <- get_defined.icd9(x, short_code = TRUE)
warning(
"only defined values requested, but some undefined ICD-9 code(s)",
" were given, so dropping them"
)
}
# any major codes are automatically in output (not condensing higher than
# three digit code) and all their children can be removed from the work list
out <- majors <- i9w[are_major <- is_major.icd9(i9w)]
i9w <- i9w[!are_major]
i9w <- i9w[i9w %nin% children.icd9(majors,
short_code = TRUE,
defined = defined
)]
fout <- c()
four_digit_parents <- unique(substr(i9w, 0, 4))
for (fp in four_digit_parents) {
# onlyBillable at 5th level is same as onlyReal
test_kids <- children.icd9(fp,
defined = defined,
short_code = TRUE, billable = FALSE
)
if (length(test_kids) > 0 && all(test_kids %in% c(fp, i9w))) {
# if ((length(test_kids) > 1) || (fp %in% i9w)) {
fout <- c(fout, fp)
i9w <- i9w[-which(i9w %in% test_kids)]
# }
}
}
# 'out' now has original major codes, 'fout' has parent four digit codes. Now
# see whether any groups of four digit codes comprise all the four digit codes
# in any parent (or original) major. We test the original majors again to
# catch where non-billable four digit codes were omitted, but otherwise all
# the children are complete. This replaces the old 'onlyBillable' flag which
# was annoying.
# set new variable so we don't change the thing we are looping over...
major_parents <- unique(get_major.icd9(c(out, fout, i9w), short_code = TRUE))
for (mp in major_parents) {
test_kids <- children.icd9(mp, short_code = TRUE, defined = defined)
# we've done these already:
test_kids <- test_kids[nchar(test_kids) < (5L + icd9_is_e(mp))]
test_kids <- test_kids[-which(test_kids == mp)]
if (length(test_kids) > 0 && all(test_kids %in% c(out, fout, i9w))) {
out <- c(out, mp)
fout <- fout[-which(fout %in% test_kids)]
}
}
out <- unique(sort.icd9(c(out, fout, i9w), short_code = TRUE))
if (!is.null(icd9Levels)) {
if (keep_factor_levels) {
out <- factor(out, icd9Levels)
} else {
out <- factor(out)
}
}
if (defined) {
get_defined.icd9(out, short_code = TRUE)
} else {
out
}
}