-
Notifications
You must be signed in to change notification settings - Fork 2
/
checkUniqueness2.R
67 lines (59 loc) · 2.68 KB
/
checkUniqueness2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#############################################################################
#' Check uniqueness of a variable.
#'
#' Function to check if a variable is unique for all cases of an identifier variable. This is a fast and more efficient version of
#' \code{\link{checkUniqueness}} which always returns a logical, non missing value of length one.
#'
#' For example if missing values are multiple imputed and data is stored in a long format, checking the uniqueness of a variable
#' within an identifier can be tricky. This function automates this task via reshaping the data into wide format and testing equality
#' among the reshaped variables. Similar functionality (via matrices) is covered by \code{lme4::isNested},
#' which is more general and performs similarly.
#'
#'@param GADSdat \code{GADSdat} object imported via \code{eatGADS}.
#'@param varName Single string containing the variable name for which the check should be performed.
#'@param idVar Single string containing the name of the identifier variable.
#'@param impVar Single string containing the name of the imputation number.
#'
#'@return Returns a logical of length one.
#'
#'@examples
#'## create an example GADSdat
#'l <- 1000
#'long_df <- data.table::data.table(id = sort(rep(1:l, 15)),
#' v1 = sort(rep(1:l, 15)),
#' imp = rep(1:15, l))
#'gads <- import_DF(long_df)
#'## check uniqueness
#'checkUniqueness2(gads, varName = "v1", idVar = "id", impVar = "imp")
#'
#'@export
checkUniqueness2 <- function(GADSdat, varName, idVar, impVar) {
UseMethod("checkUniqueness2")
}
#'@export
checkUniqueness2.GADSdat <- function(GADSdat, varName, idVar, impVar) {
check_GADSdat(GADSdat)
check_vars_in_GADSdat(GADSdat, vars = c(varName, idVar, impVar))
checkUniqueness2(GADSdat$dat, varName = varName, idVar = idVar, impVar = impVar)
}
#'@export
checkUniqueness2.data.frame <- function(GADSdat, varName, idVar, impVar) {
dat <- data.table::as.data.table(GADSdat)
if(nrow(dat) == length(unique(dat[[idVar]]))) {
message("'idVar' is unique per row in 'GADSdat' and checking for uniqueness is obsolete.")
return(TRUE)
}
form <- stats::as.formula(paste0(idVar, " ~ ", impVar))
subdat <- dat[, c(idVar, varName, impVar), with = FALSE]
wide <- data.table::dcast(subdat, formula = form, value.var = varName)
## compare all of them
imp_num <- length(unique(dat[[impVar]]))
if(imp_num < 2) stop("'impVar' must be an imputation variable with at least two different values.")
log_list <- sapply(3:(imp_num+1), function(x) {
#browser()
wide_sub <- wide[, c(2, x), with = FALSE]
wide_sub <- stats::na.omit(wide_sub)
all(wide_sub[[1]] == wide_sub[[2]])
})
all(log_list)
}