Skip to content

Commit

Permalink
version 1.0.9
Browse files Browse the repository at this point in the history
  • Loading branch information
struckma authored and cran-robot committed Sep 3, 2021
1 parent f480dca commit 6c9e39b
Show file tree
Hide file tree
Showing 51 changed files with 364 additions and 145 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
@@ -1,6 +1,6 @@
Package: dataquieR
Title: Data Quality in Epidemiological Research
Version: 1.0.8
Version: 1.0.9
Authors@R:
c(person(given = "University Medicine Greifswald",
role = "cph"),
Expand Down Expand Up @@ -29,18 +29,18 @@ Imports: patchwork, dplyr (>= 1.0.2), emmeans, ggplot2 (>= 2.1.0),
R.devices, reshape, rlang, robustbase, utils
Suggests: cowplot (>= 0.9.4), anytime, digest, DT (>= 0.15),
flexdashboard, htmltools, knitr, rmarkdown, rstudioapi,
testthat (>= 2.3.2), tibble, vdiffr
testthat (>= 2.3.2), tibble, markdown, vdiffr, parallel
VignetteBuilder: knitr
Encoding: UTF-8
KeepSource: TRUE
Language: en-US
RoxygenNote: 7.1.1
NeedsCompilation: no
Packaged: 2021-08-12 18:51:55 UTC; struckmanns
Packaged: 2021-09-01 07:56:26 UTC; struckmanns
Author: University Medicine Greifswald [cph],
Adrian Richter [aut],
Carsten Oliver Schmidt [aut],
Stephan Struckmann [aut, cre]
Maintainer: Stephan Struckmann <stephan.struckmann@uni-greifswald.de>
Repository: CRAN
Date/Publication: 2021-08-12 19:20:02 UTC
Date/Publication: 2021-09-03 12:10:09 UTC
96 changes: 50 additions & 46 deletions MD5

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions NEWS.md
@@ -1,9 +1,18 @@
# dataquieR 1.0.9
* Fixed bug in `sigmagap` and made missing guessing more robust.
* Fixed checks on missing code detection failing for `logical`.
* Fixed a damaged check for numeric threshold values in `acc_margins`.
* Fixed wrongly named `GRADING` columns.
* Improved parallel execution by automatic detection of cores.
* Tidy html dependency

# dataquieR (1.0.8)
* Removed formal arguments from `rbind.ReportSummaryTable` since these are
not needed anyways and the inherited documentation for those arguments
`rbind` from `base` contains an invalid URL triggering a `NOTE`.

# dataquieR (1.0.7)
* ***Fixed bugs in example metadata.***
* Figures now have size hints as attributes.
* Added simple type conversion check indicator function of dimension
integrity, `int_datatype_matrix`.
Expand Down
12 changes: 6 additions & 6 deletions R/acc_margins.R
Expand Up @@ -221,7 +221,7 @@ acc_margins <- function(resp_vars = NULL, group_vars = NULL, co_vars = NULL,
} else {
.threshold_value <- NA
}
if (length(threshold_value != 1) || is.na(.threshold_value)) {
if (length(threshold_value) != 1 || is.na(.threshold_value)) {
util_warning(
"threshold_value is not numeric(1): %s, setting it to default value 1.",
dQuote(head(try(as.character(threshold_value)), 1)),
Expand Down Expand Up @@ -423,7 +423,7 @@ acc_margins <- function(resp_vars = NULL, group_vars = NULL, co_vars = NULL,
res_df$threshold <- threshold_value

# select abnormalties
res_df$grading <- ifelse(res_df$margins < pars[1] |
res_df$GRADING <- ifelse(res_df$margins < pars[1] |
res_df$margins > pars[3], 1, 0)
} else if (threshold_type == "user") {
th <- threshold_value
Expand All @@ -438,7 +438,7 @@ acc_margins <- function(resp_vars = NULL, group_vars = NULL, co_vars = NULL,
}

# select abnormalties
res_df$grading <- mapply(
res_df$GRADING <- mapply(
function(th, l, u) {
ifelse(th >= l & th <= u, 0, 1)
},
Expand Down Expand Up @@ -478,7 +478,7 @@ acc_margins <- function(resp_vars = NULL, group_vars = NULL, co_vars = NULL,
y = ~margins,
ymin = ~LCL,
ymax = ~UCL,
color = ~ as.factor(grading)
color = ~ as.factor(GRADING)
),
shape = 18, size = 1,
inherit.aes = FALSE,
Expand Down Expand Up @@ -515,7 +515,7 @@ acc_margins <- function(resp_vars = NULL, group_vars = NULL, co_vars = NULL,
y = ~margins,
ymin = ~LCL,
ymax = ~UCL,
color = ~ as.factor(grading)
color = ~ as.factor(GRADING)
),
shape = 18, size = 1,
inherit.aes = FALSE,
Expand Down Expand Up @@ -581,7 +581,7 @@ acc_margins <- function(resp_vars = NULL, group_vars = NULL, co_vars = NULL,
subtitle = subtitle)

SummaryTable <- data.frame(Variables = rvs, GRADING =
as.numeric(any(res_df$grading > 0)))
as.numeric(any(res_df$GRADING > 0)))

# length(unique(fit_df$GROUP)))
# output
Expand Down
2 changes: 1 addition & 1 deletion R/acc_multivariate_outlier.R
Expand Up @@ -186,7 +186,7 @@ acc_multivariate_outlier <- function(resp_vars, id_vars = NULL, label_col,
SixSigma = sum(ds1plot$sixsigma),
Hubert = sum(ds1plot$hubert),
SigmaGap = sum(ds1plot$sigmagap),
Grading = grading
GRADING = grading
)


Expand Down
5 changes: 4 additions & 1 deletion R/acc_univariate_outlier.R
Expand Up @@ -37,6 +37,9 @@
#' [ggplot2::geom_jitter].
#'
#' @details
#'
#' **Hint*: The function is designed for unimodal data only.*
#'
#' # ALGORITHM OF THIS IMPLEMENTATION:
#'
#' - Select all variables of type float in the study data
Expand Down Expand Up @@ -339,7 +342,7 @@ acc_univariate_outlier <- function(resp_vars = NULL, label_col, study_data,
function(x) {
sum(x == 1)
})$x
st1$Grading <- ifelse(st1$"Most likely (N)" > 0, 1, 0)
st1$GRADING <- ifelse(st1$"Most likely (N)" > 0, 1, 0)

# format output
st1$Mean <- round(st1$Mean, digits = 2)
Expand Down
10 changes: 5 additions & 5 deletions R/com_segment_missingness.R
Expand Up @@ -28,14 +28,14 @@
#' This implementation uses one threshold to discriminate critical from
#' non-critical values. If direction is high than all values below the
#' threshold_value are normal (displayed in dark blue in the plot and flagged
#' with grading = 0 in the dataframe). All values above the threshold_value are
#' with GRADING = 0 in the dataframe). All values above the threshold_value are
#' considered critical. The more they deviate from the threshold the displayed
#' color shifts to dark red. All critical values are highlighted with grading =
#' color shifts to dark red. All critical values are highlighted with GRADING =
#' 1 in the summary data frame. By default, highest values are always shown in
#' dark red irrespective of the absolute deviation.
#'
#' If direction is low than all values above the threshold_value are normal
#' (displayed in dark blue, grading = 0).
#' (displayed in dark blue, GRADING = 0).
#'
#' ### Hint
#' This function does not support a `resp_vars` argument but `exclude_roles` to
Expand Down Expand Up @@ -352,10 +352,10 @@ com_segment_missingness <- function(study_data, meta_data, group_vars = NULL,
res_df$direction <- direction

if (direction == "high") {
res_df$grading <- ifelse(res_df$"(%) of missing segments" >
res_df$GRADING <- ifelse(res_df$"(%) of missing segments" >
threshold_value, 1, 0)
} else {
res_df$grading <- ifelse(res_df$"(%) of missing segments" <
res_df$GRADING <- ifelse(res_df$"(%) of missing segments" <
threshold_value, 1, 0)
}

Expand Down
8 changes: 4 additions & 4 deletions R/con_contradictions.R
Expand Up @@ -469,16 +469,16 @@ con_contradictions <- function(resp_vars = NULL, study_data, meta_data,
names(summary_df2) <- c(
"Check ID", "Check type", "Variables A and B", "A Levels",
"B Levels", "Contradictions (N)", "Contradictions (%)",
"Grading", "Label"
"GRADING", "Label"
)

summary_df2$Grading <- ordered(summary_df2$Grading)
summary_df2$GRADING <- ordered(summary_df2$GRADING)

x <- util_as_numeric(reorder(summary_df2[, 1], -summary_df2[, 1]))
lbs <- as.character(reorder(summary_df2[, 9], -summary_df2[, 1]))
# plot summary_df2
p <- ggplot(summary_df2, aes_(x = ~x, y = ~ summary_df2[, 7], fill =
~ as.ordered(Grading))) +
~ as.ordered(GRADING))) +
geom_bar(stat = "identity") +
geom_text(
y = round(summary_df2[, 7], 1) + 0.5,
Expand Down Expand Up @@ -510,7 +510,7 @@ con_contradictions <- function(resp_vars = NULL, study_data, meta_data,
TRUE))[2]))
st1$`Variables A and B` <- NULL
st1 <- st1[, c(9, 10, 1:8)]
st1 <- dplyr::rename(st1, c("GRADING" = "Grading"))
#st1 <- dplyr::rename(st1, c("GRADING" = "Grading"))

suppressWarnings({
# suppress wrong warnings: https://github.com/tidyverse/ggplot2/pull/4439/commits
Expand Down
1 change: 1 addition & 0 deletions R/dq_report.R
Expand Up @@ -102,6 +102,7 @@ dq_report <- function(study_data,
strata_vars,
cores = list(mode = "socket",
logging = FALSE,
cpus = util_detect_cores(),
load.balancing = TRUE),
specific_args = list()) {
util_prepare_dataframes(.replace_missings = FALSE)
Expand Down
12 changes: 12 additions & 0 deletions R/html_dependency_vert_dt.R
@@ -0,0 +1,12 @@
#' HTML Dependency for vertical headers in `DT::datatable`
#'
#' @return the dependency
html_dependency_vert_dt <- function() {
htmltools::htmlDependency(
name = "vertical-dt-style"
,version = "0.0.1"
,src = c(file = system.file("vertical-dt-style", package = "dataquieR"))
,stylesheet = "vertical-dt-style.css"
,script = "sort_heatmap_dt.js"
)
}
44 changes: 30 additions & 14 deletions R/print.ReportSummaryTable.R
Expand Up @@ -9,13 +9,16 @@
#' @param dt [logical] use `DT::datatables`, if installed
#' @param fillContainer [logical] if `dt` is `TRUE`, control table size,
#' see `DT::datatables`.
#' @param displayValues [logical] if `dt` is `TRUE`, also display the actual
#' values
#'
#' @inherit base::print
#' @importFrom grDevices colorRamp rgb col2rgb
#' @export
#' @return the printed object
print.ReportSummaryTable <- function(x, relative, dt = FALSE,
fillContainer = FALSE, ...) {
fillContainer = FALSE,
displayValues = FALSE, ...) {

higher_means <- attr(x, "higher_means")
if (is.null(higher_means)) higher_means <- "worse"
Expand Down Expand Up @@ -101,8 +104,13 @@ print.ReportSummaryTable <- function(x, relative, dt = FALSE,
}
a <- apply(v, 1, function(cl) {
paste0(
"<span style=\"width:100%;display:block;",
"overflow:hidden;background: ",
"<span style=\"width:100%;display:block;text-align:center;",
"color:",
rgb(255 - cl[[1]],
255 - cl[[2]],
255 - cl[[3]], maxColorValue = 255.0),
";",
"overflow:hidden;background:",
rgb(cl[[1]],
cl[[2]],
cl[[3]], maxColorValue = 255.0),
Expand All @@ -116,13 +124,27 @@ print.ReportSummaryTable <- function(x, relative, dt = FALSE,
})
cc <- apply(v, 1, function(cl) {
paste0(
"\">&nbsp;</span>"
"\">"
)
})
d <- apply(v, 1, function(cl) {
paste0(
"</span>"
)
})
if (displayValues) {
if (relative) {
dv <- paste0(round(100 * values, 0), "%")
} else {
dv <- values
}
} else {
dv <- "&nbsp;"
}
if (relative) {
paste0(a, round(100 * values, 1), "%", b, values, cc)
paste0(a, round(100 * values, 1), "%", b, values, cc, dv, d)
} else {
paste0(a, round(values, 1), b, values, cc)
paste0(a, round(values, 1), b, values, cc, dv, d)
}
})
x[, names(colors_of_hm)] <- colors_of_hm
Expand Down Expand Up @@ -162,14 +184,8 @@ print.ReportSummaryTable <- function(x, relative, dt = FALSE,
# https://stackoverflow.com/a/35775262
w$dependencies <- c(
w$dependencies,
list(htmltools::htmlDependency(
name = "vertical-dt-style"
,version = "0.0.1"
,src = c(file = system.file("", package = "dataquieR"))
,stylesheet = "vertical-dt-style.css"
,script = "sort_heatmap_dt.js"
)
))
list(html_dependency_vert_dt())
)

print(w)

Expand Down
2 changes: 1 addition & 1 deletion R/rbind.R
Expand Up @@ -90,5 +90,5 @@ rbind.ReportSummaryTable <- function(...) {
# factor.exclude = TRUE
))
)
} # TODO: xx copy attributes!!
}
}
15 changes: 15 additions & 0 deletions R/util_detect_cores.R
@@ -0,0 +1,15 @@
#' Detect cores
#'
#' See `parallel::detectCores` for further details.
#'
#' @return number of available CPU cores.
util_detect_cores <- function() {
if (requireNamespace("parallel", quietly = TRUE)) {
return(parallel::detectCores())
} else{
util_warning(c("Suggested package parallel not found,",
"autodetection of CPU cores disabled --",
"using default of 1 core only."))
return(1)
}
}
25 changes: 19 additions & 6 deletions R/util_looks_like_missing.R
Expand Up @@ -13,7 +13,11 @@
#'
#' @seealso [`acc_univariate_outlier`]
#'
util_looks_like_missing <- function(x, n_rules = 3) {
util_looks_like_missing <- function(x, n_rules = 1) {
if (any(DATA_TYPES_OF_R_TYPE[[class(x)]] %in%
c(DATA_TYPES$INTEGER, DATA_TYPES$FLOAT))) {
x <- as.numeric(x)
}
if (!is.numeric(x)) {
util_error("%s works only on numeric vectors",
dQuote("util_looks_like_missing"))
Expand All @@ -22,7 +26,7 @@ util_looks_like_missing <- function(x, n_rules = 3) {
if (all(sysmiss)) {
return(!logical(length = length(x)))
}
x[sysmiss] <- mean(x[!sysmiss], na.rm = TRUE)
# x[sysmiss] <- mean(x[!sysmiss], na.rm = TRUE)
TYPICAL_MISSINGCODES <- c(
99, 999, 9999, 99999, 999999, 9999999, 999999999
)
Expand All @@ -34,12 +38,21 @@ util_looks_like_missing <- function(x, n_rules = 3) {
.x <- gsub("0+$", "", .x)
r <- .x %in% TYPICAL_MISSINGCODES

tuk <- util_tukey(x)
tuk[sysmiss] <- 0
ssig <- util_sixsigma(x)
ssig[sysmiss] <- 0
hub <- util_hubert(x)
hub[sysmiss] <- 0
sigg <- util_sigmagap(x)
sigg[sysmiss] <- 0

return(
sysmiss | (r &
(util_tukey(x) +
util_sixsigma(x) +
util_hubert(x) +
util_sigmagap(x) >= n_rules))
(tuk +
ssig +
hub +
sigg >= n_rules))
)

}
1 change: 1 addition & 0 deletions R/util_par_pmap.R
Expand Up @@ -23,6 +23,7 @@
#' @return [list] of results of the function calls
util_par_pmap <- function(.l, .f, ...,
cores = list(mode = "socket",
cpus = util_detect_cores(),
logging = FALSE,
load.balancing = TRUE),
use_cache = FALSE) {
Expand Down
12 changes: 7 additions & 5 deletions R/util_sigmagap.R
Expand Up @@ -20,12 +20,14 @@ util_sigmagap <- function(x) {
ints$int <- c(0, diff(ints$VALUE))
ints$sigmagap <- ifelse(ints$int > xsd, 1, 0)

if (max(ints$sigmagap, na.rm = TRUE) == 1) {
if (any(!is.na(ints$sigmagap)) && max(ints$sigmagap, na.rm = TRUE) == 1) {
# if break is low
if (min(ints$VALUE[which(ints$sigmagap == 1)]) < xmu) {
ints$sigmagap[1:min(which(ints$sigmagap == 1))] <- 1
} else {
ints$sigmagap[min(which(ints$sigmagap == 1)):length(x)] <- 1
if (max(ints$VALUE[which(ints$sigmagap == 1)], na.rm = TRUE) < xmu) {
ints$sigmagap[1:min(which(ints$sigmagap == 1), na.rm = TRUE)] <- 1
}
if (min(ints$VALUE[which(ints$sigmagap == 1)], na.rm = TRUE) > xmu) {
ints$sigmagap[min(which(ints$sigmagap == 1), na.rm = TRUE):length(x)] <-
1
}
}

Expand Down

0 comments on commit 6c9e39b

Please sign in to comment.