Skip to content

Commit

Permalink
version 1.0.8
Browse files Browse the repository at this point in the history
  • Loading branch information
struckma authored and cran-robot committed Aug 12, 2021
1 parent ed4ba84 commit f480dca
Show file tree
Hide file tree
Showing 194 changed files with 81,088 additions and 56,192 deletions.
25 changes: 12 additions & 13 deletions DESCRIPTION
@@ -1,6 +1,6 @@
Package: dataquieR
Title: Data Quality in Epidemiological Research
Version: 1.0.5
Version: 1.0.8
Authors@R:
c(person(given = "University Medicine Greifswald",
role = "cph"),
Expand All @@ -19,29 +19,28 @@ Authors@R:
Description: A set of functions to assess data quality issues in
studies. See 'TMF' <https://www.tmf-ev.de/EnglishSite/Home.aspx>
guideline and 'DFG' <https://www.dfg.de/en/index.jsp> project at
<https://dfg-qa.ship-med.uni-greifswald.de>.
<https://dataquality.ship-med.uni-greifswald.de>.
License: BSD_2_clause + file LICENSE
URL: https://dfg-qa.ship-med.uni-greifswald.de/
URL: https://dataquality.ship-med.uni-greifswald.de/
BugReports: https://gitlab.com/libreumg/dataquier/-/issues
Depends: R (>= 3.3.0)
Imports: cowplot (>= 0.9.4), dplyr (>= 1.0.2), emmeans, ggplot2 (>=
2.1.0), ggpubr, lme4, lubridate, MASS, MultinomialCI,
parallelMap, R.devices, reshape, rlang, robustbase, utils
Suggests: anytime, digest, DT (>= 0.15), flexdashboard, htmltools,
knitr, patchwork, rmarkdown, rstudioapi, testthat (>= 2.3.2),
tibble, vdiffr
Depends: R (>= 3.6.0)
Imports: patchwork, dplyr (>= 1.0.2), emmeans, ggplot2 (>= 2.1.0),
ggpubr, lme4, lubridate, MASS, MultinomialCI, parallelMap,
R.devices, reshape, rlang, robustbase, utils
Suggests: cowplot (>= 0.9.4), anytime, digest, DT (>= 0.15),
flexdashboard, htmltools, knitr, rmarkdown, rstudioapi,
testthat (>= 2.3.2), tibble, vdiffr
VignetteBuilder: knitr
Encoding: UTF-8
KeepSource: TRUE
Language: en-US
LazyData: true
RoxygenNote: 7.1.1
NeedsCompilation: no
Packaged: 2021-02-26 15:53:58 UTC; struckmanns
Packaged: 2021-08-12 18:51:55 UTC; struckmanns
Author: University Medicine Greifswald [cph],
Adrian Richter [aut],
Carsten Oliver Schmidt [aut],
Stephan Struckmann [aut, cre]
Maintainer: Stephan Struckmann <stephan.struckmann@uni-greifswald.de>
Repository: CRAN
Date/Publication: 2021-02-26 16:20:02 UTC
Date/Publication: 2021-08-12 19:20:02 UTC
302 changes: 168 additions & 134 deletions MD5

Large diffs are not rendered by default.

12 changes: 8 additions & 4 deletions NAMESPACE
Expand Up @@ -5,8 +5,10 @@ S3method("[",dataquieR_result)
S3method("[[",dataquieR_result)
S3method(as.data.frame,dataquieR_resultset)
S3method(as.list,dataquieR_resultset)
S3method(print,ReportSummaryTable)
S3method(print,dataquieR_result)
S3method(print,dataquieR_resultset)
S3method(rbind,ReportSummaryTable)
S3method(summary,dataquieR_resultset)
export(DATA_TYPES)
export(DATA_TYPES_OF_R_TYPE)
Expand Down Expand Up @@ -35,6 +37,7 @@ export(contradiction_functions)
export(contradiction_functions_descriptions)
export(dq_report)
export(dq_report_by)
export(int_datatype_matrix)
export(pipeline_recursive_result)
export(pipeline_vectorized)
export(prep_add_to_meta)
Expand All @@ -47,13 +50,11 @@ export(prep_min_obs_level)
export(prep_pmap)
export(prep_prepare_dataframes)
export(prep_study2meta)
export(prep_valuelabels_from_data)
export(pro_applicability_matrix)
import(patchwork)
importFrom(MASS,fitdistr)
importFrom(MultinomialCI,multinomialCI)
importFrom(cowplot,draw_label)
importFrom(cowplot,ggdraw)
importFrom(cowplot,plot_grid)
importFrom(cowplot,theme_cowplot)
importFrom(dplyr,"%>%")
importFrom(dplyr,recode)
importFrom(ggplot2,aes)
Expand Down Expand Up @@ -110,6 +111,9 @@ importFrom(ggplot2,xlab)
importFrom(ggplot2,ylab)
importFrom(ggpubr,ggarrange)
importFrom(ggpubr,ggballoonplot)
importFrom(grDevices,col2rgb)
importFrom(grDevices,colorRamp)
importFrom(grDevices,rgb)
importFrom(reshape,melt)
importFrom(rlang,":=")
importFrom(rlang,.data)
Expand Down
52 changes: 50 additions & 2 deletions NEWS.md
@@ -1,6 +1,54 @@
# dataquieR (1.0.8)
* Removed formal arguments from `rbind.ReportSummaryTable` since these are
not needed anyways and the inherited documentation for those arguments
`rbind` from `base` contains an invalid URL triggering a `NOTE`.

# dataquieR (1.0.7)
* Figures now have size hints as attributes.
* Added simple type conversion check indicator function of dimension
integrity, `int_datatype_matrix`.
* Corrected some error classifications
* `prep_study2meta` can now also convert factors to `dataquieR` compatible
`meta_data`/`study_data`
* Slightly improved documentation.
* Bug fix in `com_item_missingness` for textual response variables.
* Added new output slot with heat-map like tables. Implemented some generics
for those.

# dataquieR (1.0.6)
* Robustness: Ensure `DT JS` is always loaded when a dq_report report is
rendered
* Bug fix: More robust handling of DECIMALS variable attribute, if
this is delivered as a character.
* Bug Fix: `com_segment_missingness` with
`strata_vars` / `group_vars` did not work
* Bug Fix: If `label_col` was set to something else than `LABEL`,
`strata_vars` did not work for `com_unit_missingness`
* More precise documentation.
* Fixed a bug in a utility function for the univariate outliers indicator
function, which caused many data points flagged as outliers by the sigma-
gap criterion.
* Made outlier function aware of too many non-outlier points causing too
complex graphics (e.g. pdf rendering crashes the PDF reader).
* Fixes and small improvements in `dq_report`.
* Switched from `cowplot` to `patchwork` in `acc_margins` yielding figures
that can be easier manipulated. Please note, that this change could break
existing output manipulations, since the structure of the margins plots
has changed internally. However, output manipulations were hardly
possible for margins plots before, so it is unlikely, that there
are pipelines affected.
* More control about the output of the `acc_loess` function.
* More robust `prep_create_meta` handling length-0 arguments by ignoring
these variable attributes at all.
* Added a classification system for warnings and error messages to
distinguish errors based on mismatching variables for a function from
other error messages.
* https://github.com/openjournals/joss-reviews/issues/3093#issuecomment-840695360
* Some tidy up and more tests.

# dataquieR 1.0.5
* Fixed two bugs in con_inadmissible_categorical (one resp_var only and
value-limits all the same for all resp_vars)
* Fixed two bugs in `con_inadmissible_categorical` (one `resp_var` only and
value-limits all the same for all `resp_vars`)
* Changed LICENSE to BSD-2
* Slightly updated documentation
* Updated `README`-File
Expand Down
8 changes: 6 additions & 2 deletions R/000_globs.R
Expand Up @@ -20,7 +20,7 @@ DISTRIBUTIONS <- list(
#' dimensions are Completeness, Consistency and Accuracy.
#'
#' @seealso [Data Quality Concept](
#' https://dfg-qa.ship-med.uni-greifswald.de/DQconceptNew.html)
#' https://dataquality.ship-med.uni-greifswald.de/DQconceptNew.html)
#'
#' @return Only a definition, not a function, so no return value
#'
Expand Down Expand Up @@ -240,7 +240,8 @@ WELL_KNOWN_META_VARIABLE_NAMES <- list(
}
for (name in names(WELL_KNOWN_META_VARIABLE_NAMES)) {
if (exists(name, asNamespace("dataquieR"))) {
util_warning(sprintf("Variable %s is in dataquieR too!", name))
util_warning("Variable %s is in dataquieR too!", name,
applicability_problem = FALSE)
}
assign(name, WELL_KNOWN_META_VARIABLE_NAMES[[name]],
asNamespace("dataquieR"))
Expand All @@ -250,3 +251,6 @@ WELL_KNOWN_META_VARIABLE_NAMES <- list(
util_fix_rstudio_bugs()
}
# nocov end

# name of the additional system missingness column in com_item_missingness
.SM_LAB <- "ADDED: SysMiss"
41 changes: 19 additions & 22 deletions R/acc_distributions.R
Expand Up @@ -37,7 +37,7 @@
#' @importFrom stats na.omit
#' @seealso
#' [Online Documentation](
#' https://dfg-qa.ship-med.uni-greifswald.de/VIN_acc_impl_distributions.html
#' https://dataquality.ship-med.uni-greifswald.de/VIN_acc_impl_distributions.html
#' )
acc_distributions <- function(resp_vars = NULL, label_col, group_vars = NULL,
study_data, meta_data) {
Expand All @@ -56,7 +56,7 @@ acc_distributions <- function(resp_vars = NULL, label_col, group_vars = NULL,

util_correct_variable_use("group_vars",
allow_null = TRUE,
allow_more_than_one = TRUE,
allow_more_than_one = FALSE,
allow_any_obs_na = TRUE,
need_type = "!float"
)
Expand All @@ -68,28 +68,21 @@ acc_distributions <- function(resp_vars = NULL, label_col, group_vars = NULL,
util_warning(
paste0(
"All variables defined to be integer or float in the metadata are used"
)
), applicability_problem = FALSE
)
rvs <- meta_data[[label_col]][meta_data$DATA_TYPE %in%
c("integer", "float")]
rvs <- intersect(rvs, colnames(ds1))
if (length(rvs) == 0) {
util_error("No suitable variables were defined.")
util_error("No suitable variables were defined.",
applicability_problem = TRUE)
}
}

# Label assignment -----------------------------------------------------------
# temporary study data

if (length(group_vars) > 0) {
# only two grouping variables allowed
if (length(group_vars) > 1) {
group_vars <- group_vars[1]
util_warning(paste0(
"Only 1 group variables allowed. Variable: ",
paste0(group_vars[1], collapse = ", "), " was selected."
))
}

# all labelled variables
levlabs <- meta_data$VALUE_LABELS[meta_data[[label_col]] %in% group_vars]
Expand All @@ -100,7 +93,7 @@ acc_distributions <- function(resp_vars = NULL, label_col, group_vars = NULL,
util_warning(paste0(
"Variables: ", paste0(group_vars[is.na(levlabs)], collapse = ", "),
" have no assigned labels and levels."
))
), applicability_problem = TRUE)
}

# only variables with labels
Expand Down Expand Up @@ -128,10 +121,10 @@ acc_distributions <- function(resp_vars = NULL, label_col, group_vars = NULL,
util_warning(paste0(
"Variables ", paste0(rvs[!whicharenum], collapse = ", "),
" are not of type float or integer and will be removed from analyses."
))
), applicability_problem = TRUE)
rvs <- rvs[whicharenum]
if (length(rvs) == 0) {
util_warning("No variables left to analyse")
util_warning("No variables left to analyse", applicability_problem = TRUE)
return(list(SummaryPlots = list()))
}
}
Expand All @@ -145,10 +138,11 @@ acc_distributions <- function(resp_vars = NULL, label_col, group_vars = NULL,
util_warning(paste0(
"Variables ", paste0(rvs[which_all_na], collapse = ", "),
" contain NAs only and will be removed from analyses."
))
), applicability_problem = TRUE)
rvs <- rvs[!which_all_na]
if (length(rvs) == 0) {
util_warning("No variables left to analyse")
util_warning("No variables left to analyse",
applicability_problem = TRUE)
return(list(SummaryPlots = list()))
}
}
Expand All @@ -161,10 +155,11 @@ acc_distributions <- function(resp_vars = NULL, label_col, group_vars = NULL,
util_warning(paste0(
"Variables ", paste0(rvs[whichunique], collapse = ", "),
" contain only one value and will be removed from analyses."
))
), applicability_problem = TRUE)
rvs <- rvs[!whichunique]
if (length(rvs) == 0) {
util_warning("No variables left to analyse")
util_warning("No variables left to analyse",
applicability_problem = TRUE)
return(list(SummaryPlots = list()))
}
}
Expand Down Expand Up @@ -251,7 +246,8 @@ acc_distributions <- function(resp_vars = NULL, label_col, group_vars = NULL,
"codes (%s)? Will arbitrarily reduce the number of breaks below",
"10000 to avoid rendering problems."),
dQuote(rv), length(unique(breaks_x)), paste0(dQuote(likely
), collapse = " or ")
), collapse = " or "),
applicability_problem = FALSE
)
while (length(unique(breaks_x)) > 10000) {
breaks_x <- breaks_x[!is.na(breaks_x)]
Expand All @@ -260,7 +256,8 @@ acc_distributions <- function(resp_vars = NULL, label_col, group_vars = NULL,
util_warning(
paste0("For %s. Will arbitrarily reduced the number of breaks to ",
"%d <= 10000 to avoid rendering problems.", collapse = ""),
dQuote(rv), length(unique(breaks_x)))
dQuote(rv), length(unique(breaks_x)),
applicability_problem = FALSE)
}

# building the plot -------------------------------------------------------
Expand Down Expand Up @@ -349,7 +346,7 @@ acc_distributions <- function(resp_vars = NULL, label_col, group_vars = NULL,
}
}

P
util_set_size(P)
})

return(list(SummaryPlotList = plot_list))
Expand Down
36 changes: 29 additions & 7 deletions R/acc_end_digits.R
Expand Up @@ -36,7 +36,7 @@
#'
#' @seealso
#' [Online Documentation](
#' https://dfg-qa.ship-med.uni-greifswald.de/VIN_acc_impl_end_digits.html
#' https://dataquality.ship-med.uni-greifswald.de/VIN_acc_impl_end_digits.html
#' )
acc_end_digits <- function(resp_vars = NULL, study_data, meta_data,
label_col = VAR_NAMES) {
Expand All @@ -58,11 +58,12 @@ acc_end_digits <- function(resp_vars = NULL, study_data, meta_data,
mode(ds1[[resp_vars]]) != "numeric") {
# likely dead code
util_error("%s must be a non-empty vector of names of numeric variables",
dQuote("resp_vars")) # nocov
dQuote("resp_vars"), applicability_problem = TRUE) # nocov
}

if (any(is.infinite(ds1[[resp_vars]]))) {
util_error("Values in 'resp_vars' must not contain infinite data")
util_error("Values in 'resp_vars' must not contain infinite data",
applicability_problem = TRUE)
}

rvs <- resp_vars
Expand All @@ -75,19 +76,24 @@ acc_end_digits <- function(resp_vars = NULL, study_data, meta_data,
decs <- NA
}

decs <- as.integer(decs)

if (vtype == DATA_TYPES$FLOAT && (is.na(decs))) {
util_error(
"The number of digits following the decimal point must be prespecified."
"The number of digits following the decimal point must be prespecified.",
applicability_problem = FALSE
)
}

if (vtype == DATA_TYPES$FLOAT && all(util_is_integer(ds1[[rvs]]))) {
util_warning("The 'resp_vars' is of type integer.")
util_warning("The 'resp_vars' is of type integer.",
applicability_problem = TRUE)
vtype <- "integer"
}

if (vtype == DATA_TYPES$INTEGER && !(all(util_is_integer(ds1[[rvs]])))) {
util_error("The 'resp_vars' is not of type integer.")
util_error("The 'resp_vars' is not of type integer.",
applicability_problem = TRUE)
}

# create new row of metadata attributes for transformed variable of last
Expand Down Expand Up @@ -144,5 +150,21 @@ acc_end_digits <- function(resp_vars = NULL, study_data, meta_data,
)

# do not return simply res to make parsing out existing results easier
return(list(SummaryData = res$SummaryTable, SummaryPlot = res$SummaryPlot))
return(list(SummaryData = res$SummaryTable, SummaryPlot = util_set_size(
res$SummaryPlot,
width_em = 15
)))
}

#' @examples
#' \dontrun{
#' x <- acc_end_digits(resp_vars = "v00041", study_data = study_data, meta_data = meta_data)
#' x <- acc_loess(resp_vars = "v00041", time_vars = "v00013", group_vars = "v00002", study_data = study_data, meta_data = meta_data)
#' w <- 30
#' h <- 15
#' ggplot2::ggsave(x$SummaryPlotList$v00041, width = ggplot2::unit(w * 0.15, units = "in"), height = ggplot2::unit(h * 0.15, units = "in"), filename = "/tmp/xxx.png"); system("open /tmp/xxx.png")
#' # .03 in is eta the default size of ggplot fonts ; http://sape.inf.usi.ch/quick-reference/ggplot2/size
#' }
xxxx <- function() {
# and run check, changed some error messages.
}

0 comments on commit f480dca

Please sign in to comment.