version 0.1.1

cran · Dec 2, 2023 · 5ead21a · 5ead21a
commit 5ead21a
Show file tree

Hide file tree

Showing 54 changed files with 1,359 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,30 @@
+Package: acro
+Title: A Tool for Automating the Statistical Disclosure Control of
+        Research Outputs
+Version: 0.1.1
+Authors@R: c(
+  person("Jim", "Smith", role = c("cre","ctb"),
+         email = "James.Smith@uwe.ac.uk", comment = c(ORCID = "0000-0001-7908-1859")),
+  person("Maha", "Albashir", role = c("aut","ctb"),
+         email = "Maha.Albashir@uwe.ac.uk"),
+  person("Richard John", "Preen", role = c("ctb"),
+         email = "Richard2.Preen@uwe.ac.uk", comment = c(ORCID = "0000-0003-3351-8132")))
+Maintainer: Jim Smith <James.Smith@uwe.ac.uk>
+Description: Assists researchers and output checkers by distinguishing between research output that is safe to publish, output that requires further analysis, and output that cannot be published because of substantial disclosure risk. A paper about the tool was presented at The United Nations Economic Commission for Europe Expert Meeting on Statistical Data Confidentiality <https://unece.org/statistics/events/SDC2023><https://uwe-repository.worktribe.com/output/11060964>.
+License: MIT + file LICENSE
+Encoding: UTF-8
+RoxygenNote: 7.2.3
+SystemRequirements: Python (>= 3.8)
+Imports: reticulate, admiraldev, png
+Depends: R (>= 2.10)
+LazyData: true
+Suggests: spelling, testthat (>= 3.0.0)
+Config/testthat/edition: 3
+Language: en-US
+NeedsCompilation: no
+Packaged: 2023-11-30 15:55:05 UTC; m-albashir
+Author: Jim Smith [cre, ctb] (<https://orcid.org/0000-0001-7908-1859>),
+  Maha Albashir [aut, ctb],
+  Richard John Preen [ctb] (<https://orcid.org/0000-0003-3351-8132>)
+Repository: CRAN
+Date/Publication: 2023-12-01 15:00:06 UTC
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2023
+COPYRIGHT HOLDER: acro authors
diff --git a/MD5 b/MD5
@@ -0,0 +1,53 @@
+66df58030ef74305f045eb0c5fe6a2b2 *DESCRIPTION
+c8b414483c789f2998f8d2a0a63b7fb4 *LICENSE
+08104cd46952f04dd4e91e8be0d7a847 *NAMESPACE
+45b4772199f225edd050dd9fa09882d0 *R/acro-package.R
+ec9d6ff71c2abcac4af4ccf6ac275d9a *R/acro_init.R
+8aac40c560f6b44b4a01c637a06b7925 *R/acro_regression.R
+54de98018e9bc4a0b617785e156cbf35 *R/acro_tables.R
+1ec50fc139a4ae3054557a458184ffc6 *R/create_virtualenv.R
+e113a0e13d809f63b725485c6c173307 *R/lung.R
+da76e64267bbb8a845ca4736d1434a7e *R/nursery_data.R
+074297cb30bdbe3fb1e582ebe2bd715c *R/output_commands.R
+b5b328c98dec3a80ec1569ed12a69805 *README.md
+876d8542c52da15add22233c47425d41 *data/lung.rda
+9deb42da9dc40b2b8989ea172bc8a348 *data/nursery_data.rda
+9e970cd953f2ca9c6b1ad39b2df3a2d7 *inst/WORDLIST
+aac145bd5345e4d67e3dec5a39f7a825 *man/acro_add_comments.Rd
+d2d95d77c48a774fa9124fdff61b9382 *man/acro_add_exception.Rd
+11b48103734b5554ffa36d2e87c25300 *man/acro_crosstab.Rd
+78d93d456d29b923e0770131915e7d3b *man/acro_custom_output.Rd
+41d77e49e454f7403ed6b7ba9d7a2af3 *man/acro_finalise.Rd
+11440e62d9cd674ded03699c928a842c *man/acro_glm.Rd
+f72924fc12c8c5929d1df80df6106565 *man/acro_hist.Rd
+06b8b0e580449793fc0556276a67f4dd *man/acro_init.Rd
+32f6a168685ecae86ed531a458f304cb *man/acro_lm.Rd
+6e8d532c9811390fd5258415e719f887 *man/acro_pivot_table.Rd
+5c28b63d04c8189223cb6f3fda36296f *man/acro_print_outputs.Rd
+efc88eb25145f7a15e4ee1143fa6678e *man/acro_remove_output.Rd
+f7f99f9a4cb222edce05b4e1a646f0ed *man/acro_rename_output.Rd
+c2698c082bb62ae65df0586ebf9f5eb8 *man/acro_surv_func.Rd
+cb8684711ef6b53aafc3a503a6ff9b94 *man/acro_table.Rd
+04a3a9b3e79a10edddcb7d2889fe959b *man/create_virtualenv.Rd
+bd6ae7030c5b3f378b439dc5afbfe7e3 *man/install_acro.Rd
+75f70b490326f57da278cabb051df996 *man/lung.Rd
+2b5012cf424f8b0c9a718922127e0eb7 *man/nursery_data.Rd
+332173f20d20942c02019350ea4dcadc *tests/spelling.R
+638adc75fb0bf38e0d0946dae064f21a *tests/testthat.R
+c6dc25bc8d3e702e0e70d448cc260630 *tests/testthat/XandY.jpeg
+ae99a3106d0d58f24863ed30a4f83948 *tests/testthat/test-acro_add_comments.R
+8e98b372b86bf912a75ec2e72e103199 *tests/testthat/test-acro_add_exception.R
+48f2437e3c2621dddae70296bf74092f *tests/testthat/test-acro_crosstab.R
+872b470f2489bef2da81b183b28a3085 *tests/testthat/test-acro_custom_output.R
+38108936335aa6dc540b88022d00d694 *tests/testthat/test-acro_finalise.R
+956f489906c25ac937d5c7953a707287 *tests/testthat/test-acro_glm.R
+688f2c4982fd658e7881fbe663c0c348 *tests/testthat/test-acro_hist.R
+5a5cbfcaba555ac5c3d0109b8bd04591 *tests/testthat/test-acro_init.R
+eff985aa90f75f923be27d4f2cae0c17 *tests/testthat/test-acro_lm.R
+14513cde278ea040000752b85a90ae74 *tests/testthat/test-acro_pivot_table.R
+a2a082aa05beda594c9dd4b128881ac3 *tests/testthat/test-acro_print_outputs.R
+b4a339619ab1f475562834e8a2817a97 *tests/testthat/test-acro_remove_output.R
+df2e2a8fabd981f2fa68284c77205542 *tests/testthat/test-acro_rename_output.R
+1dfd1a556767faeb9bc7384b2723d6fd *tests/testthat/test-acro_surv_func.R
+73a574f9cf268034d8dbb60615a0c961 *tests/testthat/test-acro_table.R
+aabfe8407976fdd165494bc6c1d6fb5c *tests/testthat/test-install_acro.R
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,17 @@
+# Generated by roxygen2: do not edit by hand
+
+export(acro_add_comments)
+export(acro_add_exception)
+export(acro_crosstab)
+export(acro_custom_output)
+export(acro_finalise)
+export(acro_glm)
+export(acro_hist)
+export(acro_init)
+export(acro_lm)
+export(acro_pivot_table)
+export(acro_print_outputs)
+export(acro_remove_output)
+export(acro_rename_output)
+export(acro_surv_func)
+export(acro_table)
diff --git a/R/acro-package.R b/R/acro-package.R
@@ -0,0 +1,5 @@
+if (!exists("acroEnv", envir = emptyenv(), inherits = FALSE)) {
+  # Create a package-specific environment
+  acroEnv <- new.env(parent = emptyenv())
+  acroEnv$ac <- NULL
+}
diff --git a/R/acro_init.R b/R/acro_init.R
@@ -0,0 +1,12 @@
+#' Initialise an ACRO object
+#'
+#' @param suppress Whether to automatically apply suppression.
+#'
+#' @return No return value, called for side effects
+#' @export
+
+acro_init <- function(suppress=FALSE) {
+  create_virtualenv()
+  acro <- reticulate::import("acro", delay_load = TRUE)
+  acroEnv$ac  <- acro$ACRO(suppress=suppress)
+}
diff --git a/R/acro_regression.R b/R/acro_regression.R
@@ -0,0 +1,40 @@
+#' Fits Ordinary Least Squares Regression
+#'
+#' @param formula The formula specifying the model.
+#' @param data The data for the model.
+#'
+#' @return Regression Results Wrapper.
+#' @export
+
+acro_lm <- function(formula, data)
+{
+  if (is.null(acroEnv$ac)) {
+    stop("ACRO has not been initialised. Please first call acro_init().")
+  }
+  model = acroEnv$ac$olsr(formula, data)
+  model$summary()
+}
+
+#' Fits Logit or Probit model.
+#'
+#' @param formula The formula specifying the model.
+#' @param data The data for the model.
+#' @param family Decide whether to fit a logit or probit model.
+#'
+#' @return Regression Results Wrapper
+#' @export
+
+acro_glm <- function(formula, data, family)
+{
+  if (is.null(acroEnv$ac)) {
+    stop("ACRO has not been initialised. Please first call acro_init()")
+  }
+  if (family == "logit") {
+    model = acroEnv$ac$logitr(formula, data)
+  } else if (family == "probit") {
+    model = acroEnv$ac$probitr(formula, data)
+  } else {
+    stop("Invalid family. Options for family are: logit or probit");
+  }
+  model$summary()
+}
diff --git a/R/acro_tables.R b/R/acro_tables.R
@@ -0,0 +1,141 @@
+#' Compute a simple cross tabulation of two (or more) factors.
+#'
+#' @param index Values to group by in the rows.
+#' @param columns Values to group by in the columns.
+#' @param values  Array of values to aggregate according to the factors. Requires `aggfunc` be specified.
+#' @param aggfunc If specified, requires `values` be specified as well.
+#'
+#' @return Cross tabulation of the data
+#' @export
+
+acro_crosstab <- function(index, columns, values=NULL, aggfunc=NULL)
+{
+  if (is.null(acroEnv$ac)) {
+    stop("ACRO has not been initialised. Please first call acro_init()")
+  }
+  table = acroEnv$ac$crosstab(index, columns, values=values, aggfunc=aggfunc)
+  return(table)
+}
+
+#' Compute a simple cross tabulation of two (or more) factors.
+#'
+#' @param index Values to group by in the rows.
+#' @param columns Values to group by in the columns.
+#' @param dnn The names to be given to the dimensions in the result
+#' @param deparse.level Controls how the default `dnn` is constructed.
+#' @param ... Any other parameters.
+#'
+#' @return Cross tabulation of the data
+#' @export
+
+acro_table <- function(index, columns, dnn=NULL, deparse.level=0, ...)
+{
+  if (is.null(acroEnv$ac)) {
+    stop("ACRO has not been initialised. Please first call acro_init().")
+  }
+  "ACRO crosstab without aggregation function"
+  if (is.null(dnn)) {
+    if (deparse.level == 0) {
+      acroEnv$row_names <- list("")
+      acroEnv$col_names <- list("")
+    } else if (deparse.level == 1) {
+      tryCatch({
+        index_symbol <- admiraldev::assert_symbol(substitute(index))
+        acroEnv$row_names <- list(deparse(index_symbol))},
+        error = function(e) {
+          acroEnv$row_names <- list("")
+        })
+      tryCatch({
+        column_symbol <- admiraldev::assert_symbol(substitute(columns))
+        acroEnv$col_names <- list(deparse(column_symbol))},
+        error = function(e) {
+          acroEnv$col_names <- list("")
+        })
+    } else if (deparse.level == 2) {
+      acroEnv$row_names <- list(deparse((substitute(index))))
+      acroEnv$col_names <- list(deparse(substitute(columns)))
+    }
+  }
+  else {
+    acroEnv$row_names <- list(dnn[1])
+    acroEnv$col_names <- list(dnn[2])
+  }
+
+  table <- acroEnv$ac$crosstab(index, columns, rownames=acroEnv$row_names, colnames=acroEnv$col_names)
+  # Check for any unused arguments
+  if (length(list(...)) > 0) {
+    warning("Unused arguments were provided: ", paste0(names(list(...)), collapse = ", "), "\n", "Please use the help command to learn more about the function.")
+  }
+  return(table)
+}
+
+#' Pivot table
+#'
+#' @param data The data to operate on.
+#' @param values Column to aggregate, optional.
+#' @param index If an array is passed, it must be the same length as the data. The list can contain any of the other types (except list). Keys to group by on the pivot table index. If an array is passed, it is being used as the same manner as column values.
+#' @param columns If an array is passed, it must be the same length as the data. The list can contain any of the other types (except list). Keys to group by on the pivot table column. If an array is passed, it is being used as the same manner as column values.
+#' @param aggfunc  If list of strings passed, the resulting pivot table will have hierarchical columns whose top level are the function names
+#'
+#' @return Cross tabulation of the data.
+#' @export
+
+acro_pivot_table <- function(data, values=NULL, index=NULL, columns=NULL, aggfunc="mean")
+{
+  if (is.null(acroEnv$ac)) {
+    stop("ACRO has not been initialised. Please first call acro_init()")
+  }
+  table = acroEnv$ac$pivot_table(data, values=values, index=index, columns=columns, aggfunc=aggfunc)
+  return(table)
+}
+
+#' Histogram
+#'
+#' @param data The object holding the data.
+#' @param column The column that will be used to plot the histogram.
+#' @param breaks Number of histogram bins to be used.
+#' @param freq If False, the result will contain the number of samples in each bin. If True, the result is the value of the probability density function at the bin.
+#' @param col The color of the plot.
+#' @param filename The name of the file where the plot will be saved.
+#'
+#' @return The histogram.
+#' @export
+
+acro_hist <- function(data, column, breaks=10, freq=TRUE, col=NULL, filename="histogram.png")
+{
+  if (is.null(acroEnv$ac)) {
+    stop("ACRO has not been initialised. Please first call acro_init()")
+  }
+  histogram = acroEnv$ac$hist(data=data, column=column, bins=as.integer(breaks), density=freq, color=col, filename=filename)
+  # Load the saved histogram
+  image <- png::readPNG(histogram)
+  grid::grid.raster(image)
+  return(histogram)
+}
+
+#' Survival analysis
+#'
+#' @param time An array of times (censoring times or event times).
+#' @param status Status at the event time.
+#' @param output  A string determine the type of output. Available options are table or plot.
+#' @param filename The name of the file where the plot will be saved.
+#'
+#' @return The survival table or plot.
+#' @export
+
+acro_surv_func <- function(time, status, output, filename="kaplan-meier.png")
+{
+  if (is.null(acroEnv$ac)) {
+    stop("ACRO has not been initialised. Please first call acro_init()")
+  }
+  results = acroEnv$ac$surv_func(time=time, status=status, output=output, filename=filename)
+  if (output=="plot"){
+    # Load the saved survival plot
+    image <- png::readPNG(results[[2]])
+    grid::grid.raster(image)
+  }
+  return(results)
+  }
+
+
+
diff --git a/R/create_virtualenv.R b/R/create_virtualenv.R
@@ -0,0 +1,28 @@
+#' Install the python library 'acro'
+#'
+#' @param ... Any other parameters.
+#' @param envname the name of the python virtual environment
+#'
+#' @return No return value, called for side effects
+
+install_acro <- function(..., envname = "r-acro") {
+  reticulate::py_install("acro", envname = envname, ...)
+}
+
+#' Create a python virtual environment
+#'
+#' @param ... Any other parameters.
+#'
+#' @return No return value, called for side effects
+
+create_virtualenv <- function(...) {
+  # Create a virtual environment if it does not exists
+  if (!reticulate::virtualenv_exists("r-acro")) {
+    reticulate::virtualenv_create("r-acro", version=">= 3.8")
+  }
+  reticulate::use_virtualenv("r-acro", required = TRUE)
+
+  if (!reticulate::py_module_available("acro")) {
+    install_acro()
+  }
+}
diff --git a/R/lung.R b/R/lung.R
@@ -0,0 +1,22 @@
+#'  Lung Cancer Survival Data
+
+#'
+#' The lung dataset contains information about lung cancer survival.
+#'
+#' @format A data frame with columns:
+#' \describe{
+#'     \item{inst}{institutional identification}
+#'     \item{time}{Survival time in months.}
+#'     \item{status}{Survival status (1 = death, 0 = censored).}
+#'     \item{age}{Age of the patient at the start of the study.}
+#'     \item{sex}{Gender of the patient.}
+#'     \item{ph.ecog}{Performance status (Eastern Cooperative Oncology Group).}
+#'     \item{ph.karno}{'Karnofsky' performance status.}
+#'     \item{pat.karno}{'Karnofsky' performance status as assessed by the patient.}
+#'     \item{meal.cal}{Daily caloric intake at the start of the study.}
+#'     \item{wt.loss}{Weight loss in the last six months.}
+#'   }
+#'
+#' @examples
+#' data(lung)
+"lung"
diff --git a/R/nursery_data.R b/R/nursery_data.R
@@ -0,0 +1,24 @@
+#'  Nursery Database
+#'
+#' This dataset is originated from a hierarchical decision model created to evaluate applications for nursery schools.
+#'
+#' @format A data frame with columns:
+#' A data frame with 12960 rows and 9 columns:
+#' \describe{
+#'    \item{parents}{Parents' occupation}
+#'    \item{has_nurs}{Child's nursery}
+#'    \item{form}{Form of the family}
+#'    \item{children}{Number of children}
+#'    \item{housing}{Housing conditions}
+#'    \item{finance}{Financial standing of the family}
+#'    \item{social}{Social conditions}
+#'    \item{health}{Health conditions}
+#'    \item{recommend}{The ranking of applications for nursery schools}
+#'
+#' }
+#'
+#' @source {https://www.openml.org/search?type=data&status=active&id=26&sort=runs}
+#'
+#' @examples
+#' data(nursery_data)
+"nursery_data"