Skip to content

Commit

Permalink
version 0.4.0
Browse files Browse the repository at this point in the history
  • Loading branch information
tsrobinson authored and cran-robot committed Feb 10, 2022
1 parent 4eb2398 commit 3752909
Show file tree
Hide file tree
Showing 27 changed files with 362 additions and 311 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
@@ -1,6 +1,6 @@
Package: rMIDAS
Title: Multiple Imputation with Denoising Autoencoders
Version: 0.3.0
Version: 0.4.0
Authors@R: c(
person(given = "Thomas",
family = "Robinson",
Expand All @@ -18,21 +18,21 @@ Authors@R: c(
)
Description: A tool for multiply imputing missing data using 'MIDAS', a deep learning method based on denoising autoencoder neural networks. This algorithm offers significant accuracy and efficiency advantages over other multiple imputation strategies, particularly when applied to large datasets with complex features. Alongside interfacing with 'Python' to run the core algorithm, this package contains functions for processing data before and after model training, running imputation model diagnostics, generating multiple completed datasets, and estimating regression models on these datasets.
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.1
Depends: R (>= 3.6.0), data.table, mltools, reticulate
Imports:
Suggests: testthat, knitr, rmarkdown
SystemRequirements: Python (>= 2.7.0)
VignetteBuilder: knitr
License: Apache License (>= 2.0)
URL: https://github.com/MIDASverse/rMIDAS
BugReports: https://github.com/MIDASverse/rMIDAS/issues
NeedsCompilation: no
Packaged: 2021-01-30 00:25:27 UTC; tomrobinson
Packaged: 2022-02-09 21:10:24 UTC; tomrobinson
Author: Thomas Robinson [aut, cre, cph]
(<https://orcid.org/0000-0001-7097-1599>),
Ranjit Lall [aut, cph] (<https://orcid.org/0000-0003-1455-3506>),
Alex Stenlake [ctb, cph]
Maintainer: Thomas Robinson <ts.robinson1994@gmail.com>
Repository: CRAN
Date/Publication: 2021-01-30 05:50:02 UTC
Date/Publication: 2022-02-10 16:10:02 UTC
51 changes: 26 additions & 25 deletions MD5
@@ -1,49 +1,50 @@
450e99720eaab2d1ebe8b385a8afe763 *DESCRIPTION
e1516f10256ae0ac5eb9d0378a5c121e *DESCRIPTION
4b0e8158ffc6dfd349c09ae2cf78bf79 *NAMESPACE
1ae7dbcfd58ed0252e8747c09a518e87 *NEWS.md
310b8c5ebd9c9142693bb13d801fb34c *R/load_utils.R
54bcecf6a71938796a02be05fb02cd16 *R/midas_functions.R
35da741b0f6ece834d88d4e2c48ae048 *R/pre_processing.R
72e952bb87fb2174353f8bc00390b6e0 *NEWS.md
6e0cd17aed32c0311fe76f2996c70530 *R/load_utils.R
26d1b0137cfb9139f0c5f16175386299 *R/midas_functions.R
58f51f0d43076486ac2a81350f2d5733 *R/pre_processing.R
0ef1815140875b09e36db14da6177157 *R/rubin_analysis.R
5f0f7a477bc22f93c04aa9388a5b1059 *R/setup.R
dfae92d29cde2eb804d727b91e0b02c2 *R/zzz.R
675c3fb7b84afdfefcdee30fc47eefb7 *README.md
a530231b4224e7197f03e30437bf016a *build/vignette.rds
dad62e621f64cde04f7fd3146db0a8a0 *R/setup.R
becdf5098e1c9a18c9bfc31688df0dd0 *R/zzz.R
9fc2a00fd510bc69fe81fa51da72c1a8 *README.md
19710b5c998a0619ba94e65348b60d93 *build/vignette.rds
d49090aff7e7d8f2614da7cfcc8dda64 *inst/CITATION
5387350a6623a416df9777722d1b85e4 *inst/doc/custom_python_versions.R
f484f6445b5229eddb00fbd8881b021b *inst/doc/custom_python_versions.Rmd
c1938cdce11c36f9c9ead57cee3e6223 *inst/doc/custom_python_versions.html
69815d9858673c275fd3251958343e93 *inst/doc/custom_python_versions.R
0c307d22142c8b940bb2eabf7a171f66 *inst/doc/custom_python_versions.Rmd
fb9c7af541576a0e4cedb75bd0b7b349 *inst/doc/custom_python_versions.html
072b1f5c9f87b73195cf00bb70bbd106 *inst/doc/imputation_demo.R
78db257a69cec810b48a1dc8bac655d5 *inst/doc/imputation_demo.Rmd
76e9243e2e6d460bb67fd07555456126 *inst/doc/imputation_demo.html
64fba0a91b3a21bbbf50fa4f8a600741 *inst/examples/basic_workflow.R
a67958e3009099ce054dfeb6836ecf22 *inst/examples/overimputation.R
8fb23fc2e93eee5bb937a21099ca4563 *inst/doc/imputation_demo.Rmd
2101464b80cbee5d820fd4ef07f36622 *inst/doc/imputation_demo.html
08369e4174aab34c69bbb86bfe800694 *inst/examples/basic_workflow.R
72a6c2985ab497dd0c35df39827011f3 *inst/examples/overimputation.R
6a02980078146cc9bdd061054e3b4bfc *inst/python/__pycache__/midas_base.cpython-36.pyc
393fee7a0486f0c55914ee243ecffa35 *inst/python/__pycache__/midas_base.cpython-37.pyc
eb00af692b487faec78c83b934afa54f *inst/python/__pycache__/midas_base.cpython-38.pyc
368cad3d8868ace931f5800300c687f9 *inst/python/__pycache__/midas_base.cpython-37.pyc
3fdc1ad51197fe665796f6cfe5e32942 *inst/python/__pycache__/midas_base.cpython-38.pyc
eb3f66e25ec913f65e7241c00dcfd437 *inst/python/__pycache__/midas_base.cpython-39.pyc
73b4dc9395165c4e27482f00d11e8592 *inst/python/midas_base.py
488fb2cb3ec9334b78245fb4e16f1937 *man/add_bin_labels.Rd
c108ce6825a14d383201734677a83d54 *man/add_missingness.Rd
deabaa625498cb22c65d443d4a60fabe *man/add_missingness.Rd
a5a30b2cc7b62e1bd61a17d593e17927 *man/coalesce_one_hot.Rd
c13c5d8b933d5dd21459c139ebf43538 *man/col_minmax.Rd
2bb86f17a81bf892ae7e2b191f6f95fe *man/combine.Rd
723a044dfc96ac49267f435af3893427 *man/complete.Rd
a221670d70dff3414cac42094b0a80f5 *man/complete.Rd
48413a5a794caa38137a9bc36ca02c4c *man/convert.Rd
ee448b97e60bee367e6e5f98ba823b16 *man/figures/logo.png
551e6d455d913ad82f2885625e08708b *man/figures/logos.drawio
0029cb10bab1c771f84aa1892f749efd *man/import_midas.Rd
a796458731eabf8e9e6bc35b2d828cf0 *man/mid_py_setup.Rd
94665912e58b1aa50605a3fd9e98b08c *man/midas_setup.Rd
094823bf4af2a3d7fed849c42a087ee7 *man/na_to_nan.Rd
d4c2d7ae85b4b52fb50e33373fac6211 *man/overimpute.Rd
d4c23b45ca9cbffa53fc72c99ea0d56e *man/overimpute.Rd
fd482c198c4096b5eb1c5a0c6941c3e1 *man/python_configured.Rd
67d01f8dd1cf19b84a79632f047922f4 *man/python_init.Rd
bf582d11c2bbd5bdad111aaad3be8b36 *man/set_python_env.Rd
02377b2e452e4f904da3e723ec4fbbac *man/set_python_env.Rd
2aaaff54cab764411588113f92dbc22a *man/skip_if_no_numpy.Rd
62467aedb62fbcd1df4133a204f2a7e9 *man/train.Rd
5cd0176f8448ae970c45b1dce8caee50 *man/train.Rd
499c92fc7cdd4e54e555f49b27dd88c9 *man/undo_minmax.Rd
316d76f801fd61c6d2121a1687c0c4cd *tests/testthat.R
d7ce0a0c02e69876725d5e39cc609feb *tests/testthat/testAnalysis.R
aaee589ee89b5f9b3a27daa0972584a8 *tests/testthat/testPreProc.R
f484f6445b5229eddb00fbd8881b021b *vignettes/custom_python_versions.Rmd
78db257a69cec810b48a1dc8bac655d5 *vignettes/imputation_demo.Rmd
0c307d22142c8b940bb2eabf7a171f66 *vignettes/custom_python_versions.Rmd
8fb23fc2e93eee5bb937a21099ca4563 *vignettes/imputation_demo.Rmd
9 changes: 6 additions & 3 deletions NEWS.md
@@ -1,11 +1,14 @@
# rMIDAS 0.4

* `python` argument in `set_python_env` renamed to `x` for clarity
* Minor fixes including remedying bug in `complete()` function
* Improved documentation

# rMIDAS 0.3

* Minor updates to underlying Python code to mirror MIDASpy v1.2.1

* Added NULL defaults to cat_cols and bin_cols parameters within `rMIDAS::convert()`

* Overimputation legend now plotted in bottom-right corner of figure

* Minor changes to README

# rMIDAS 0.2
Expand Down
26 changes: 26 additions & 0 deletions R/load_utils.R
Expand Up @@ -9,3 +9,29 @@ skip_if_no_numpy <- function() {
if (!have_numpy)
testthat::skip("numpy not available for testing")
}

#' Check whether Python is capable of executing example code
#'
#' Checks if each Python dependency is available.
#' This function is called within some examples to ensure code executes properly.
#' @keywords setup
#' @return `NULL`
python_configured <- function() {

if (!reticulate::py_available()) {
return(FALSE)
} else {

py_dep <- c("matplotlib","numpy","pandas","tensorflow","sklearn","os","random", "tensorflow_addons")
dep_avail <- sapply(py_dep, function (x) reticulate::py_module_available(x))

if (sum(dep_avail) == length(py_dep)) {
return(TRUE)
} else {
return(FALSE)
}

}
}


13 changes: 7 additions & 6 deletions R/midas_functions.R
Expand Up @@ -94,7 +94,7 @@ train <- function(data,
savepath= gsub("//","/",tempdir()))

transf_model = FALSE
if (class(data) == "midas_pre") {
if (inherits(data, "midas_pre")) {
binary_columns <- data$bin_list
softmax_columns <- data$cat_lists
data_in <- data$data
Expand Down Expand Up @@ -142,7 +142,7 @@ complete <- function(mid_obj,
file = NULL,
file_root = NULL) {

if (!("midas_base.Midas" %in% class(mid_obj))) {
if (!inherits(mid_obj,"midas_base.Midas")) {
stop("Trained midas object not supplied to 'mid_obj' argument")
}

Expand Down Expand Up @@ -198,9 +198,10 @@ complete <- function(mid_obj,

}

if (cat_coalesce) {
cat_params <- mid_obj$preproc$cat_lists
if (cat_coalesce & !is.null(cat_params)) {


cat_params <- mid_obj$preproc$cat_lists
cat_cols <- mid_obj$preproc$cat_names

for (i in 1:length(cat_cols)) {
Expand Down Expand Up @@ -341,7 +342,7 @@ overimpute <- function(# Input data
savepath= tempdir())

transf_model = FALSE
if (class(data) == "midas_pre") {
if (inherits(data, "midas_pre")) {
binary_columns = data$bin_list
softmax_columns = data$cat_lists
transf_model = TRUE
Expand All @@ -354,7 +355,7 @@ overimpute <- function(# Input data
matplotlib <- import("matplotlib", convert = TRUE)
matplot_render <- try(matplotlib$use("TkAgg"), silent = TRUE)

if ("try-error" %in% class(matplot_render)) {
if (inherits(matplot_render, "try-error")) {
stop("Cannot load TkAgg, which is needed to render the overimputation plot.\n You can try installing TkAgg by running the following at the command line: `sudo apt-get install python3-tk' ")
}

Expand Down
8 changes: 4 additions & 4 deletions R/pre_processing.R
Expand Up @@ -35,9 +35,9 @@ convert <- function(data, bin_cols = NULL, cat_cols = NULL, minmax_scale = FALSE

# Check data input

if ("character" %in% class(data)) {
if (inherits(data,"character")) {
data.table::fread(data)
} else if (!("data.table" %in% class(data))) {
} else if (!inherits(data,"data.table")) {
data.table::setDT(data)
}

Expand Down Expand Up @@ -269,10 +269,10 @@ coalesce_one_hot <- function(X, var_name, fast = TRUE) {

#' Apply MAR missingness to data
#'
#' Helper function to re-apply binary variable labels post-imputation.
#' Helper function to add missing values to data.
#' @keywords preprocessing
#' @param X A data.frame or similar
#' @param prop Numeric between 0 and 1; the proportion of observations set to missing
#' @param prop Numeric value between 0 and 1; the proportion of observations set to missing
#' @param cols A vector of column names to be corrupted; if NULL, all columns are used
#' @export
#' @return Data with missing values
Expand Down
24 changes: 12 additions & 12 deletions R/setup.R
Expand Up @@ -4,13 +4,13 @@
#' Users comfortable with reticulate can configure Python manually using `reticulate::use_`.
#' Note: If users wish to set a custom binary/environment, this must be completed prior to the first call to either `train()` or `complete()`. The same is true if users use the reticulate package directly.
#' If users wish to switch to a different Python binaries, R must be restarted prior to calling this function.
#' @param python Character string, path to python binary, or directory of virtualenv, or name of conda environment
#' @param x Character string, path to python binary, or directory of virtualenv, or name of conda environment
#' @param type Character string, specifies whether to set a python binary ("auto"), "virtualenv", or "conda"
#' @param ... Further arguments passed to `reticulate::use_condaenv()`
#' @keywords setup
#' @export
#' @return Boolean indicating whether the custom python environment was activated.
set_python_env <- function(python, type = "auto", ...) {
set_python_env <- function(x, type = "auto", ...) {

set_complete <- FALSE

Expand All @@ -19,26 +19,26 @@ set_python_env <- function(python, type = "auto", ...) {
return(set_complete)
} else if (type == "auto") {

set_py_attempt <- try(reticulate::use_python(python = python, required = TRUE),
set_py_attempt <- try(reticulate::use_python(python = x, required = TRUE),
silent = TRUE)

} else if (type == "virtualenv") {

set_py_attempt <- try(reticulate::use_virtualenv(virtualenv = python, required = TRUE),
set_py_attempt <- try(reticulate::use_virtualenv(virtualenv = x, required = TRUE),
silent = TRUE)

} else if (type == "conda") {

set_py_attempt <- try(reticulate::use_condaenv(condaenv = python, required = TRUE, ...),
set_py_attempt <- try(reticulate::use_condaenv(condaenv = x, required = TRUE, ...),
silent = TRUE)

} else {
set_py_attempt <- "error"
class(set_py_attempt) <- "try-error"
}

if ("try-error" %in% class(set_py_attempt)) {
stop("Setting user-specified python environment '",python, "' failed.
if (inherits(set_py_attempt,"try-error")) {
stop("Setting user-specified python environment '",x, "' failed.
Please check the specified path/environment and try again.")
}

Expand Down Expand Up @@ -70,7 +70,7 @@ python_init <- function() {
load_stat <- substr(py_config()$version[1],1,1)
}

if ("try-error" %in% class(load_stat)) {
if (inherits(load_stat, "try-error")) {

stop("Unable to initialise Python and required packages.\n
Please use set_python_env() to set the Python environment manually, then try again.")
Expand Down Expand Up @@ -129,7 +129,7 @@ mid_py_setup <- function() {
py_pkg_load <- sapply(py_pkgs, function (py_pkg) try(reticulate::import(py_pkg, delay_load = FALSE),
silent = TRUE))

missing_pkg <- sapply(py_pkg_load, function (x) ("try-error" %in% class(x)))
missing_pkg <- sapply(py_pkg_load, function (x) inherits(x, "try-error"))
missing_pkg <- py_dep[missing_pkg]

if ("sklearn" %in% missing_pkg) {
Expand All @@ -155,7 +155,7 @@ mid_py_setup <- function() {
python_version = "<3.9"),
silent = TRUE)

if ("try-error" %in% class(pkg_install)) {
if (inherits(pkg_install, "try-error")) {
stop("Unable to install package ", py_pkg, "\n")
}
}
Expand All @@ -176,15 +176,15 @@ mid_py_setup <- function() {
if (py_v == "3.9") {

warning("Packages installed but the R session needs to be restarted before proceeding.
Please restart R then call set_py_env('your_conda_name', type = 'conda')
Please restart R then call set_py_env('your_conda_name', type = 'conda').
rMIDAS will then be ready to train and impute missing data.")

} else {

py_pkg_load <- sapply(py_pkgs, function (py_pkg) try(reticulate::import(py_pkg, delay_load = FALSE),
silent = TRUE))

inst_check <- sum(sapply(py_pkg_load, function (x) ("try-error" %in% class(x))))
inst_check <- sum(sapply(py_pkg_load, function (x) inherits(x,"try-error")))

if (inst_check != 0) {
stop("\nUnable to load required packages after install")
Expand Down
11 changes: 11 additions & 0 deletions R/zzz.R
Expand Up @@ -4,3 +4,14 @@
options("python_initialised" = NULL)

}

.onAttach <- function(libname, pkgname) {

packageStartupMessage("\n## \n",
"## rMIDAS: Multiple Imputation using Denoising Autoencoders \n",
"## Authors: Thomas Robinson and Ranjit Lall \n",
"## Please visit https://github.com/MIDASverse/rMIDAS for more information \n",
"## \n"
)

}

0 comments on commit 3752909

Please sign in to comment.