diff --git a/src/interface_r/DESCRIPTION b/src/interface_r/DESCRIPTION index 301139472..1d9401469 100644 --- a/src/interface_r/DESCRIPTION +++ b/src/interface_r/DESCRIPTION @@ -1,18 +1,20 @@ Package: h2o4gpu Type: Package -Title: R Interface to 'H2O4GPU' +Title: Interface to 'H2O4GPU' Version: 0.2.0 Authors@R: c( - person("Yuan", "Tang", role = c("aut", "cre"), email = "terry@h2o.ai"), + person("Yuan", "Tang", role = c("aut", "cre"), + email = "terrytangyuan@gmail.com", + comment = c(ORCID = "0000-0001-5243-233X")), person("Navdeep", "Gill", role = c("aut"), email = "navdeep@h2o.ai"), person("Erin", "LeDell", role = c("aut"), email = "erin@h2o.ai"), person("H2O.ai", role = c("cph", "fnd"))) -Description: R Interface to 'H2O4GPU' - A collection of 'GPU' solvers for machine learning algorithms. +Description: Interface to 'H2O4GPU' , a collection of 'GPU' solvers for machine learning algorithms. License: Apache License 2.0 URL: https://github.com/h2oai/h2o4gpu BugReports: https://github.com/h2oai/h2o4gpu/issues SystemRequirements: Python (>= 3.6) with header files and shared library; - h2o4gpu (https://github.com/h2oai/h2o4gpu) + H2O4GPU (https://github.com/h2oai/h2o4gpu) Encoding: UTF-8 LazyData: true Depends: diff --git a/src/interface_r/NEWS.md b/src/interface_r/NEWS.md index 1c0724bc4..4ee0fde81 100644 --- a/src/interface_r/NEWS.md +++ b/src/interface_r/NEWS.md @@ -1,3 +1,3 @@ -## h2o4gpu 0.0.1 (CRAN) +## h2o4gpu 0.2.0 (CRAN) * Initial release. diff --git a/src/interface_r/R/model.R b/src/interface_r/R/model.R index 2116502ca..7173f4f40 100644 --- a/src/interface_r/R/model.R +++ b/src/interface_r/R/model.R @@ -73,6 +73,18 @@ print.h2o4gpu_model <- function(x, ...) { #' @param ... Additional arguments (unused for now). #' #' @export +#' @examples +#' \dontrun{ +#' +#' library(h2o4gpu) +#' +#' # Setup dataset +#' x <- iris[1:4] +#' y <- as.integer(iris$Species) - 1 +#' +#' # Train the classifier +#' h2o4gpu.random_forest_classifier() %>% fit(x, y) +#' } fit.h2o4gpu_model <- function(object, x, y = NULL, ...) { if (inherits(object$model, "h2o4gpu.solvers.elastic_net.ElasticNet") && object$params$family == "logistic"){ if (length(unique(y)) > 2){ @@ -94,6 +106,23 @@ fit.h2o4gpu_model <- function(object, x, y = NULL, ...) { #' @param type One of "raw" or "prob", indicating the type of output: predicted values or probabilities #' @param ... Additional arguments (unused for now). #' @export +#' @examples +#' \dontrun{ +#' +#' library(h2o4gpu) +#' +#' # Setup dataset +#' x <- iris[1:4] +#' y <- as.integer(iris$Species) - 1 +#' +#' # Initialize and train the classifier +#' model <- h2o4gpu.random_forest_classifier() %>% fit(x, y) +#' +#' # Make predictions +#' predictions <- model %>% predict(x) +#' +#' } +#' predict.h2o4gpu_model <- function(object, x, type="raw", ...) { if (type == "raw") { preds <- object$model$predict(X = resolve_model_input(x), ...) @@ -122,6 +151,27 @@ predict.h2o4gpu_model <- function(object, x, type="raw", ...) { #' be used in generating predictions. #' @param ... Additional arguments (unused for now). #' @export +#' @examples +#' \dontrun{ +#' +#' library(h2o4gpu) +#' +#' # Prepare data +#' iris$Species <- as.integer(iris$Species) # convert to numeric data +#' +#' # Randomly sample 80% of the rows for the training set +#' set.seed(1) +#' train_idx <- sample(1:nrow(iris), 0.8*nrow(iris)) +#' train <- iris[train_idx, ] +#' test <- iris[-train_idx, ] +#' +#' # Train a K-Means model +#' model_km <- h2o4gpu.kmeans(n_clusters = 3L) %>% fit(train) +#' +#' # Transform test data +#' test_dist <- model_km %>% transform(test) +#' +#' } transform.h2o4gpu_model <- function(object, x, ...) { object$model$transform(X = resolve_model_input(x), ...) } diff --git a/src/interface_r/R/package.R b/src/interface_r/R/package.R index 2e053cd27..cca2c0796 100644 --- a/src/interface_r/R/package.R +++ b/src/interface_r/R/package.R @@ -2,6 +2,23 @@ #' #' @docType package #' @name h2o4gpu +#' +#' @examples +#' \dontrun{ +#' +#' library(h2o4gpu) +#' +#' # Setup dataset +#' x <- iris[1:4] +#' y <- as.integer(iris$Species) - 1 +#' +#' # Initialize and train the classifier +#' model <- h2o4gpu.random_forest_classifier() %>% fit(x, y) +#' +#' # Make predictions +#' predictions <- model %>% predict(x) +#' +#' } NULL h2o4gpu <- NULL diff --git a/src/interface_r/cran-comments.md b/src/interface_r/cran-comments.md index a42509e81..c5ede2de3 100644 --- a/src/interface_r/cran-comments.md +++ b/src/interface_r/cran-comments.md @@ -7,17 +7,10 @@ 0 errors | 0 warnings | 1 note +New submission + * This is a new release. ## Reverse dependencies This is a new release, so there are no reverse dependencies. - ---- - -* I have run R CMD check on the NUMBER downstream dependencies. - (Summary at ...). - -* FAILURE SUMMARY - -* All revdep maintainers were notified of the release on RELEASE DATE. diff --git a/src/interface_r/man/fit.h2o4gpu_model.Rd b/src/interface_r/man/fit.h2o4gpu_model.Rd index 9d79ea098..ae7988441 100644 --- a/src/interface_r/man/fit.h2o4gpu_model.Rd +++ b/src/interface_r/man/fit.h2o4gpu_model.Rd @@ -22,3 +22,16 @@ specified as \code{NULL}.} \description{ This function builds the model using the training data specified. } +\examples{ +\dontrun{ + +library(h2o4gpu) + +# Setup dataset +x <- iris[1:4] +y <- as.integer(iris$Species) - 1 + +# Train the classifier +h2o4gpu.random_forest_classifier() \%>\% fit(x, y) +} +} diff --git a/src/interface_r/man/h2o4gpu.Rd b/src/interface_r/man/h2o4gpu.Rd index 0ecd8370a..47150640a 100644 --- a/src/interface_r/man/h2o4gpu.Rd +++ b/src/interface_r/man/h2o4gpu.Rd @@ -8,3 +8,20 @@ \description{ h2o4gpu in R } +\examples{ +\dontrun{ + +library(h2o4gpu) + +# Setup dataset +x <- iris[1:4] +y <- as.integer(iris$Species) - 1 + +# Initialize and train the classifier +model <- h2o4gpu.random_forest_classifier() \%>\% fit(x, y) + +# Make predictions +predictions <- model \%>\% predict(x) + +} +} diff --git a/src/interface_r/man/predict.h2o4gpu_model.Rd b/src/interface_r/man/predict.h2o4gpu_model.Rd index 5b76245bb..d5b3fa2ba 100644 --- a/src/interface_r/man/predict.h2o4gpu_model.Rd +++ b/src/interface_r/man/predict.h2o4gpu_model.Rd @@ -20,3 +20,21 @@ be used in generating predictions.} This function makes predictions from new data using a trained H2O4GPU model and returns class predictions for classification and predicted values for regression. } +\examples{ +\dontrun{ + +library(h2o4gpu) + +# Setup dataset +x <- iris[1:4] +y <- as.integer(iris$Species) - 1 + +# Initialize and train the classifier +model <- h2o4gpu.random_forest_classifier() \%>\% fit(x, y) + +# Make predictions +predictions <- model \%>\% predict(x) + +} + +} diff --git a/src/interface_r/man/transform.h2o4gpu_model.Rd b/src/interface_r/man/transform.h2o4gpu_model.Rd index 720df7809..8c60c1522 100644 --- a/src/interface_r/man/transform.h2o4gpu_model.Rd +++ b/src/interface_r/man/transform.h2o4gpu_model.Rd @@ -17,3 +17,25 @@ be used in generating predictions.} \description{ This function transforms the given new data using a trained H2O4GPU model. } +\examples{ +\dontrun{ + +library(h2o4gpu) + +# Prepare data +iris$Species <- as.integer(iris$Species) # convert to numeric data + +# Randomly sample 80\% of the rows for the training set +set.seed(1) +train_idx <- sample(1:nrow(iris), 0.8*nrow(iris)) +train <- iris[train_idx, ] +test <- iris[-train_idx, ] + +# Train a K-Means model +model_km <- h2o4gpu.kmeans(n_clusters = 3L) \%>\% fit(train) + +# Transform test data +test_dist <- model_km \%>\% transform(test) + +} +} diff --git a/src/interface_r/vignettes/getting_started.Rmd b/src/interface_r/vignettes/getting_started.Rmd index 5bff545a0..f731c05ab 100644 --- a/src/interface_r/vignettes/getting_started.Rmd +++ b/src/interface_r/vignettes/getting_started.Rmd @@ -4,7 +4,7 @@ author: "Navdeep Gill, Erin LeDell, Yuan Tang" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Vignette Title} + %\VignetteIndexEntry{H2O4GPU: Machine Learning with GPUs in R} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -74,7 +74,7 @@ ce(actual = y, predicted = pred) The tree based models (Random Forest and GBM) are built on top of the very powerful [XGBoost](https://xgboost.readthedocs.io/en/latest/) library, and the Elastic Net GLM has been built upon the POGS solver. [Proximal Graph Solver (POGS)](http://stanford.edu/%7Eboyd/papers/pogs.html) is a solver for convex optimization problems in graph form using Alternating Direction Method of Multipliers (ADMM). We have found that this method is not as fast as we'd like it to be, so we are working on implementing an entirely new GLM from scratch (follow progress [here](https://github.com/h2oai/h2o4gpu/issues/356)). -The **h2o4gpu** R package does not include a suite of internal model metrics functions, therefore we encourage users to use a third-party model metrics package of their choice. For all the examples below, we will use the [Metrics](https://cran.r-project.org/web/packages/Metrics/index.html) R package. This package has a large number of model metrics functions, all with a very simple, unified API. +The **h2o4gpu** R package does not include a suite of internal model metrics functions, therefore we encourage users to use a third-party model metrics package of their choice. For all the examples below, we will use the [Metrics](https://CRAN.R-project.org/package=Metrics) R package. This package has a large number of model metrics functions, all with a very simple, unified API. ### Binary Classification