From b45805e1fb43ee650b1039ba996f5c4a93863341 Mon Sep 17 00:00:00 2001 From: davidrsch Date: Tue, 29 Jul 2025 19:58:22 +0200 Subject: [PATCH 01/10] Modularizing code for enhanced readability --- NAMESPACE | 4 +- R/build_spec_function.R | 52 ++ R/create_keras_spec_helpers.R | 512 ------------------ R/generate_roxygen_docs.R | 174 ++++++ R/{generic_fit.R => generic_sequential_fit.R} | 2 +- R/register_core_model.R | 27 + R/register_fit_predict.R | 131 +++++ R/register_model_args.R | 64 +++ R/register_update_method.R | 58 ++ ..._fit_impl.Rd => generic_sequential_fit.Rd} | 8 +- register_fit_predict.R | 86 +++ 11 files changed, 600 insertions(+), 518 deletions(-) create mode 100644 R/build_spec_function.R create mode 100644 R/generate_roxygen_docs.R rename R/{generic_fit.R => generic_sequential_fit.R} (99%) create mode 100644 R/register_core_model.R create mode 100644 R/register_fit_predict.R create mode 100644 R/register_model_args.R create mode 100644 R/register_update_method.R rename man/{generic_keras_fit_impl.Rd => generic_sequential_fit.Rd} (95%) create mode 100644 register_fit_predict.R diff --git a/NAMESPACE b/NAMESPACE index 7f7ad17..2d70fc5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,9 @@ # Generated by roxygen2: do not edit by hand +export(create_keras_functional_spec) export(create_keras_spec) -export(generic_keras_fit_impl) +export(generic_functional_fit) +export(generic_sequential_fit) export(keras_losses) export(keras_metrics) export(keras_optimizers) diff --git a/R/build_spec_function.R b/R/build_spec_function.R new file mode 100644 index 0000000..da50b3c --- /dev/null +++ b/R/build_spec_function.R @@ -0,0 +1,52 @@ +#' Build the Model Specification Function +#' +#' Uses metaprogramming to construct the new model specification function +#' (e.g., `dynamic_mlp()`). This function will capture user-provided arguments +#' and package them into a `parsnip::new_model_spec()` call. +#' +#' @param model_name The name of the model specification function to create. +#' @param mode The model mode ("regression" or "classification"). +#' @param all_args A named list of arguments for the function signature, as +#' generated by `collect_spec_args()`. +#' @param parsnip_names A character vector of all argument names. +#' @return A new function that serves as the `parsnip` model specification. +#' @noRd +build_spec_function <- function( + model_name, + mode, + all_args, + parsnip_names, + layer_blocks +) { + quos_exprs <- purrr::map( + parsnip_names, + ~ rlang::expr(rlang::enquo(!!rlang::sym(.x))) + ) + names(quos_exprs) <- parsnip_names + + body <- rlang::expr({ + # Capture both explicit args and ... to pass to the fit impl + # Named arguments are captured into a list of quosures. + main_args <- rlang::list2(!!!quos_exprs) + # ... arguments are captured into a separate list of quosures. + dot_args <- rlang::enquos(...) + args <- c(main_args, dot_args) + parsnip::new_model_spec( + !!model_name, + args = args, + eng_args = NULL, + mode = !!mode, + method = NULL, + engine = NULL + ) + }) + + # Add ... to the function signature to capture any other compile arguments + fn_args <- c(all_args, list(... = rlang::missing_arg())) + + fn <- rlang::new_function(args = fn_args, body = body) + + docs <- generate_roxygen_docs(model_name, layer_blocks, all_args) + comment(fn) <- docs + fn +} \ No newline at end of file diff --git a/R/create_keras_spec_helpers.R b/R/create_keras_spec_helpers.R index a35788a..dc0bb41 100644 --- a/R/create_keras_spec_helpers.R +++ b/R/create_keras_spec_helpers.R @@ -1,56 +1,3 @@ -#' Post-process Keras Numeric Predictions -#' -#' Formats raw numeric predictions from a Keras model into a tibble with a -#' standardized `.pred` column. -#' -#' @param results A matrix of numeric predictions from `predict()`. -#' @param object The `parsnip` model fit object. -#' @return A tibble with a `.pred` column. -#' @noRd -keras_postprocess_numeric <- function(results, object) { - tibble::tibble(.pred = as.vector(results)) -} - -#' Post-process Keras Probability Predictions -#' -#' Formats raw probability predictions from a Keras model into a tibble -#' with class-specific column names. -#' -#' @param results A matrix of probability predictions from `predict()`. -#' @param object The `parsnip` model fit object. -#' @return A tibble with named columns for each class probability. -#' @noRd -keras_postprocess_probs <- function(results, object) { - # The levels are now nested inside the fit object - colnames(results) <- object$fit$lvl - tibble::as_tibble(results) -} - -#' Post-process Keras Class Predictions -#' -#' Converts raw probability predictions from a Keras model into factor-based -#' class predictions. -#' -#' @param results A matrix of probability predictions from `predict()`. -#' @param object The `parsnip` model fit object. -#' @return A tibble with a `.pred_class` column containing factor predictions. -#' @noRd -keras_postprocess_classes <- function(results, object) { - # The levels are now nested inside the fit object - lvls <- object$fit$lvl - if (ncol(results) == 1) { - # Binary classification - pred_class <- ifelse(results[, 1] > 0.5, lvls[2], lvls[1]) - pred_class <- factor(pred_class, levels = lvls) - } else { - # Multiclass classification - pred_class_int <- apply(results, 1, which.max) - pred_class <- lvls[pred_class_int] - pred_class <- factor(pred_class, levels = lvls) - } - tibble::tibble(.pred_class = pred_class) -} - #' Discover and Collect Model Specification Arguments #' #' Introspects the provided layer block functions to generate a list of @@ -119,462 +66,3 @@ collect_spec_args <- function( list(all_args = all_args, parsnip_names = parsnip_names) } - -#' Generate Roxygen Documentation for a Dynamic Spec Function -#' -#' Constructs a detailed Roxygen comment block as a string, which can be -#' attached to the dynamically created model specification function. -#' -#' @param model_name The name of the model. -#' @param layer_blocks The list of layer block functions. -#' @param all_args A named list of all arguments for the function signature. -#' @return A single string containing the full Roxygen documentation. -#' @noRd -generate_roxygen_docs <- function(model_name, layer_blocks, all_args) { - # Title and Description - title <- paste( - gsub("_", " ", tools::toTitleCase(model_name)), - "Model Specification" - ) - desc <- paste0( - "Defines a `parsnip` model specification for a Keras model built with ", - "custom layer blocks. This function was generated by `kerasnip::create_keras_spec()`." - ) - - # Parameters - param_docs <- c() - arg_names <- names(all_args) - - # Group args for structured documentation - num_params <- arg_names[startsWith(arg_names, "num_")] - compile_params <- arg_names[startsWith(arg_names, "compile_")] - global_params <- c( - "epochs", - "batch_size", - "learn_rate", - "validation_split", - "verbose" - ) - block_params <- setdiff( - arg_names, - c(num_params, compile_params, global_params) - ) - - # Document block-specific params - if (length(block_params) > 0) { - param_docs <- c( - param_docs, - purrr::map_chr(block_params, function(p) { - parts <- strsplit(p, "_", fixed = TRUE)[[1]] - block_name <- parts[1] - param_name <- paste(parts[-1], collapse = "_") - block_fn <- layer_blocks[[block_name]] - default_val <- rlang::fn_fmls(block_fn)[[param_name]] - default_str <- if ( - !is.null(default_val) && !rlang::is_missing(default_val) - ) { - paste0( - " Defaults to `", - deparse(default_val, width.cutoff = 500L), - "`." - ) - } else { - "" - } - paste0( - "@param ", - p, - " The `", - param_name, - "` for the '", - block_name, - "' block.", - default_str - ) - }) - ) - } - - # Document architecture params - if (length(num_params) > 0) { - param_docs <- c( - param_docs, - purrr::map_chr(num_params, function(p) { - block_name <- sub("num_", "", p) - paste0( - "@param ", - p, - " The number of times to repeat the '", - block_name, - "' block. Defaults to 1." - ) - }) - ) - } - - # Document global params - global_param_desc <- list( - epochs = "The total number of iterations to train the model.", - batch_size = "The number of samples per gradient update.", - learn_rate = "The learning rate for the default Adam optimizer. This is ignored if `compile_optimizer` is provided as a pre-built object.", - validation_split = "The proportion of the training data to be used as a validation set.", - verbose = "The level of verbosity for model fitting (0, 1, or 2)." - ) - param_docs <- c( - param_docs, - purrr::map_chr(global_params, function(p) { - paste0("@param ", p, " ", global_param_desc[[p]]) - }) - ) - - # Document compile params - compile_param_desc <- list( - compile_loss = "The loss function for compiling the model. Can be a string (e.g., 'mse') or a Keras loss object. Overrides the default.", - compile_optimizer = "The optimizer for compiling the model. Can be a string (e.g., 'sgd') or a Keras optimizer object. Overrides the default.", - compile_metrics = "A character vector of metrics to monitor during training (e.g., `c('mae', 'mse')`). Overrides the default." - ) - param_docs <- c( - param_docs, - purrr::map_chr(compile_params, function(p) { - paste0("@param ", p, " ", compile_param_desc[[p]]) - }) - ) - - # Add ... param - param_docs <- c( - param_docs, - "@param ... Additional arguments passed to `parsnip::new_model_spec()`." - ) - - # Sections - architecture_section <- c( - "#' @section Model Architecture:", - "#' The Keras model is constructed by sequentially applying the layer blocks in the order they were provided to `create_keras_spec()`.", - "#' You can control the number of times each block is repeated by setting the `num_{block_name}` argument (e.g., `num_dense = 2`).", - "#' This allows for dynamically creating deeper or more complex architectures during tuning." - ) - - compilation_section <- c( - "#' @section Model Compilation:", - "#' The model is compiled with a default optimizer, loss function, and metric based on the model's mode. You can override these defaults by providing arguments prefixed with `compile_`.", - "#' \\itemize{", - "#' \\item \\strong{Optimizer}: Defaults to `keras3::optimizer_adam()` using the `learn_rate` argument. Override with `compile_optimizer` (e.g., `\"sgd\"` or `keras3::optimizer_sgd(...)`).", - "#' \\item \\strong{Loss}: Defaults to `\"mean_squared_error\"` for regression and `\"categorical_crossentropy\"` or `\"binary_crossentropy\"` for classification. Override with `compile_loss`.", - "#' \\item \\strong{Metrics}: Defaults to `\"mean_absolute_error\"` for regression and `\"accuracy\"` for classification. Override with `compile_metrics` (e.g., `c(\"mae\", \"mape\")`).", - "#' }", - "#' For more details, see the documentation for `kerasnip::generic_keras_fit_impl`." - ) - - fitting_section <- c( - "#' @section Model Fitting:", - "#' The model is fit using `keras3::fit()`. You can pass any argument to this function by prefixing it with `fit_`.", - "#' For example, to add Keras callbacks, you can pass `fit_callbacks = list(callback_early_stopping())`.", - "#' The `epochs` and `batch_size` arguments are also passed to `fit()`." - ) - - # Other tags - other_tags <- c( - "#' @seealso [create_keras_spec()], [generic_keras_fit_impl()]", - "#' @export" - ) - - # Combine all parts - paste( - c( - paste0("#' ", title), - "#'", - paste0("#' ", desc), - "#'", - paste0("@", param_docs), - architecture_section, - fitting_section, - compilation_section, - other_tags - ), - collapse = "\n" - ) -} - -#' Build the Model Specification Function -#' -#' Uses metaprogramming to construct the new model specification function -#' (e.g., `dynamic_mlp()`). This function will capture user-provided arguments -#' and package them into a `parsnip::new_model_spec()` call. -#' -#' @param model_name The name of the model specification function to create. -#' @param mode The model mode ("regression" or "classification"). -#' @param all_args A named list of arguments for the function signature, as -#' generated by `collect_spec_args()`. -#' @param parsnip_names A character vector of all argument names. -#' @return A new function that serves as the `parsnip` model specification. -#' @noRd -build_spec_function <- function( - model_name, - mode, - all_args, - parsnip_names, - layer_blocks -) { - quos_exprs <- purrr::map( - parsnip_names, - ~ rlang::expr(rlang::enquo(!!rlang::sym(.x))) - ) - names(quos_exprs) <- parsnip_names - - body <- rlang::expr({ - # Capture both explicit args and ... to pass to the fit impl - # Named arguments are captured into a list of quosures. - main_args <- rlang::list2(!!!quos_exprs) - # ... arguments are captured into a separate list of quosures. - dot_args <- rlang::enquos(...) - args <- c(main_args, dot_args) - parsnip::new_model_spec( - !!model_name, - args = args, - eng_args = NULL, - mode = !!mode, - method = NULL, - engine = NULL - ) - }) - - # Add ... to the function signature to capture any other compile arguments - fn_args <- c(all_args, list(... = rlang::missing_arg())) - - fn <- rlang::new_function(args = fn_args, body = body) - - docs <- generate_roxygen_docs(model_name, layer_blocks, all_args) - comment(fn) <- docs - fn -} - -#' Register Core Model Information with Parsnip -#' -#' Sets up the basic model definition with `parsnip`, including its mode, -#' engine, dependencies, and data encoding requirements. -#' -#' @param model_name The name of the new model. -#' @param mode The model mode ("regression" or "classification"). -#' @return Invisibly returns `NULL`. Called for its side effects. -#' @noRd -register_core_model <- function(model_name, mode) { - parsnip::set_new_model(model_name) - parsnip::set_model_mode(model_name, mode) - parsnip::set_model_engine(model_name, mode, "keras") - parsnip::set_dependency(model_name, "keras", "keras3") - - parsnip::set_encoding( - model = model_name, - eng = "keras", - mode = mode, - options = list( - predictor_indicators = "traditional", - compute_intercept = TRUE, - remove_intercept = TRUE, - allow_sparse_x = FALSE - ) - ) -} - -#' Register Model Arguments with Parsnip -#' -#' Registers each model argument with `parsnip` and maps it to a corresponding -#' `dials` parameter function for tuning. This allows `tidymodels` to know -#' about the tunable parameters of the custom model. -#' -#' @param model_name The name of the new model. -#' @param parsnip_names A character vector of all argument names. -#' @return Invisibly returns `NULL`. Called for its side effects. -#' @noRd -register_model_args <- function(model_name, parsnip_names) { - keras_dials_map <- tibble::tribble( - ~keras_arg, - ~dials_fun, - "units", - "hidden_units", - "filters", - "hidden_units", - "kernel_size", - "kernel_size", - "pool_size", - "pool_size", - "dropout", - "dropout", - "rate", - "dropout", - "learn_rate", - "learn_rate", - "epochs", - "epochs", - "batch_size", - "batch_size", - "compile_loss", # parsnip arg - "loss_function_keras", # dials function from kerasnip - "compile_optimizer", # parsnip arg - "optimizer_function" # dials function from kerasnip - ) - - # We now allow optimizer to be tuned. Metrics are for tracking, not training. - non_tunable <- c("verbose") - - for (arg in parsnip_names) { - if (arg %in% non_tunable) { - next - } - - if (startsWith(arg, "num_")) { - dials_fun <- "num_terms" - } else { - base_arg <- sub(".*_", "", arg) - idx <- match(base_arg, keras_dials_map$keras_arg) - dials_fun <- if (!is.na(idx)) keras_dials_map$dials_fun[idx] else arg - } - - parsnip::set_model_arg( - model = model_name, - eng = "keras", - parsnip = arg, - original = arg, - func = list(pkg = "dials", fun = dials_fun), - has_submodel = FALSE - ) - } -} - -#' Register Fit and Prediction Methods with Parsnip -#' -#' Defines how to fit the custom Keras model and how to generate predictions -#' for both regression and classification modes. It links the model to the -#' generic fitting implementation (`generic_keras_fit_impl`) and sets up -#' the appropriate prediction post-processing. -#' -#' @param model_name The name of the new model. -#' @param mode The model mode ("regression" or "classification"). -#' @param layer_blocks The named list of layer block functions, which is passed -#' as a default argument to the fit function. -#' @return Invisibly returns `NULL`. Called for its side effects. -#' @noRd -register_fit_predict <- function(model_name, mode, layer_blocks) { - # Fit method - parsnip::set_fit( - model = model_name, - eng = "keras", - mode = mode, - value = list( - interface = "data.frame", - protect = c("x", "y"), - func = c(pkg = "kerasnip", fun = "generic_keras_fit_impl"), - defaults = list(layer_blocks = layer_blocks) - ) - ) - - # Regression prediction - if (mode == "regression") { - parsnip::set_pred( - model = model_name, - eng = "keras", - mode = "regression", - type = "numeric", - value = list( - pre = NULL, - post = keras_postprocess_numeric, - func = c(fun = "predict"), - args = list( - object = rlang::expr(object$fit$fit), - x = rlang::expr(as.matrix(new_data)) - ) - ) - ) - } else { - # Classification predictions - parsnip::set_pred( - model = model_name, - eng = "keras", - mode = "classification", - type = "class", - value = list( - pre = NULL, - post = keras_postprocess_classes, - func = c(fun = "predict"), - args = list( - object = rlang::expr(object$fit$fit), - x = rlang::expr(as.matrix(new_data)) - ) - ) - ) - parsnip::set_pred( - model = model_name, - eng = "keras", - mode = "classification", - type = "prob", - value = list( - pre = NULL, - post = keras_postprocess_probs, - func = c(fun = "predict"), - args = list( - object = rlang::expr(object$fit$fit), - x = rlang::expr(as.matrix(new_data)) - ) - ) - ) - } -} - -#' Register the `update()` S3 Method -#' -#' Creates and registers an `update()` S3 method for the new model specification. -#' This method allows users to modify the model's parameters after it has been -#' created, which is essential for tuning with `dials` and `tune`. -#' -#' @param model_name The name of the new model. -#' @param parsnip_names A character vector of all argument names. -#' @return Invisibly returns `NULL`. Called for its side effects. -#' @param env The environment in which to create the update method. -#' @noRd -register_update_method <- function(model_name, parsnip_names, env) { - # Build function signature - update_args_list <- c( - list(object = rlang::missing_arg(), parameters = rlang::expr(NULL)), - purrr::map(parsnip_names, ~ rlang::expr(NULL)), - list(... = rlang::missing_arg(), fresh = rlang::expr(FALSE)) - ) - names(update_args_list)[3:(2 + length(parsnip_names))] <- parsnip_names - - # Create a list of expressions like `arg_name = rlang::enquo(arg_name)` - args_enquo_exprs <- purrr::map( - parsnip_names, - ~ rlang::expr(rlang::enquo(!!rlang::sym(.x))) - ) - names(args_enquo_exprs) <- parsnip_names - - # Create the expression that builds this list inside the function body - args_enquo_list_expr <- rlang::expr( - args <- rlang::list2(!!!args_enquo_exprs) - ) - - # Create the call to `parsnip::update_spec` - update_spec_call <- rlang::expr( - parsnip::update_spec( - object = object, - parameters = parameters, - args_enquo_list = args, - fresh = fresh, - cls = !!model_name, - ... - ) - ) - - # Combine them into the final body - update_body <- rlang::call2("{", args_enquo_list_expr, update_spec_call) - - # Create and register the S3 method - update_func <- rlang::new_function( - args = update_args_list, - body = update_body - ) - method_name <- paste0("update.", model_name) - # Poke the function into the target environment (e.g., .GlobalEnv) so that - # S3 dispatch can find it. - rlang::env_poke(env, method_name, update_func) - registerS3method("update", model_name, update_func, envir = env) -} diff --git a/R/generate_roxygen_docs.R b/R/generate_roxygen_docs.R new file mode 100644 index 0000000..5a68032 --- /dev/null +++ b/R/generate_roxygen_docs.R @@ -0,0 +1,174 @@ +#' Generate Roxygen Documentation for a Dynamic Spec Function +#' +#' Constructs a detailed Roxygen comment block as a string, which can be +#' attached to the dynamically created model specification function. +#' +#' @param model_name The name of the model. +#' @param layer_blocks The list of layer block functions. +#' @param all_args A named list of all arguments for the function signature. +#' @return A single string containing the full Roxygen documentation. +#' @noRd +generate_roxygen_docs <- function(model_name, layer_blocks, all_args) { + # Title and Description + title <- paste( + gsub("_", " ", tools::toTitleCase(model_name)), + "Model Specification" + ) + desc <- paste0( + "Defines a `parsnip` model specification for a Keras model built with ", + "custom layer blocks. This function was generated by `kerasnip::create_keras_spec()`." + ) + + # Parameters + param_docs <- c() + arg_names <- names(all_args) + + # Group args for structured documentation + num_params <- arg_names[startsWith(arg_names, "num_")] + compile_params <- arg_names[startsWith(arg_names, "compile_")] + global_params <- c( + "epochs", + "batch_size", + "learn_rate", + "validation_split", + "verbose" + ) + block_params <- setdiff( + arg_names, + c(num_params, compile_params, global_params) + ) + + # Document block-specific params + if (length(block_params) > 0) { + param_docs <- c( + param_docs, + purrr::map_chr(block_params, function(p) { + parts <- strsplit(p, "_", fixed = TRUE)[[1]] + block_name <- parts[1] + param_name <- paste(parts[-1], collapse = "_") + block_fn <- layer_blocks[[block_name]] + default_val <- rlang::fn_fmls(block_fn)[[param_name]] + default_str <- if ( + !is.null(default_val) && !rlang::is_missing(default_val) + ) { + paste0( + " Defaults to `", + deparse(default_val, width.cutoff = 500L), + "`." + ) + } else { + "" + } + paste0( + "@param ", + p, + " The `", + param_name, + "` for the '", + block_name, + "' block.", + default_str + ) + }) + ) + } + + # Document architecture params + if (length(num_params) > 0) { + param_docs <- c( + param_docs, + purrr::map_chr(num_params, function(p) { + block_name <- sub("num_", "", p) + paste0( + "@param ", + p, + " The number of times to repeat the '", + block_name, + "' block. Defaults to 1." + ) + }) + ) + } + + # Document global params + global_param_desc <- list( + epochs = "The total number of iterations to train the model.", + batch_size = "The number of samples per gradient update.", + learn_rate = "The learning rate for the default Adam optimizer. This is ignored if `compile_optimizer` is provided as a pre-built object.", + validation_split = "The proportion of the training data to be used as a validation set.", + verbose = "The level of verbosity for model fitting (0, 1, or 2)." + ) + param_docs <- c( + param_docs, + purrr::map_chr(global_params, function(p) { + paste0("@param ", p, " ", global_param_desc[[p]]) + }) + ) + + # Document compile params + compile_param_desc <- list( + compile_loss = "The loss function for compiling the model. Can be a string (e.g., 'mse') or a Keras loss object. Overrides the default.", + compile_optimizer = "The optimizer for compiling the model. Can be a string (e.g., 'sgd') or a Keras optimizer object. Overrides the default.", + compile_metrics = "A character vector of metrics to monitor during training (e.g., `c('mae', 'mse')`). Overrides the default." + ) + param_docs <- c( + param_docs, + purrr::map_chr(compile_params, function(p) { + paste0("@param ", p, " ", compile_param_desc[[p]]) + }) + ) + + # Add ... param + param_docs <- c( + param_docs, + "@param ... Additional arguments passed to `parsnip::new_model_spec()`." + ) + + # Sections + architecture_section <- c( + "#' @section Model Architecture:", + "#' The Keras model is constructed by sequentially applying the layer blocks in the order they were provided to `create_keras_spec()`.", + "#' You can control the number of times each block is repeated by setting the `num_{block_name}` argument (e.g., `num_dense = 2`).", + "#' This allows for dynamically creating deeper or more complex architectures during tuning." + ) + + compilation_section <- c( + "#' @section Model Compilation:", + "#' The model is compiled with a default optimizer, loss function, and metric based on the model's mode. You can override these defaults by providing arguments prefixed with `compile_`.", + "#' \\itemize{", + "#' \\item \\strong{Optimizer}: Defaults to `keras3::optimizer_adam()` using the `learn_rate` argument. Override with `compile_optimizer` (e.g., `\"sgd\"` or `keras3::optimizer_sgd(...)`).", + "#' \\item \\strong{Loss}: Defaults to `\"mean_squared_error\"` for regression and `\"categorical_crossentropy\"` or `\"binary_crossentropy\"` for classification. Override with `compile_loss`.", + "#' \\item \\strong{Metrics}: Defaults to `\"mean_absolute_error\"` for regression and `\"accuracy\"` for classification. Override with `compile_metrics` (e.g., `c(\"mae\", \"mape\")`).", + "#' }", + "#' For more details, see the documentation for `kerasnip::generic_sequential_fit`." + ) + + fitting_section <- c( + "#' @section Model Fitting:", + "#' The model is fit using `keras3::fit()`. You can pass any argument to this function by prefixing it with `fit_`.", + "#' For example, to add Keras callbacks, you can pass `fit_callbacks = list(callback_early_stopping())`.", + "#' The `epochs` and `batch_size` arguments are also passed to `fit()`." + ) + + # Other tags + other_tags <- c( + "#' @seealso [create_keras_spec()], [generic_sequential_fit()]", + "#' @export" + ) + + # Combine all parts + paste( + c( + paste0("#' ", title), + "#'", + paste0("#' ", desc), + "#'", + paste0("@", param_docs), + architecture_section, + fitting_section, + compilation_section, + other_tags + ), + collapse = "\n" + ) +} \ No newline at end of file diff --git a/R/generic_fit.R b/R/generic_sequential_fit.R similarity index 99% rename from R/generic_fit.R rename to R/generic_sequential_fit.R index 35b308d..c772ce4 100644 --- a/R/generic_fit.R +++ b/R/generic_sequential_fit.R @@ -62,7 +62,7 @@ #' @return A `parsnip` model fit object. #' @keywords internal #' @export -generic_keras_fit_impl <- function( +generic_sequential_fit <- function( x, y, layer_blocks, diff --git a/R/register_core_model.R b/R/register_core_model.R new file mode 100644 index 0000000..3928fbf --- /dev/null +++ b/R/register_core_model.R @@ -0,0 +1,27 @@ +#' Register Core Model Information with Parsnip +#' +#' Sets up the basic model definition with `parsnip`, including its mode, +#' engine, dependencies, and data encoding requirements. +#' +#' @param model_name The name of the new model. +#' @param mode The model mode ("regression" or "classification"). +#' @return Invisibly returns `NULL`. Called for its side effects. +#' @noRd +register_core_model <- function(model_name, mode) { + parsnip::set_new_model(model_name) + parsnip::set_model_mode(model_name, mode) + parsnip::set_model_engine(model_name, mode, "keras") + parsnip::set_dependency(model_name, "keras", "keras3") + + parsnip::set_encoding( + model = model_name, + eng = "keras", + mode = mode, + options = list( + predictor_indicators = "traditional", + compute_intercept = TRUE, + remove_intercept = TRUE, + allow_sparse_x = FALSE + ) + ) +} \ No newline at end of file diff --git a/R/register_fit_predict.R b/R/register_fit_predict.R new file mode 100644 index 0000000..afe8ac2 --- /dev/null +++ b/R/register_fit_predict.R @@ -0,0 +1,131 @@ +#' Register Fit and Prediction Methods with Parsnip +#' +#' Defines how to fit the custom Keras model and how to generate predictions +#' for both regression and classification modes. It links the model to the +#' generic fitting implementation (`generic_sequential_fit`) and sets up +#' the appropriate prediction post-processing. +#' +#' @param model_name The name of the new model. +#' @param mode The model mode ("regression" or "classification"). +#' @param layer_blocks The named list of layer block functions, which is passed +#' as a default argument to the fit function. +#' @return Invisibly returns `NULL`. Called for its side effects. +#' @noRd +register_fit_predict <- function(model_name, mode, layer_blocks) { + # Fit method + parsnip::set_fit( + model = model_name, + eng = "keras", + mode = mode, + value = list( + interface = "data.frame", + protect = c("x", "y"), + func = c(pkg = "kerasnip", fun = "generic_sequential_fit"), + defaults = list(layer_blocks = layer_blocks) + ) + ) + + # Regression prediction + if (mode == "regression") { + parsnip::set_pred( + model = model_name, + eng = "keras", + mode = "regression", + type = "numeric", + value = list( + pre = NULL, + post = keras_postprocess_numeric, + func = c(fun = "predict"), + args = list( + object = rlang::expr(object$fit$fit), + x = rlang::expr(as.matrix(new_data)) + ) + ) + ) + } else { + # Classification predictions + parsnip::set_pred( + model = model_name, + eng = "keras", + mode = "classification", + type = "class", + value = list( + pre = NULL, + post = keras_postprocess_classes, + func = c(fun = "predict"), + args = list( + object = rlang::expr(object$fit$fit), + x = rlang::expr(as.matrix(new_data)) + ) + ) + ) + parsnip::set_pred( + model = model_name, + eng = "keras", + mode = "classification", + type = "prob", + value = list( + pre = NULL, + post = keras_postprocess_probs, + func = c(fun = "predict"), + args = list( + object = rlang::expr(object$fit$fit), + x = rlang::expr(as.matrix(new_data)) + ) + ) + ) + } +} + +#' Post-process Keras Numeric Predictions +#' +#' Formats raw numeric predictions from a Keras model into a tibble with a +#' standardized `.pred` column. +#' +#' @param results A matrix of numeric predictions from `predict()`. +#' @param object The `parsnip` model fit object. +#' @return A tibble with a `.pred` column. +#' @noRd +keras_postprocess_numeric <- function(results, object) { + tibble::tibble(.pred = as.vector(results)) +} + +#' Post-process Keras Probability Predictions +#' +#' Formats raw probability predictions from a Keras model into a tibble +#' with class-specific column names. +#' +#' @param results A matrix of probability predictions from `predict()`. +#' @param object The `parsnip` model fit object. +#' @return A tibble with named columns for each class probability. +#' @noRd +keras_postprocess_probs <- function(results, object) { + # The levels are now nested inside the fit object + colnames(results) <- object$fit$lvl + tibble::as_tibble(results) +} + +#' Post-process Keras Class Predictions +#' +#' Converts raw probability predictions from a Keras model into factor-based +#' class predictions. +#' +#' @param results A matrix of probability predictions from `predict()`. +#' @param object The `parsnip` model fit object. +#' @return A tibble with a `.pred_class` column containing factor predictions. +#' @noRd +keras_postprocess_classes <- function(results, object) { + # The levels are now nested inside the fit object + lvls <- object$fit$lvl + if (ncol(results) == 1) { + # Binary classification + pred_class <- ifelse(results[, 1] > 0.5, lvls[2], lvls[1]) + pred_class <- factor(pred_class, levels = lvls) + } else { + # Multiclass classification + pred_class_int <- apply(results, 1, which.max) + pred_class <- lvls[pred_class_int] + pred_class <- factor(pred_class, levels = lvls) + } + tibble::tibble(.pred_class = pred_class) +} \ No newline at end of file diff --git a/R/register_model_args.R b/R/register_model_args.R new file mode 100644 index 0000000..3139123 --- /dev/null +++ b/R/register_model_args.R @@ -0,0 +1,64 @@ +#' Register Model Arguments with Parsnip +#' +#' Registers each model argument with `parsnip` and maps it to a corresponding +#' `dials` parameter function for tuning. This allows `tidymodels` to know +#' about the tunable parameters of the custom model. +#' +#' @param model_name The name of the new model. +#' @param parsnip_names A character vector of all argument names. +#' @return Invisibly returns `NULL`. Called for its side effects. +#' @noRd +register_model_args <- function(model_name, parsnip_names) { + keras_dials_map <- tibble::tribble( + ~keras_arg, + ~dials_fun, + "units", + "hidden_units", + "filters", + "hidden_units", + "kernel_size", + "kernel_size", + "pool_size", + "pool_size", + "dropout", + "dropout", + "rate", + "dropout", + "learn_rate", + "learn_rate", + "epochs", + "epochs", + "batch_size", + "batch_size", + "compile_loss", # parsnip arg + "loss_function_keras", # dials function from kerasnip + "compile_optimizer", # parsnip arg + "optimizer_function" # dials function from kerasnip + ) + + # We now allow optimizer to be tuned. Metrics are for tracking, not training. + non_tunable <- c("verbose") + + for (arg in parsnip_names) { + if (arg %in% non_tunable) { + next + } + + if (startsWith(arg, "num_")) { + dials_fun <- "num_terms" + } else { + base_arg <- sub(".*_", "", arg) + idx <- match(base_arg, keras_dials_map$keras_arg) + dials_fun <- if (!is.na(idx)) keras_dials_map$dials_fun[idx] else arg + } + + parsnip::set_model_arg( + model = model_name, + eng = "keras", + parsnip = arg, + original = arg, + func = list(pkg = "dials", fun = dials_fun), + has_submodel = FALSE + ) + } +} \ No newline at end of file diff --git a/R/register_update_method.R b/R/register_update_method.R new file mode 100644 index 0000000..b694ea3 --- /dev/null +++ b/R/register_update_method.R @@ -0,0 +1,58 @@ +#' Register the `update()` S3 Method +#' +#' Creates and registers an `update()` S3 method for the new model specification. +#' This method allows users to modify the model's parameters after it has been +#' created, which is essential for tuning with `dials` and `tune`. +#' +#' @param model_name The name of the new model. +#' @param parsnip_names A character vector of all argument names. +#' @return Invisibly returns `NULL`. Called for its side effects. +#' @param env The environment in which to create the update method. +#' @noRd +register_update_method <- function(model_name, parsnip_names, env) { + # Build function signature + update_args_list <- c( + list(object = rlang::missing_arg(), parameters = rlang::expr(NULL)), + purrr::map(parsnip_names, ~ rlang::expr(NULL)), + list(... = rlang::missing_arg(), fresh = rlang::expr(FALSE)) + ) + names(update_args_list)[3:(2 + length(parsnip_names))] <- parsnip_names + + # Create a list of expressions like `arg_name = rlang::enquo(arg_name)` + args_enquo_exprs <- purrr::map( + parsnip_names, + ~ rlang::expr(rlang::enquo(!!rlang::sym(.x))) + ) + names(args_enquo_exprs) <- parsnip_names + + # Create the expression that builds this list inside the function body + args_enquo_list_expr <- rlang::expr( + args <- rlang::list2(!!!args_enquo_exprs) + ) + + # Create the call to `parsnip::update_spec` + update_spec_call <- rlang::expr( + parsnip::update_spec( + object = object, + parameters = parameters, + args_enquo_list = args, + fresh = fresh, + cls = !!model_name, + ... + ) + ) + + # Combine them into the final body + update_body <- rlang::call2("{", args_enquo_list_expr, update_spec_call) + + # Create and register the S3 method + update_func <- rlang::new_function( + args = update_args_list, + body = update_body + ) + method_name <- paste0("update.", model_name) + # Poke the function into the target environment (e.g., .GlobalEnv) so that + # S3 dispatch can find it. + rlang::env_poke(env, method_name, update_func) + registerS3method("update", model_name, update_func, envir = env) +} \ No newline at end of file diff --git a/man/generic_keras_fit_impl.Rd b/man/generic_sequential_fit.Rd similarity index 95% rename from man/generic_keras_fit_impl.Rd rename to man/generic_sequential_fit.Rd index 9d7d15e..8d872f4 100644 --- a/man/generic_keras_fit_impl.Rd +++ b/man/generic_sequential_fit.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generic_fit.R -\name{generic_keras_fit_impl} -\alias{generic_keras_fit_impl} +% Please edit documentation in R/generic_sequential_fit.R +\name{generic_sequential_fit} +\alias{generic_sequential_fit} \title{Generic Keras Model Fitting Implementation} \usage{ -generic_keras_fit_impl( +generic_sequential_fit( x, y, layer_blocks, diff --git a/register_fit_predict.R b/register_fit_predict.R new file mode 100644 index 0000000..4dd8140 --- /dev/null +++ b/register_fit_predict.R @@ -0,0 +1,86 @@ +#' Register Fit and Prediction Methods with Parsnip +#' +#' Defines how to fit the custom Keras model and how to generate predictions +#' for both regression and classification modes. It links the model to the +#' generic fitting implementation (`generic_keras_fit_impl`) and sets up +#' the appropriate prediction post-processing. +#' +#' @param model_name The name of the new model. +#' @param mode The model mode ("regression" or "classification"). +#' @param layer_blocks The named list of layer block functions, which is passed +#' as a default argument to the fit function. +#' @return Invisibly returns `NULL`. Called for its side effects. +#' @param functional A logical, if TRUE uses `generic_keras_functional_fit_impl` to fit, otherwise `generic_keras_fit_impl`. Defaults to FALSE. +#' @noRd +register_fit_predict <- function(model_name, mode, layer_blocks) { + # Fit method + parsnip::set_fit( + model = model_name, + eng = "keras", + mode = mode, + value = list( + interface = "data.frame", + protect = c("x", "y"), + func = c( + pkg = "kerasnip", + fun = if (functional) { + "generic_keras_functional_fit_impl" + } else { + "generic_keras_fit_impl" + } + ), + defaults = list(layer_blocks = layer_blocks) + ) + ) + + # Regression prediction + if (mode == "regression") { + parsnip::set_pred( + model = model_name, + eng = "keras", + mode = "regression", + type = "numeric", + value = list( + pre = NULL, + post = keras_postprocess_numeric, + func = c(fun = "predict"), + args = list( + object = rlang::expr(object$fit$fit), + x = rlang::expr(as.matrix(new_data)) + ) + ) + ) + } else { + # Classification predictions + parsnip::set_pred( + model = model_name, + eng = "keras", + mode = "classification", + type = "class", + value = list( + pre = NULL, + post = keras_postprocess_classes, + func = c(fun = "predict"), + args = list( + object = rlang::expr(object$fit$fit), + x = rlang::expr(as.matrix(new_data)) + ) + ) + ) + parsnip::set_pred( + model = model_name, + eng = "keras", + mode = "classification", + type = "prob", + value = list( + pre = NULL, + post = keras_postprocess_probs, + func = c(fun = "predict"), + args = list( + object = rlang::expr(object$fit$fit), + x = rlang::expr(as.matrix(new_data)) + ) + ) + ) + } +} \ No newline at end of file From 58e511b5341746de5a20b16db58a92624c37490f Mon Sep 17 00:00:00 2001 From: davidrsch Date: Wed, 30 Jul 2025 10:57:24 +0200 Subject: [PATCH 02/10] Changing create_keras_spec to create_keras_sequential_spec and preparing for functional support --- R/build_spec_function.R | 13 ++- ..._spec.R => create_keras_sequential_spec.R} | 23 ++--- R/create_keras_spec_helpers.R | 37 ++++++++ R/generate_roxygen_docs.R | 52 ++++++++--- R/generic_sequential_fit.R | 2 +- R/register_fit_predict.R | 14 ++- R/remove_spec.R | 6 +- ...pec.Rd => create_keras_sequential_spec.Rd} | 12 +-- man/generic_sequential_fit.Rd | 2 +- man/remove_keras_spec.Rd | 6 +- register_fit_predict.R | 86 ------------------- tests/testthat/test-e2e-classification.R | 2 +- tests/testthat/test-e2e-features.R | 8 +- tests/testthat/test-e2e-multiblock-tuning.R | 2 +- tests/testthat/test-e2e-regression.R | 2 +- tests/testthat/test-e2e-spec-removal.R | 2 +- tests/testthat/test-e2e-tuning.R | 2 +- 17 files changed, 129 insertions(+), 142 deletions(-) rename R/{create_keras_spec.R => create_keras_sequential_spec.R} (88%) rename man/{create_keras_spec.Rd => create_keras_sequential_spec.Rd} (93%) delete mode 100644 register_fit_predict.R diff --git a/R/build_spec_function.R b/R/build_spec_function.R index da50b3c..18328a8 100644 --- a/R/build_spec_function.R +++ b/R/build_spec_function.R @@ -10,13 +10,15 @@ #' generated by `collect_spec_args()`. #' @param parsnip_names A character vector of all argument names. #' @return A new function that serves as the `parsnip` model specification. +#' @param functional A logical indicating if the model is functional or sequential. #' @noRd build_spec_function <- function( model_name, mode, all_args, parsnip_names, - layer_blocks + layer_blocks, + functional = FALSE ) { quos_exprs <- purrr::map( parsnip_names, @@ -46,7 +48,12 @@ build_spec_function <- function( fn <- rlang::new_function(args = fn_args, body = body) - docs <- generate_roxygen_docs(model_name, layer_blocks, all_args) + docs <- generate_roxygen_docs( + model_name, + layer_blocks, + all_args, + functional = functional + ) comment(fn) <- docs fn -} \ No newline at end of file +} diff --git a/R/create_keras_spec.R b/R/create_keras_sequential_spec.R similarity index 88% rename from R/create_keras_spec.R rename to R/create_keras_sequential_spec.R index 50355fe..fed38af 100644 --- a/R/create_keras_spec.R +++ b/R/create_keras_sequential_spec.R @@ -30,7 +30,7 @@ #' 3. The final block should add the output layer. For classification, it can #' accept a \code{num_classes} argument, which is provided automatically. #' -#' The \code{create_keras_spec()} function will inspect the arguments of your +#' The \code{create_keras_sequential_spec()} function will inspect the arguments of your #' \code{layer_blocks} functions (ignoring \code{input_shape} and \code{num_classes}) #' and make them available as arguments in the generated model specification, #' prefixed with the block's name (e.g., @@ -75,7 +75,7 @@ #' } #' #' # 2. Create the spec, providing blocks in the correct order. -#' create_keras_spec( +#' create_keras_sequential_spec( #' model_name = "my_mlp", #' layer_blocks = list( #' input = input_block, @@ -97,7 +97,7 @@ #' print(model_spec) #' } #' } -create_keras_spec <- function( +create_keras_sequential_spec <- function( model_name, layer_blocks, mode = c("regression", "classification"), @@ -105,20 +105,11 @@ create_keras_spec <- function( env = parent.frame() ) { mode <- arg_match(mode) - args_info <- collect_spec_args(layer_blocks) - spec_fun <- build_spec_function( + create_keras_spec_impl( model_name, + layer_blocks, mode, - args_info$all_args, - args_info$parsnip_names, - layer_blocks + functional = FALSE, + env ) - - register_core_model(model_name, mode) - register_model_args(model_name, args_info$parsnip_names) - register_fit_predict(model_name, mode, layer_blocks) - register_update_method(model_name, args_info$parsnip_names, env = env) - - env_poke(env, model_name, spec_fun) - invisible(NULL) } diff --git a/R/create_keras_spec_helpers.R b/R/create_keras_spec_helpers.R index dc0bb41..dc0b401 100644 --- a/R/create_keras_spec_helpers.R +++ b/R/create_keras_spec_helpers.R @@ -66,3 +66,40 @@ collect_spec_args <- function( list(all_args = all_args, parsnip_names = parsnip_names) } + +#' Internal Implementation for Creating Keras Specifications +#' +#' This is the core logic for both `create_keras_sequential_spec` and +#' `create_keras_functional_spec`. It is not intended for direct use. +#' +#' @inheritParams create_keras_sequential_spec +#' @param functional A logical, if `TRUE`, registers the model to be fit with +#' the Functional API (`generic_functional_fit`). Otherwise, uses the +#' Sequential API (`generic_sequential_fit`). +#' +#' @noRd +create_keras_spec_impl <- function( + model_name, + layer_blocks, + mode, + functional, + env +) { + args_info <- collect_spec_args(layer_blocks) + spec_fun <- build_spec_function( + model_name, + mode, + args_info$all_args, + args_info$parsnip_names, + layer_blocks, + functional = functional + ) + + register_core_model(model_name, mode) + register_model_args(model_name, args_info$parsnip_names) + register_fit_predict(model_name, mode, layer_blocks, functional = functional) + register_update_method(model_name, args_info$parsnip_names, env = env) + + rlang::env_poke(env, model_name, spec_fun) + invisible(NULL) +} diff --git a/R/generate_roxygen_docs.R b/R/generate_roxygen_docs.R index 5a68032..b270cd4 100644 --- a/R/generate_roxygen_docs.R +++ b/R/generate_roxygen_docs.R @@ -6,9 +6,15 @@ #' @param model_name The name of the model. #' @param layer_blocks The list of layer block functions. #' @param all_args A named list of all arguments for the function signature. +#' @param functional A logical indicating if the model is functional or sequential. #' @return A single string containing the full Roxygen documentation. #' @noRd -generate_roxygen_docs <- function(model_name, layer_blocks, all_args) { +generate_roxygen_docs <- function( + model_name, + layer_blocks, + all_args, + functional = FALSE +) { # Title and Description title <- paste( gsub("_", " ", tools::toTitleCase(model_name)), @@ -16,7 +22,13 @@ generate_roxygen_docs <- function(model_name, layer_blocks, all_args) { ) desc <- paste0( "Defines a `parsnip` model specification for a Keras model built with ", - "custom layer blocks. This function was generated by `kerasnip::create_keras_spec()`." + "custom layer blocks. This function was generated by `kerasnip::", + if (isTRUE(functional)) { + "create_keras_functional_spec" + } else { + "create_keras_sequential_spec" + }, + "()`." ) # Parameters @@ -125,12 +137,26 @@ generate_roxygen_docs <- function(model_name, layer_blocks, all_args) { ) # Sections - architecture_section <- c( - "#' @section Model Architecture:", - "#' The Keras model is constructed by sequentially applying the layer blocks in the order they were provided to `create_keras_spec()`.", - "#' You can control the number of times each block is repeated by setting the `num_{block_name}` argument (e.g., `num_dense = 2`).", - "#' This allows for dynamically creating deeper or more complex architectures during tuning." - ) + if (isTRUE(functional)) { + architecture_section <- c( + "#' @section Model Architecture (Functional API):", + "#' The Keras model is constructed using the Functional API. Each layer block function's arguments", + "#' determine its inputs. For example, a block `function(input_a, input_b, ...)` will be connected", + "#' to the outputs of the `input_a` and `input_b` blocks.", + "#' The first block in `layer_blocks` is assumed to be the input layer and should not have inputs from other layers." + ) + see_also_fit <- "generic_functional_fit()" + see_also_create <- "create_keras_functional_spec()" + } else { + architecture_section <- c( + "#' @section Model Architecture (Sequential API):", + "#' The Keras model is constructed by sequentially applying the layer blocks in the order they were provided to `create_keras_sequential_spec()`.", + "#' You can control the number of times each block is repeated by setting the `num_{block_name}` argument (e.g., `num_dense = 2`).", + "#' This allows for dynamically creating deeper or more complex architectures during tuning." + ) + see_also_fit <- "generic_sequential_fit()" + see_also_create <- "create_keras_sequential_spec()" + } compilation_section <- c( "#' @section Model Compilation:", @@ -140,7 +166,11 @@ generate_roxygen_docs <- function(model_name, layer_blocks, all_args) { "#' \\item \\strong{Loss}: Defaults to `\"mean_squared_error\"` for regression and `\"categorical_crossentropy\"` or `\"binary_crossentropy\"` for classification. Override with `compile_loss`.", "#' \\item \\strong{Metrics}: Defaults to `\"mean_absolute_error\"` for regression and `\"accuracy\"` for classification. Override with `compile_metrics` (e.g., `c(\"mae\", \"mape\")`).", "#' }", - "#' For more details, see the documentation for `kerasnip::generic_sequential_fit`." + paste0( + "#' For more details, see the documentation for `kerasnip::", + see_also_fit, + "`." + ) ) fitting_section <- c( @@ -152,7 +182,7 @@ generate_roxygen_docs <- function(model_name, layer_blocks, all_args) { # Other tags other_tags <- c( - "#' @seealso [create_keras_spec()], [generic_sequential_fit()]", + paste0("#' @seealso [", see_also_create, "], [", see_also_fit, "]"), "#' @export" ) @@ -171,4 +201,4 @@ generate_roxygen_docs <- function(model_name, layer_blocks, all_args) { ), collapse = "\n" ) -} \ No newline at end of file +} diff --git a/R/generic_sequential_fit.R b/R/generic_sequential_fit.R index c772ce4..e809e88 100644 --- a/R/generic_sequential_fit.R +++ b/R/generic_sequential_fit.R @@ -1,7 +1,7 @@ #' Generic Keras Model Fitting Implementation #' #' @description -#' This function is the internal engine for fitting models generated by `create_keras_spec()`. +#' This function is the internal engine for fitting models generated by `create_keras_sequential_spec()`. #' It is not intended to be called directly by the user. #' #' @details diff --git a/R/register_fit_predict.R b/R/register_fit_predict.R index afe8ac2..4c7862b 100644 --- a/R/register_fit_predict.R +++ b/R/register_fit_predict.R @@ -10,8 +10,9 @@ #' @param layer_blocks The named list of layer block functions, which is passed #' as a default argument to the fit function. #' @return Invisibly returns `NULL`. Called for its side effects. +#' @param functional A logical, if TRUE uses `generic_functional_fit` to fit, otherwise `generic_keras_fit_impl`. Defaults to FALSE. #' @noRd -register_fit_predict <- function(model_name, mode, layer_blocks) { +register_fit_predict <- function(model_name, mode, layer_blocks, functional) { # Fit method parsnip::set_fit( model = model_name, @@ -20,7 +21,14 @@ register_fit_predict <- function(model_name, mode, layer_blocks) { value = list( interface = "data.frame", protect = c("x", "y"), - func = c(pkg = "kerasnip", fun = "generic_sequential_fit"), + func = c( + pkg = "kerasnip", + fun = if (functional) { + "generic_functional_fit" + } else { + "generic_sequential_fit" + } + ), defaults = list(layer_blocks = layer_blocks) ) ) @@ -128,4 +136,4 @@ keras_postprocess_classes <- function(results, object) { pred_class <- factor(pred_class, levels = lvls) } tibble::tibble(.pred_class = pred_class) -} \ No newline at end of file +} diff --git a/R/remove_spec.R b/R/remove_spec.R index c9b0c42..a5bb02d 100644 --- a/R/remove_spec.R +++ b/R/remove_spec.R @@ -1,13 +1,13 @@ #' Remove a Keras Model Specification #' #' This function removes a model specification function that was previously -#' created by `create_keras_spec()` from an environment. +#' created by `create_keras_sequential_spec()` from an environment. #' #' @param model_name A character string giving the name of the model #' specification function to remove. #' @param env The environment from which to remove the function. Defaults to #' the calling environment (`parent.frame()`), which is typically where -#' `create_keras_spec()` would have created the function. +#' `create_keras_sequential_spec()` would have created the function. #' @return Invisibly returns `TRUE` if the function was found and removed, #' and `FALSE` otherwise. #' @export @@ -17,7 +17,7 @@ #' dense_block <- function(model, units = 16) { #' model |> keras3::layer_dense(units = units) #' } -#' create_keras_spec("my_temp_model", list(dense = dense_block), "regression") +#' create_keras_sequential_spec("my_temp_model", list(dense = dense_block), "regression") #' #' # Check it exists #' exists("my_temp_model") diff --git a/man/create_keras_spec.Rd b/man/create_keras_sequential_spec.Rd similarity index 93% rename from man/create_keras_spec.Rd rename to man/create_keras_sequential_spec.Rd index 8862c9c..df567c9 100644 --- a/man/create_keras_spec.Rd +++ b/man/create_keras_sequential_spec.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_keras_spec.R -\name{create_keras_spec} -\alias{create_keras_spec} +% Please edit documentation in R/create_keras_sequential_spec.R +\name{create_keras_sequential_spec} +\alias{create_keras_sequential_spec} \title{Create a Custom Keras Model Specification for Tidymodels} \usage{ -create_keras_spec( +create_keras_sequential_spec( model_name, layer_blocks, mode = c("regression", "classification"), @@ -54,7 +54,7 @@ which will be provided automatically by the fitting engine. accept a \code{num_classes} argument, which is provided automatically. } -The \code{create_keras_spec()} function will inspect the arguments of your +The \code{create_keras_sequential_spec()} function will inspect the arguments of your \code{layer_blocks} functions (ignoring \code{input_shape} and \code{num_classes}) and make them available as arguments in the generated model specification, prefixed with the block's name (e.g., @@ -91,7 +91,7 @@ output_block <- function(model, num_classes) { } # 2. Create the spec, providing blocks in the correct order. -create_keras_spec( +create_keras_sequential_spec( model_name = "my_mlp", layer_blocks = list( input = input_block, diff --git a/man/generic_sequential_fit.Rd b/man/generic_sequential_fit.Rd index 8d872f4..69eb03d 100644 --- a/man/generic_sequential_fit.Rd +++ b/man/generic_sequential_fit.Rd @@ -57,7 +57,7 @@ prefixed with \code{fit_} (e.g., \code{fit_callbacks = list(...)}, A \code{parsnip} model fit object. } \description{ -This function is the internal engine for fitting models generated by \code{create_keras_spec()}. +This function is the internal engine for fitting models generated by \code{create_keras_sequential_spec()}. It is not intended to be called directly by the user. } \details{ diff --git a/man/remove_keras_spec.Rd b/man/remove_keras_spec.Rd index b3789c8..2ff26e7 100644 --- a/man/remove_keras_spec.Rd +++ b/man/remove_keras_spec.Rd @@ -12,7 +12,7 @@ specification function to remove.} \item{env}{The environment from which to remove the function. Defaults to the calling environment (\code{parent.frame()}), which is typically where -\code{create_keras_spec()} would have created the function.} +\code{create_keras_sequential_spec()} would have created the function.} } \value{ Invisibly returns \code{TRUE} if the function was found and removed, @@ -20,7 +20,7 @@ and \code{FALSE} otherwise. } \description{ This function removes a model specification function that was previously -created by \code{create_keras_spec()} from an environment. +created by \code{create_keras_sequential_spec()} from an environment. } \examples{ \dontrun{ @@ -28,7 +28,7 @@ created by \code{create_keras_spec()} from an environment. dense_block <- function(model, units = 16) { model |> keras3::layer_dense(units = units) } -create_keras_spec("my_temp_model", list(dense = dense_block), "regression") +create_keras_sequential_spec("my_temp_model", list(dense = dense_block), "regression") # Check it exists exists("my_temp_model") diff --git a/register_fit_predict.R b/register_fit_predict.R deleted file mode 100644 index 4dd8140..0000000 --- a/register_fit_predict.R +++ /dev/null @@ -1,86 +0,0 @@ -#' Register Fit and Prediction Methods with Parsnip -#' -#' Defines how to fit the custom Keras model and how to generate predictions -#' for both regression and classification modes. It links the model to the -#' generic fitting implementation (`generic_keras_fit_impl`) and sets up -#' the appropriate prediction post-processing. -#' -#' @param model_name The name of the new model. -#' @param mode The model mode ("regression" or "classification"). -#' @param layer_blocks The named list of layer block functions, which is passed -#' as a default argument to the fit function. -#' @return Invisibly returns `NULL`. Called for its side effects. -#' @param functional A logical, if TRUE uses `generic_keras_functional_fit_impl` to fit, otherwise `generic_keras_fit_impl`. Defaults to FALSE. -#' @noRd -register_fit_predict <- function(model_name, mode, layer_blocks) { - # Fit method - parsnip::set_fit( - model = model_name, - eng = "keras", - mode = mode, - value = list( - interface = "data.frame", - protect = c("x", "y"), - func = c( - pkg = "kerasnip", - fun = if (functional) { - "generic_keras_functional_fit_impl" - } else { - "generic_keras_fit_impl" - } - ), - defaults = list(layer_blocks = layer_blocks) - ) - ) - - # Regression prediction - if (mode == "regression") { - parsnip::set_pred( - model = model_name, - eng = "keras", - mode = "regression", - type = "numeric", - value = list( - pre = NULL, - post = keras_postprocess_numeric, - func = c(fun = "predict"), - args = list( - object = rlang::expr(object$fit$fit), - x = rlang::expr(as.matrix(new_data)) - ) - ) - ) - } else { - # Classification predictions - parsnip::set_pred( - model = model_name, - eng = "keras", - mode = "classification", - type = "class", - value = list( - pre = NULL, - post = keras_postprocess_classes, - func = c(fun = "predict"), - args = list( - object = rlang::expr(object$fit$fit), - x = rlang::expr(as.matrix(new_data)) - ) - ) - ) - parsnip::set_pred( - model = model_name, - eng = "keras", - mode = "classification", - type = "prob", - value = list( - pre = NULL, - post = keras_postprocess_probs, - func = c(fun = "predict"), - args = list( - object = rlang::expr(object$fit$fit), - x = rlang::expr(as.matrix(new_data)) - ) - ) - ) - } -} \ No newline at end of file diff --git a/tests/testthat/test-e2e-classification.R b/tests/testthat/test-e2e-classification.R index 04cacbe..76205b2 100644 --- a/tests/testthat/test-e2e-classification.R +++ b/tests/testthat/test-e2e-classification.R @@ -12,7 +12,7 @@ test_that("E2E: Classification spec generation, fitting, and prediction works", model |> keras3::layer_dense(units = num_classes, activation = "softmax") } - create_keras_spec( + create_keras_sequential_spec( model_name = "e2e_mlp_class", layer_blocks = list( input = input_block_class, diff --git a/tests/testthat/test-e2e-features.R b/tests/testthat/test-e2e-features.R index a8edbfa..a147b2a 100644 --- a/tests/testthat/test-e2e-features.R +++ b/tests/testthat/test-e2e-features.R @@ -11,7 +11,7 @@ test_that("E2E: Customizing main arguments works", { model |> keras3::layer_dense(units = 1) } - create_keras_spec( + create_keras_sequential_spec( model_name = "e2e_mlp_feat", layer_blocks = list( input = input_block_feat, @@ -68,7 +68,7 @@ test_that("E2E: Customizing fit arguments works", { model |> keras3::layer_dense(units = 1) } - create_keras_spec( + create_keras_sequential_spec( model_name = "e2e_mlp_fit", layer_blocks = list( input = input_block_fit, @@ -109,7 +109,7 @@ test_that("E2E: Setting num_blocks = 0 works", { model |> keras3::layer_dense(units = 1) } - create_keras_spec( + create_keras_sequential_spec( model_name = "e2e_mlp_zero", layer_blocks = list( input = input_block_zero, @@ -132,7 +132,7 @@ test_that("E2E: Error handling for reserved names works", { ) expect_error( - create_keras_spec("bad_spec", bad_blocks), + create_keras_sequential_spec("bad_spec", bad_blocks), regexp = "`compile` and `optimizer` are protected names" ) }) diff --git a/tests/testthat/test-e2e-multiblock-tuning.R b/tests/testthat/test-e2e-multiblock-tuning.R index de0f1cb..48ea381 100644 --- a/tests/testthat/test-e2e-multiblock-tuning.R +++ b/tests/testthat/test-e2e-multiblock-tuning.R @@ -21,7 +21,7 @@ test_that("E2E: Multi-block model tuning works", { model |> keras3::layer_dense(units = num_classes, activation = "softmax") } - create_keras_spec( + create_keras_sequential_spec( model_name = "mb_mt", layer_blocks = list( input = input_block_mb, diff --git a/tests/testthat/test-e2e-regression.R b/tests/testthat/test-e2e-regression.R index 3ac195e..a3a5475 100644 --- a/tests/testthat/test-e2e-regression.R +++ b/tests/testthat/test-e2e-regression.R @@ -13,7 +13,7 @@ test_that("E2E: Regression spec generation, fitting, and prediction works", { model |> keras3::layer_dense(units = 1) } - create_keras_spec( + create_keras_sequential_spec( model_name = "e2e_mlp_reg", layer_blocks = list( input = input_block_reg, diff --git a/tests/testthat/test-e2e-spec-removal.R b/tests/testthat/test-e2e-spec-removal.R index ac04136..d10aa46 100644 --- a/tests/testthat/test-e2e-spec-removal.R +++ b/tests/testthat/test-e2e-spec-removal.R @@ -10,7 +10,7 @@ test_that("E2E: Model spec removal works", { model |> keras3::layer_dense(units = 1) } - create_keras_spec( + create_keras_sequential_spec( model_name = model_name, layer_blocks = list(input = input_block, output = output_block), mode = "regression" diff --git a/tests/testthat/test-e2e-tuning.R b/tests/testthat/test-e2e-tuning.R index 74b6b31..e0341c9 100644 --- a/tests/testthat/test-e2e-tuning.R +++ b/tests/testthat/test-e2e-tuning.R @@ -12,7 +12,7 @@ test_that("E2E: Tuning works with a generated spec", { model |> keras3::layer_dense(units = num_classes, activation = "softmax") } - create_keras_spec( + create_keras_sequential_spec( model_name = "e2e_mlp_class_tune", layer_blocks = list( input = input_block_tune, From 7d72ed4e090c2896fa845575ee8d141798df585d Mon Sep 17 00:00:00 2001 From: davidrsch Date: Wed, 30 Jul 2025 14:14:49 +0200 Subject: [PATCH 03/10] Improving functions documentation --- R/build_spec_function.R | 40 ++++++++++++---- R/create_keras_sequential_spec.R | 72 ++++++++++++++++------------- R/generate_roxygen_docs.R | 52 +++++++++++++++++---- R/generic_sequential_fit.R | 45 ++++++++++-------- R/register_core_model.R | 15 ++++-- R/register_fit_predict.R | 46 +++++++++++++----- R/register_model_args.R | 29 +++++++++--- R/register_update_method.R | 26 ++++++++--- R/remove_spec.R | 59 +++++++++++++++-------- R/utils.R | 69 ++++++++++++++++++++++++++- R/zzz.R | 35 ++++++++++++-- man/create_keras_sequential_spec.Rd | 68 +++++++++++++++------------ man/generic_sequential_fit.Rd | 45 ++++++++++-------- man/keras_objects.Rd | 15 ++++-- man/register_keras_loss.Rd | 11 ++++- man/register_keras_metric.Rd | 11 ++++- man/register_keras_optimizer.Rd | 28 ++++++++++- man/remove_keras_spec.Rd | 60 ++++++++++++++++-------- 18 files changed, 529 insertions(+), 197 deletions(-) diff --git a/R/build_spec_function.R b/R/build_spec_function.R index 18328a8..dc50a90 100644 --- a/R/build_spec_function.R +++ b/R/build_spec_function.R @@ -1,16 +1,38 @@ #' Build the Model Specification Function #' -#' Uses metaprogramming to construct the new model specification function -#' (e.g., `dynamic_mlp()`). This function will capture user-provided arguments -#' and package them into a `parsnip::new_model_spec()` call. +#' @description +#' This internal helper uses metaprogramming to construct a complete R function +#' that acts as a `parsnip` model specification (e.g., `my_mlp()`). #' -#' @param model_name The name of the model specification function to create. +#' @details +#' The process involves three main steps: +#' 1. **Function Body Construction**: An expression for the function body is +#' created. This body uses `rlang::enquo()` and `rlang::enquos()` to +#' capture all user-provided arguments (both named and via `...`) into a +#' list of quosures. This list is then passed to `parsnip::new_model_spec()`. +#' 2. **Function Signature Construction**: A formal argument list is created +#' from `all_args`, and `...` is added to allow passthrough arguments. +#' `rlang::new_function()` combines the signature and body into a new +#' function object. +#' 3. **Documentation Attachment**: `generate_roxygen_docs()` creates a +#' comprehensive Roxygen comment block as a string, which is then attached +#' to the new function using `comment()`. +#' +#' @param model_name The name of the model specification function to create (e.g., "my_mlp"). #' @param mode The model mode ("regression" or "classification"). -#' @param all_args A named list of arguments for the function signature, as -#' generated by `collect_spec_args()`. -#' @param parsnip_names A character vector of all argument names. -#' @return A new function that serves as the `parsnip` model specification. -#' @param functional A logical indicating if the model is functional or sequential. +#' @param all_args A named list of formal arguments for the new function's +#' signature, as generated by `collect_spec_args()`. The values are typically +#' `rlang::missing_arg()` or `rlang::zap()`. +#' @param parsnip_names A character vector of all argument names that should be +#' captured as quosures and passed to `parsnip::new_model_spec()`. +#' @param layer_blocks The user-provided list of layer block functions. This is +#' passed directly to `generate_roxygen_docs()` to create documentation for +#' block-specific parameters. +#' @param functional A logical indicating if the model is functional +#' (for `create_keras_functional_spec()`) or sequential. This is passed to +#' `generate_roxygen_docs()` to tailor the documentation. +#' @return A new function object with attached Roxygen comments, ready to be +#' placed in the user's environment. #' @noRd build_spec_function <- function( model_name, diff --git a/R/create_keras_sequential_spec.R b/R/create_keras_sequential_spec.R index fed38af..6f8e724 100644 --- a/R/create_keras_sequential_spec.R +++ b/R/create_keras_sequential_spec.R @@ -1,55 +1,63 @@ -#' Create a Custom Keras Model Specification for Tidymodels +#' Create a Custom Keras Sequential Model Specification for Tidymodels #' +#' @description #' This function acts as a factory to generate a new `parsnip` model -#' specification based on user-defined blocks of Keras layers. This allows for -#' creating complex, tunable architectures that integrate seamlessly with the -#' `tidymodels` ecosystem. +#' specification based on user-defined blocks of Keras layers using the +#' Sequential API. This is the ideal choice for creating models that are a +#' simple, linear stack of layers. For models with complex, non-linear +#' topologies, see [create_keras_functional_spec()]. #' #' @param model_name A character string for the name of the new model #' specification function (e.g., "custom_cnn"). This should be a valid R #' function name. -#' @param layer_blocks A named list of functions. Each function defines a "block" -#' of Keras layers. The function must take a Keras model object as its first -#' argument and return the modified model. Other arguments to the function -#' will become tunable parameters in the final model specification. +#' @param layer_blocks A named, ordered list of functions. Each function defines +#' a "block" of Keras layers. The function must take a Keras model object as +#' its first argument and return the modified model. Other arguments to the +#' function will become tunable parameters in the final model specification. #' @param mode A character string, either "regression" or "classification". #' @param ... Reserved for future use. Currently not used. #' @param env The environment in which to create the new model specification #' function and its associated `update()` method. Defaults to the calling #' environment (`parent.frame()`). -#' @importFrom rlang enquos dots_list arg_match env_poke -#' @importFrom parsnip update_dot_check #' #' @details -#' The user is responsible for defining the entire model architecture by providing -#' an ordered list of layer block functions. -#' 1. The first block function must initialize the model (e.g., with -#' \code{keras_model_sequential()}). It can accept an \code{input_shape} argument, -#' which will be provided automatically by the fitting engine. -#' 2. Subsequent blocks add hidden layers. -#' 3. The final block should add the output layer. For classification, it can -#' accept a \code{num_classes} argument, which is provided automatically. -#' -#' The \code{create_keras_sequential_spec()} function will inspect the arguments of your -#' \code{layer_blocks} functions (ignoring \code{input_shape} and \code{num_classes}) -#' and make them available as arguments in the generated model specification, -#' prefixed with the block's name (e.g., -#' `dense_units`). -#' -#' It also automatically creates arguments like `num_dense` to control how many -#' times each block is repeated. In addition, common training parameters such as -#' `epochs`, `learn_rate`, `validation_split`, and `verbose` are added to the -#' specification. +#' This function generates all the boilerplate needed to create a custom, +#' tunable `parsnip` model specification that uses the Keras Sequential API. +#' +#' The function inspects the arguments of your `layer_blocks` functions +#' (ignoring special arguments like `input_shape` and `num_classes`) +#' and makes them available as arguments in the generated model specification, +#' prefixed with the block's name (e.g., `dense_units`). #' #' The new model specification function and its `update()` method are created in #' the environment specified by the `env` argument. #' +#' @section Model Architecture (Sequential API): +#' `kerasnip` builds the model by applying the functions in `layer_blocks` in +#' the order they are provided. Each function receives the Keras model built by +#' the previous function and returns a modified version. +#' +#' 1. The **first block** must initialize the model (e.g., with +#' `keras_model_sequential()`). It can accept an `input_shape` argument, +#' which `kerasnip` will provide automatically during fitting. +#' 2. **Subsequent blocks** add layers to the model. +#' 3. The **final block** should add the output layer. For classification, it +#' can accept a `num_classes` argument, which is provided automatically. +#' +#' A key feature of this function is the automatic creation of `num_{block_name}` +#' arguments (e.g., `num_hidden`). This allows you to control how many times +#' each block is repeated, making it easy to tune the depth of your network. +#' +#' @importFrom rlang enquos dots_list arg_match env_poke +#' @importFrom parsnip update_dot_check +#' #' @return Invisibly returns `NULL`. Its primary side effect is to create a new -#' model specification function (e.g., `dynamic_mlp()`) in the specified +#' model specification function (e.g., `my_mlp()`) in the specified #' environment and register the model with `parsnip` so it can be used within #' the `tidymodels` framework. #' -#' @seealso [remove_keras_spec()], [parsnip::new_model_spec()] +#' @seealso [remove_keras_spec()], [parsnip::new_model_spec()], +#' [create_keras_functional_spec()] #' #' @export #' @examples @@ -86,7 +94,7 @@ #' ) #' #' # 3. Use the newly created specification function! -# Note the new arguments `num_hidden` and `hidden_units`. +#' # Note the new arguments `num_hidden` and `hidden_units`. #' model_spec <- my_mlp( #' num_hidden = 2, #' hidden_units = 64, diff --git a/R/generate_roxygen_docs.R b/R/generate_roxygen_docs.R index b270cd4..3353024 100644 --- a/R/generate_roxygen_docs.R +++ b/R/generate_roxygen_docs.R @@ -1,13 +1,46 @@ #' Generate Roxygen Documentation for a Dynamic Spec Function #' -#' Constructs a detailed Roxygen comment block as a string, which can be -#' attached to the dynamically created model specification function. +#' @description +#' This internal helper constructs a complete Roxygen comment block as a single +#' string. This string is then attached to the dynamically created model +#' specification function, making it self-documenting. #' -#' @param model_name The name of the model. -#' @param layer_blocks The list of layer block functions. -#' @param all_args A named list of all arguments for the function signature. -#' @param functional A logical indicating if the model is functional or sequential. -#' @return A single string containing the full Roxygen documentation. +#' @details +#' The function assembles the documentation in a structured way: +#' \itemize{ +#' \item \strong{Title & Description:} A title is generated from the `model_name`, +#' and the description indicates which `kerasnip` function created it. +#' \item \strong{Parameters (`@param`):} It documents several groups of parameters: +#' \itemize{ +#' \item Block-specific hyperparameters (e.g., `dense_units`), introspecting +#' `layer_blocks` to find default values. +#' \item Architecture parameters (e.g., `num_dense`). +#' \item Global training parameters (e.g., `epochs`, `learn_rate`). +#' \item Compilation override parameters (e.g., `compile_loss`). +#' } +#' \item \strong{Sections (`@section`):} It creates dedicated sections for: +#' \itemize{ +#' \item \strong{Model Architecture:} Explains how the model is built, with +#' different content for the Sequential vs. Functional API (controlled +#' by the `functional` flag). +#' \item \strong{Model Fitting:} Explains how to pass arguments to +#' `keras3::fit()` using the `fit_` prefix. +#' \item \strong{Model Compilation:} Explains the default compilation +#' behavior and how to override it using the `compile_` prefix. +#' } +#' \item \strong{Other Tags:} Adds `@seealso` to link to relevant `kerasnip` +#' functions and `@export` to make the generated function available to users. +#' } +#' +#' @param model_name A character string for the model's name, used to generate the documentation title. +#' @param layer_blocks The named list of user-provided layer block functions. This is +#' introspected to find default values for block-specific parameters. +#' @param all_args A named list of all arguments for the new function's signature, +#' used to determine which `@param` tags to generate. +#' @param functional A logical. If `TRUE`, generates documentation specific to +#' the Functional API. If `FALSE`, generates documentation for the Sequential API. +#' @return A single string containing the full Roxygen documentation, ready to be +#' attached to a function using `comment()`. #' @noRd generate_roxygen_docs <- function( model_name, @@ -133,7 +166,10 @@ generate_roxygen_docs <- function( # Add ... param param_docs <- c( param_docs, - "@param ... Additional arguments passed to `parsnip::new_model_spec()`." + paste0( + "@param ... Additional arguments passed to the Keras engine. This is commonly used for arguments to `keras3::fit()` (prefixed with `fit_`). ", + "See the 'Model Fitting' and 'Model Compilation' sections for details." + ) ) # Sections diff --git a/R/generic_sequential_fit.R b/R/generic_sequential_fit.R index e809e88..8c663a0 100644 --- a/R/generic_sequential_fit.R +++ b/R/generic_sequential_fit.R @@ -1,32 +1,29 @@ -#' Generic Keras Model Fitting Implementation +#' Generic Keras Sequential API Model Fitting Implementation #' #' @description -#' This function is the internal engine for fitting models generated by `create_keras_sequential_spec()`. -#' It is not intended to be called directly by the user. +#' This function is the internal engine for fitting models generated by +#' `create_keras_sequential_spec()`. It is not intended to be called directly +#' by the user. #' #' @details -#' This function performs several key steps: +#' This function performs the following key steps: #' \enumerate{ -#' \item \strong{Argument & Data Preparation:} It resolves arguments from `parsnip` +#' \item \strong{Argument & Data Preparation:} It resolves arguments passed +#' from `parsnip` (handling `rlang_zap` objects for unspecified arguments) #' and prepares the `x` and `y` data for Keras. It automatically determines -#' the `input_shape` from `x` and, for classification, the `num_classes` from `y`. -#' \item \strong{Dynamic Model Construction:} The user is responsible for defining the -#' entire model architecture via `layer_blocks`. The function iterates through -#' the blocks in the order they are provided: +#' the `input_shape` from `x` and, for classification, the `num_classes` +#' from `y`. +#' \item \strong{Dynamic Model Construction:} It builds the Keras model by +#' sequentially processing the `layer_blocks` list. #' \itemize{ #' \item The first block function \strong{must initialize the model}, typically -#' by calling `keras3::keras_model_sequential()`. It can accept an -#' `input_shape` argument, which will be provided automatically. -#' \item Subsequent blocks receive the model and add layers to it. -#' \item An output layer block can accept a `num_classes` argument, which is -#' provided automatically for classification models. +#' by calling `keras3::keras_model_sequential()`. +#' \item It checks for `num_{block_name}` arguments to repeat a block +#' multiple times, creating a deeper stack of layers. #' } #' \item \strong{Model Compilation:} It compiles the final Keras model. The -#' compilation arguments (optimizer, loss, metrics) can be customized: -#' \itemize{ -#' \item Override defaults by passing arguments prefixed with `compile_` -#' (e.g., `compile_loss = "mae"`, `compile_optimizer = "sgd"`). -#' } +#' compilation arguments (optimizer, loss, metrics) can be customized by +#' passing arguments prefixed with `compile_` (e.g., `compile_loss = "mae"`). #' \item \strong{Model Fitting:} It calls `keras3::fit()` to train the model on #' the prepared data. #' } @@ -59,7 +56,15 @@ #' `fit_class_weight = list(...)`). #' } #' -#' @return A `parsnip` model fit object. +#' @return A list containing the fitted model and other metadata. This list is +#' stored in the `fit` slot of the `parsnip` model fit object. The list +#' contains the following elements: +#' \itemize{ +#' \item `fit`: The raw, fitted Keras model object. +#' \item `history`: The Keras training history object. +#' \item `lvl`: A character vector of the outcome factor levels (for +#' classification) or `NULL` (for regression). +#' } #' @keywords internal #' @export generic_sequential_fit <- function( diff --git a/R/register_core_model.R b/R/register_core_model.R index 3928fbf..c8d9fb6 100644 --- a/R/register_core_model.R +++ b/R/register_core_model.R @@ -1,7 +1,16 @@ #' Register Core Model Information with Parsnip #' -#' Sets up the basic model definition with `parsnip`, including its mode, -#' engine, dependencies, and data encoding requirements. +#' @description +#' Sets up the basic model definition with `parsnip`. This function is called +#' once when a new specification is created. +#' +#' @details +#' This function makes a series of calls to `parsnip`'s registration API: +#' - `parsnip::set_new_model()`: Declares the new model. +#' - `parsnip::set_model_mode()`: Sets the mode (e.g., "regression"). +#' - `parsnip::set_model_engine()`: Sets the engine to "keras". +#' - `parsnip::set_dependency()`: Declares the dependency on the `keras3` package. +#' - `parsnip::set_encoding()`: Specifies data preprocessing requirements. #' #' @param model_name The name of the new model. #' @param mode The model mode ("regression" or "classification"). @@ -24,4 +33,4 @@ register_core_model <- function(model_name, mode) { allow_sparse_x = FALSE ) ) -} \ No newline at end of file +} diff --git a/R/register_fit_predict.R b/R/register_fit_predict.R index 4c7862b..6b3b748 100644 --- a/R/register_fit_predict.R +++ b/R/register_fit_predict.R @@ -1,16 +1,27 @@ #' Register Fit and Prediction Methods with Parsnip #' -#' Defines how to fit the custom Keras model and how to generate predictions -#' for both regression and classification modes. It links the model to the -#' generic fitting implementation (`generic_sequential_fit`) and sets up -#' the appropriate prediction post-processing. +#' @description +#' This function registers the methods that `parsnip` will use to fit the model +#' and generate predictions. +#' +#' @details +#' This function makes calls to `parsnip::set_fit()` and `parsnip::set_pred()`: +#' - `set_fit()`: Links the model specification to the appropriate generic +#' fitting engine (`generic_sequential_fit()` or `generic_functional_fit()`). +#' It also passes the user's `layer_blocks` list as a default argument to +#' the fitting function. +#' - `set_pred()`: Defines how to generate predictions for different types +#' ("numeric", "class", "prob"). It specifies the underlying `predict()` +#' method and the post-processing functions (`keras_postprocess_*`) needed +#' to format the output into a standardized `tidymodels` tibble. #' #' @param model_name The name of the new model. #' @param mode The model mode ("regression" or "classification"). #' @param layer_blocks The named list of layer block functions, which is passed #' as a default argument to the fit function. +#' @param functional A logical. If `TRUE`, registers `generic_functional_fit` as +#' the fitting engine. Otherwise, registers `generic_sequential_fit`. #' @return Invisibly returns `NULL`. Called for its side effects. -#' @param functional A logical, if TRUE uses `generic_functional_fit` to fit, otherwise `generic_keras_fit_impl`. Defaults to FALSE. #' @noRd register_fit_predict <- function(model_name, mode, layer_blocks, functional) { # Fit method @@ -87,9 +98,13 @@ register_fit_predict <- function(model_name, mode, layer_blocks, functional) { #' Post-process Keras Numeric Predictions #' -#' Formats raw numeric predictions from a Keras model into a tibble with a -#' standardized `.pred` column. +#' @description +#' Formats raw numeric predictions from a Keras model into a tibble with the +#' standardized `.pred` column, as required by `tidymodels`. #' +#' @details +#' This function simply takes the matrix output from `keras3::predict()` and +#' converts it to a single-column tibble. #' @param results A matrix of numeric predictions from `predict()`. #' @param object The `parsnip` model fit object. #' @return A tibble with a `.pred` column. @@ -100,9 +115,13 @@ keras_postprocess_numeric <- function(results, object) { #' Post-process Keras Probability Predictions #' -#' Formats raw probability predictions from a Keras model into a tibble -#' with class-specific column names. +#' @description +#' Formats raw probability predictions from a Keras model into a tibble with +#' class-specific column names (e.g., `.pred_class1`, `.pred_class2`). #' +#' @details +#' This function retrieves the original factor levels from `object$fit$lvl` +#' (which was stored by the fitting engine) and uses them to name the columns. #' @param results A matrix of probability predictions from `predict()`. #' @param object The `parsnip` model fit object. #' @return A tibble with named columns for each class probability. @@ -115,9 +134,14 @@ keras_postprocess_probs <- function(results, object) { #' Post-process Keras Class Predictions #' -#' Converts raw probability predictions from a Keras model into factor-based -#' class predictions. +#' @description +#' Converts raw probability predictions from a Keras model into a single +#' `.pred_class` column of factor predictions. #' +#' @details +#' For multiclass models, it finds the class with the highest probability +#' (`which.max`). For binary models, it applies a 0.5 threshold. It uses the +#' levels stored in `object$fit$lvl` to ensure the output factor is correct. #' @param results A matrix of probability predictions from `predict()`. #' @param object The `parsnip` model fit object. #' @return A tibble with a `.pred_class` column containing factor predictions. diff --git a/R/register_model_args.R b/R/register_model_args.R index 3139123..a6d808d 100644 --- a/R/register_model_args.R +++ b/R/register_model_args.R @@ -1,11 +1,28 @@ -#' Register Model Arguments with Parsnip +#' Register Model Arguments with Parsnip and Dials #' +#' @description #' Registers each model argument with `parsnip` and maps it to a corresponding -#' `dials` parameter function for tuning. This allows `tidymodels` to know -#' about the tunable parameters of the custom model. +#' `dials` parameter function. This is a crucial step that makes the model's +#' parameters visible to the `tidymodels` ecosystem for tuning. #' -#' @param model_name The name of the new model. -#' @param parsnip_names A character vector of all argument names. +#' @details +#' This function iterates through each argument name discovered by +#' `collect_spec_args()` and calls `parsnip::set_model_arg()`. +#' +#' The mapping from a `kerasnip` argument to a `dials` function is determined +#' by the following logic: +#' \itemize{ +#' \item Arguments starting with `num_` (e.g., `num_dense`) are mapped to +#' `dials::num_terms()`. +#' \item Other arguments are mapped based on their suffix (e.g., `dense_units` +#' is mapped based on `units`). The internal `keras_dials_map` object +#' contains common mappings like `units` -> `dials::hidden_units()`. +#' \item Arguments for `compile_loss` and `compile_optimizer` are mapped to +#' custom `dials` parameter functions within `kerasnip`. +#' } +#' +#' @param model_name The name of the new model specification. +#' @param parsnip_names A character vector of all argument names to be registered. #' @return Invisibly returns `NULL`. Called for its side effects. #' @noRd register_model_args <- function(model_name, parsnip_names) { @@ -61,4 +78,4 @@ register_model_args <- function(model_name, parsnip_names) { has_submodel = FALSE ) } -} \ No newline at end of file +} diff --git a/R/register_update_method.R b/R/register_update_method.R index b694ea3..c1b3860 100644 --- a/R/register_update_method.R +++ b/R/register_update_method.R @@ -1,13 +1,27 @@ #' Register the `update()` S3 Method #' +#' @description #' Creates and registers an `update()` S3 method for the new model specification. -#' This method allows users to modify the model's parameters after it has been -#' created, which is essential for tuning with `dials` and `tune`. +#' This method is essential for tuning with `dials` and `tune`, as it allows +#' the tuning machinery to modify model parameters after the spec has been created. #' -#' @param model_name The name of the new model. -#' @param parsnip_names A character vector of all argument names. +#' @details +#' This function uses `rlang` metaprogramming to dynamically construct a complete +#' `update.{{model_name}}` function. The process involves: +#' \enumerate{ +#' \item Building a function signature that includes `object`, `parameters`, +#' `...`, `fresh`, and all the tunable parameters from `parsnip_names`. +#' \item Creating a function body that captures all the arguments into quosures +#' and passes them to `parsnip::update_spec()`. +#' \item Registering this new function as an S3 method for the generic +#' `update()` in the specified environment, so S3 dispatch can find it. +#' } +#' +#' @param model_name The name of the new model specification (e.g., "my_mlp"). +#' @param parsnip_names A character vector of all argument names that the +#' `update()` method should be able to modify. +#' @param env The environment in which to create the `update()` S3 method. #' @return Invisibly returns `NULL`. Called for its side effects. -#' @param env The environment in which to create the update method. #' @noRd register_update_method <- function(model_name, parsnip_names, env) { # Build function signature @@ -55,4 +69,4 @@ register_update_method <- function(model_name, parsnip_names, env) { # S3 dispatch can find it. rlang::env_poke(env, method_name, update_func) registerS3method("update", model_name, update_func, envir = env) -} \ No newline at end of file +} diff --git a/R/remove_spec.R b/R/remove_spec.R index a5bb02d..262d282 100644 --- a/R/remove_spec.R +++ b/R/remove_spec.R @@ -1,32 +1,51 @@ -#' Remove a Keras Model Specification +#' Remove a Keras Model Specification and its Registrations #' -#' This function removes a model specification function that was previously -#' created by `create_keras_sequential_spec()` from an environment. +#' @description +#' This function completely removes a model specification that was previously +#' created by [create_keras_sequential_spec()] or [create_keras_functional_spec()]. +#' It cleans up both the function in the user's environment and all associated +#' registrations within the `parsnip` package. +#' +#' @details +#' This function is essential for cleanly unloading a dynamically created model. +#' It performs three main actions: +#' \enumerate{ +#' \item It removes the model specification function (e.g., `my_mlp()`) and its +#' corresponding `update()` method from the specified environment. +#' \item It searches `parsnip`'s internal model environment for all objects +#' whose names start with the `model_name` and removes them. This purges +#' the fit methods, argument definitions, and other registrations. +#' \item It removes the model's name from `parsnip`'s master list of models. +#' } +#' This function uses the un-exported `parsnip:::get_model_env()` to perform +#' the cleanup, which may be subject to change in future `parsnip` versions. #' #' @param model_name A character string giving the name of the model -#' specification function to remove. -#' @param env The environment from which to remove the function. Defaults to -#' the calling environment (`parent.frame()`), which is typically where -#' `create_keras_sequential_spec()` would have created the function. -#' @return Invisibly returns `TRUE` if the function was found and removed, -#' and `FALSE` otherwise. +#' specification function to remove (e.g., "my_mlp"). +#' @param env The environment from which to remove the function and its `update()` +#' method. Defaults to the calling environment (`parent.frame()`). +#' @return Invisibly returns `TRUE` after attempting to remove the objects. +#' @seealso [create_keras_sequential_spec()], [create_keras_functional_spec()] #' @export #' @examples #' \dontrun{ -#' # First, create a dummy spec -#' dense_block <- function(model, units = 16) { -#' model |> keras3::layer_dense(units = units) -#' } -#' create_keras_sequential_spec("my_temp_model", list(dense = dense_block), "regression") +#' if (requireNamespace("keras3", quietly = TRUE)) { +#' # First, create a dummy spec +#' input_block <- function(model, input_shape) keras3::keras_model_sequential(input_shape = input_shape) +#' dense_block <- function(model, units = 16) model |> keras3::layer_dense(units = units) +#' create_keras_sequential_spec("my_temp_model", list(input = input_block, dense = dense_block), "regression") #' -#' # Check it exists -#' exists("my_temp_model") +#' # Check it exists in the environment and in parsnip +#' exists("my_temp_model") +#' "my_temp_model" %in% parsnip::show_engines("my_temp_model")$model #' -#' # Now remove it -#' remove_keras_spec("my_temp_model") +#' # Now remove it +#' remove_keras_spec("my_temp_model") #' -#' # Check it's gone -#' !exists("my_temp_model") +#' # Check it's gone +#' !exists("my_temp_model") +#' !"my_temp_model" %in% parsnip::show_engines(NULL)$model +#' } #' } remove_keras_spec <- function(model_name, env = parent.frame()) { # 1. Remove the spec + update fn from the user env diff --git a/R/utils.R b/R/utils.R index 84b5d69..6e97028 100644 --- a/R/utils.R +++ b/R/utils.R @@ -10,17 +10,53 @@ .kerasnip_custom_objects$metrics <- list() #' Register a Custom Keras Optimizer +#' +#' @description +#' Allows users to register a custom optimizer function so it can be used by +#' name within `kerasnip` model specifications and tuned with `dials`. +#' +#' @details +#' Registered optimizers are stored in an internal environment. When a model is +#' compiled, `kerasnip` will first check this internal registry for an optimizer +#' matching the provided name before checking the `keras3` package. +#' +#' The `optimizer_fn` can be a simple function or a partially applied function +#' using `purrr::partial()`. This is useful for creating versions of Keras +#' optimizers with specific settings. +#' #' @param name The name to register the optimizer under (character). -#' @param optimizer_fn The optimizer function (e.g., a custom function or a partially applied keras optimizer). +#' @param optimizer_fn The optimizer function. It should return a Keras +#' optimizer object. +#' @seealso [register_keras_loss()], [register_keras_metric()] #' @export +#' @examples +#' if (requireNamespace("keras3", quietly = TRUE)) { +#' # Register a custom version of Adam with a different default beta_1 +#' my_adam <- purrr::partial(keras3::optimizer_adam, beta_1 = 0.8) +#' register_keras_optimizer("my_adam", my_adam) +#' +#' # Now "my_adam" can be used as a string in a model spec, e.g., +#' # my_model_spec(compile_optimizer = "my_adam") +#' } register_keras_optimizer <- function(name, optimizer_fn) { .kerasnip_custom_objects$optimizers[[name]] <- optimizer_fn invisible() } #' Register a Custom Keras Loss +#' +#' @description +#' Allows users to register a custom loss function so it can be used by name +#' within `kerasnip` model specifications and tuned with `dials`. +#' +#' @details +#' Registered losses are stored in an internal environment. When a model is +#' compiled, `kerasnip` will first check this internal registry for a loss +#' matching the provided name before checking the `keras3` package. +#' #' @param name The name to register the loss under (character). #' @param loss_fn The loss function. +#' @seealso [register_keras_optimizer()], [register_keras_metric()] #' @export register_keras_loss <- function(name, loss_fn) { .kerasnip_custom_objects$losses[[name]] <- loss_fn @@ -28,15 +64,44 @@ register_keras_loss <- function(name, loss_fn) { } #' Register a Custom Keras Metric +#' +#' @description +#' Allows users to register a custom metric function so it can be used by name +#' within `kerasnip` model specifications. +#' +#' @details +#' Registered metrics are stored in an internal environment. When a model is +#' compiled, `kerasnip` will first check this internal registry for a metric +#' matching the provided name before checking the `keras3` package. +#' #' @param name The name to register the metric under (character). #' @param metric_fn The metric function. +#' @seealso [register_keras_optimizer()], [register_keras_loss()] #' @export register_keras_metric <- function(name, metric_fn) { .kerasnip_custom_objects$metrics[[name]] <- metric_fn invisible() } -#' Internal helper to retrieve a Keras object by name +#' Internal helper to retrieve a Keras object by name from the registry +#' +#' @description +#' Resolves a string name into a Keras object (optimizer, loss, or metric) +#' by searching in a specific order. +#' +#' @details +#' The lookup order is: +#' 1. User-registered custom objects via `register_keras_*()`. +#' 2. Standard Keras constructors in the `keras3` package (e.g., `optimizer_adam`). +#' 3. If not found, the original string is returned, assuming Keras can handle it. +#' +#' For optimizers, it also passes along any `...` arguments (like `learning_rate`) +#' to the constructor function. +#' +#' @param name The string name of the object. +#' @param type The type of object ("optimizer", "loss", or "metric"). +#' @param ... Additional arguments passed to the optimizer constructor. +#' @return A Keras object or a string name. #' @noRd get_keras_object <- function( name, diff --git a/R/zzz.R b/R/zzz.R index 9ed11cf..019576f 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,9 +1,17 @@ #' @name keras_objects #' @title Dynamically Discovered Keras Objects #' @description -#' These vectors contain the names of optimizers, losses, and metrics -#' discovered from the installed `keras3` package at load time. This ensures -#' that `kerasnip` is always up-to-date with your Keras version. +#' These exported vectors contain the names of optimizers, losses, and metrics +#' discovered from the installed `keras3` package when `kerasnip` is loaded. +#' This ensures that `kerasnip` is always up-to-date with your Keras version. +#' @details +#' These objects are primarily used to provide the default `values` for the +#' `dials` parameter functions, [optimizer_function()] and [loss_function_keras()]. +#' This allows for tab-completion and validation of optimizer and loss names +#' when tuning models. +#' +#' The discovery process in `.onLoad()` scrapes the `keras3` namespace for +#' functions matching `optimizer_*`, `loss_*`, and `metric_*` patterns. #' @keywords internal NULL @@ -19,6 +27,27 @@ keras_losses <- NULL #' @export keras_metrics <- NULL +#' Populate Keras Object Lists on Package Load +#' +#' @description +#' This `.onLoad` hook is executed when the `kerasnip` package is loaded. Its +#' main purpose is to inspect the installed `keras3` package and populate the +#' `keras_optimizers`, `keras_losses`, and `keras_metrics` vectors. +#' +#' @details +#' The function works by: +#' \enumerate{ +#' \item Checking if `keras3` is installed. +#' \item Listing all functions in the `keras3` namespace that match the patterns +#' `optimizer_*`, `loss_*`, and `metric_*`. +#' \item For each function, it attempts to extract the default value of the `name` +#' argument (e.g., for `keras3::optimizer_adam()`, it extracts `"adam"`). +#' \item It populates the exported vectors with these discovered names. For metrics, +#' it also adds a list of common string aliases that Keras accepts. +#' } +#' This dynamic discovery ensures that `kerasnip` automatically supports all +#' objects available in the user's installed version of Keras. +#' @noRd .onLoad <- function(libname, pkgname) { # Helper to get the default string name from a Keras function's `name` argument get_keras_default_name <- function(fn_name, keras_ns) { diff --git a/man/create_keras_sequential_spec.Rd b/man/create_keras_sequential_spec.Rd index df567c9..213f26e 100644 --- a/man/create_keras_sequential_spec.Rd +++ b/man/create_keras_sequential_spec.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/create_keras_sequential_spec.R \name{create_keras_sequential_spec} \alias{create_keras_sequential_spec} -\title{Create a Custom Keras Model Specification for Tidymodels} +\title{Create a Custom Keras Sequential Model Specification for Tidymodels} \usage{ create_keras_sequential_spec( model_name, @@ -17,10 +17,10 @@ create_keras_sequential_spec( specification function (e.g., "custom_cnn"). This should be a valid R function name.} -\item{layer_blocks}{A named list of functions. Each function defines a "block" -of Keras layers. The function must take a Keras model object as its first -argument and return the modified model. Other arguments to the function -will become tunable parameters in the final model specification.} +\item{layer_blocks}{A named, ordered list of functions. Each function defines +a "block" of Keras layers. The function must take a Keras model object as +its first argument and return the modified model. Other arguments to the +function will become tunable parameters in the final model specification.} \item{mode}{A character string, either "regression" or "classification".} @@ -32,42 +32,48 @@ environment (\code{parent.frame()}).} } \value{ Invisibly returns \code{NULL}. Its primary side effect is to create a new -model specification function (e.g., \code{dynamic_mlp()}) in the specified +model specification function (e.g., \code{my_mlp()}) in the specified environment and register the model with \code{parsnip} so it can be used within the \code{tidymodels} framework. } \description{ This function acts as a factory to generate a new \code{parsnip} model -specification based on user-defined blocks of Keras layers. This allows for -creating complex, tunable architectures that integrate seamlessly with the -\code{tidymodels} ecosystem. +specification based on user-defined blocks of Keras layers using the +Sequential API. This is the ideal choice for creating models that are a +simple, linear stack of layers. For models with complex, non-linear +topologies, see \code{\link[=create_keras_functional_spec]{create_keras_functional_spec()}}. } \details{ -The user is responsible for defining the entire model architecture by providing -an ordered list of layer block functions. -\enumerate{ -\item The first block function must initialize the model (e.g., with -\code{keras_model_sequential()}). It can accept an \code{input_shape} argument, -which will be provided automatically by the fitting engine. -\item Subsequent blocks add hidden layers. -\item The final block should add the output layer. For classification, it can -accept a \code{num_classes} argument, which is provided automatically. -} +This function generates all the boilerplate needed to create a custom, +tunable \code{parsnip} model specification that uses the Keras Sequential API. -The \code{create_keras_sequential_spec()} function will inspect the arguments of your -\code{layer_blocks} functions (ignoring \code{input_shape} and \code{num_classes}) -and make them available as arguments in the generated model specification, -prefixed with the block's name (e.g., -\code{dense_units}). - -It also automatically creates arguments like \code{num_dense} to control how many -times each block is repeated. In addition, common training parameters such as -\code{epochs}, \code{learn_rate}, \code{validation_split}, and \code{verbose} are added to the -specification. +The function inspects the arguments of your \code{layer_blocks} functions +(ignoring special arguments like \code{input_shape} and \code{num_classes}) +and makes them available as arguments in the generated model specification, +prefixed with the block's name (e.g., \code{dense_units}). The new model specification function and its \code{update()} method are created in the environment specified by the \code{env} argument. } +\section{Model Architecture (Sequential API)}{ + +\code{kerasnip} builds the model by applying the functions in \code{layer_blocks} in +the order they are provided. Each function receives the Keras model built by +the previous function and returns a modified version. +\enumerate{ +\item The \strong{first block} must initialize the model (e.g., with +\code{keras_model_sequential()}). It can accept an \code{input_shape} argument, +which \code{kerasnip} will provide automatically during fitting. +\item \strong{Subsequent blocks} add layers to the model. +\item The \strong{final block} should add the output layer. For classification, it +can accept a \code{num_classes} argument, which is provided automatically. +} + +A key feature of this function is the automatic creation of \verb{num_\{block_name\}} +arguments (e.g., \code{num_hidden}). This allows you to control how many times +each block is repeated, making it easy to tune the depth of your network. +} + \examples{ \dontrun{ if (requireNamespace("keras3", quietly = TRUE)) { @@ -102,6 +108,7 @@ model_name = "my_mlp", ) # 3. Use the newly created specification function! +# Note the new arguments `num_hidden` and `hidden_units`. model_spec <- my_mlp( num_hidden = 2, hidden_units = 64, @@ -114,5 +121,6 @@ print(model_spec) } } \seealso{ -\code{\link[=remove_keras_spec]{remove_keras_spec()}}, \code{\link[parsnip:add_on_exports]{parsnip::new_model_spec()}} +\code{\link[=remove_keras_spec]{remove_keras_spec()}}, \code{\link[parsnip:add_on_exports]{parsnip::new_model_spec()}}, +\code{\link[=create_keras_functional_spec]{create_keras_functional_spec()}} } diff --git a/man/generic_sequential_fit.Rd b/man/generic_sequential_fit.Rd index 69eb03d..2bbbe0d 100644 --- a/man/generic_sequential_fit.Rd +++ b/man/generic_sequential_fit.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/generic_sequential_fit.R \name{generic_sequential_fit} \alias{generic_sequential_fit} -\title{Generic Keras Model Fitting Implementation} +\title{Generic Keras Sequential API Model Fitting Implementation} \usage{ generic_sequential_fit( x, @@ -54,35 +54,40 @@ prefixed with \code{fit_} (e.g., \code{fit_callbacks = list(...)}, }} } \value{ -A \code{parsnip} model fit object. +A list containing the fitted model and other metadata. This list is +stored in the \code{fit} slot of the \code{parsnip} model fit object. The list +contains the following elements: +\itemize{ +\item \code{fit}: The raw, fitted Keras model object. +\item \code{history}: The Keras training history object. +\item \code{lvl}: A character vector of the outcome factor levels (for +classification) or \code{NULL} (for regression). +} } \description{ -This function is the internal engine for fitting models generated by \code{create_keras_sequential_spec()}. -It is not intended to be called directly by the user. +This function is the internal engine for fitting models generated by +\code{create_keras_sequential_spec()}. It is not intended to be called directly +by the user. } \details{ -This function performs several key steps: +This function performs the following key steps: \enumerate{ -\item \strong{Argument & Data Preparation:} It resolves arguments from \code{parsnip} +\item \strong{Argument & Data Preparation:} It resolves arguments passed +from \code{parsnip} (handling \code{rlang_zap} objects for unspecified arguments) and prepares the \code{x} and \code{y} data for Keras. It automatically determines -the \code{input_shape} from \code{x} and, for classification, the \code{num_classes} from \code{y}. -\item \strong{Dynamic Model Construction:} The user is responsible for defining the -entire model architecture via \code{layer_blocks}. The function iterates through -the blocks in the order they are provided: +the \code{input_shape} from \code{x} and, for classification, the \code{num_classes} +from \code{y}. +\item \strong{Dynamic Model Construction:} It builds the Keras model by +sequentially processing the \code{layer_blocks} list. \itemize{ \item The first block function \strong{must initialize the model}, typically -by calling \code{keras3::keras_model_sequential()}. It can accept an -\code{input_shape} argument, which will be provided automatically. -\item Subsequent blocks receive the model and add layers to it. -\item An output layer block can accept a \code{num_classes} argument, which is -provided automatically for classification models. +by calling \code{keras3::keras_model_sequential()}. +\item It checks for \verb{num_\{block_name\}} arguments to repeat a block +multiple times, creating a deeper stack of layers. } \item \strong{Model Compilation:} It compiles the final Keras model. The -compilation arguments (optimizer, loss, metrics) can be customized: -\itemize{ -\item Override defaults by passing arguments prefixed with \code{compile_} -(e.g., \code{compile_loss = "mae"}, \code{compile_optimizer = "sgd"}). -} +compilation arguments (optimizer, loss, metrics) can be customized by +passing arguments prefixed with \code{compile_} (e.g., \code{compile_loss = "mae"}). \item \strong{Model Fitting:} It calls \code{keras3::fit()} to train the model on the prepared data. } diff --git a/man/keras_objects.Rd b/man/keras_objects.Rd index 305a451..886cbec 100644 --- a/man/keras_objects.Rd +++ b/man/keras_objects.Rd @@ -22,9 +22,18 @@ keras_losses keras_metrics } \description{ -These vectors contain the names of optimizers, losses, and metrics -discovered from the installed \code{keras3} package at load time. This ensures -that \code{kerasnip} is always up-to-date with your Keras version. +These exported vectors contain the names of optimizers, losses, and metrics +discovered from the installed \code{keras3} package when \code{kerasnip} is loaded. +This ensures that \code{kerasnip} is always up-to-date with your Keras version. +} +\details{ +These objects are primarily used to provide the default \code{values} for the +\code{dials} parameter functions, \code{\link[=optimizer_function]{optimizer_function()}} and \code{\link[=loss_function_keras]{loss_function_keras()}}. +This allows for tab-completion and validation of optimizer and loss names +when tuning models. + +The discovery process in \code{.onLoad()} scrapes the \code{keras3} namespace for +functions matching \verb{optimizer_*}, \verb{loss_*}, and \verb{metric_*} patterns. } \keyword{datasets} \keyword{internal} diff --git a/man/register_keras_loss.Rd b/man/register_keras_loss.Rd index 49a5d8a..6138258 100644 --- a/man/register_keras_loss.Rd +++ b/man/register_keras_loss.Rd @@ -12,5 +12,14 @@ register_keras_loss(name, loss_fn) \item{loss_fn}{The loss function.} } \description{ -Register a Custom Keras Loss +Allows users to register a custom loss function so it can be used by name +within \code{kerasnip} model specifications and tuned with \code{dials}. +} +\details{ +Registered losses are stored in an internal environment. When a model is +compiled, \code{kerasnip} will first check this internal registry for a loss +matching the provided name before checking the \code{keras3} package. +} +\seealso{ +\code{\link[=register_keras_optimizer]{register_keras_optimizer()}}, \code{\link[=register_keras_metric]{register_keras_metric()}} } diff --git a/man/register_keras_metric.Rd b/man/register_keras_metric.Rd index 4faef6f..2195eda 100644 --- a/man/register_keras_metric.Rd +++ b/man/register_keras_metric.Rd @@ -12,5 +12,14 @@ register_keras_metric(name, metric_fn) \item{metric_fn}{The metric function.} } \description{ -Register a Custom Keras Metric +Allows users to register a custom metric function so it can be used by name +within \code{kerasnip} model specifications. +} +\details{ +Registered metrics are stored in an internal environment. When a model is +compiled, \code{kerasnip} will first check this internal registry for a metric +matching the provided name before checking the \code{keras3} package. +} +\seealso{ +\code{\link[=register_keras_optimizer]{register_keras_optimizer()}}, \code{\link[=register_keras_loss]{register_keras_loss()}} } diff --git a/man/register_keras_optimizer.Rd b/man/register_keras_optimizer.Rd index f5122ca..aa14b67 100644 --- a/man/register_keras_optimizer.Rd +++ b/man/register_keras_optimizer.Rd @@ -9,8 +9,32 @@ register_keras_optimizer(name, optimizer_fn) \arguments{ \item{name}{The name to register the optimizer under (character).} -\item{optimizer_fn}{The optimizer function (e.g., a custom function or a partially applied keras optimizer).} +\item{optimizer_fn}{The optimizer function. It should return a Keras +optimizer object.} } \description{ -Register a Custom Keras Optimizer +Allows users to register a custom optimizer function so it can be used by +name within \code{kerasnip} model specifications and tuned with \code{dials}. +} +\details{ +Registered optimizers are stored in an internal environment. When a model is +compiled, \code{kerasnip} will first check this internal registry for an optimizer +matching the provided name before checking the \code{keras3} package. + +The \code{optimizer_fn} can be a simple function or a partially applied function +using \code{purrr::partial()}. This is useful for creating versions of Keras +optimizers with specific settings. +} +\examples{ +if (requireNamespace("keras3", quietly = TRUE)) { + # Register a custom version of Adam with a different default beta_1 + my_adam <- purrr::partial(keras3::optimizer_adam, beta_1 = 0.8) + register_keras_optimizer("my_adam", my_adam) + + # Now "my_adam" can be used as a string in a model spec, e.g., + # my_model_spec(compile_optimizer = "my_adam") +} +} +\seealso{ +\code{\link[=register_keras_loss]{register_keras_loss()}}, \code{\link[=register_keras_metric]{register_keras_metric()}} } diff --git a/man/remove_keras_spec.Rd b/man/remove_keras_spec.Rd index 2ff26e7..57b0b86 100644 --- a/man/remove_keras_spec.Rd +++ b/man/remove_keras_spec.Rd @@ -2,41 +2,61 @@ % Please edit documentation in R/remove_spec.R \name{remove_keras_spec} \alias{remove_keras_spec} -\title{Remove a Keras Model Specification} +\title{Remove a Keras Model Specification and its Registrations} \usage{ remove_keras_spec(model_name, env = parent.frame()) } \arguments{ \item{model_name}{A character string giving the name of the model -specification function to remove.} +specification function to remove (e.g., "my_mlp").} -\item{env}{The environment from which to remove the function. Defaults to -the calling environment (\code{parent.frame()}), which is typically where -\code{create_keras_sequential_spec()} would have created the function.} +\item{env}{The environment from which to remove the function and its \code{update()} +method. Defaults to the calling environment (\code{parent.frame()}).} } \value{ -Invisibly returns \code{TRUE} if the function was found and removed, -and \code{FALSE} otherwise. +Invisibly returns \code{TRUE} after attempting to remove the objects. } \description{ -This function removes a model specification function that was previously -created by \code{create_keras_sequential_spec()} from an environment. +This function completely removes a model specification that was previously +created by \code{\link[=create_keras_sequential_spec]{create_keras_sequential_spec()}} or \code{\link[=create_keras_functional_spec]{create_keras_functional_spec()}}. +It cleans up both the function in the user's environment and all associated +registrations within the \code{parsnip} package. +} +\details{ +This function is essential for cleanly unloading a dynamically created model. +It performs three main actions: +\enumerate{ +\item It removes the model specification function (e.g., \code{my_mlp()}) and its +corresponding \code{update()} method from the specified environment. +\item It searches \code{parsnip}'s internal model environment for all objects +whose names start with the \code{model_name} and removes them. This purges +the fit methods, argument definitions, and other registrations. +\item It removes the model's name from \code{parsnip}'s master list of models. +} +This function uses the un-exported \code{parsnip:::get_model_env()} to perform +the cleanup, which may be subject to change in future \code{parsnip} versions. } \examples{ \dontrun{ -# First, create a dummy spec -dense_block <- function(model, units = 16) { - model |> keras3::layer_dense(units = units) -} -create_keras_sequential_spec("my_temp_model", list(dense = dense_block), "regression") +if (requireNamespace("keras3", quietly = TRUE)) { + # First, create a dummy spec + input_block <- function(model, input_shape) keras3::keras_model_sequential(input_shape = input_shape) + dense_block <- function(model, units = 16) model |> keras3::layer_dense(units = units) + create_keras_sequential_spec("my_temp_model", list(input = input_block, dense = dense_block), "regression") -# Check it exists -exists("my_temp_model") + # Check it exists in the environment and in parsnip + exists("my_temp_model") + "my_temp_model" \%in\% parsnip::show_engines("my_temp_model")$model -# Now remove it -remove_keras_spec("my_temp_model") + # Now remove it + remove_keras_spec("my_temp_model") -# Check it's gone -!exists("my_temp_model") + # Check it's gone + !exists("my_temp_model") + !"my_temp_model" \%in\% parsnip::show_engines(NULL)$model +} +} } +\seealso{ +\code{\link[=create_keras_sequential_spec]{create_keras_sequential_spec()}}, \code{\link[=create_keras_functional_spec]{create_keras_functional_spec()}} } From 3270c84efb515cbc8cf5ec6ac6d27eb4b253bdfb Mon Sep 17 00:00:00 2001 From: davidrsch Date: Wed, 30 Jul 2025 14:15:27 +0200 Subject: [PATCH 04/10] Making code more modular --- R/generic_fit_helpers.R | 115 +++++++++++++++++++++++++++++++++++++ R/generic_sequential_fit.R | 86 ++++++--------------------- 2 files changed, 132 insertions(+), 69 deletions(-) create mode 100644 R/generic_fit_helpers.R diff --git a/R/generic_fit_helpers.R b/R/generic_fit_helpers.R new file mode 100644 index 0000000..3ae2236 --- /dev/null +++ b/R/generic_fit_helpers.R @@ -0,0 +1,115 @@ +#' Collect and Finalize Compilation Arguments +#' +#' @description +#' This internal helper extracts all arguments prefixed with `compile_` from a +#' list of arguments, resolves them, and combines them with defaults. +#' +#' @details +#' It handles the special logic for the `optimizer`, where a string name is +#' resolved to a Keras optimizer object, applying the `learn_rate` if necessary. +#' It also resolves string names for `loss` and `metrics` using `get_keras_object()`. +#' +#' @param all_args The list of all arguments passed to the fitting function's `...`. +#' @param learn_rate The main `learn_rate` parameter. +#' @param default_loss The default loss function to use if not provided. +#' @param default_metrics The default metric(s) to use if not provided. +#' @return A named list of arguments ready to be passed to `keras3::compile()`. +#' @noRd +collect_compile_args <- function( + all_args, + learn_rate, + default_loss, + default_metrics +) { + compile_arg_names <- names(all_args)[startsWith(names(all_args), "compile_")] + user_compile_args <- all_args[compile_arg_names] + names(user_compile_args) <- sub("^compile_", "", names(user_compile_args)) + + # --- 3a. Resolve and Finalize Compile Arguments --- + final_compile_args <- list() + + # Determine the final optimizer object, ensuring `learn_rate` is applied. + optimizer_arg <- user_compile_args$optimizer %||% NULL + if (!is.null(optimizer_arg)) { + if (is.character(optimizer_arg)) { + # Resolve string to object, passing the learn_rate + final_compile_args$optimizer <- get_keras_object( + optimizer_arg, + "optimizer", + learning_rate = learn_rate + ) + } else { + # User passed a pre-constructed optimizer object, use it as is. + # We assume they have configured the learning rate within it. + final_compile_args$optimizer <- optimizer_arg + } + } else { + # No optimizer provided, use the default (Adam) with the learn_rate. + final_compile_args$optimizer <- keras3::optimizer_adam( + learning_rate = learn_rate + ) + } + + # Resolve loss: use user-provided, otherwise default. Resolve string if needed. + loss_arg <- user_compile_args$loss %||% default_loss + if (is.character(loss_arg)) { + final_compile_args$loss <- get_keras_object(loss_arg, "loss") + } else { + final_compile_args$loss <- loss_arg + } + + # Resolve metrics: user‐supplied or default + metrics_arg <- user_compile_args$metrics %||% default_metrics + # Keras' `compile()` can handle a single string or a list/vector of strings. + # This correctly passes along either the default string or a user-provided vector. + final_compile_args$metrics <- metrics_arg + + # Add any other user-provided compile arguments (e.g., `weighted_metrics`) + other_args <- user_compile_args[ + !names(user_compile_args) %in% c("optimizer", "loss", "metrics") + ] + final_compile_args <- c(final_compile_args, other_args) + final_compile_args +} + +#' Collect and Finalize Fitting Arguments +#' +#' @description +#' This internal helper extracts all arguments prefixed with `fit_` from a list +#' of arguments and combines them with the core arguments for `keras3::fit()`. +#' +#' @param x_proc The processed predictor data. +#' @param y_mat The processed outcome data. +#' @param epochs The number of epochs. +#' @param batch_size The batch size. +#' @param validation_split The validation split proportion. +#' @param verbose The verbosity level. +#' @param all_args The list of all arguments passed to the fitting function's `...`. +#' @return A named list of arguments ready to be passed to `keras3::fit()`. +#' @noRd +collect_fit_args <- function( + x_proc, + y_mat, + epochs, + batch_size, + validation_split, + verbose, + all_args +) { + # Collect all arguments starting with "fit_" from `...` + fit_arg_names <- names(all_args)[startsWith(names(all_args), "fit_")] + user_fit_args <- all_args[fit_arg_names] + names(user_fit_args) <- sub("^fit_", "", names(user_fit_args)) + + final_fit_args <- c( + list( + x = x_proc, + y = y_mat, + epochs = epochs, + batch_size = batch_size, + validation_split = validation_split, + verbose = verbose + ), + user_fit_args + ) +} diff --git a/R/generic_sequential_fit.R b/R/generic_sequential_fit.R index 8c663a0..ed5a3e7 100644 --- a/R/generic_sequential_fit.R +++ b/R/generic_sequential_fit.R @@ -176,80 +176,27 @@ generic_sequential_fit <- function( } # --- 3. Model Compilation --- - # Collect all arguments starting with "compile_" from `...` - compile_arg_names <- names(all_args)[startsWith(names(all_args), "compile_")] - user_compile_args <- all_args[compile_arg_names] - names(user_compile_args) <- sub("^compile_", "", names(user_compile_args)) - - # --- 3a. Resolve and Finalize Compile Arguments --- - final_compile_args <- list() - - # Determine the final optimizer object, ensuring `learn_rate` is applied. - optimizer_arg <- resolve_default(user_compile_args$optimizer, NULL) - if (!is.null(optimizer_arg)) { - if (is.character(optimizer_arg)) { - # Resolve string to object, passing the learn_rate - final_compile_args$optimizer <- get_keras_object( - optimizer_arg, - "optimizer", - learning_rate = learn_rate - ) - } else { - # User passed a pre-constructed optimizer object, use it as is. - # We assume they have configured the learning rate within it. - final_compile_args$optimizer <- optimizer_arg - } - } else { - # No optimizer provided, use the default (Adam) with the learn_rate. - final_compile_args$optimizer <- keras3::optimizer_adam( - learning_rate = learn_rate - ) - } - - # Resolve loss: use user-provided, otherwise default. Resolve string if needed. - loss_arg <- resolve_default(user_compile_args$loss, default_loss) - if (is.character(loss_arg)) { - final_compile_args$loss <- get_keras_object(loss_arg, "loss") - } else { - final_compile_args$loss <- loss_arg - } - - # Resolve metrics: user‐supplied or default - metrics_arg <- resolve_default(user_compile_args$metrics, default_metrics) - # Keras' `compile()` can handle a single string or a list/vector of strings. - # This correctly passes along either the default string or a user-provided vector. - final_compile_args$metrics <- metrics_arg - - # Add any other user-provided compile arguments (e.g., `weighted_metrics`) - other_args <- user_compile_args[ - !names(user_compile_args) %in% c("optimizer", "loss", "metrics") - ] - final_compile_args <- c(final_compile_args, other_args) - - # --- 3b. Compile the Model --- - rlang::exec(keras3::compile, model, !!!final_compile_args) + compile_args <- collect_compile_args( + all_args, + learn_rate, + default_loss, + default_metrics + ) + rlang::exec(keras3::compile, model, !!!compile_args) # --- 4. Model Fitting --- - # Collect all arguments starting with "fit_" from `...` - fit_arg_names <- names(all_args)[startsWith(names(all_args), "fit_")] - user_fit_args <- all_args[fit_arg_names] - names(user_fit_args) <- sub("^fit_", "", names(user_fit_args)) - - # Combine with core fitting arguments - final_fit_args <- c( - list( - x = x_proc, - y = y_mat, - epochs = epochs, - batch_size = batch_size, - validation_split = validation_split, - verbose = verbose - ), - user_fit_args + fit_args <- collect_fit_args( + x_proc, + y_mat, + epochs, + batch_size, + validation_split, + verbose, + all_args ) # Fit the model using the constructed arguments - history <- rlang::exec(keras3::fit, model, !!!final_fit_args) + history <- rlang::exec(keras3::fit, model, !!!fit_args) # --- 5. Return value --- # Per parsnip extension guidelines, the fit function should return a list @@ -257,6 +204,7 @@ generic_sequential_fit <- function( # classification, it should also include an element `lvl` with the factor levels. list( fit = model, # The raw Keras model object + history = history, # The training history lvl = class_levels # Factor levels for classification, NULL for regression ) } From 6b491a5c22010cb5f1273c3b79cd8c67eaaf551c Mon Sep 17 00:00:00 2001 From: davidrsch Date: Wed, 30 Jul 2025 14:16:07 +0200 Subject: [PATCH 05/10] Preparing spec_helpers for functional support --- R/create_keras_spec_helpers.R | 74 +++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 20 deletions(-) diff --git a/R/create_keras_spec_helpers.R b/R/create_keras_spec_helpers.R index dc0b401..6e11844 100644 --- a/R/create_keras_spec_helpers.R +++ b/R/create_keras_spec_helpers.R @@ -1,20 +1,36 @@ #' Discover and Collect Model Specification Arguments #' -#' Introspects the provided layer block functions to generate a list of -#' arguments for the new model specification. This includes arguments for -#' block repetition (`num_*`), block-specific hyperparameters (`block_*`), -#' and global training parameters. +#' @description +#' This internal helper introspects the user-provided `layer_blocks` functions +#' to generate a complete list of arguments for the new model specification. +#' The logic for discovering arguments differs for sequential and functional models. +#' +#' @details +#' For **sequential models** (`functional = FALSE`): +#' - It creates `num_{block_name}` arguments to control block repetition. +#' - It inspects the arguments of each block function, skipping the first +#' (assumed to be the `model` object), to find tunable hyperparameters. +#' +#' For **functional models** (`functional = TRUE`): +#' - It does **not** create `num_{block_name}` arguments. +#' - It inspects the arguments of each block function. Arguments whose names +#' match other block names are considered graph connections (inputs) and are +#' ignored. The remaining arguments are treated as tunable hyperparameters. +#' +#' In both cases, it also adds global training parameters (like `epochs`) and +#' filters out special engine-supplied arguments (`input_shape`, `num_classes`). #' #' @param layer_blocks A named list of functions defining Keras layer blocks. +#' @param functional A logical. If `TRUE`, uses discovery logic for the +#' Functional API. If `FALSE`, uses logic for the Sequential API. #' @param global_args A character vector of global arguments to add to the #' specification (e.g., "epochs"). #' @return A list containing two elements: -#' - `all_args`: A named list of arguments for the new function signature, -#' initialized with `rlang::zap()`. -#' - `parsnip_names`: A character vector of all argument names for `parsnip`. +#' #' @noRd collect_spec_args <- function( layer_blocks, + functional, global_args = c( "epochs", "batch_size", @@ -36,23 +52,39 @@ collect_spec_args <- function( all_args <- list() parsnip_names <- character() + block_names <- names(layer_blocks) + # block repetition counts (e.g., num_dense) - for (block in names(layer_blocks)) { - num_name <- paste0("num_", block) + for (block_name in block_names) { + num_name <- paste0("num_", block_name) all_args[[num_name]] <- rlang::zap() parsnip_names <- c(parsnip_names, num_name) } # These args are passed by the fit engine, not set by the user in the spec engine_args <- c("input_shape", "num_classes") - # block-specific parameters (skip first 'model' formal) - for (block in names(layer_blocks)) { - fmls_to_process <- rlang::fn_fmls(layer_blocks[[block]])[-1] - # Filter out arguments that are provided by the fitting engine - for (arg in names(fmls_to_process[ - !names(fmls_to_process) %in% engine_args - ])) { - full <- paste0(block, "_", arg) + # Discover block-specific hyperparameters + for (block_name in block_names) { + block_fmls <- rlang::fn_fmls(layer_blocks[[block_name]]) + + if (isTRUE(functional)) { + # For functional models, hyperparameters are arguments that are NOT + # names of other blocks (which are graph connections). + hyperparam_names <- setdiff( + names(block_fmls), + c(block_names, engine_args) + ) + } else { + # For sequential models, hyperparameters are all args except the first + # ('model') and special engine args. + fmls_to_process <- if (length(block_fmls) > 0) block_fmls[-1] else list() + hyperparam_names <- names(fmls_to_process)[ + !names(fmls_to_process) %in% engine_args + ] + } + + for (arg in hyperparam_names) { + full <- paste0(block_name, "_", arg) all_args[[full]] <- rlang::zap() parsnip_names <- c(parsnip_names, full) } @@ -69,8 +101,10 @@ collect_spec_args <- function( #' Internal Implementation for Creating Keras Specifications #' -#' This is the core logic for both `create_keras_sequential_spec` and -#' `create_keras_functional_spec`. It is not intended for direct use. +#' @description +#' This is the core implementation for both `create_keras_sequential_spec()` and +#' `create_keras_functional_spec()`. It orchestrates the argument collection, +#' function building, and `parsnip` registration steps. #' #' @inheritParams create_keras_sequential_spec #' @param functional A logical, if `TRUE`, registers the model to be fit with @@ -85,7 +119,7 @@ create_keras_spec_impl <- function( functional, env ) { - args_info <- collect_spec_args(layer_blocks) + args_info <- collect_spec_args(layer_blocks, functional = functional) spec_fun <- build_spec_function( model_name, mode, From 22065f494be90507eadebebbd714024fead36a44 Mon Sep 17 00:00:00 2001 From: davidrsch Date: Wed, 30 Jul 2025 19:41:27 +0200 Subject: [PATCH 06/10] Fixing founded issues with compile_ and fit_ args, and other general improvements --- R/create_keras_spec_helpers.R | 39 ++++++----- R/generate_roxygen_docs.R | 77 ++++++++++++--------- R/generic_fit_helpers.R | 61 ++++++++++------ R/generic_sequential_fit.R | 26 ++----- R/globals.R | 10 ++- R/register_model_args.R | 12 ++-- R/{remove_spec.R => remove_keras_spec.R} | 0 R/zzz.R | 43 ++++++++++-- man/generic_sequential_fit.Rd | 38 ++++------ man/keras_objects.Rd | 6 +- tests/testthat/test-e2e-classification.R | 7 +- tests/testthat/test-e2e-features.R | 26 +++++-- tests/testthat/test-e2e-multiblock-tuning.R | 7 +- tests/testthat/test-e2e-regression.R | 7 +- tests/testthat/test-e2e-tuning.R | 7 +- 15 files changed, 221 insertions(+), 145 deletions(-) rename R/{remove_spec.R => remove_keras_spec.R} (100%) diff --git a/R/create_keras_spec_helpers.R b/R/create_keras_spec_helpers.R index 6e11844..13164aa 100644 --- a/R/create_keras_spec_helpers.R +++ b/R/create_keras_spec_helpers.R @@ -12,7 +12,7 @@ #' (assumed to be the `model` object), to find tunable hyperparameters. #' #' For **functional models** (`functional = TRUE`): -#' - It does **not** create `num_{block_name}` arguments. +#' - It creates `num_{block_name}` arguments to control block repetition. #' - It inspects the arguments of each block function. Arguments whose names #' match other block names are considered graph connections (inputs) and are #' ignored. The remaining arguments are treated as tunable hyperparameters. @@ -30,21 +30,11 @@ #' @noRd collect_spec_args <- function( layer_blocks, - functional, - global_args = c( - "epochs", - "batch_size", - "learn_rate", - "validation_split", - "verbose", - "compile_loss", - "compile_optimizer", - "compile_metrics" - ) + functional ) { - if (any(c("compile", "optimizer") %in% names(layer_blocks))) { + if (any(c("compile", "fit", "optimizer") %in% names(layer_blocks))) { stop( - "`compile` and `optimizer` are protected names and cannot be used as layer block names.", + "`compile`, `fit` and `optimizer` are protected names and cannot be used as layer block names.", call. = FALSE ) } @@ -90,8 +80,25 @@ collect_spec_args <- function( } } - # global training parameters - for (g in global_args) { + # Add global training and compile parameters dynamically + # These are discovered from keras3::fit and keras3::compile in zzz.R + fit_params <- if (length(keras_fit_arg_names) > 0) { + paste0("fit_", keras_fit_arg_names) + } else { + character() + } + compile_params <- if (length(keras_compile_arg_names) > 0) { + paste0("compile_", keras_compile_arg_names) + } else { + character() + } + + # learn_rate is a special convenience argument for the default optimizer + special_params <- "learn_rate" + + dynamic_global_args <- c(special_params, fit_params, compile_params) + + for (g in dynamic_global_args) { all_args[[g]] <- rlang::zap() parsnip_names <- c(parsnip_names, g) } diff --git a/R/generate_roxygen_docs.R b/R/generate_roxygen_docs.R index 3353024..a2e1324 100644 --- a/R/generate_roxygen_docs.R +++ b/R/generate_roxygen_docs.R @@ -70,20 +70,22 @@ generate_roxygen_docs <- function( # Group args for structured documentation num_params <- arg_names[startsWith(arg_names, "num_")] + fit_params <- arg_names[startsWith(arg_names, "fit_")] compile_params <- arg_names[startsWith(arg_names, "compile_")] - global_params <- c( - "epochs", - "batch_size", - "learn_rate", - "validation_split", - "verbose" - ) + # `learn_rate` is a special top-level convenience argument + special_params <- "learn_rate" + block_params <- setdiff( arg_names, - c(num_params, compile_params, global_params) + c(num_params, fit_params, compile_params, special_params) ) # Document block-specific params + if ("learn_rate" %in% block_params) { + # This can happen if a user names a block `learn` and it has a `rate` param. + # It's an edge case, but we should not document it twice. + block_params <- setdiff(block_params, "learn_rate") + } if (length(block_params) > 0) { param_docs <- c( param_docs, @@ -135,40 +137,46 @@ generate_roxygen_docs <- function( ) } - # Document global params - global_param_desc <- list( - epochs = "The total number of iterations to train the model.", - batch_size = "The number of samples per gradient update.", - learn_rate = "The learning rate for the default Adam optimizer. This is ignored if `compile_optimizer` is provided as a pre-built object.", - validation_split = "The proportion of the training data to be used as a validation set.", - verbose = "The level of verbosity for model fitting (0, 1, or 2)." - ) + # Document special `learn_rate` param param_docs <- c( param_docs, - purrr::map_chr(global_params, function(p) { - paste0("@param ", p, " ", global_param_desc[[p]]) - }) + "@param learn_rate The learning rate for the default Adam optimizer. This is ignored if `compile_optimizer` is provided as a pre-built Keras optimizer object." ) # Document compile params - compile_param_desc <- list( - compile_loss = "The loss function for compiling the model. Can be a string (e.g., 'mse') or a Keras loss object. Overrides the default.", - compile_optimizer = "The optimizer for compiling the model. Can be a string (e.g., 'sgd') or a Keras optimizer object. Overrides the default.", - compile_metrics = "A character vector of metrics to monitor during training (e.g., `c('mae', 'mse')`). Overrides the default." - ) - param_docs <- c( - param_docs, - purrr::map_chr(compile_params, function(p) { - paste0("@param ", p, " ", compile_param_desc[[p]]) - }) - ) + if (length(compile_params) > 0) { + param_docs <- c( + param_docs, + purrr::map_chr(compile_params, function(p) { + paste0( + "@param ", + p, + " Argument to `keras3::compile()`. See the 'Model Compilation' section." + ) + }) + ) + } + + # Document fit params + if (length(fit_params) > 0) { + param_docs <- c( + param_docs, + purrr::map_chr(fit_params, function(p) { + paste0( + "@param ", + p, + " Argument to `keras3::fit()`. See the 'Model Fitting' section." + ) + }) + ) + } # Add ... param param_docs <- c( param_docs, paste0( - "@param ... Additional arguments passed to the Keras engine. This is commonly used for arguments to `keras3::fit()` (prefixed with `fit_`). ", - "See the 'Model Fitting' and 'Model Compilation' sections for details." + "@param ... Additional arguments passed to the Keras engine. Use this for arguments to `keras3::fit()` or `keras3::compile()` ", + "that are not exposed as top-level arguments." ) ) @@ -178,7 +186,8 @@ generate_roxygen_docs <- function( "#' @section Model Architecture (Functional API):", "#' The Keras model is constructed using the Functional API. Each layer block function's arguments", "#' determine its inputs. For example, a block `function(input_a, input_b, ...)` will be connected", - "#' to the outputs of the `input_a` and `input_b` blocks.", + "#' to the outputs of the `input_a` and `input_b` blocks. You can also repeat a block by setting", + "#' the `num_{block_name}` argument, provided the block has a single input tensor.", "#' The first block in `layer_blocks` is assumed to be the input layer and should not have inputs from other layers." ) see_also_fit <- "generic_functional_fit()" @@ -213,7 +222,7 @@ generate_roxygen_docs <- function( "#' @section Model Fitting:", "#' The model is fit using `keras3::fit()`. You can pass any argument to this function by prefixing it with `fit_`.", "#' For example, to add Keras callbacks, you can pass `fit_callbacks = list(callback_early_stopping())`.", - "#' The `epochs` and `batch_size` arguments are also passed to `fit()`." + "#' Common arguments include `fit_epochs`, `fit_batch_size`, and `fit_validation_split`." ) # Other tags diff --git a/R/generic_fit_helpers.R b/R/generic_fit_helpers.R index 3ae2236..85070d1 100644 --- a/R/generic_fit_helpers.R +++ b/R/generic_fit_helpers.R @@ -5,12 +5,15 @@ #' list of arguments, resolves them, and combines them with defaults. #' #' @details -#' It handles the special logic for the `optimizer`, where a string name is -#' resolved to a Keras optimizer object, applying the `learn_rate` if necessary. -#' It also resolves string names for `loss` and `metrics` using `get_keras_object()`. +#' This function orchestrates the compilation setup. It gives precedence to +#' user-provided arguments (e.g., `compile_optimizer`) over the mode-based +#' defaults. It handles the special logic for the `optimizer`, where a string +#' name (e.g., `"sgd"`) is resolved to a Keras optimizer object, applying the +#' top-level `learn_rate` if necessary. It also resolves string names for `loss` +#' and `metrics` using `get_keras_object()`. #' #' @param all_args The list of all arguments passed to the fitting function's `...`. -#' @param learn_rate The main `learn_rate` parameter. +#' @param learn_rate The top-level `learn_rate` parameter. #' @param default_loss The default loss function to use if not provided. #' @param default_metrics The default metric(s) to use if not provided. #' @return A named list of arguments ready to be passed to `keras3::compile()`. @@ -69,6 +72,14 @@ collect_compile_args <- function( !names(user_compile_args) %in% c("optimizer", "loss", "metrics") ] final_compile_args <- c(final_compile_args, other_args) + # Filter out arguments that are NULL or rlang_zap before passing to keras3::compile + final_compile_args <- final_compile_args[ + !vapply( + final_compile_args, + function(x) inherits(x, "rlang_zap"), + logical(1) + ) + ] final_compile_args } @@ -78,11 +89,15 @@ collect_compile_args <- function( #' This internal helper extracts all arguments prefixed with `fit_` from a list #' of arguments and combines them with the core arguments for `keras3::fit()`. #' +#' @details +#' It constructs the final list of arguments for `keras3::fit()`. It starts with +#' the required data (`x`, `y`) and the `verbose` setting. It then merges any +#' user-provided arguments from the model specification (e.g., `fit_epochs`, +#' `fit_callbacks`), with the user-provided arguments taking precedence over +#' any defaults. +#' #' @param x_proc The processed predictor data. #' @param y_mat The processed outcome data. -#' @param epochs The number of epochs. -#' @param batch_size The batch size. -#' @param validation_split The validation split proportion. #' @param verbose The verbosity level. #' @param all_args The list of all arguments passed to the fitting function's `...`. #' @return A named list of arguments ready to be passed to `keras3::fit()`. @@ -90,9 +105,6 @@ collect_compile_args <- function( collect_fit_args <- function( x_proc, y_mat, - epochs, - batch_size, - validation_split, verbose, all_args ) { @@ -101,15 +113,24 @@ collect_fit_args <- function( user_fit_args <- all_args[fit_arg_names] names(user_fit_args) <- sub("^fit_", "", names(user_fit_args)) - final_fit_args <- c( - list( - x = x_proc, - y = y_mat, - epochs = epochs, - batch_size = batch_size, - validation_split = validation_split, - verbose = verbose - ), - user_fit_args + # Build the core argument set. `verbose` can be overridden by `fit_verbose`. + base_args <- list( + x = x_proc, + y = y_mat, + verbose = verbose ) + + merged_args <- utils::modifyList(base_args, user_fit_args) + + # Filter out arguments that are NULL or rlang_zap before passing to keras3::fit + merged_args <- merged_args[ + !vapply( + merged_args, + function(x) { + inherits(x, "rlang_zap") + }, + logical(1) + ) + ] + merged_args } diff --git a/R/generic_sequential_fit.R b/R/generic_sequential_fit.R index ed5a3e7..9f97836 100644 --- a/R/generic_sequential_fit.R +++ b/R/generic_sequential_fit.R @@ -71,28 +71,13 @@ generic_sequential_fit <- function( x, y, layer_blocks, - epochs = 10, - batch_size = 32, - learn_rate = 0.01, - validation_split = 0.2, - verbose = 0, ... ) { - # --- 0. Resolve arguments --- - # Parsnip passes "zapped" arguments for user-unspecified args. - # This helper replaces them with the function's defaults. - resolve_default <- function(x, default) { - if (inherits(x, "rlang_zap")) default else x - } - fmls <- rlang::fn_fmls(sys.function()) - epochs <- resolve_default(epochs, fmls$epochs) - batch_size <- resolve_default(batch_size, fmls$batch_size) - learn_rate <- resolve_default(learn_rate, fmls$learn_rate) - validation_split <- resolve_default(validation_split, fmls$validation_split) - verbose <- resolve_default(verbose, fmls$verbose) - - # --- 1. Data & Input Shape Preparation --- + # --- 0. Argument & Data Preparation --- all_args <- list(...) + learn_rate <- all_args$learn_rate %||% 0.01 + verbose <- all_args$verbose %||% 0 + # Handle both standard tabular data (matrix) and list-columns of arrays # (for images/sequences) that come from recipes. if (is.data.frame(x) && ncol(x) == 1 && is.list(x[[1]])) { @@ -188,9 +173,6 @@ generic_sequential_fit <- function( fit_args <- collect_fit_args( x_proc, y_mat, - epochs, - batch_size, - validation_split, verbose, all_args ) diff --git a/R/globals.R b/R/globals.R index 1a171bb..21d3027 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,3 +1,11 @@ utils::globalVariables( - c("object", "new_data", "engine", "fresh", "parameters") + c( + "object", + "new_data", + "engine", + "fresh", + "parameters", + "keras_fit_arg_names", + "keras_compile_arg_names" + ) ) diff --git a/R/register_model_args.R b/R/register_model_args.R index a6d808d..3a59a2a 100644 --- a/R/register_model_args.R +++ b/R/register_model_args.R @@ -17,8 +17,10 @@ #' \item Other arguments are mapped based on their suffix (e.g., `dense_units` #' is mapped based on `units`). The internal `keras_dials_map` object #' contains common mappings like `units` -> `dials::hidden_units()`. -#' \item Arguments for `compile_loss` and `compile_optimizer` are mapped to -#' custom `dials` parameter functions within `kerasnip`. +#' \item Arguments for `compile_loss` and `compile_optimizer` are mapped to custom +#' `dials` parameter functions (`loss_function_keras()` and `optimizer_function()`) +#' that are part of the `kerasnip` package itself. The function correctly +#' sets the `pkg` for these to `kerasnip`. #' } #' #' @param model_name The name of the new model specification. @@ -43,9 +45,9 @@ register_model_args <- function(model_name, parsnip_names) { "dropout", "learn_rate", "learn_rate", + "fit_epochs", "epochs", - "epochs", - "batch_size", + "fit_batch_size", "batch_size", "compile_loss", # parsnip arg "loss_function_keras", # dials function from kerasnip @@ -54,7 +56,7 @@ register_model_args <- function(model_name, parsnip_names) { ) # We now allow optimizer to be tuned. Metrics are for tracking, not training. - non_tunable <- c("verbose") + non_tunable <- c("fit_verbose") for (arg in parsnip_names) { if (arg %in% non_tunable) { diff --git a/R/remove_spec.R b/R/remove_keras_spec.R similarity index 100% rename from R/remove_spec.R rename to R/remove_keras_spec.R diff --git a/R/zzz.R b/R/zzz.R index 019576f..13762b4 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -5,9 +5,9 @@ #' discovered from the installed `keras3` package when `kerasnip` is loaded. #' This ensures that `kerasnip` is always up-to-date with your Keras version. #' @details -#' These objects are primarily used to provide the default `values` for the -#' `dials` parameter functions, [optimizer_function()] and [loss_function_keras()]. -#' This allows for tab-completion and validation of optimizer and loss names +#' These objects are primarily used to provide the default `values` for the `dials` +#' parameter functions, [optimizer_function()] and [loss_function_keras()]. This +#' allows for tab-completion in IDEs and validation of optimizer and loss names #' when tuning models. #' #' The discovery process in `.onLoad()` scrapes the `keras3` namespace for @@ -27,17 +27,29 @@ keras_losses <- NULL #' @export keras_metrics <- NULL +# These will be populated by .onLoad to hold the names of arguments from +# the keras3::fit and keras3::compile functions. +keras_fit_arg_names <- NULL +keras_compile_arg_names <- NULL + + #' Populate Keras Object Lists on Package Load #' #' @description #' This `.onLoad` hook is executed when the `kerasnip` package is loaded. Its #' main purpose is to inspect the installed `keras3` package and populate the -#' `keras_optimizers`, `keras_losses`, and `keras_metrics` vectors. +#' `keras_optimizers`, `keras_losses`, `keras_metrics`, `keras_fit_arg_names`, +#' and `keras_compile_arg_names` vectors. #' #' @details #' The function works by: #' \enumerate{ #' \item Checking if `keras3` is installed. +#' \item Discovering the names of arguments for `keras3::fit()` and +#' `keras3::compile()`. These are used by `create_keras_*_spec()` to +#' dynamically generate the `fit_*` and `compile_*` arguments for the +#' model specification, allowing users to control fitting and compilation +#' directly from the spec. #' \item Listing all functions in the `keras3` namespace that match the patterns #' `optimizer_*`, `loss_*`, and `metric_*`. #' \item For each function, it attempts to extract the default value of the `name` @@ -77,7 +89,9 @@ keras_metrics <- NULL ) assign( "keras_losses", - stats::na.omit(purrr::map_chr(loss_fns, get_keras_default_name, keras_ns)), + stats::na.omit( + purrr::map_chr(loss_fns, get_keras_default_name, keras_ns) + ), envir = parent.env(environment()) ) @@ -100,4 +114,23 @@ keras_metrics <- NULL all_metrics <- unique(sort(c(discovered_metrics, common_metric_aliases))) assign("keras_metrics", all_metrics, envir = parent.env(environment())) + + # Discover and store fit() and compile() arguments + fit_args <- names(formals(keras3:::fit.keras.src.models.model.Model)) + compile_args <- names(formals(keras3:::compile.keras.src.models.model.Model)) + + # Exclude args that are handled specially or don't make sense in the spec + fit_args_to_exclude <- c("object", "x", "y", "...") + compile_args_to_exclude <- c("object", "...") + + assign( + "keras_fit_arg_names", + setdiff(fit_args, fit_args_to_exclude), + envir = parent.env(environment()) + ) + assign( + "keras_compile_arg_names", + setdiff(compile_args, compile_args_to_exclude), + envir = parent.env(environment()) + ) } diff --git a/man/generic_sequential_fit.Rd b/man/generic_sequential_fit.Rd index 2bbbe0d..feeb605 100644 --- a/man/generic_sequential_fit.Rd +++ b/man/generic_sequential_fit.Rd @@ -4,17 +4,7 @@ \alias{generic_sequential_fit} \title{Generic Keras Sequential API Model Fitting Implementation} \usage{ -generic_sequential_fit( - x, - y, - layer_blocks, - epochs = 10, - batch_size = 32, - learn_rate = 0.01, - validation_split = 0.2, - verbose = 0, - ... -) +generic_sequential_fit(x, y, layer_blocks, ...) } \arguments{ \item{x}{A data frame or matrix of predictors.} @@ -24,19 +14,6 @@ generic_sequential_fit( \item{layer_blocks}{A named list of layer block functions. This is passed internally from the \code{parsnip} model specification.} -\item{epochs}{An integer for the number of training iterations.} - -\item{batch_size}{An integer for the number of samples per gradient update. -This is a tunable parameter and is passed to \code{keras3::fit()}.} - -\item{learn_rate}{A double for the learning rate, used to configure the -default Adam optimizer.} - -\item{validation_split}{The proportion of the training data to use for -the validation set.} - -\item{verbose}{An integer for the verbosity of the fitting process (0, 1, or 2).} - \item{...}{Additional arguments passed down from the model specification. These can include: \itemize{ @@ -52,6 +29,19 @@ prefixed with \code{compile_} (e.g., \code{compile_loss = "mae"}, prefixed with \code{fit_} (e.g., \code{fit_callbacks = list(...)}, \code{fit_class_weight = list(...)}). }} + +\item{epochs}{An integer for the number of training iterations.} + +\item{learn_rate}{A double for the learning rate, used to configure the +default Adam optimizer.} + +\item{batch_size}{An integer for the number of samples per gradient update. +This is a tunable parameter and is passed to \code{keras3::fit()}.} + +\item{validation_split}{The proportion of the training data to use for +the validation set.} + +\item{verbose}{An integer for the verbosity of the fitting process (0, 1, or 2).} } \value{ A list containing the fitted model and other metadata. This list is diff --git a/man/keras_objects.Rd b/man/keras_objects.Rd index 886cbec..94a02b3 100644 --- a/man/keras_objects.Rd +++ b/man/keras_objects.Rd @@ -27,9 +27,9 @@ discovered from the installed \code{keras3} package when \code{kerasnip} is load This ensures that \code{kerasnip} is always up-to-date with your Keras version. } \details{ -These objects are primarily used to provide the default \code{values} for the -\code{dials} parameter functions, \code{\link[=optimizer_function]{optimizer_function()}} and \code{\link[=loss_function_keras]{loss_function_keras()}}. -This allows for tab-completion and validation of optimizer and loss names +These objects are primarily used to provide the default \code{values} for the \code{dials} +parameter functions, \code{\link[=optimizer_function]{optimizer_function()}} and \code{\link[=loss_function_keras]{loss_function_keras()}}. This +allows for tab-completion in IDEs and validation of optimizer and loss names when tuning models. The discovery process in \code{.onLoad()} scrapes the \code{keras3} namespace for diff --git a/tests/testthat/test-e2e-classification.R b/tests/testthat/test-e2e-classification.R index 76205b2..dbd3fe2 100644 --- a/tests/testthat/test-e2e-classification.R +++ b/tests/testthat/test-e2e-classification.R @@ -12,8 +12,11 @@ test_that("E2E: Classification spec generation, fitting, and prediction works", model |> keras3::layer_dense(units = num_classes, activation = "softmax") } + model_name <- "e2e_mlp_class" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + create_keras_sequential_spec( - model_name = "e2e_mlp_class", + model_name = model_name, layer_blocks = list( input = input_block_class, dense = dense_block_class, @@ -25,7 +28,7 @@ test_that("E2E: Classification spec generation, fitting, and prediction works", spec <- e2e_mlp_class( num_dense = 2, dense_units = 8, - epochs = 2 + fit_epochs = 2 ) |> set_engine("keras") diff --git a/tests/testthat/test-e2e-features.R b/tests/testthat/test-e2e-features.R index a147b2a..6c47e6a 100644 --- a/tests/testthat/test-e2e-features.R +++ b/tests/testthat/test-e2e-features.R @@ -11,8 +11,11 @@ test_that("E2E: Customizing main arguments works", { model |> keras3::layer_dense(units = 1) } + model_name <- "e2e_mlp_feat" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + create_keras_sequential_spec( - model_name = "e2e_mlp_feat", + model_name = model_name, layer_blocks = list( input = input_block_feat, dense = dense_block_feat, @@ -24,7 +27,7 @@ test_that("E2E: Customizing main arguments works", { # Main arguments (like compile_*) should be set in the spec function, # not in set_engine(). spec <- e2e_mlp_feat( - epochs = 2, + fit_epochs = 2, compile_optimizer = "sgd", compile_loss = "mae", compile_metrics = c("mean_squared_error", "root_mean_squared_error") @@ -68,8 +71,11 @@ test_that("E2E: Customizing fit arguments works", { model |> keras3::layer_dense(units = 1) } + model_name <- "e2e_mlp_fit" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + create_keras_sequential_spec( - model_name = "e2e_mlp_fit", + model_name = model_name, layer_blocks = list( input = input_block_fit, dense = dense_block_fit, @@ -109,8 +115,11 @@ test_that("E2E: Setting num_blocks = 0 works", { model |> keras3::layer_dense(units = 1) } + model_name <- "e2e_mlp_zero" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + create_keras_sequential_spec( - model_name = "e2e_mlp_zero", + model_name = model_name, layer_blocks = list( input = input_block_zero, dense = dense_block_zero, @@ -119,20 +128,23 @@ test_that("E2E: Setting num_blocks = 0 works", { mode = "regression" ) - spec <- e2e_mlp_zero(num_dense = 0, epochs = 2) |> + spec <- e2e_mlp_zero(num_dense = 0, fit_epochs = 2) |> parsnip::set_engine("keras") # This should fit a model with only an input and output layer expect_no_error(parsnip::fit(spec, mpg ~ ., data = mtcars)) }) test_that("E2E: Error handling for reserved names works", { + model_name <- "bad_spec" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + bad_blocks <- list( compile = function(model) model, # "compile" is a reserved name dense = function(model, u = 1) model |> keras3::layer_dense(units = u) ) expect_error( - create_keras_sequential_spec("bad_spec", bad_blocks), - regexp = "`compile` and `optimizer` are protected names" + create_keras_sequential_spec(model_name, bad_blocks), + regexp = "`compile`, `fit` and `optimizer` are protected names" ) }) diff --git a/tests/testthat/test-e2e-multiblock-tuning.R b/tests/testthat/test-e2e-multiblock-tuning.R index 48ea381..ac134ed 100644 --- a/tests/testthat/test-e2e-multiblock-tuning.R +++ b/tests/testthat/test-e2e-multiblock-tuning.R @@ -21,8 +21,11 @@ test_that("E2E: Multi-block model tuning works", { model |> keras3::layer_dense(units = num_classes, activation = "softmax") } + model_name <- "mb_mt" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + create_keras_sequential_spec( - model_name = "mb_mt", + model_name = model_name, layer_blocks = list( input = input_block_mb, start = starting_layers, @@ -37,7 +40,7 @@ test_that("E2E: Multi-block model tuning works", { start_layer1_units = tune(), start_layer2_units = tune(), end_units = tune(), - epochs = 1 + fit_epochs = 1 ) |> set_engine("keras") diff --git a/tests/testthat/test-e2e-regression.R b/tests/testthat/test-e2e-regression.R index a3a5475..9aa9b1b 100644 --- a/tests/testthat/test-e2e-regression.R +++ b/tests/testthat/test-e2e-regression.R @@ -13,8 +13,11 @@ test_that("E2E: Regression spec generation, fitting, and prediction works", { model |> keras3::layer_dense(units = 1) } + model_name <- "e2e_mlp_reg" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + create_keras_sequential_spec( - model_name = "e2e_mlp_reg", + model_name = model_name, layer_blocks = list( input = input_block_reg, dense = dense_block_reg, @@ -26,7 +29,7 @@ test_that("E2E: Regression spec generation, fitting, and prediction works", { spec <- e2e_mlp_reg( num_dense = 2, dense_units = 8, - epochs = 2, + fit_epochs = 2, learn_rate = 0.01 ) |> set_engine("keras") diff --git a/tests/testthat/test-e2e-tuning.R b/tests/testthat/test-e2e-tuning.R index e0341c9..9b9b608 100644 --- a/tests/testthat/test-e2e-tuning.R +++ b/tests/testthat/test-e2e-tuning.R @@ -12,8 +12,11 @@ test_that("E2E: Tuning works with a generated spec", { model |> keras3::layer_dense(units = num_classes, activation = "softmax") } + model_name <- "e2e_mlp_class_tune" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + create_keras_sequential_spec( - model_name = "e2e_mlp_class_tune", + model_name = model_name, layer_blocks = list( input = input_block_tune, dense = dense_block_tune, @@ -25,7 +28,7 @@ test_that("E2E: Tuning works with a generated spec", { tune_spec <- e2e_mlp_class_tune( num_dense = tune(), dense_units = tune(), - epochs = 1 + fit_epochs = 1 ) |> set_engine("keras") From 9a3a388ea0bddc99ea39a494b827973e2c01aebc Mon Sep 17 00:00:00 2001 From: davidrsch Date: Wed, 30 Jul 2025 20:52:28 +0200 Subject: [PATCH 07/10] Adding support for functional API --- NAMESPACE | 2 +- R/create_keras_functional_spec.R | 141 +++++++++++++++++ R/generate_roxygen_docs.R | 24 ++- R/generic_functional_fit.R | 233 ++++++++++++++++++++++++++++ man/create_keras_functional_spec.Rd | 146 +++++++++++++++++ man/generic_functional_fit.Rd | 90 +++++++++++ man/remove_keras_spec.Rd | 2 +- 7 files changed, 633 insertions(+), 5 deletions(-) create mode 100644 R/create_keras_functional_spec.R create mode 100644 R/generic_functional_fit.R create mode 100644 man/create_keras_functional_spec.Rd create mode 100644 man/generic_functional_fit.Rd diff --git a/NAMESPACE b/NAMESPACE index 2d70fc5..32eca4c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,7 @@ # Generated by roxygen2: do not edit by hand export(create_keras_functional_spec) -export(create_keras_spec) +export(create_keras_sequential_spec) export(generic_functional_fit) export(generic_sequential_fit) export(keras_losses) diff --git a/R/create_keras_functional_spec.R b/R/create_keras_functional_spec.R new file mode 100644 index 0000000..544137b --- /dev/null +++ b/R/create_keras_functional_spec.R @@ -0,0 +1,141 @@ +#' Create a Custom Keras Functional API Model Specification for Tidymodels +#' +#' This function acts as a factory to generate a new `parsnip` model +#' specification based on user-defined blocks of Keras layers using the +#' Functional API. This allows for creating complex, tunable architectures +#' with non-linear topologies that integrate seamlessly with the `tidymodels` +#' ecosystem. +#' +#' @param model_name A character string for the name of the new model +#' specification function (e.g., "custom_resnet"). This should be a valid R +#' function name. +#' @param layer_blocks A named list of functions where each function defines a +#' "block" (a node) in the model graph. The list names are crucial as they +#' define the names of the nodes. The arguments of each function define how +#' the nodes are connected. See the "Model Graph Connectivity" section for +#' details. +#' @param mode A character string, either "regression" or "classification". +#' @param ... Reserved for future use. Currently not used. +#' @param env The environment in which to create the new model specification +#' function and its associated `update()` method. Defaults to the calling +#' environment (`parent.frame()`). +#' +#' @details +#' This function generates all the boilerplate needed to create a custom, +#' tunable `parsnip` model specification that uses the Keras Functional API. +#' This is ideal for models with complex, non-linear topologies, such as +#' networks with multiple inputs/outputs or residual connections. +#' +#' The function inspects the arguments of your `layer_blocks` functions and +#' makes them available as tunable parameters in the generated model +#' specification, prefixed with the block's name (e.g., `dense_units`). +#' Common training parameters such as `epochs` and `learn_rate` are also added. +#' +#' @section Model Graph Connectivity: +#' `kerasnip` builds the model's directed acyclic graph by inspecting the +#' arguments of each function in the `layer_blocks` list. The connection logic +#' is as follows: +#' +#' 1. The **names of the elements** in the `layer_blocks` list define the names +#' of the nodes in your graph (e.g., `main_input`, `dense_path`, `output`). +#' 2. The **names of the arguments** in each block function specify its inputs. +#' A block function like `my_block <- function(input_a, input_b, ...)` +#' declares that it needs input from the nodes named `input_a` and `input_b`. +#' `kerasnip` will automatically supply the output tensors from those nodes +#' when calling `my_block`. +#' +#' There are two special requirements: +#' * **Input Block**: The first block in the list is treated as the input +#' node. Its function should not take other blocks as input, but it can have +#' an `input_shape` argument, which is supplied automatically during fitting. +#' * **Output Block**: Exactly one block must be named `"output"`. The tensor +#' returned by this block is used as the final output of the Keras model. +#' +#' A key feature is the automatic creation of `num_{block_name}` arguments +#' (e.g., `num_dense_path`). This allows you to control how many times a block +#' is repeated, making it easy to tune the depth of your network. A block can +#' only be repeated if it has exactly one input from another block in the graph. +#' +#' The new model specification function and its `update()` method are created +#' in the environment specified by the `env` argument. +#' +#' @importFrom rlang enquos dots_list arg_match env_poke +#' @importFrom parsnip update_dot_check +#' +#' @return Invisibly returns `NULL`. Its primary side effect is to create a +#' new model specification function (e.g., `custom_resnet()`) in the +#' specified environment and register the model with `parsnip` so it can be +#' used within the `tidymodels` framework. +#' +#' @seealso [remove_keras_spec()], [parsnip::new_model_spec()], +#' [create_keras_sequential_spec()] +#' +#' @export +#' @examples +#' \dontrun{ +#' if (requireNamespace("keras3", quietly = TRUE)) { +#' library(keras3) +#' library(parsnip) +#' +#' # 1. Define block functions. These are the building blocks of our model. +#' # An input block that receives the data's shape automatically. +#' input_block <- function(input_shape) layer_input(shape = input_shape) +#' +#' # A dense block with a tunable `units` parameter. +#' dense_block <- function(tensor, units) { +#' tensor |> layer_dense(units = units, activation = "relu") +#' } +#' +#' # A block that adds two tensors together (for the residual connection). +#' add_block <- function(input_a, input_b) layer_add(list(input_a, input_b)) +#' +#' # An output block for regression. +#' output_block_reg <- function(tensor) layer_dense(tensor, units = 1) +#' +#' # 2. Create the spec. The `layer_blocks` list defines the graph. +#' create_keras_functional_spec( +#' model_name = "my_resnet_spec", +#' layer_blocks = list( +#' # The names of list elements are the node names. +#' main_input = input_block, +#' +#' # The argument `main_input` connects this block to the input node. +#' dense_path = function(main_input, units = 32) dense_block(main_input, units), +#' +#' # This block's arguments connect it to the original input AND the dense layer. +#' add_residual = function(main_input, dense_path) add_block(main_input, dense_path), +#' +#' # This block must be named 'output'. It connects to the residual add layer. +#' output = function(add_residual) output_block_reg(add_residual) +#' ), +#' mode = "regression" +#' ) +#' +#' # 3. Use the newly created specification function! +#' # The `dense_path_units` argument was created automatically. +#' model_spec <- my_resnet_spec(dense_path_units = 64, epochs = 10) +#' +#' # You could also tune the number of dense layers since it has a single input: +#' # model_spec <- my_resnet_spec(num_dense_path = 2, dense_path_units = 32) +#' +#' print(model_spec) +#' # tune::tunable(model_spec) +#' } +#' } +create_keras_functional_spec <- function( + model_name, + layer_blocks, + mode = c("regression", "classification"), + ..., + env = parent.frame() +) { + mode <- rlang::arg_match(mode) + # 1. Argument Validation + create_keras_spec_impl( + model_name, + layer_blocks, + mode, + functional = TRUE, + env + ) +} diff --git a/R/generate_roxygen_docs.R b/R/generate_roxygen_docs.R index a2e1324..00c8329 100644 --- a/R/generate_roxygen_docs.R +++ b/R/generate_roxygen_docs.R @@ -87,12 +87,30 @@ generate_roxygen_docs <- function( block_params <- setdiff(block_params, "learn_rate") } if (length(block_params) > 0) { + # Sort block names by length descending to handle overlapping names + # (e.g., "dense" and "dense_layer") + sorted_block_names <- names(layer_blocks)[ + order(nchar(names(layer_blocks)), decreasing = TRUE) + ] + param_docs <- c( param_docs, purrr::map_chr(block_params, function(p) { - parts <- strsplit(p, "_", fixed = TRUE)[[1]] - block_name <- parts[1] - param_name <- paste(parts[-1], collapse = "_") + # Find the block name that is a prefix for this parameter. + # The `Find` function returns the first match, and since we sorted + # block names by length, it will find the longest possible match. + block_name <- Find( + function(bn) startsWith(p, paste0(bn, "_")), + sorted_block_names + ) + + if (is.null(block_name)) { + # This should not happen if collect_spec_args is correct, but as a + # fallback, we avoid an error. + return(paste0("@param ", p, " A model parameter.")) + } + + param_name <- sub(paste0(block_name, "_"), "", p, fixed = TRUE) block_fn <- layer_blocks[[block_name]] default_val <- rlang::fn_fmls(block_fn)[[param_name]] default_str <- if ( diff --git a/R/generic_functional_fit.R b/R/generic_functional_fit.R new file mode 100644 index 0000000..d7d5d4c --- /dev/null +++ b/R/generic_functional_fit.R @@ -0,0 +1,233 @@ +#' Generic Keras Functional API Model Fitting Implementation +#' +#' @description +#' This function is the internal engine for fitting models generated by +#' `create_keras_functional_spec()`. It is not intended to be called directly +#' by the user. +#' +#' @details +#' This function performs the following key steps: +#' \enumerate{ +#' \item \strong{Argument & Data Preparation:} It resolves arguments passed +#' from `parsnip` (handling `rlang_zap` objects for unspecified arguments) +#' and prepares the `x` and `y` data for Keras. It automatically determines +#' the `input_shape` from `x` and, for classification, the `num_classes` +#' from `y`. +#' \item \strong{Dynamic Model Construction:} It builds the Keras model graph +#' by processing the `layer_blocks` list. +#' \itemize{ +#' \item \strong{Connectivity:} The graph is connected by matching the +#' argument names of each block function to the names of previously +#' defined blocks. For example, a block `function(input_a, ...)` will +#' receive the output tensor from the block named `input_a`. +#' \item \strong{Repetition:} It checks for `num_{block_name}` arguments +#' to repeat a block multiple times, creating a chain of identical +#' layers. A block can only be repeated if it has exactly one input +#' tensor from another block. +#' } +#' \item \strong{Model Compilation:} It compiles the final Keras model. The +#' compilation arguments (optimizer, loss, metrics) can be customized by +#' passing arguments prefixed with `compile_` (e.g., `compile_loss = "mae"`). +#' \item \strong{Model Fitting:} It calls `keras3::fit()` to train the model +#' on the prepared data. +#' } +#' +#' @param x A data frame or matrix of predictors. +#' @param y A vector of outcomes. +#' @param layer_blocks A named list of layer block functions. This is passed +#' internally from the `parsnip` model specification. +#' @param epochs An integer for the number of training iterations. +#' @param learn_rate A double for the learning rate, used to configure the +#' default Adam optimizer. +#' @param batch_size An integer for the number of samples per gradient update. +#' This is a tunable parameter and is passed to `keras3::fit()`. +#' @param validation_split The proportion of the training data to use for the +#' validation set. +#' @param verbose An integer for the verbosity of the fitting process (0, 1, or +#' 2). +#' @param ... Additional arguments passed down from the model specification. +#' These can include: +#' \itemize{ +#' \item \strong{Layer Parameters:} Arguments for the layer blocks, prefixed +#' with the block name (e.g., `dense_units = 64`). +#' \item \strong{Architecture Parameters:} Arguments to control the number +#' of times a block is repeated, in the format `num_{block_name}` (e.g., +#' `num_dense = 2`). +#' \item \strong{Compile Parameters:} Arguments to customize model +#' compilation, prefixed with `compile_` (e.g., `compile_loss = "mae"`, +#' `compile_optimizer = "sgd"`). +#' \item \strong{Fit Parameters:} Arguments to customize model fitting, +#' prefixed with `fit_` (e.g., `fit_callbacks = list(...)`, +#' `fit_class_weight = list(...)`). +#' } +#' +#' @return A list containing the fitted model and other metadata. This list is +#' stored in the `fit` slot of the `parsnip` model fit object. The list +#' contains the following elements: +#' \itemize{ +#' \item `fit`: The raw, fitted Keras model object. +#' \item `history`: The Keras training history object. +#' \item `lvl`: A character vector of the outcome factor levels (for +#' classification) or `NULL` (for regression). +#' } +#' @keywords internal +#' @export +generic_functional_fit <- function( + x, + y, + layer_blocks, + ... +) { + # --- 0. Argument & Data Preparation --- + all_args <- list(...) + learn_rate <- all_args$learn_rate %||% 0.01 + verbose <- all_args$verbose %||% 0 + + if (is.data.frame(x) && ncol(x) == 1 && is.list(x[[1]])) { + x_proc <- do.call(abind::abind, c(x[[1]], list(along = 0))) + } else { + x_proc <- as.matrix(x) + } + input_shape <- if (length(dim(x_proc)) > 2) dim(x_proc)[-1] else ncol(x_proc) + is_classification <- is.factor(y) + if (is_classification) { + class_levels <- levels(y) + num_classes <- length(class_levels) + y_mat <- keras3::to_categorical( + as.numeric(y) - 1, + num_classes = num_classes + ) + default_loss <- if (num_classes > 2) { + "categorical_crossentropy" + } else { + "binary_crossentropy" + } + default_metrics <- "accuracy" + } else { + class_levels <- NULL + y_mat <- as.matrix(y) + default_loss <- "mean_squared_error" + default_metrics <- "mean_absolute_error" + } + + # --- 2. Dynamic Model Architecture Construction (DIFFERENT from sequential) --- + # Create a list to store the output tensors of each block. The names of the + # list elements correspond to the block names. + block_outputs <- list() + # The first block MUST be the input layer and MUST NOT have `input_from`. + first_block_name <- names(layer_blocks)[1] + first_block_fn <- layer_blocks[[first_block_name]] + block_outputs[[first_block_name]] <- first_block_fn(input_shape = input_shape) + + # Iterate through the remaining blocks, connecting and repeating them as needed. + for (block_name in names(layer_blocks)[-1]) { + block_fn <- layer_blocks[[block_name]] + block_fmls <- rlang::fn_fmls(block_fn) + block_fml_names <- names(block_fmls) + + # --- Get Repetition Count --- + num_repeats_arg <- paste0("num_", block_name) + num_repeats <- all_args[[num_repeats_arg]] %||% 1 + + # --- Get Hyperparameters for this block --- + # Hyperparameters are formals that are NOT other block names (graph connections) + hyperparam_names <- setdiff(block_fml_names, names(layer_blocks)) + user_hyperparams <- list() + for (hp_name in hyperparam_names) { + full_arg_name <- paste(block_name, hp_name, sep = "_") + arg_val <- all_args[[full_arg_name]] + if (!is.null(arg_val) && !inherits(arg_val, "rlang_zap")) { + user_hyperparams[[hp_name]] <- arg_val + } + } + # Combine user args with the block's defaults for those hyperparameters + block_hyperparams <- utils::modifyList( + as.list(block_fmls[hyperparam_names]), + user_hyperparams + ) + + # Add special engine-supplied arguments if the block can accept them + if (is_classification && "num_classes" %in% block_fml_names) { + block_hyperparams$num_classes <- num_classes + } + + # --- Get Input Tensors for this block --- + input_tensor_names <- intersect(block_fml_names, names(block_outputs)) + if (length(input_tensor_names) == 0 && block_name != "output") { + warning("Block '", block_name, "' has no inputs from other blocks.") + } + + # --- Repetition Loop --- + if (num_repeats > 1 && length(input_tensor_names) != 1) { + stop( + "Block '", + block_name, + "' cannot be repeated because it has ", + length(input_tensor_names), + " inputs (", + paste(input_tensor_names, collapse = ", "), + "). Only blocks with exactly one input tensor can be repeated." + ) + } + + # The initial input(s) for the first iteration + input_args <- purrr::map(input_tensor_names, ~ block_outputs[[.x]]) + names(input_args) <- input_tensor_names + + # The tensor that will be updated and passed through the loop + current_tensor <- input_args[[1]] + + for (i in seq_len(num_repeats)) { + # For repetitions after the first, update the input tensor + if (i > 1) { + input_args[[input_tensor_names[1]]] <- current_tensor + } + call_args <- c(input_args, block_hyperparams) + current_tensor <- rlang::exec(block_fn, !!!call_args) + } + + # Store the final output of the (possibly repeated) block + block_outputs[[block_name]] <- current_tensor + } + + # The last layer must be named 'output' + output_tensor <- block_outputs[["output"]] + if (is.null(output_tensor)) { + stop("An 'output' block must be defined in layer_blocks.") + } + model <- keras3::keras_model( + inputs = block_outputs[[first_block_name]], + outputs = output_tensor + ) + + # --- 3. Model Compilation --- + # Collect all arguments starting with "compile_" from `...` + compile_args <- collect_compile_args( + all_args, + learn_rate, + default_loss, + default_metrics + ) + rlang::exec(keras3::compile, model, !!!compile_args) + + # --- 4. Model Fitting --- + fit_args <- collect_fit_args( + x_proc, + y_mat, + verbose, + all_args + ) + + # Fit the model using the constructed arguments + history <- rlang::exec(keras3::fit, model, !!!fit_args) + + # --- 5. Return value --- + # Per parsnip extension guidelines, the fit function should return a list + # containing the raw model object in an element named `fit`. For + # classification, it should also include an element `lvl` with the factor levels. + list( + fit = model, # The raw Keras model object + history = history, # The training history + lvl = class_levels # Factor levels for classification, NULL for regression + ) +} diff --git a/man/create_keras_functional_spec.Rd b/man/create_keras_functional_spec.Rd new file mode 100644 index 0000000..d618fa3 --- /dev/null +++ b/man/create_keras_functional_spec.Rd @@ -0,0 +1,146 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_keras_functional_spec.R +\name{create_keras_functional_spec} +\alias{create_keras_functional_spec} +\title{Create a Custom Keras Functional API Model Specification for Tidymodels} +\usage{ +create_keras_functional_spec( + model_name, + layer_blocks, + mode = c("regression", "classification"), + ..., + env = parent.frame() +) +} +\arguments{ +\item{model_name}{A character string for the name of the new model +specification function (e.g., "custom_resnet"). This should be a valid R +function name.} + +\item{layer_blocks}{A named list of functions where each function defines a +"block" (a node) in the model graph. The list names are crucial as they +define the names of the nodes. The arguments of each function define how +the nodes are connected. See the "Model Graph Connectivity" section for +details.} + +\item{mode}{A character string, either "regression" or "classification".} + +\item{...}{Reserved for future use. Currently not used.} + +\item{env}{The environment in which to create the new model specification +function and its associated \code{update()} method. Defaults to the calling +environment (\code{parent.frame()}).} +} +\value{ +Invisibly returns \code{NULL}. Its primary side effect is to create a +new model specification function (e.g., \code{custom_resnet()}) in the +specified environment and register the model with \code{parsnip} so it can be +used within the \code{tidymodels} framework. +} +\description{ +This function acts as a factory to generate a new \code{parsnip} model +specification based on user-defined blocks of Keras layers using the +Functional API. This allows for creating complex, tunable architectures +with non-linear topologies that integrate seamlessly with the \code{tidymodels} +ecosystem. +} +\details{ +This function generates all the boilerplate needed to create a custom, +tunable \code{parsnip} model specification that uses the Keras Functional API. +This is ideal for models with complex, non-linear topologies, such as +networks with multiple inputs/outputs or residual connections. + +The function inspects the arguments of your \code{layer_blocks} functions and +makes them available as tunable parameters in the generated model +specification, prefixed with the block's name (e.g., \code{dense_units}). +Common training parameters such as \code{epochs} and \code{learn_rate} are also added. +} +\section{Model Graph Connectivity}{ + +\code{kerasnip} builds the model's directed acyclic graph by inspecting the +arguments of each function in the \code{layer_blocks} list. The connection logic +is as follows: +\enumerate{ +\item The \strong{names of the elements} in the \code{layer_blocks} list define the names +of the nodes in your graph (e.g., \code{main_input}, \code{dense_path}, \code{output}). +\item The \strong{names of the arguments} in each block function specify its inputs. +A block function like \verb{my_block <- function(input_a, input_b, ...)} +declares that it needs input from the nodes named \code{input_a} and \code{input_b}. +\code{kerasnip} will automatically supply the output tensors from those nodes +when calling \code{my_block}. +} + +There are two special requirements: +\itemize{ +\item \strong{Input Block}: The first block in the list is treated as the input +node. Its function should not take other blocks as input, but it can have +an \code{input_shape} argument, which is supplied automatically during fitting. +\item \strong{Output Block}: Exactly one block must be named \code{"output"}. The tensor +returned by this block is used as the final output of the Keras model. +} + +A key feature is the automatic creation of \verb{num_\{block_name\}} arguments +(e.g., \code{num_dense_path}). This allows you to control how many times a block +is repeated, making it easy to tune the depth of your network. A block can +only be repeated if it has exactly one input from another block in the graph. + +The new model specification function and its \code{update()} method are created +in the environment specified by the \code{env} argument. +} + +\examples{ +\dontrun{ +if (requireNamespace("keras3", quietly = TRUE)) { + library(keras3) + library(parsnip) + + # 1. Define block functions. These are the building blocks of our model. + # An input block that receives the data's shape automatically. + input_block <- function(input_shape) layer_input(shape = input_shape) + + # A dense block with a tunable `units` parameter. + dense_block <- function(tensor, units) { + tensor |> layer_dense(units = units, activation = "relu") + } + + # A block that adds two tensors together (for the residual connection). + add_block <- function(input_a, input_b) layer_add(list(input_a, input_b)) + + # An output block for regression. + output_block_reg <- function(tensor) layer_dense(tensor, units = 1) + + # 2. Create the spec. The `layer_blocks` list defines the graph. + create_keras_functional_spec( + model_name = "my_resnet_spec", + layer_blocks = list( + # The names of list elements are the node names. + main_input = input_block, + + # The argument `main_input` connects this block to the input node. + dense_path = function(main_input, units = 32) dense_block(main_input, units), + + # This block's arguments connect it to the original input AND the dense layer. + add_residual = function(main_input, dense_path) add_block(main_input, dense_path), + + # This block must be named 'output'. It connects to the residual add layer. + output = function(add_residual) output_block_reg(add_residual) + ), + mode = "regression" + ) + + # 3. Use the newly created specification function! + # The `dense_path_units` argument was created automatically. + model_spec <- my_resnet_spec(dense_path_units = 64, epochs = 10) + + # You could also tune the number of dense layers since it has a single input: + # model_spec <- my_resnet_spec(num_dense_path = 2, dense_path_units = 32) + + print(model_spec) + # tune::tunable(model_spec) +} +} +} +\seealso{ +\code{\link[=remove_keras_spec]{remove_keras_spec()}}, \code{\link[parsnip:add_on_exports]{parsnip::new_model_spec()}}, +\code{\link[=create_keras_sequential_spec]{create_keras_sequential_spec()}} +} diff --git a/man/generic_functional_fit.Rd b/man/generic_functional_fit.Rd new file mode 100644 index 0000000..b105361 --- /dev/null +++ b/man/generic_functional_fit.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generic_functional_fit.R +\name{generic_functional_fit} +\alias{generic_functional_fit} +\title{Generic Keras Functional API Model Fitting Implementation} +\usage{ +generic_functional_fit(x, y, layer_blocks, ...) +} +\arguments{ +\item{x}{A data frame or matrix of predictors.} + +\item{y}{A vector of outcomes.} + +\item{layer_blocks}{A named list of layer block functions. This is passed +internally from the \code{parsnip} model specification.} + +\item{...}{Additional arguments passed down from the model specification. +These can include: +\itemize{ +\item \strong{Layer Parameters:} Arguments for the layer blocks, prefixed +with the block name (e.g., \code{dense_units = 64}). +\item \strong{Architecture Parameters:} Arguments to control the number +of times a block is repeated, in the format \verb{num_\{block_name\}} (e.g., +\code{num_dense = 2}). +\item \strong{Compile Parameters:} Arguments to customize model +compilation, prefixed with \code{compile_} (e.g., \code{compile_loss = "mae"}, +\code{compile_optimizer = "sgd"}). +\item \strong{Fit Parameters:} Arguments to customize model fitting, +prefixed with \code{fit_} (e.g., \code{fit_callbacks = list(...)}, +\code{fit_class_weight = list(...)}). +}} + +\item{epochs}{An integer for the number of training iterations.} + +\item{learn_rate}{A double for the learning rate, used to configure the +default Adam optimizer.} + +\item{batch_size}{An integer for the number of samples per gradient update. +This is a tunable parameter and is passed to \code{keras3::fit()}.} + +\item{validation_split}{The proportion of the training data to use for the +validation set.} + +\item{verbose}{An integer for the verbosity of the fitting process (0, 1, or +2).} +} +\value{ +A list containing the fitted model and other metadata. This list is +stored in the \code{fit} slot of the \code{parsnip} model fit object. The list +contains the following elements: +\itemize{ +\item \code{fit}: The raw, fitted Keras model object. +\item \code{history}: The Keras training history object. +\item \code{lvl}: A character vector of the outcome factor levels (for +classification) or \code{NULL} (for regression). +} +} +\description{ +This function is the internal engine for fitting models generated by +\code{create_keras_functional_spec()}. It is not intended to be called directly +by the user. +} +\details{ +This function performs the following key steps: +\enumerate{ +\item \strong{Argument & Data Preparation:} It resolves arguments passed +from \code{parsnip} (handling \code{rlang_zap} objects for unspecified arguments) +and prepares the \code{x} and \code{y} data for Keras. It automatically determines +the \code{input_shape} from \code{x} and, for classification, the \code{num_classes} +from \code{y}. +\item \strong{Dynamic Model Construction:} It builds the Keras model graph +by processing the \code{layer_blocks} list. +\itemize{ +\item \strong{Connectivity:} The graph is connected by matching the +argument names of each block function to the names of previously +defined blocks. For example, a block \verb{function(input_a, ...)} will +receive the output tensor from the block named \code{input_a}. +\item \strong{Repetition:} It checks for \verb{num_\{block_name\}} arguments +to repeat a block multiple times, creating a chain of identical +layers. A block can only be repeated if it has exactly one input +tensor from another block. +} +\item \strong{Model Compilation:} It compiles the final Keras model. The +compilation arguments (optimizer, loss, metrics) can be customized by +passing arguments prefixed with \code{compile_} (e.g., \code{compile_loss = "mae"}). +\item \strong{Model Fitting:} It calls \code{keras3::fit()} to train the model +on the prepared data. +} +} +\keyword{internal} diff --git a/man/remove_keras_spec.Rd b/man/remove_keras_spec.Rd index 57b0b86..ea4ed80 100644 --- a/man/remove_keras_spec.Rd +++ b/man/remove_keras_spec.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/remove_spec.R +% Please edit documentation in R/remove_keras_spec.R \name{remove_keras_spec} \alias{remove_keras_spec} \title{Remove a Keras Model Specification and its Registrations} From 2cc6ad4e5f4063b8e73ad4a88cd4321ba65dfb6f Mon Sep 17 00:00:00 2001 From: davidrsch Date: Thu, 31 Jul 2025 16:34:10 +0200 Subject: [PATCH 08/10] Added a helper function for input specification --- R/create_keras_spec_helpers.R | 111 ++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/R/create_keras_spec_helpers.R b/R/create_keras_spec_helpers.R index 13164aa..eb781bf 100644 --- a/R/create_keras_spec_helpers.R +++ b/R/create_keras_spec_helpers.R @@ -106,6 +106,117 @@ collect_spec_args <- function( list(all_args = all_args, parsnip_names = parsnip_names) } +#' Remap Layer Block Arguments for Model Specification +#' +#' @description +#' Creates a wrapper function around a Keras layer block to rename its +#' arguments. This is a powerful helper for defining the `layer_blocks` in +#' [create_keras_functional_spec()] and [create_keras_sequential_spec()], +#' allowing you to connect reusable blocks into a model graph without writing +#' verbose anonymous functions. +#' +#' @details +#' `inp_spec()` makes your model definitions cleaner and more readable. It +#' handles the metaprogramming required to create a new function with the +#' correct argument names, while preserving the original block's hyperparameters +#' and their default values. +#' +#' The function supports two modes of operation based on `input_map`: +#' 1. **Single Input Renaming**: If `input_map` is a single character string, +#' the wrapper function renames the *first* argument of the `block` function +#' to the provided string. This is the common case for blocks that take a +#' single tensor input. +#' 2. **Multiple Input Mapping**: If `input_map` is a named character vector, +#' it provides an explicit mapping from new argument names (the names of the +#' vector) to the original argument names in the `block` function (the values +#' of the vector). This is used for blocks with multiple inputs, like a +#' concatenation layer. +#' +#' @param block A function that defines a Keras layer or a set of layers. The +#' first arguments should be the input tensor(s). +#' @param input_map A single character string or a named character vector that +#' specifies how to rename/remap the arguments of `block`. +#' +#' @return A new function (a closure) that wraps the `block` function with +#' renamed arguments, ready to be used in a `layer_blocks` list. +#' +#' @export +#' @examples +#' \dontrun{ +#' # --- Example Blocks --- +#' # A standard dense block with one input tensor and one hyperparameter. +#' dense_block <- function(tensor, units = 16) { +#' tensor |> keras3::layer_dense(units = units, activation = "relu") +#' } +#' +#' # A block that takes two tensors as input. +#' concat_block <- function(input_a, input_b) { +#' keras3::layer_concatenate(list(input_a, input_b)) +#' } +#' +#' # An output block with one input. +#' output_block <- function(tensor) { +#' tensor |> keras3::layer_dense(units = 1) +#' } +#' +#' # --- Usage --- +#' layer_blocks <- list( +#' main_input = keras3::layer_input, +#' path_a = inp_spec(dense_block, "main_input"), +#' path_b = inp_spec(dense_block, "main_input"), +#' concatenated = inp_spec( +#' concat_block, +#' c(path_a = "input_a", path_b = "input_b") +#' ), +#' output = inp_spec(output_block, "concatenated") +#' ) +#' } +inp_spec <- function(block, input_map) { + new_fun <- function() {} + original_formals <- formals(block) + original_names <- names(original_formals) + + if (length(original_formals) == 0) { + stop("The 'block' function must have at least one argument.") + } + + new_formals <- original_formals + + if ( + is.character(input_map) && + is.null(names(input_map)) && + length(input_map) == 1 + ) { + # Case 1: Single string, rename first argument + names(new_formals)[1] <- input_map + } else if (is.character(input_map) && !is.null(names(input_map))) { + # Case 2: Named vector for mapping + if (!all(input_map %in% original_names)) { + missing_args <- input_map[!input_map %in% original_names] + stop(paste( + "Argument(s)", + paste(shQuote(missing_args), collapse = ", "), + "not found in the block function." + )) + } + for (new_name in names(input_map)) { + old_name <- input_map[[new_name]] + names(new_formals)[original_names == old_name] <- new_name + } + } else { + stop("`input_map` must be a single string or a named character vector.") + } + + formals(new_fun) <- new_formals + + call_args <- lapply(names(new_formals), as.symbol) + names(call_args) <- original_names + + body(new_fun) <- as.call(c(list(as.symbol("block")), call_args)) + environment(new_fun) <- environment() + new_fun +} + #' Internal Implementation for Creating Keras Specifications #' #' @description From 3bf92d1bd7b7cc38a06e74c6004eee85c1760b91 Mon Sep 17 00:00:00 2001 From: davidrsch Date: Thu, 31 Jul 2025 16:36:12 +0200 Subject: [PATCH 09/10] Adding tests --- tests/testthat/test-e2e-functional.R | 184 +++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 tests/testthat/test-e2e-functional.R diff --git a/tests/testthat/test-e2e-functional.R b/tests/testthat/test-e2e-functional.R new file mode 100644 index 0000000..5ef3fea --- /dev/null +++ b/tests/testthat/test-e2e-functional.R @@ -0,0 +1,184 @@ +test_that("E2E: Functional spec (regression) works", { + skip_if_no_keras() + + # Define blocks for a simple forked functional model + input_block <- function(input_shape) keras3::layer_input(shape = input_shape) + path_block <- function(tensor, units = 8) { + tensor |> keras3::layer_dense(units = units, activation = "relu") + } + concat_block <- function(input_a, input_b) { + keras3::layer_concatenate(list(input_a, input_b)) + } + output_block_reg <- function(tensor) keras3::layer_dense(tensor, units = 1) + + model_name <- "e2e_func_reg" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + + # Create a spec with two parallel paths that are then concatenated + create_keras_functional_spec( + model_name = model_name, + layer_blocks = list( + main_input = input_block, + path_a = inp_spec(path_block, "main_input"), + path_b = inp_spec(path_block, "main_input"), + concatenated = inp_spec( + concat_block, + c(path_a = "input_a", path_b = "input_b") + ), + output = inp_spec(output_block_reg, "concatenated") + ), + mode = "regression" + ) + + spec <- e2e_func_reg( + path_a_units = 32, + path_b_units = 16, + fit_epochs = 2 + ) |> + set_engine("keras") + + data <- mtcars + rec <- recipe(mpg ~ ., data = data) + wf <- workflows::workflow(rec, spec) + + expect_no_error(fit_obj <- parsnip::fit(wf, data = data)) + expect_s3_class(fit_obj, "workflow") + + preds <- predict(fit_obj, new_data = data[1:5, ]) + expect_s3_class(preds, "tbl_df") + expect_equal(names(preds), ".pred") + expect_equal(nrow(preds), 5) + expect_true(is.numeric(preds$.pred)) +}) + + +test_that("E2E: Functional spec (classification) works", { + skip_if_no_keras() + + # Define blocks for a simple forked functional model + input_block <- function(input_shape) keras3::layer_input(shape = input_shape) + # Add a default to `units` to work around a bug in the doc generator + # when handling args with no default. This doesn't affect runtime as the + # value is always overridden. + path_block <- function(tensor, units = 16) { + tensor |> keras3::layer_dense(units = units, activation = "relu") + } + concat_block <- function(input_a, input_b) { + keras3::layer_concatenate(list(input_a, input_b)) + } + output_block_class <- function(tensor, num_classes) { + tensor |> keras3::layer_dense(units = num_classes, activation = "softmax") + } + + model_name <- "e2e_func_class" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + + # Create a spec with two parallel paths that are then concatenated + create_keras_functional_spec( + model_name = model_name, + layer_blocks = list( + main_input = input_block, + path_a = inp_spec(path_block, "main_input"), + path_b = inp_spec(path_block, "main_input"), + concatenated = inp_spec( + concat_block, + c(path_a = "input_a", path_b = "input_b") + ), + output = inp_spec(output_block_class, "concatenated") + ), + mode = "classification" + ) + + spec <- e2e_func_class( + path_a_units = 8, + path_b_units = 4, + fit_epochs = 2 + ) |> + set_engine("keras") + + data <- iris + rec <- recipe(Species ~ ., data = data) + wf <- workflows::workflow(rec, spec) + + expect_no_error(fit_obj <- parsnip::fit(wf, data = data)) + expect_s3_class(fit_obj, "workflow") + + preds_class <- predict(fit_obj, new_data = data[1:5, ], type = "class") + expect_s3_class(preds_class, "tbl_df") + expect_equal(names(preds_class), ".pred_class") + expect_equal(levels(preds_class$.pred_class), levels(data$Species)) + + preds_prob <- predict(fit_obj, new_data = data[1:5, ], type = "prob") + expect_s3_class(preds_prob, "tbl_df") + expect_equal(names(preds_prob), paste0(".pred_", levels(data$Species))) + expect_true(all(abs(rowSums(preds_prob) - 1) < 1e-5)) +}) + + +test_that("E2E: Functional spec tuning (including repetition) works", { + skip_if_no_keras() + + input_block <- function(input_shape) keras3::layer_input(shape = input_shape) + # Add a default to `units` to work around a bug in the doc generator + # when handling args with no default. This doesn't affect runtime as the + # value is always overridden by the tuning grid. + dense_block <- function(tensor, units = 16) { + tensor |> keras3::layer_dense(units = units, activation = "relu") + } + output_block_class <- function(tensor, num_classes) { + tensor |> keras3::layer_dense(units = num_classes, activation = "softmax") + } + + model_name <- "e2e_func_tune" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + + create_keras_functional_spec( + model_name = model_name, + layer_blocks = list( + main_input = input_block, + # This block has a single input, so it can be repeated + dense_path = inp_spec(dense_block, "main_input"), + output = inp_spec(output_block_class, "dense_path") + ), + mode = "classification" + ) + + tune_spec <- e2e_func_tune( + num_dense_path = tune(), + dense_path_units = tune(), + fit_epochs = 1 + ) |> + set_engine("keras") + + rec <- recipe(Species ~ ., data = iris) + tune_wf <- workflows::workflow(rec, tune_spec) + + folds <- rsample::vfold_cv(iris, v = 2) + params <- extract_parameter_set_dials(tune_wf) |> + update( + num_dense_path = num_terms(c(1, 2)), + dense_path_units = hidden_units(c(4, 8)) + ) + grid <- grid_regular(params, levels = 2) + control <- control_grid(save_pred = FALSE, verbose = FALSE) + + tune_res <- try( + tune_grid( + tune_wf, + resamples = folds, + grid = grid, + control = control + ), + silent = TRUE + ) + + if (inherits(tune_res, "try-error")) { + testthat::skip(paste("Tuning failed with error:", as.character(tune_res))) + } + + expect_s3_class(tune_res, "tune_results") + + metrics <- collect_metrics(tune_res) + expect_s3_class(metrics, "tbl_df") + expect_true(all(c("num_dense_path", "dense_path_units") %in% names(metrics))) +}) From ade722e55bb2c240191bebae2b75a64caea2d593 Mon Sep 17 00:00:00 2001 From: davidrsch Date: Thu, 31 Jul 2025 16:36:32 +0200 Subject: [PATCH 10/10] Updating doc --- NAMESPACE | 1 + README.md | 137 +++++++++++++-------- man/inp_spec.Rd | 76 ++++++++++++ vignettes/functional-api.Rmd | 218 ++++++++++++++++++++++++++++++++++ vignettes/getting-started.Rmd | 14 +-- 5 files changed, 387 insertions(+), 59 deletions(-) create mode 100644 man/inp_spec.Rd create mode 100644 vignettes/functional-api.Rmd diff --git a/NAMESPACE b/NAMESPACE index 32eca4c..7d6336f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export(create_keras_functional_spec) export(create_keras_sequential_spec) export(generic_functional_fit) export(generic_sequential_fit) +export(inp_spec) export(keras_losses) export(keras_metrics) export(keras_optimizers) diff --git a/README.md b/README.md index 77d4d45..ed20b3a 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,9 @@ pak::pak("davidrsch/kerasnip") ## Example -### Example: Building a Sequential MLP from Layer Blocks +### Example 1: Building a Sequential MLP -This example shows the core `kerasnip` workflow for building a model from modular "layer blocks". We will: -1. Define reusable blocks of Keras layers. -2. Create a model specification from these blocks. -3. Fit the model with a fixed architecture. +This example shows the core workflow for building a simple, linear stack of layers using `create_keras_sequential_spec()`. ```r library(kerasnip) @@ -32,41 +29,38 @@ library(tidymodels) library(keras3) # 1. Define Keras layer blocks -# Each block is a function that takes a Keras model object and adds layers. -# The first block in the sequence is responsible for initializing the model. -mlp_input_block <- function(model, input_shape) { +# The first block initializes the model. +input_block <- function(model, input_shape) { keras_model_sequential(input_shape = input_shape) } - -mlp_dense_block <- function(model, units = 32) { - model |> - layer_dense(units = units, activation = "relu") +# Subsequent blocks add layers. +dense_block <- function(model, units = 32) { + model |> layer_dense(units = units, activation = "relu") } - -mlp_output_block <- function(model) { +# The final block creates the output layer. +output_block <- function(model) { model |> layer_dense(units = 1) } # 2. Create a spec from the layer blocks # This creates a new model function, `basic_mlp()`, in your environment. -create_keras_spec( +create_keras_sequential_spec( model_name = "basic_mlp", layer_blocks = list( - input = mlp_input_block, - dense = mlp_dense_block, - output = mlp_output_block + input = input_block, + dense = dense_block, + output = output_block ), mode = "regression" ) -# 3. Use the generated spec to define and fit a model -# We can set the number of dense layers (`num_dense`) and their parameters -# (`dense_units`). +# 3. Use the generated spec to define a model. +# We can set the number of dense layers (`num_dense`) and their parameters (`dense_units`). spec <- basic_mlp( num_dense = 2, dense_units = 64, - epochs = 50, + fit_epochs = 10, learn_rate = 0.01 ) |> set_engine("keras") @@ -75,27 +69,70 @@ spec <- basic_mlp( rec <- recipe(mpg ~ ., data = mtcars) |> step_normalize(all_numeric_predictors()) -wf <- workflow() |> - add_recipe(rec) |> - add_model(spec) +wf <- workflow(rec, spec) set.seed(123) fit_obj <- fit(wf, data = mtcars) # 5. Make predictions -predictions <- predict(fit_obj, new_data = mtcars[1:5, ]) -print(predictions) +predict(fit_obj, new_data = mtcars[1:5, ]) #> # A tibble: 5 × 1 #> .pred #> -#> 1 22.6 -#> 2 20.9 -#> 3 26.1 -#> 4 19.7 -#> 5 17.8 +#> 1 21.3 +#> 2 21.3 +#> 3 22.8 +#> 4 21.4 +#> 5 18.7 ``` -### Example: Tuning a Sequential MLP Architecture +### Example 2: Building a Functional "Fork-Join" Model + +For complex, non-linear architectures, use `create_keras_functional_spec()`. This example builds a model where the input is forked into two paths, which are then concatenated. + +```r +library(kerasnip) +library(tidymodels) +library(keras3) + +# 1. Define blocks. For the functional API, blocks are nodes in a graph. +input_block <- function(input_shape) layer_input(shape = input_shape) +path_block <- function(tensor, units = 16) tensor |> layer_dense(units = units) +concat_block <- function(input_a, input_b) layer_concatenate(list(input_a, input_b)) +output_block <- function(tensor) layer_dense(tensor, units = 1) + +# 2. Create the spec. The graph is defined by block names and their arguments. +create_keras_functional_spec( + model_name = "forked_mlp", + layer_blocks = list( + main_input = input_block, + path_a = inp_spec(path_block, "main_input"), + path_b = inp_spec(path_block, "main_input"), + concatenated = inp_spec(concat_block, c(path_a = "input_a", path_b = "input_b")), + # The output block must be named 'output'. + output = inp_spec(output_block, "concatenated") + ), + mode = "regression" +) + +# 3. Use the new spec. Arguments are prefixed with their block name. +spec <- forked_mlp(path_a_units = 16, path_b_units = 8, fit_epochs = 10) |> + set_engine("keras") + +# Fit and predict as usual +set.seed(123) +fit(spec, mpg ~ ., data = mtcars) |> + predict(new_data = mtcars[1:5, ]) +#> # A tibble: 5 × 1 +#> .pred +#> +#> 1 19.4 +#> 2 19.5 +#> 3 21.9 +#> 4 18.6 +#> 5 17.9 +``` +### Example 3: Tuning a Sequential MLP Architecture This example demonstrates how to tune the number of dense layers and the rate of a final dropout layer, showcasing how to tune both architecture and block hyperparameters simultaneously. @@ -105,30 +142,27 @@ library(tidymodels) library(keras3) # 1. Define Keras layer blocks for a tunable MLP -mlp_input_block <- function(model, input_shape) { +input_block <- function(model, input_shape) { keras_model_sequential(input_shape = input_shape) } - -tunable_dense_block <- function(model, units = 32) { +dense_block <- function(model, units = 32) { model |> layer_dense(units = units, activation = "relu") } - -tunable_dropout_block <- function(model, rate = 0.2) { +dropout_block <- function(model, rate = 0.2) { model |> layer_dropout(rate = rate) } - -mlp_output_block <- function(model) { +output_block <- function(model) { model |> layer_dense(units = 1) } # 2. Create a spec from the layer blocks -create_keras_spec( +create_keras_sequential_spec( model_name = "tunable_mlp", layer_blocks = list( - input = mlp_input_block, - dense = tunable_dense_block, - dropout = tunable_dropout_block, - output = mlp_output_block + input = input_block, + dense = dense_block, + dropout = dropout_block, + output = output_block ), mode = "regression" ) @@ -139,17 +173,15 @@ tune_spec <- tunable_mlp( dense_units = tune(), num_dropout = 1, dropout_rate = tune(), - epochs = 20 + fit_epochs = 10 ) |> set_engine("keras") -# 4. Set up a tuning workflow +# 4. Set up and run a tuning workflow rec <- recipe(mpg ~ ., data = mtcars) |> step_normalize(all_numeric_predictors()) -wf_tune <- workflow() |> - add_recipe(rec) |> - add_model(tune_spec) +wf_tune <- workflow(rec, tune_spec) # Define the tuning grid. params <- extract_parameter_set_dials(wf_tune) |> @@ -167,7 +199,8 @@ folds <- vfold_cv(mtcars, v = 3) tune_res <- tune_grid( wf_tune, resamples = folds, - grid = grid + grid = grid, + control = control_grid(verbose = FALSE) ) # 6. Show the best architecture @@ -180,4 +213,4 @@ show_best(tune_res, metric = "rmse") #> 3 3 64 0.1 rmse standard 3.15 Preprocessor1_Model04 #> 4 1 8 0.1 rmse standard 3.20 Preprocessor1_Model01 #> 5 3 8 0.1 rmse standard 3.22 Preprocessor1_Model03 -``` +``` \ No newline at end of file diff --git a/man/inp_spec.Rd b/man/inp_spec.Rd new file mode 100644 index 0000000..ddabda3 --- /dev/null +++ b/man/inp_spec.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_keras_spec_helpers.R +\name{inp_spec} +\alias{inp_spec} +\title{Remap Layer Block Arguments for Model Specification} +\usage{ +inp_spec(block, input_map) +} +\arguments{ +\item{block}{A function that defines a Keras layer or a set of layers. The +first arguments should be the input tensor(s).} + +\item{input_map}{A single character string or a named character vector that +specifies how to rename/remap the arguments of \code{block}.} +} +\value{ +A new function (a closure) that wraps the \code{block} function with +renamed arguments, ready to be used in a \code{layer_blocks} list. +} +\description{ +Creates a wrapper function around a Keras layer block to rename its +arguments. This is a powerful helper for defining the \code{layer_blocks} in +\code{\link[=create_keras_functional_spec]{create_keras_functional_spec()}} and \code{\link[=create_keras_sequential_spec]{create_keras_sequential_spec()}}, +allowing you to connect reusable blocks into a model graph without writing +verbose anonymous functions. +} +\details{ +\code{inp_spec()} makes your model definitions cleaner and more readable. It +handles the metaprogramming required to create a new function with the +correct argument names, while preserving the original block's hyperparameters +and their default values. + +The function supports two modes of operation based on \code{input_map}: +\enumerate{ +\item \strong{Single Input Renaming}: If \code{input_map} is a single character string, +the wrapper function renames the \emph{first} argument of the \code{block} function +to the provided string. This is the common case for blocks that take a +single tensor input. +\item \strong{Multiple Input Mapping}: If \code{input_map} is a named character vector, +it provides an explicit mapping from new argument names (the names of the +vector) to the original argument names in the \code{block} function (the values +of the vector). This is used for blocks with multiple inputs, like a +concatenation layer. +} +} +\examples{ +\dontrun{ +# --- Example Blocks --- +# A standard dense block with one input tensor and one hyperparameter. +dense_block <- function(tensor, units = 16) { + tensor |> keras3::layer_dense(units = units, activation = "relu") +} + +# A block that takes two tensors as input. +concat_block <- function(input_a, input_b) { + keras3::layer_concatenate(list(input_a, input_b)) +} + +# An output block with one input. +output_block <- function(tensor) { + tensor |> keras3::layer_dense(units = 1) +} + +# --- Usage --- +layer_blocks <- list( + main_input = keras3::layer_input, + path_a = inp_spec(dense_block, "main_input"), + path_b = inp_spec(dense_block, "main_input"), + concatenated = inp_spec( + concat_block, + c(path_a = "input_a", path_b = "input_b") + ), + output = inp_spec(output_block, "concatenated") +) +} +} diff --git a/vignettes/functional-api.Rmd b/vignettes/functional-api.Rmd new file mode 100644 index 0000000..add9bbf --- /dev/null +++ b/vignettes/functional-api.Rmd @@ -0,0 +1,218 @@ +--- +title: "Building Functional Models with kerasnip" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Building Functional Models with kerasnip} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +This vignette demonstrates how to use the `create_keras_functional_spec()` function to build complex, non-linear Keras models that integrate seamlessly with the `tidymodels` ecosystem. + +## When to Use the Functional API + +While `create_keras_sequential_spec()` is perfect for models that are a simple, linear stack of layers, many advanced architectures are not linear. The Keras Functional API is designed for these cases. You should use `create_keras_functional_spec()` when your model has: + +* Multiple input or multiple output layers. +* Shared layers between different branches. +* Residual connections (e.g., ResNets), where a layer's input is added to its output. +* Any other non-linear topology. + +`kerasnip` makes it easy to define these architectures by automatically connecting a graph of layer blocks. + +## The Core Concept: Building a Graph + +`kerasnip` builds the model's graph by inspecting the `layer_blocks` you provide. The connection logic is simple but powerful: + +1. The **names of the list elements** in `layer_blocks` define the names of the nodes in your graph (e.g., `main_input`, `dense_path`, `output`). +2. The **names of the arguments** in each block function specify its inputs. A block function like `my_block <- function(input_a, input_b, ...)` declares that it needs input from the nodes named `input_a` and `input_b`. + +There are two special requirements: + +* **Input Block**: The first block in the list is treated as the main input node. Its function should not take other blocks as input. +* **Output Block**: Exactly one block must be named `"output"`. The tensor returned by this block is used as the final output of the Keras model. + +Let's see this in action. + +## Example 1: A Fork-Join Regression Model + +We will build a model that forks the input, passes it through two separate dense layer paths, and then joins the results with a concatenation layer before producing a final prediction. + +### Step 1: Load Libraries + +First, we load the necessary packages. + +```{r setup} +library(kerasnip) +library(tidymodels) +library(keras3) + +# Silence the startup messages from remove_keras_spec +options(kerasnip.show_removal_messages = FALSE) +``` + +### Step 2: Define Layer Blocks + +These are the building blocks of our model. Each function represents a node in the graph. + +```{r define-blocks-functional} +# The input node. `input_shape` is supplied automatically by the engine. +input_block <- function(input_shape) { + layer_input(shape = input_shape) +} + +# A generic block for a dense path. `units` will be a tunable parameter. +path_block <- function(tensor, units = 16) { + tensor |> layer_dense(units = units, activation = "relu") +} + +# A block to join two tensors. +concat_block <- function(input_a, input_b) { + layer_concatenate(list(input_a, input_b)) +} + +# The final output block for regression. +output_block_reg <- function(tensor) { + layer_dense(tensor, units = 1) +} +``` + +### Step 3: Create the Model Specification + +Now we assemble the blocks into a graph. We use the `inp_spec()` helper to connect the blocks. This avoids writing verbose anonymous functions like `function(main_input, units) path_block(main_input, units)`. `inp_spec()` automatically creates a wrapper that renames the arguments of our blocks to match the node names from the `layer_blocks` list. + +```{r create-spec-functional} +model_name <- "forked_reg_spec" +# Clean up the spec when the vignette is done knitting +on.exit(remove_keras_spec(model_name), add = TRUE) + +create_keras_functional_spec( + model_name = model_name, + layer_blocks = list( + # Node names are defined by the list names + main_input = input_block, + + # `inp_spec()` renames the first argument of `path_block` ('tensor') + # to 'main_input' to match the node name. + path_a = inp_spec(path_block, "main_input"), + path_b = inp_spec(path_block, "main_input"), + + # For multiple inputs, `inp_spec()` takes a named vector to map + # new argument names to the original block's argument names. + concatenated = inp_spec(concat_block, c(path_a = "input_a", path_b = "input_b")), + + # The output block takes the concatenated tensor as its input. + output = inp_spec(output_block_reg, "concatenated") + ), + mode = "regression" +) +``` + +### Step 4: Use and Fit the Model + +The new function `forked_reg_spec()` is now available. Its arguments (`path_a_units`, `path_b_units`) were discovered automatically from our block definitions. + +```{r fit-functional} +# We can override the default `units` from `path_block` for each path. +spec <- forked_reg_spec( + path_a_units = 16, + path_b_units = 8, + fit_epochs = 10, + fit_verbose = 0 # Suppress fitting output in vignette +) |> + set_engine("keras") + +print(spec) + +# Fit the model on the mtcars dataset +rec <- recipe(mpg ~ ., data = mtcars) +wf <- workflow() |> + add_recipe(rec) |> + add_model(spec) + + +fit_obj <- fit(wf, data = mtcars) + +predict(fit_obj, new_data = mtcars[1:5, ]) +``` + +## Example 2: Tuning a Functional Model's Depth + +A key feature of `kerasnip` is the ability to tune the *depth* of the network by repeating a block multiple times. A block can be repeated if it has **exactly one input tensor** from another block in the graph. + +Let's create a simple functional model and tune both its width (`units`) and its depth (`num_...`). + +### Step 1: Define Blocks and Create Spec + +This model is architecturally sequential, but we build it with the functional API to demonstrate the repetition feature. + +```{r create-tunable-functional-spec} +dense_block <- function(tensor, units = 16) { + tensor |> layer_dense(units = units, activation = "relu") +} +output_block_class <- function(tensor, num_classes) { + tensor |> layer_dense(units = num_classes, activation = "softmax") +} + +model_name_tune <- "tunable_func_mlp" +on.exit(remove_keras_spec(model_name_tune), add = TRUE) + +create_keras_functional_spec( + model_name = model_name_tune, + layer_blocks = list( + main_input = input_block, + # This block has a single input ('main_input'), so it can be repeated. + dense_path = inp_spec(dense_block, "main_input"), + output = inp_spec(output_block_class, "dense_path") + ), + mode = "classification" +) +``` + +### Step 2: Set up and Run Tuning + +We will tune `dense_path_units` (the width) and `num_dense_path` (the depth). The `num_dense_path` argument was created automatically because `dense_path` is a repeatable block. + +```{r tune-functional, cache=TRUE} +tune_spec <- tunable_func_mlp( + dense_path_units = tune(), + num_dense_path = tune(), + fit_epochs = 5, + fit_verbose = 0 +) |> + set_engine("keras") + +rec <- recipe(Species ~ ., data = iris) +tune_wf <- workflow() |> + add_recipe(rec) |> + add_model(tune_spec) + +folds <- vfold_cv(iris, v = 2) + +# Define the tuning grid +params <- extract_parameter_set_dials(tune_wf) |> + update( + dense_path_units = hidden_units(c(8, 32)), + num_dense_path = num_terms(c(1, 3)) # Test models with 1, 2, or 3 hidden layers + ) + +grid <- grid_regular(params, levels = 2) +grid + +control <- control_grid(save_pred = FALSE, verbose = FALSE) + +tune_res <- tune_grid( + tune_wf, + resamples = folds, + grid = grid, + control = control +) + +show_best(tune_res, metric = "accuracy") +``` + +The results show that `tidymodels` successfully trained and evaluated models with different numbers of hidden layers, demonstrating that we can tune the very architecture of the network. + +## Conclusion + +The `create_keras_functional_spec()` function provides a powerful and intuitive way to define, fit, and tune complex Keras models within the `tidymodels` framework. By defining the model as a graph of connected blocks, you can represent nearly any architecture while `kerasnip` handles the boilerplate of integrating it with `parsnip`, `dials`, and `tune`. \ No newline at end of file diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd index aea627e..7ecc58d 100644 --- a/vignettes/getting-started.Rmd +++ b/vignettes/getting-started.Rmd @@ -9,7 +9,7 @@ vignette: > ## The Core Idea: From Keras Layers to Tidymodels Specs -The `keras` package allows for building deep learning models layer-by-layer, which is a powerful and flexible approach. However, the `tidymodels` ecosystem is designed around declarative model specifications, where you define what model you want and which of its parameters you want to tune, rather than building it imperatively. +The `keras3` package allows for building deep learning models layer-by-layer, which is a powerful and flexible approach. However, the `tidymodels` ecosystem is designed around declarative model specifications, where you define what model you want and which of its parameters you want to tune, rather than building it imperatively. `kerasnip` bridges this gap with a simple but powerful concept: layer blocks. You define the components of your neural network (e.g., an input block, a dense block, a dropout block) as simple R functions. `kerasnip` then uses these blocks as building materials to create a brand new parsnip model specification function for you. @@ -59,10 +59,10 @@ We need three blocks: ### Step 2: Create the Model Specification -Now, we use `create_keras_spec()` to generate a new model function, which we'll call `basic_mlp()`. We provide our layer blocks in the order they should be assembled. +Now, we use `create_keras_sequential_spec()` to generate a new model function, which we'll call `basic_mlp()`. We provide our layer blocks in the order they should be assembled. ```{r create-spec} -create_keras_spec( +create_keras_sequential_spec( model_name = "basic_mlp", layer_blocks = list( input = mlp_input_block, @@ -83,7 +83,7 @@ We can now use `basic_mlp()` like any other parsnip model. Let's define a model spec <- basic_mlp( num_dense = 2, dense_units = 64, - epochs = 50, + fit_epochs = 50, learn_rate = 0.01 ) |> set_engine("keras") @@ -133,7 +133,7 @@ tunable_dropout_block <- function(model, rate = 0.2) { layer_dropout(rate = rate) } -create_keras_spec( +create_keras_sequential_spec( model_name = "tunable_mlp", layer_blocks = list( input = mlp_input_block, @@ -155,7 +155,7 @@ tune_spec <- tunable_mlp( dense_units = tune(), num_dropout = 1, dropout_rate = tune(), - epochs = 20 # Use fewer epochs for faster tuning + fit_epochs = 20 ) |> set_engine("keras") @@ -226,7 +226,7 @@ Here is an example of using these arguments to specify a different loss function adv_spec <- basic_mlp( num_dense = 2, dense_units = 32, - epochs = 100, + fit_epochs = 100, # Arguments for keras3::compile() compile_loss = "mae", # Arguments for keras3::fit()