diff --git a/NAMESPACE b/NAMESPACE index 21f5c93..6f66623 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,8 +16,10 @@ export(keras_metrics) export(keras_optimizers) export(loss_function_keras) export(optimizer_function) -export(process_x) -export(process_y) +export(process_x_functional) +export(process_x_sequential) +export(process_y_functional) +export(process_y_sequential) export(register_keras_loss) export(register_keras_metric) export(register_keras_optimizer) diff --git a/R/build_and_compile_model.R b/R/build_and_compile_model.R index 0b18165..4089ddb 100644 --- a/R/build_and_compile_model.R +++ b/R/build_and_compile_model.R @@ -1,3 +1,18 @@ +#' Build and Compile a Keras Sequential Model +#' +#' @description +#' This internal helper function constructs and compiles a Keras sequential model +#' based on a list of layer blocks and other parameters. It handles data +#' processing, dynamic architecture construction, and model compilation. +#' +#' @param x A data frame or matrix of predictors. +#' @param y A vector or data frame of outcomes. +#' @param layer_blocks A named list of functions that define the layers of the +#' model. The order of the list determines the order of the layers. +#' @param ... Additional arguments passed to the function, including layer +#' hyperparameters, repetition counts for blocks, and compile/fit arguments. +#' +#' @return A compiled Keras model object. #' @noRd build_and_compile_sequential_model <- function( x, @@ -11,16 +26,18 @@ build_and_compile_sequential_model <- function( verbose <- all_args$verbose %||% 0 # Process x input - x_processed <- process_x(x) + x_processed <- process_x_sequential(x) x_proc <- x_processed$x_proc input_shape <- x_processed$input_shape # Process y input - y_processed <- process_y(y) - y_mat <- y_processed$y_proc + y_processed <- process_y_sequential(y) + + # Determine is_classification, class_levels, and num_classes is_classification <- y_processed$is_classification class_levels <- y_processed$class_levels num_classes <- y_processed$num_classes + y_mat <- y_processed$y_proc # Determine default compile arguments based on mode default_loss <- if (is_classification) { @@ -93,6 +110,7 @@ build_and_compile_sequential_model <- function( } # --- 3. Model Compilation --- + # Collect all arguments starting with "compile_" from `...` compile_args <- collect_compile_args( all_args, learn_rate, @@ -104,6 +122,24 @@ build_and_compile_sequential_model <- function( return(model) } +#' Build and Compile a Keras Functional Model +#' +#' @description +#' This internal helper function constructs and compiles a Keras functional model +#' based on a list of layer blocks and other parameters. It handles data +#' processing, dynamic architecture construction (including multiple inputs and +#' branches), and model compilation. +#' +#' @param x A data frame or matrix of predictors. For multiple inputs, this is +#' often a data frame with list-columns. +#' @param y A vector or data frame of outcomes. Can handle multiple outputs if +#' provided as a data frame with multiple columns. +#' @param layer_blocks A named list of functions that define the building blocks +#' of the model graph. Connections are defined by referencing other block names. +#' @param ... Additional arguments passed to the function, including layer +#' hyperparameters, repetition counts for blocks, and compile/fit arguments. +#' +#' @return A compiled Keras model object. #' @noRd build_and_compile_functional_model <- function( x, @@ -117,44 +153,117 @@ build_and_compile_functional_model <- function( verbose <- all_args$verbose %||% 0 # Process x input - x_processed <- process_x(x) + x_processed <- process_x_functional(x) x_proc <- x_processed$x_proc input_shape <- x_processed$input_shape # Process y input - y_processed <- process_y(y) - y_mat <- y_processed$y_proc - is_classification <- y_processed$is_classification - class_levels <- y_processed$class_levels - num_classes <- y_processed$num_classes + y_processed <- process_y_functional(y) # Determine default compile arguments based on mode - default_loss <- if (is_classification) { - if (num_classes > 2) { - "categorical_crossentropy" - } else { - "binary_crossentropy" + default_losses <- list() + default_metrics_list <- list() + + # Check if y_processed$y_proc is a list (indicating multiple outputs) + if (is.list(y_processed$y_proc) && !is.null(names(y_processed$y_proc))) { + # Multiple outputs + for (output_name in names(y_processed$y_proc)) { + # We need to determine is_classification and num_classes for each output + # based on the class_levels for that output. + current_class_levels <- y_processed$class_levels[[output_name]] + current_is_classification <- !is.null(current_class_levels) && + length(current_class_levels) > 0 + current_num_classes <- if (current_is_classification) { + length(current_class_levels) + } else { + NULL + } + + default_losses[[output_name]] <- if (current_is_classification) { + if (current_num_classes > 2) { + "categorical_crossentropy" + } else { + "binary_crossentropy" + } + } else { + "mean_squared_error" + } + default_metrics_list[[output_name]] <- if (current_is_classification) { + "accuracy" + } else { + "mean_absolute_error" + } } } else { - "mean_squared_error" - } - default_metrics <- if (is_classification) { - "accuracy" - } else { - "mean_absolute_error" + # Single output case + # Determine is_classification and num_classes from the top-level class_levels + is_classification <- !is.null(y_processed$class_levels) && + length(y_processed$class_levels) > 0 + num_classes <- if (is_classification) { + length(y_processed$class_levels) + } else { + NULL + } + + default_losses <- if (is_classification) { + if (num_classes > 2) { + "categorical_crossentropy" + } else { + "binary_crossentropy" + } + } else { + "mean_squared_error" + } + default_metrics_list <- if (is_classification) { + "accuracy" + } else { + "mean_absolute_error" + } } # --- 2. Dynamic Model Architecture Construction (DIFFERENT from sequential) --- # Create a list to store the output tensors of each block. The names of the # list elements correspond to the block names. block_outputs <- list() - # The first block MUST be the input layer and MUST NOT have `input_from`. - first_block_name <- names(layer_blocks)[1] - first_block_fn <- layer_blocks[[first_block_name]] - block_outputs[[first_block_name]] <- first_block_fn(input_shape = input_shape) + model_input_tensors <- list() # To collect all input tensors for keras_model + + # Identify and process input layers based on names matching input_shape + # This assumes that if input_shape is a named list, the corresponding + # input blocks in layer_blocks will have matching names. + if (is.list(input_shape) && !is.null(names(input_shape))) { + input_block_names_in_spec <- intersect( + names(layer_blocks), + names(input_shape) + ) + + if (length(input_block_names_in_spec) != length(input_shape)) { + stop( + "Mismatch between named inputs from process_x and named input blocks in layer_blocks. ", + "Ensure all processed inputs have a corresponding named input block in your model specification." + ) + } + + for (block_name in input_block_names_in_spec) { + block_fn <- layer_blocks[[block_name]] + current_input_tensor <- block_fn(input_shape = input_shape[[block_name]]) + block_outputs[[block_name]] <- current_input_tensor + model_input_tensors[[block_name]] <- current_input_tensor + } + remaining_layer_blocks_names <- names(layer_blocks)[ + !(names(layer_blocks) %in% input_block_names_in_spec) + ] + } else { + # Single input case (original logic, but now also collecting for model_input_tensors) + first_block_name <- names(layer_blocks)[1] + first_block_fn <- layer_blocks[[first_block_name]] + current_input_tensor <- first_block_fn(input_shape = input_shape) + block_outputs[[first_block_name]] <- current_input_tensor + model_input_tensors[[first_block_name]] <- current_input_tensor + remaining_layer_blocks_names <- names(layer_blocks)[-1] + } # Iterate through the remaining blocks, connecting and repeating them as needed. - for (block_name in names(layer_blocks)[-1]) { + for (block_name in remaining_layer_blocks_names) { block_fn <- layer_blocks[[block_name]] block_fmls <- rlang::fn_fmls(block_fn) block_fml_names <- names(block_fmls) @@ -189,8 +298,50 @@ build_and_compile_functional_model <- function( ) # Add special engine-supplied arguments if the block can accept them - if (is_classification && "num_classes" %in% block_fml_names) { - block_hyperparams$num_classes <- num_classes + # Add special engine-supplied arguments if the block can accept them + # This is primarily for output layers that might need num_classes + if ("num_classes" %in% block_fml_names) { + # Check if this block is an output block and if it's a classification task + if (is.list(y_processed$y_proc) && !is.null(names(y_processed$y_proc))) { + # Multi-output case + # Find the corresponding output in y_processed based on block_name + y_names <- names(y_processed$y_proc) + # If there is only one output, and this block is named 'output', + # connect them automatically. + if (length(y_names) == 1 && block_name == "output") { + y_name <- y_names[1] + is_cls <- !is.null(y_processed$class_levels[[y_name]]) && + length(y_processed$class_levels[[y_name]]) > 0 + if (is_cls) { + block_hyperparams$num_classes <- length(y_processed$class_levels[[ + y_name + ]]) + } + } else if (block_name %in% y_names) { + # Standard case: block name matches an output name + current_y_info <- list( + is_classification = !is.null(y_processed$class_levels[[ + block_name + ]]) && + length(y_processed$class_levels[[block_name]]) > 0, + num_classes = if ( + !is.null(y_processed$class_levels[[block_name]]) + ) { + length(y_processed$class_levels[[block_name]]) + } else { + NULL + } + ) + if (current_y_info$is_classification) { + block_hyperparams$num_classes <- current_y_info$num_classes + } + } + } else { + # Single output case + if (is_classification) { + block_hyperparams$num_classes <- num_classes + } + } } # --- Get Input Tensors for this block --- @@ -232,14 +383,42 @@ build_and_compile_functional_model <- function( block_outputs[[block_name]] <- current_tensor } - # The last layer must be named 'output' - output_tensor <- block_outputs[["output"]] - if (is.null(output_tensor)) { - stop("An 'output' block must be defined in layer_blocks.") + # The last layer must be named 'output' or match the names of y_processed outputs + final_output_tensors <- list() + + # Check if y_processed$y_proc is a named list, indicating multiple outputs) + if (is.list(y_processed$y_proc) && !is.null(names(y_processed$y_proc))) { + # Multiple outputs + for (output_name in names(y_processed$y_proc)) { + # Iterate over the names of the actual outputs + if (is.null(block_outputs[[output_name]])) { + stop(paste0( + "An output block named '", + output_name, + "' must be defined in layer_blocks for multi-output models." + )) + } + final_output_tensors[[output_name]] <- block_outputs[[output_name]] + } + } else { + # Single output case + output_tensor <- block_outputs[["output"]] + if (is.null(output_tensor)) { + stop("An 'output' block must be defined in layer_blocks.") + } + final_output_tensors <- output_tensor } + + # If there's only one input, it shouldn't be a list for keras_model + final_model_inputs <- if (length(model_input_tensors) == 1) { + model_input_tensors[[1]] + } else { + model_input_tensors + } + model <- keras3::keras_model( - inputs = block_outputs[[first_block_name]], - outputs = output_tensor + inputs = final_model_inputs, + outputs = final_output_tensors # This will now be a list if multiple outputs ) # --- 3. Model Compilation --- @@ -247,8 +426,8 @@ build_and_compile_functional_model <- function( compile_args <- collect_compile_args( all_args, learn_rate, - default_loss, - default_metrics + default_losses, + default_metrics_list ) rlang::exec(keras3::compile, model, !!!compile_args) diff --git a/R/generic_fit_helpers.R b/R/generic_fit_helpers.R index 85070d1..dfa0e1b 100644 --- a/R/generic_fit_helpers.R +++ b/R/generic_fit_helpers.R @@ -14,8 +14,8 @@ #' #' @param all_args The list of all arguments passed to the fitting function's `...`. #' @param learn_rate The top-level `learn_rate` parameter. -#' @param default_loss The default loss function to use if not provided. -#' @param default_metrics The default metric(s) to use if not provided. +#' @param default_loss The default loss function to use if not provided. Can be a single value or a named list. +#' @param default_metrics The default metric(s) to use if not provided. Can be a single value or a named list of vectors/single values. #' @return A named list of arguments ready to be passed to `keras3::compile()`. #' @noRd collect_compile_args <- function( @@ -53,19 +53,65 @@ collect_compile_args <- function( ) } - # Resolve loss: use user-provided, otherwise default. Resolve string if needed. - loss_arg <- user_compile_args$loss %||% default_loss - if (is.character(loss_arg)) { - final_compile_args$loss <- get_keras_object(loss_arg, "loss") + # Handle loss: can be single or multiple outputs + if (is.list(default_loss) && !is.null(names(default_loss))) { + # Multiple outputs + # User can provide a single loss for all outputs, or a named list + loss_arg <- user_compile_args$loss %||% default_loss + if (is.character(loss_arg) && length(loss_arg) == 1) { + # Single loss string for all outputs + final_compile_args$loss <- get_keras_object(loss_arg, "loss") + } else if (is.list(loss_arg) && !is.null(names(loss_arg))) { + # Named list of losses + final_compile_args$loss <- lapply(loss_arg, function(l) { + if (is.character(l)) get_keras_object(l, "loss") else l + }) + } else { + stop( + "For multiple outputs, 'compile_loss' must be a single string or a named list of losses." + ) + } } else { - final_compile_args$loss <- loss_arg + # Single output + loss_arg <- user_compile_args$loss %||% default_loss + if (is.character(loss_arg)) { + final_compile_args$loss <- get_keras_object(loss_arg, "loss") + } else { + final_compile_args$loss <- loss_arg + } } - # Resolve metrics: user‐supplied or default - metrics_arg <- user_compile_args$metrics %||% default_metrics - # Keras' `compile()` can handle a single string or a list/vector of strings. - # This correctly passes along either the default string or a user-provided vector. - final_compile_args$metrics <- metrics_arg + # Handle metrics: can be single or multiple outputs + if (is.list(default_metrics) && !is.null(names(default_metrics))) { + # Multiple outputs + # User can provide a single metric for all outputs, or a named list + metrics_arg <- user_compile_args$metrics %||% default_metrics + if (is.character(metrics_arg) && length(metrics_arg) == 1) { + # Single metric string for all outputs + final_compile_args$metrics <- get_keras_object(metrics_arg, "metric") + } else if (is.list(metrics_arg) && !is.null(names(metrics_arg))) { + # Named list of metrics + final_compile_args$metrics <- lapply(metrics_arg, function(m) { + if (is.character(m)) get_keras_object(m, "metric") else m + }) + } else { + stop( + "For multiple outputs, 'compile_metrics' must be a single string or a named list of metrics." + ) + } + } else { + # Single output + metrics_arg <- user_compile_args$metrics %||% default_metrics + if (is.character(metrics_arg)) { + final_compile_args$metrics <- lapply( + metrics_arg, + get_keras_object, + "metric" + ) + } else { + final_compile_args$metrics <- metrics_arg + } + } # Add any other user-provided compile arguments (e.g., `weighted_metrics`) other_args <- user_compile_args[ @@ -133,4 +179,4 @@ collect_fit_args <- function( ) ] merged_args -} +} \ No newline at end of file diff --git a/R/generic_functional_fit.R b/R/generic_functional_fit.R index c1e7649..2be84d9 100644 --- a/R/generic_functional_fit.R +++ b/R/generic_functional_fit.R @@ -90,9 +90,9 @@ generic_functional_fit <- function( # --- 2. Model Fitting --- all_args <- list(...) verbose <- all_args$verbose %||% 0 - x_processed <- process_x(x) + x_processed <- process_x_functional(x) x_proc <- x_processed$x_proc - y_processed <- process_y(y) + y_processed <- process_y_functional(y) y_mat <- y_processed$y_proc fit_args <- collect_fit_args( @@ -109,6 +109,8 @@ generic_functional_fit <- function( list( fit = model, # The raw Keras model object history = history, # The training history - lvl = y_processed$class_levels # Factor levels for classification, NULL for regression + lvl = y_processed$class_levels, # Factor levels for classification, NULL for regression + process_x = process_x_functional, + process_y = process_y_functional ) } diff --git a/R/generic_sequential_fit.R b/R/generic_sequential_fit.R index 42d7107..46d0f20 100644 --- a/R/generic_sequential_fit.R +++ b/R/generic_sequential_fit.R @@ -90,9 +90,9 @@ generic_sequential_fit <- function( # --- 2. Model Fitting --- all_args <- list(...) verbose <- all_args$verbose %||% 0 - x_processed <- process_x(x) + x_processed <- process_x_sequential(x) x_proc <- x_processed$x_proc - y_processed <- process_y(y) + y_processed <- process_y_sequential(y) y_mat <- y_processed$y_proc fit_args <- collect_fit_args( @@ -109,6 +109,8 @@ generic_sequential_fit <- function( list( fit = model, # The raw Keras model object history = history, # The training history - lvl = y_processed$class_levels # Factor levels for classification, NULL for regression + lvl = y_processed$class_levels, # Factor levels for classification, NULL for regression + process_x = process_x_sequential, + process_y = process_y_sequential ) } diff --git a/R/keras_tools.R b/R/keras_tools.R index f7bc77e..0877d38 100644 --- a/R/keras_tools.R +++ b/R/keras_tools.R @@ -60,14 +60,28 @@ #' } #' @export keras_evaluate <- function(object, x, y = NULL, ...) { - # 1. Preprocess predictor data (x) - x_processed <- process_x(x) + # 1. Get the correct processing functions from the fit object + process_x_fun <- object$fit$process_x + process_y_fun <- object$fit$process_y + + if (is.null(process_x_fun) || is.null(process_y_fun)) { + stop( + "Could not find processing functions in the model fit object. ", + "Please ensure the model was fitted with a recent version of kerasnip.", + call. = FALSE + ) + } + + # 2. Preprocess predictor data (x) + x_processed <- process_x_fun(x) x_proc <- x_processed$x_proc - # 2. Preprocess outcome data (y) + # 3. Preprocess outcome data (y) y_proc <- NULL if (!is.null(y)) { - y_processed <- process_y( + # Note: For evaluation, we pass the class levels from the trained model + # to ensure consistent encoding of the new data. + y_processed <- process_y_fun( y, is_classification = !is.null(object$fit$lvl), class_levels = object$fit$lvl @@ -75,7 +89,7 @@ keras_evaluate <- function(object, x, y = NULL, ...) { y_proc <- y_processed$y_proc } - # 3. Call the underlying Keras evaluate method + # 4. Call the underlying Keras evaluate method keras_model <- object$fit$fit keras3::evaluate(keras_model, x = x_proc, y = y_proc, ...) } diff --git a/R/register_fit_predict.R b/R/register_fit_predict.R index 2fa6c48..96deedb 100644 --- a/R/register_fit_predict.R +++ b/R/register_fit_predict.R @@ -57,7 +57,11 @@ register_fit_predict <- function(model_name, mode, layer_blocks, functional) { func = c(fun = "predict"), args = list( object = rlang::expr(object$fit$fit), - x = rlang::expr(process_x(new_data)$x_proc) + x = if (functional) { + rlang::expr(process_x_functional(new_data)$x_proc) + } else { + rlang::expr(process_x_sequential(new_data)$x_proc) + } ) ) ) @@ -74,7 +78,11 @@ register_fit_predict <- function(model_name, mode, layer_blocks, functional) { func = c(fun = "predict"), args = list( object = rlang::expr(object$fit$fit), - x = rlang::expr(process_x(new_data)$x_proc) + x = if (functional) { + rlang::expr(process_x_functional(new_data)$x_proc) + } else { + rlang::expr(process_x_sequential(new_data)$x_proc) + } ) ) ) @@ -89,14 +97,18 @@ register_fit_predict <- function(model_name, mode, layer_blocks, functional) { func = c(fun = "predict"), args = list( object = rlang::expr(object$fit$fit), - x = rlang::expr(process_x(new_data)$x_proc) + x = if (functional) { + rlang::expr(process_x_functional(new_data)$x_proc) + } else { + rlang::expr(process_x_sequential(new_data)$x_proc) + } ) ) ) } } -#' Post-process Keras Numeric Predictions +##' Post-process Keras Numeric Predictions #' #' @description #' Formats raw numeric predictions from a Keras model into a tibble with the @@ -110,7 +122,22 @@ register_fit_predict <- function(model_name, mode, layer_blocks, functional) { #' @return A tibble with a `.pred` column. #' @noRd keras_postprocess_numeric <- function(results, object) { - tibble::tibble(.pred = as.vector(results)) + if (is.list(results) && !is.null(names(results))) { + # Multi-output case: results is a named list of arrays/matrices + # Combine them into a single tibble with appropriate column names + combined_preds <- tibble::as_tibble(results) + # Rename columns to .pred_output_name if there are multiple outputs + if (length(results) > 1) { + colnames(combined_preds) <- paste0(".pred_", names(results)) + } else { + # If only one output, but still a list, name it .pred + colnames(combined_preds) <- ".pred" + } + return(combined_preds) + } else { + # Single output case: results is a matrix/array + tibble::tibble(.pred = as.vector(results)) + } } #' Post-process Keras Probability Predictions @@ -127,9 +154,32 @@ keras_postprocess_numeric <- function(results, object) { #' @return A tibble with named columns for each class probability. #' @noRd keras_postprocess_probs <- function(results, object) { - # The levels are now nested inside the fit object - colnames(results) <- object$fit$lvl - tibble::as_tibble(results) + if (is.list(results) && !is.null(names(results))) { + # Multi-output case: results is a named list of arrays/matrices + combined_preds <- purrr::map2_dfc( + results, + names(results), + function(res, name) { + lvls <- object$fit$lvl[[name]] # Assuming object$fit$lvl is a named list of levels + if (is.null(lvls)) { + # Fallback if levels are not specifically named for this output + lvls <- paste0("class", 1:ncol(res)) + } + colnames(res) <- lvls + tibble::as_tibble(res, .name_repair = "unique") %>% + dplyr::rename_with(~ paste0(".pred_", name, "_", .x)) + } + colnames(res) <- lvls + tibble::as_tibble(res, .name_repair = "unique") %>% + dplyr::rename_with(~ paste0(".pred_", name, "_", .x)) + }) + return(combined_preds) + } else { + # Single output case: results is a matrix/array + # The levels are now nested inside the fit object + colnames(results) <- object$fit$lvl + tibble::as_tibble(results) + } } #' Post-process Keras Class Predictions @@ -147,17 +197,43 @@ keras_postprocess_probs <- function(results, object) { #' @return A tibble with a `.pred_class` column containing factor predictions. #' @noRd keras_postprocess_classes <- function(results, object) { - # The levels are now nested inside the fit object - lvls <- object$fit$lvl - if (ncol(results) == 1) { - # Binary classification - pred_class <- ifelse(results[, 1] > 0.5, lvls[2], lvls[1]) - pred_class <- factor(pred_class, levels = lvls) + if (is.list(results) && !is.null(names(results))) { + # Multi-output case: results is a named list of arrays/matrices + combined_preds <- purrr::map2_dfc(results, names(results), function(res, name) { + lvls <- object$fit$lvl[[name]] # Assuming object$fit$lvl is a named list of levels + if (is.null(lvls)) { + # Fallback if levels are not specifically named for this output + lvls <- paste0("class", 1:ncol(res)) # This might not be correct for classes, but a placeholder + } + + if (ncol(res) == 1) { + # Binary classification + pred_class <- ifelse(res[, 1] > 0.5, lvls[2], lvls[1]) + pred_class <- factor(pred_class, levels = lvls) + } else { + # Multiclass classification + pred_class_int <- apply(res, 1, which.max) + pred_class <- lvls[pred_class_int] + pred_class <- factor(pred_class, levels = lvls) + } + tibble::tibble(.pred_class = pred_class) %>% + dplyr::rename_with(~ paste0(".pred_class_", name)) + }) + return(combined_preds) } else { - # Multiclass classification - pred_class_int <- apply(results, 1, which.max) - pred_class <- lvls[pred_class_int] - pred_class <- factor(pred_class, levels = lvls) + # Single output case: results is a matrix/array + # The levels are now nested inside the fit object + lvls <- object$fit$lvl + if (ncol(results) == 1) { + # Binary classification + pred_class <- ifelse(results[, 1] > 0.5, lvls[2], lvls[1]) + pred_class <- factor(pred_class, levels = lvls) + } else { + # Multiclass classification + pred_class_int <- apply(results, 1, which.max) + pred_class <- lvls[pred_class_int] + pred_class <- factor(pred_class, levels = lvls) + } + tibble::tibble(.pred_class = pred_class) } - tibble::tibble(.pred_class = pred_class) } diff --git a/R/utils.R b/R/utils.R index ae0e69a..a130fde 100644 --- a/R/utils.R +++ b/R/utils.R @@ -176,6 +176,141 @@ loss_function_keras <- function(values = NULL) { ) } +#' Process Predictor Input for Keras (Functional API) +#' +#' @description +#' Preprocesses predictor data (`x`) into a format suitable for Keras models +#' built with the Functional API. Handles both tabular data and list-columns +#' of arrays (e.g., for images), supporting multiple inputs. +#' +#' @param x A data frame or matrix of predictors. +#' @return A list containing: +#' - `x_proc`: The processed predictor data (matrix or array, or list of arrays). +#' - `input_shape`: The determined input shape(s) for the Keras model. +#' @keywords internal +#' @export +process_x_functional <- function(x) { + if (is.data.frame(x)) { + # Check if it's a multi-input scenario (multiple list-columns) + if (all(sapply(x, is.list)) && ncol(x) > 1) { + x_proc_list <- lapply(x, function(col) { + do.call(abind::abind, c(col, list(along = 0))) + }) + # For multi-input, input_shape should be a list of shapes + input_shape_list <- lapply(x_proc_list, function(arr) { + if (length(dim(arr)) > 2) dim(arr)[-1] else ncol(arr) + }) + # Add names to the lists + names(x_proc_list) <- names(x) + names(input_shape_list) <- names(x) + return(list(x_proc = x_proc_list, input_shape = input_shape_list)) + } else if (ncol(x) == 1 && is.list(x[[1]])) { + # Original case: single predictor column containing a list of arrays. + x_proc <- do.call(abind::abind, c(x[[1]], list(along = 0))) + } else { + x_proc <- as.matrix(x) + } + } + input_shape <- if (length(dim(x_proc)) > 2) dim(x_proc)[-1] else ncol(x_proc) + list(x_proc = x_proc, input_shape = input_shape) +} + +#' Process Outcome Input for Keras (Functional API) +#' +#' @description +#' Preprocesses outcome data (`y`) into a format suitable for Keras models +#' built with the Functional API. Handles both regression (numeric) and +#' classification (factor) outcomes, including one-hot encoding for classification, +#' and supports multiple outputs. +#' +#' @param y A vector or data frame of outcomes. +#' @param is_classification Logical, optional. If `TRUE`, treats `y` as +#' classification. If `FALSE`, treats as regression. If `NULL` (default), +#' it's determined from `is.factor(y)`. +#' @param class_levels Character vector, optional. The factor levels for +#' classification outcomes. If `NULL` (default), determined from `levels(y)`. +#' @return A list containing: +#' - `y_proc`: The processed outcome data (matrix or one-hot encoded array, +#' or list of these for multiple outputs). +#' - `is_classification`: Logical, indicating if `y` was treated as classification. +#' - `num_classes`: Integer, the number of classes for classification, or `NULL`. +#' - `class_levels`: Character vector, the factor levels for classification, or `NULL`. +#' @importFrom keras3 to_categorical +#' @keywords internal +#' @export +process_y_functional <- function( + y, + is_classification = NULL, + class_levels = NULL +) { + # If y is a data frame/tibble with one column, extract it to ensure it's + # processed by the single-output logic path. + if (is.data.frame(y) && ncol(y) == 1) { + y <- y[[1]] + } + + if (is.data.frame(y)) { + # Handle multiple output columns + y_proc_list <- list() # This will store the processed y for each output + class_levels_list <- list() # To store class levels for each output + + for (col_name in names(y)) { + current_y <- y[[col_name]] + current_is_classification <- is_classification %||% is.factor(current_y) + current_class_levels <- class_levels %||% levels(current_y) + + y_proc_single <- NULL + num_classes_single <- NULL + + if (current_is_classification) { + if (is.null(current_class_levels)) { + current_class_levels <- levels(current_y) + } + num_classes_single <- length(current_class_levels) + y_factored <- factor(current_y, levels = current_class_levels) + y_proc_single <- keras3::to_categorical( + as.numeric(y_factored) - 1, + num_classes = num_classes_single + ) + } else { + y_proc_single <- as.matrix(current_y) + } + y_proc_list[[col_name]] <- y_proc_single + class_levels_list[[col_name]] <- current_class_levels # Store class levels for each output + } + # Return a list containing y_proc_list and class_levels_list + return(list(y_proc = y_proc_list, class_levels = class_levels_list)) + } else { + # Original single output case + if (is.null(is_classification)) { + is_classification <- is.factor(y) + } + + y_proc <- NULL + num_classes <- NULL + if (is_classification) { + if (is.null(class_levels)) { + class_levels <- levels(y) + } + num_classes <- length(class_levels) + y_factored <- factor(y, levels = class_levels) + y_proc <- keras3::to_categorical( + as.numeric(y_factored) - 1, + num_classes = num_classes + ) + } else { + y_proc <- as.matrix(y) + } + return(list( + y_proc = y_proc, + class_levels = class_levels, + is_classification = is_classification, + num_classes = num_classes + )) + } +} + + #' Process Predictor Input for Keras #' #' @description @@ -188,7 +323,7 @@ loss_function_keras <- function(values = NULL) { #' - `input_shape`: The determined input shape for the Keras model. #' @keywords internal #' @export -process_x <- function(x) { +process_x_sequential <- function(x) { if (is.data.frame(x) && ncol(x) == 1 && is.list(x[[1]])) { # Assumes a single predictor column containing a list of arrays. # We stack them into a single higher-dimensional array. @@ -221,7 +356,11 @@ process_x <- function(x) { #' @importFrom keras3 to_categorical #' @keywords internal #' @export -process_y <- function(y, is_classification = NULL, class_levels = NULL) { +process_y_sequential <- function( + y, + is_classification = NULL, + class_levels = NULL +) { # If y is a data frame/tibble, extract the first column if (is.data.frame(y)) { y <- y[[1]] diff --git a/man/process_x_functional.Rd b/man/process_x_functional.Rd new file mode 100644 index 0000000..6b05569 --- /dev/null +++ b/man/process_x_functional.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{process_x_functional} +\alias{process_x_functional} +\title{Process Predictor Input for Keras (Functional API)} +\usage{ +process_x_functional(x) +} +\arguments{ +\item{x}{A data frame or matrix of predictors.} +} +\value{ +A list containing: +\itemize{ +\item \code{x_proc}: The processed predictor data (matrix or array, or list of arrays). +\item \code{input_shape}: The determined input shape(s) for the Keras model. +} +} +\description{ +Preprocesses predictor data (\code{x}) into a format suitable for Keras models +built with the Functional API. Handles both tabular data and list-columns +of arrays (e.g., for images), supporting multiple inputs. +} +\keyword{internal} diff --git a/man/process_x.Rd b/man/process_x_sequential.Rd similarity index 87% rename from man/process_x.Rd rename to man/process_x_sequential.Rd index f464bbc..4a8059b 100644 --- a/man/process_x.Rd +++ b/man/process_x_sequential.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R -\name{process_x} -\alias{process_x} +\name{process_x_sequential} +\alias{process_x_sequential} \title{Process Predictor Input for Keras} \usage{ -process_x(x) +process_x_sequential(x) } \arguments{ \item{x}{A data frame or matrix of predictors.} diff --git a/man/process_y_functional.Rd b/man/process_y_functional.Rd new file mode 100644 index 0000000..8294f38 --- /dev/null +++ b/man/process_y_functional.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{process_y_functional} +\alias{process_y_functional} +\title{Process Outcome Input for Keras (Functional API)} +\usage{ +process_y_functional(y, is_classification = NULL, class_levels = NULL) +} +\arguments{ +\item{y}{A vector or data frame of outcomes.} + +\item{is_classification}{Logical, optional. If \code{TRUE}, treats \code{y} as +classification. If \code{FALSE}, treats as regression. If \code{NULL} (default), +it's determined from \code{is.factor(y)}.} + +\item{class_levels}{Character vector, optional. The factor levels for +classification outcomes. If \code{NULL} (default), determined from \code{levels(y)}.} +} +\value{ +A list containing: +\itemize{ +\item \code{y_proc}: The processed outcome data (matrix or one-hot encoded array, +or list of these for multiple outputs). +\item \code{is_classification}: Logical, indicating if \code{y} was treated as classification. +\item \code{num_classes}: Integer, the number of classes for classification, or \code{NULL}. +\item \code{class_levels}: Character vector, the factor levels for classification, or \code{NULL}. +} +} +\description{ +Preprocesses outcome data (\code{y}) into a format suitable for Keras models +built with the Functional API. Handles both regression (numeric) and +classification (factor) outcomes, including one-hot encoding for classification, +and supports multiple outputs. +} +\keyword{internal} diff --git a/man/process_y.Rd b/man/process_y_sequential.Rd similarity index 90% rename from man/process_y.Rd rename to man/process_y_sequential.Rd index 4d1187d..05ee206 100644 --- a/man/process_y.Rd +++ b/man/process_y_sequential.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R -\name{process_y} -\alias{process_y} +\name{process_y_sequential} +\alias{process_y_sequential} \title{Process Outcome Input for Keras} \usage{ -process_y(y, is_classification = NULL, class_levels = NULL) +process_y_sequential(y, is_classification = NULL, class_levels = NULL) } \arguments{ \item{y}{A vector of outcomes.} diff --git a/tests/testthat/helper_keras.R b/tests/testthat/helper_keras.R index fc1aded..b67d286 100644 --- a/tests/testthat/helper_keras.R +++ b/tests/testthat/helper_keras.R @@ -7,6 +7,7 @@ library(rsample) library(dials) library(tune) library(purrr) +library(dplyr) skip_if_no_keras <- function() { testthat::skip_if_not_installed("keras3") diff --git a/tests/testthat/test_compile_keras_grid.R b/tests/testthat/test_compile_keras_grid.R index 5045cf3..b4ac6ed 100644 --- a/tests/testthat/test_compile_keras_grid.R +++ b/tests/testthat/test_compile_keras_grid.R @@ -1,6 +1,7 @@ # --- Test Data --- x_train <- as.matrix(iris[, 1:4]) y_train <- iris$Species +train_df <- tibble(x = I(x_train), y = y_train) # --- Tests --- test_that("compile_keras_grid works for sequential models", { @@ -36,7 +37,12 @@ test_that("compile_keras_grid works for sequential models", { learn_rate = c(0.01, 0.001) ) - results <- compile_keras_grid(spec, grid, x_train, y_train) + results <- compile_keras_grid( + spec, + grid, + select(train_df, x), + select(train_df, y) + ) expect_s3_class(results, "tbl_df") expect_equal(nrow(results), 2) @@ -63,32 +69,54 @@ test_that("compile_keras_grid works for functional models", { model_name <- "test_func_spec_compile" on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + input_block <- function(input_shape) { + keras3::layer_input(shape = input_shape, name = "x") + } + + dense_block <- function(tensor, units = 32) { + tensor |> keras3::layer_dense(units = units, activation = "relu") + } + + output_block <- function(tensor, num_classes) { + tensor |> + keras3::layer_dense( + units = num_classes, + activation = "softmax", + name = "y" + ) + } + create_keras_functional_spec( model_name = model_name, mode = "classification", layer_blocks = list( - input = function(input_shape) { - keras3::layer_input(shape = input_shape) - }, - dense = function(input, units = 32) { - input |> keras3::layer_dense(units = units, activation = "relu") - }, - output = function(dense, num_classes) { - dense |> - keras3::layer_dense(units = num_classes, activation = "softmax") - } + input = input_block, + dense = inp_spec(dense_block, "input"), + output = inp_spec(output_block, "dense") ) ) spec <- test_func_spec_compile() |> set_engine("keras") + rec <- recipe(y ~ x, data = train_df) # Recipe for two outputs + wf <- workflow() |> + add_recipe(rec) |> + add_model(spec) + + fit_obj <- fit(wf, data = train_df) + grid <- tibble::tibble( dense_units = c(16, 32), learn_rate = c(0.01, 0.001) ) - results <- compile_keras_grid(spec, grid, x_train, y_train) + results <- compile_keras_grid( + spec, + grid, + select(train_df, x), + select(train_df, y) + ) expect_s3_class(results, "tbl_df") expect_equal(nrow(results), 2) @@ -120,7 +148,7 @@ test_that("compile_keras_grid handles errors gracefully", { mode = "classification", layer_blocks = list( input = function(input_shape) { - keras3::layer_input(shape = input_shape) + keras3::layer_input(shape = input_shape, name = "x") }, dense1 = function(input, units = 32) { input |> keras3::layer_dense(units = units, activation = "relu") @@ -142,7 +170,12 @@ test_that("compile_keras_grid handles errors gracefully", { grid <- tibble::tibble(dense1_units = 16) expect_warning( - results <- compile_keras_grid(spec, grid, x_train, y_train), + results <- compile_keras_grid( + spec, + grid, + select(train_df, x), + select(train_df, y) + ), "Block 'dense2' has no inputs from other blocks." ) diff --git a/tests/testthat/test_e2e_functional.R b/tests/testthat/test_e2e_func_classification.R similarity index 59% rename from tests/testthat/test_e2e_functional.R rename to tests/testthat/test_e2e_func_classification.R index 2b90eb6..6a245e9 100644 --- a/tests/testthat/test_e2e_functional.R +++ b/tests/testthat/test_e2e_func_classification.R @@ -1,57 +1,3 @@ -test_that("E2E: Functional spec (regression) works", { - skip_if_no_keras() - - # Define blocks for a simple forked functional model - input_block <- function(input_shape) keras3::layer_input(shape = input_shape) - path_block <- function(tensor, units = 8) { - tensor |> keras3::layer_dense(units = units, activation = "relu") - } - concat_block <- function(input_a, input_b) { - keras3::layer_concatenate(list(input_a, input_b)) - } - output_block_reg <- function(tensor) keras3::layer_dense(tensor, units = 1) - - model_name <- "e2e_func_reg" - on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) - - # Create a spec with two parallel paths that are then concatenated - create_keras_functional_spec( - model_name = model_name, - layer_blocks = list( - main_input = input_block, - path_a = inp_spec(path_block, "main_input"), - path_b = inp_spec(path_block, "main_input"), - concatenated = inp_spec( - concat_block, - c(path_a = "input_a", path_b = "input_b") - ), - output = inp_spec(output_block_reg, "concatenated") - ), - mode = "regression" - ) - - spec <- e2e_func_reg( - path_a_units = 32, - path_b_units = 16, - fit_epochs = 2 - ) |> - set_engine("keras") - - data <- mtcars - rec <- recipe(mpg ~ ., data = data) - wf <- workflows::workflow(rec, spec) - - expect_no_error(fit_obj <- parsnip::fit(wf, data = data)) - expect_s3_class(fit_obj, "workflow") - - preds <- predict(fit_obj, new_data = data[1:5, ]) - expect_s3_class(preds, "tbl_df") - expect_equal(names(preds), ".pred") - expect_equal(nrow(preds), 5) - expect_true(is.numeric(preds$.pred)) -}) - - test_that("E2E: Functional spec (classification) works", { skip_if_no_keras() @@ -114,7 +60,6 @@ test_that("E2E: Functional spec (classification) works", { expect_true(all(abs(rowSums(preds_prob) - 1) < 1e-5)) }) - test_that("E2E: Functional spec tuning (including repetition) works", { skip_if_no_keras() @@ -154,7 +99,7 @@ test_that("E2E: Functional spec tuning (including repetition) works", { tune_wf <- workflows::workflow(rec, tune_spec) folds <- rsample::vfold_cv(iris, v = 2) - params <- extract_parameter_set_dials(tune_wf) |> + params <- extract_parameter_set_dials(tune_wf) |> update( num_dense_path = num_terms(c(1, 2)), dense_path_units = hidden_units(c(4, 8)) @@ -183,56 +128,68 @@ test_that("E2E: Functional spec tuning (including repetition) works", { expect_true(all(c("num_dense_path", "dense_path_units") %in% names(metrics))) }) -test_that("E2E: Block repetition works for functional models", { +test_that("E2E: Multi-input, single-output functional classification works", { skip_if_no_keras() - - input_block <- function(input_shape) keras3::layer_input(shape = input_shape) - dense_block <- function(tensor, units = 8) { - tensor |> keras3::layer_dense(units = units, activation = "relu") + options(kerasnip.show_removal_messages = FALSE) + on.exit(options(kerasnip.show_removal_messages = TRUE), add = TRUE) + + # Define layer blocks + input_block_1 <- function(input_shape) layer_input(shape = input_shape, name = "input_1") + input_block_2 <- function(input_shape) layer_input(shape = input_shape, name = "input_2") + flatten_block <- function(tensor) layer_flatten(tensor) + dense_path <- function(tensor, units = 16) { + tensor |> layer_dense(units = units, activation = "relu") + } + concat_block <- function(in_1, in_2) layer_concatenate(list(in_1, in_2)) + output_block_class <- function(tensor, num_classes) { + layer_dense(tensor, units = num_classes, activation = "softmax") } - output_block <- function(tensor) keras3::layer_dense(tensor, units = 1) - model_name <- "e2e_func_repeat" + model_name <- "multi_in_class" on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) create_keras_functional_spec( model_name = model_name, layer_blocks = list( - main_input = input_block, - dense_path = inp_spec(dense_block, "main_input"), - output = inp_spec(output_block, "dense_path") + input_a = input_block_1, + input_b = input_block_2, + flatten_a = inp_spec(flatten_block, "input_a"), + flatten_b = inp_spec(flatten_block, "input_b"), + path_a = inp_spec(dense_path, "flatten_a"), + path_b = inp_spec(dense_path, "flatten_b"), + concatenated = inp_spec(concat_block, c(path_a = "in_1", path_b = "in_2")), + output = inp_spec(output_block_class, "concatenated") ), - mode = "regression" + mode = "classification" ) - # --- Test with 1 repetition --- - spec_1 <- e2e_func_repeat(num_dense_path = 1, fit_epochs = 1) |> - set_engine("keras") - fit_1 <- fit(spec_1, mpg ~ ., data = mtcars) - model_1_layers <- fit_1 |> - extract_keras_model() |> - pluck("layers") + spec <- multi_in_class(fit_epochs = 2) |> set_engine("keras") - # Expect 3 layers: Input, Dense, Output - expect_equal(length(model_1_layers), 3) + # Prepare dummy data + set.seed(123) + x1 <- matrix(rnorm(100 * 5), ncol = 5) + x2 <- matrix(rnorm(100 * 3), ncol = 3) + y <- factor(sample(c("a", "b"), 100, replace = TRUE)) - # --- Test with 2 repetitions --- - spec_2 <- e2e_func_repeat(num_dense_path = 2, fit_epochs = 1) |> - set_engine("keras") - fit_2 <- fit(spec_2, mpg ~ ., data = mtcars) - model_2_layers <- fit_2 |> - extract_keras_model() |> - pluck("layers") - # Expect 4 layers: Input, Dense, Dense, Output - expect_equal(length(model_2_layers), 4) - - # --- Test with 0 repetitions --- - spec_3 <- e2e_func_repeat(num_dense_path = 0, fit_epochs = 1) |> - set_engine("keras") - fit_3 <- fit(spec_3, mpg ~ ., data = mtcars) - model_3_layers <- fit_3 |> - extract_keras_model() |> - pluck("layers") - # Expect 2 layers: Input, Output - expect_equal(length(model_3_layers), 2) -}) + train_df <- tibble::tibble( + input_a = lapply(seq_len(nrow(x1)), function(i) x1[i, , drop = FALSE]), + input_b = lapply(seq_len(nrow(x2)), function(i) x2[i, , drop = FALSE]), + outcome = y + ) + + rec <- recipe(outcome ~ input_a + input_b, data = train_df) + wf <- workflows::workflow(rec, spec) + + expect_no_error(fit_obj <- parsnip::fit(wf, data = train_df)) + + new_data_df <- tibble::tibble( + input_a = lapply(seq_len(5), function(i) matrix(rnorm(5), ncol = 5)), + input_b = lapply(seq_len(5), function(i) matrix(rnorm(3), ncol = 3)) + ) + preds <- predict(fit_obj, new_data = new_data_df) + + expect_s3_class(preds, "tbl_df") + expect_equal(names(preds), c(".pred_class")) + expect_equal(nrow(preds), 5) + expect_true(is.factor(preds$.pred_class)) +}) \ No newline at end of file diff --git a/tests/testthat/test_e2e_func_regression.R b/tests/testthat/test_e2e_func_regression.R new file mode 100644 index 0000000..99d54c1 --- /dev/null +++ b/tests/testthat/test_e2e_func_regression.R @@ -0,0 +1,183 @@ +test_that("E2E: Functional spec (regression) works", { + skip_if_no_keras() + + # Define blocks for a simple forked functional model + input_block <- function(input_shape) keras3::layer_input(shape = input_shape) + path_block <- function(tensor, units = 8) { + tensor |> keras3::layer_dense(units = units, activation = "relu") + } + concat_block <- function(input_a, input_b) { + keras3::layer_concatenate(list(input_a, input_b)) + } + output_block_reg <- function(tensor) keras3::layer_dense(tensor, units = 1) + + model_name <- "e2e_func_reg" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + + # Create a spec with two parallel paths that are then concatenated + create_keras_functional_spec( + model_name = model_name, + layer_blocks = list( + main_input = input_block, + path_a = inp_spec(path_block, "main_input"), + path_b = inp_spec(path_block, "main_input"), + concatenated = inp_spec( + concat_block, + c(path_a = "input_a", path_b = "input_b") + ), + output = inp_spec(output_block_reg, "concatenated") + ), + mode = "regression" + ) + + spec <- e2e_func_reg( + path_a_units = 32, + path_b_units = 16, + fit_epochs = 2 + ) |> + set_engine("keras") + + data <- mtcars + rec <- recipe(mpg ~ ., data = data) + wf <- workflows::workflow(rec, spec) + + expect_no_error(fit_obj <- parsnip::fit(wf, data = data)) + expect_s3_class(fit_obj, "workflow") + + preds <- predict(fit_obj, new_data = data[1:5, ]) + expect_s3_class(preds, "tbl_df") + expect_equal(names(preds), ".pred") + expect_equal(nrow(preds), 5) + expect_true(is.numeric(preds$.pred)) +}) + +test_that("E2E: Block repetition works for functional models", { + skip_if_no_keras() + + input_block <- function(input_shape) keras3::layer_input(shape = input_shape) + dense_block <- function(tensor, units = 8) { + tensor |> keras3::layer_dense(units = units, activation = "relu") + } + output_block <- function(tensor) keras3::layer_dense(tensor, units = 1) + + model_name <- "e2e_func_repeat" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + + create_keras_functional_spec( + model_name = model_name, + layer_blocks = list( + main_input = input_block, + dense_path = inp_spec(dense_block, "main_input"), + output = inp_spec(output_block, "dense_path") + ), + mode = "regression" + ) + + # --- Test with 1 repetition --- + spec_1 <- e2e_func_repeat(num_dense_path = 1, fit_epochs = 1) |> + set_engine("keras") + fit_1 <- fit(spec_1, mpg ~ ., data = mtcars) + model_1_layers <- fit_1 |> + extract_keras_model() |> + pluck("layers") + + # Expect 3 layers: Input, Dense, Output + expect_equal(length(model_1_layers), 3) + + # --- Test with 2 repetitions --- + spec_2 <- e2e_func_repeat(num_dense_path = 2, fit_epochs = 1) |> + set_engine("keras") + fit_2 <- fit(spec_2, mpg ~ ., data = mtcars) + model_2_layers <- fit_2 |> + extract_keras_model() |> + pluck("layers") + # Expect 4 layers: Input, Dense, Dense, Output + expect_equal(length(model_2_layers), 4) + + # --- Test with 0 repetitions --- + spec_3 <- e2e_func_repeat(num_dense_path = 0, fit_epochs = 1) |> + set_engine("keras") + fit_3 <- fit(spec_3, mpg ~ ., data = mtcars) + model_3_layers <- fit_3 |> + extract_keras_model() |> + pluck("layers") + # Expect 2 layers: Input, Output + expect_equal(length(model_3_layers), 2) +}) + +test_that("E2E: Multi-input, multi-output functional regression works", { + skip_if_no_keras() + options(kerasnip.show_removal_messages = FALSE) + on.exit(options(kerasnip.show_removal_messages = TRUE), add = TRUE) + + # Define layer blocks + input_block_1 <- function(input_shape) { + layer_input(shape = input_shape, name = "input_1") + } + input_block_2 <- function(input_shape) { + layer_input(shape = input_shape, name = "input_2") + } + dense_path <- function(tensor, units = 16) { + tensor |> layer_dense(units = units, activation = "relu") + } + concat_block <- function(in_1, in_2) layer_concatenate(list(in_1, in_2)) + output_block_1 <- function(tensor) { + layer_dense(tensor, units = 1, name = "output_1") + } + output_block_2 <- function(tensor) { + layer_dense(tensor, units = 1, name = "output_2") + } + + model_name <- "multi_in_out_reg" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + + create_keras_functional_spec( + model_name = model_name, + layer_blocks = list( + input_a = input_block_1, + input_b = input_block_2, + path_a = inp_spec(dense_path, "input_a"), + path_b = inp_spec(dense_path, "input_b"), + concatenated = inp_spec( + concat_block, + c(path_a = "in_1", path_b = "in_2") + ), + output_1 = inp_spec(output_block_1, "concatenated"), + output_2 = inp_spec(output_block_2, "concatenated") + ), + mode = "regression" + ) + + spec <- multi_in_out_reg(fit_epochs = 2) |> set_engine("keras") + + # Prepare dummy data + set.seed(123) + x1 <- matrix(rnorm(100 * 5), ncol = 5) + x2 <- matrix(rnorm(100 * 3), ncol = 3) + y1 <- rnorm(100) + y2 <- rnorm(100) + + train_df <- tibble::tibble( + input_a = lapply(seq_len(nrow(x1)), function(i) x1[i, , drop = FALSE]), + input_b = lapply(seq_len(nrow(x2)), function(i) x2[i, , drop = FALSE]), + output_1 = y1, + output_2 = y2 + ) + + rec <- recipe(output_1 + output_2 ~ input_a + input_b, data = train_df) + wf <- workflows::workflow(rec, spec) + + expect_no_error(fit_obj <- parsnip::fit(wf, data = train_df)) + + new_data_df <- tibble::tibble( + input_a = lapply(seq_len(5), function(i) matrix(rnorm(5), ncol = 5)), + input_b = lapply(seq_len(5), function(i) matrix(rnorm(3), ncol = 3)) + ) + preds <- predict(fit_obj, new_data = new_data_df) + + expect_s3_class(preds, "tbl_df") + expect_equal(names(preds), c(".pred_output_1", ".pred_output_2")) + expect_equal(nrow(preds), 5) + expect_true(is.numeric(preds$.pred_output_1)) + expect_true(is.numeric(preds$.pred_output_2)) +}) \ No newline at end of file diff --git a/tests/testthat/test_e2e_classification.R b/tests/testthat/test_e2e_seq_classification.R similarity index 100% rename from tests/testthat/test_e2e_classification.R rename to tests/testthat/test_e2e_seq_classification.R diff --git a/tests/testthat/test_e2e_regression.R b/tests/testthat/test_e2e_seq_regression.R similarity index 100% rename from tests/testthat/test_e2e_regression.R rename to tests/testthat/test_e2e_seq_regression.R diff --git a/vignettes/functional_api.Rmd b/vignettes/functional_api.Rmd index add9bbf..68fa310 100644 --- a/vignettes/functional_api.Rmd +++ b/vignettes/functional_api.Rmd @@ -34,9 +34,9 @@ There are two special requirements: Let's see this in action. -## Example 1: A Fork-Join Regression Model +## Example 1: A Two-Input Regression Model -We will build a model that forks the input, passes it through two separate dense layer paths, and then joins the results with a concatenation layer before producing a final prediction. +This model will take two distinct inputs, process them separately, and then concatenate their outputs before a final regression layer. This clearly demonstrates the functional API's ability to handle multiple inputs, which is not possible with the sequential API. ### Step 1: Load Libraries @@ -55,68 +55,73 @@ options(kerasnip.show_removal_messages = FALSE) These are the building blocks of our model. Each function represents a node in the graph. -```{r define-blocks-functional} -# The input node. `input_shape` is supplied automatically by the engine. -input_block <- function(input_shape) { - layer_input(shape = input_shape) +```{r define-blocks-functional-two-input} +# Input blocks for two distinct inputs +input_block_1 <- function(input_shape) { + layer_input(shape = input_shape, name = "input_1") } -# A generic block for a dense path. `units` will be a tunable parameter. -path_block <- function(tensor, units = 16) { +input_block_2 <- function(input_shape) { + layer_input(shape = input_shape, name = "input_2") +} + +# Dense paths for each input +dense_path_1 <- function(tensor, units = 16) { + tensor |> layer_dense(units = units, activation = "relu") +} + +dense_path_2 <- function(tensor, units = 16) { tensor |> layer_dense(units = units, activation = "relu") } -# A block to join two tensors. +# A block to join two tensors concat_block <- function(input_a, input_b) { layer_concatenate(list(input_a, input_b)) } -# The final output block for regression. -output_block_reg <- function(tensor) { - layer_dense(tensor, units = 1) +# The final output block for regression +output_block_1 <- function(tensor) { + layer_dense(tensor, units = 1, name = "output_1") +} + +output_block_2 <- function(tensor) { + layer_dense(tensor, units = 1, name = "output_2") } ``` ### Step 3: Create the Model Specification -Now we assemble the blocks into a graph. We use the `inp_spec()` helper to connect the blocks. This avoids writing verbose anonymous functions like `function(main_input, units) path_block(main_input, units)`. `inp_spec()` automatically creates a wrapper that renames the arguments of our blocks to match the node names from the `layer_blocks` list. +Now we assemble the blocks into a graph. The `inp_spec()` helper simplifies connecting these blocks, eliminating the need for verbose anonymous functions. `inp_spec()` automatically creates a wrapper that renames the arguments of our blocks to match the node names defined in the `layer_blocks` list. -```{r create-spec-functional} -model_name <- "forked_reg_spec" +```{r create-spec-functional-two-input} +model_name <- "two_output_reg_spec" # Changed model name # Clean up the spec when the vignette is done knitting on.exit(remove_keras_spec(model_name), add = TRUE) create_keras_functional_spec( model_name = model_name, layer_blocks = list( - # Node names are defined by the list names - main_input = input_block, - - # `inp_spec()` renames the first argument of `path_block` ('tensor') - # to 'main_input' to match the node name. - path_a = inp_spec(path_block, "main_input"), - path_b = inp_spec(path_block, "main_input"), - - # For multiple inputs, `inp_spec()` takes a named vector to map - # new argument names to the original block's argument names. - concatenated = inp_spec(concat_block, c(path_a = "input_a", path_b = "input_b")), - - # The output block takes the concatenated tensor as its input. - output = inp_spec(output_block_reg, "concatenated") + input_1 = input_block_1, + input_2 = input_block_2, + processed_1 = inp_spec(dense_path_1, "input_1"), + processed_2 = inp_spec(dense_path_2, "input_2"), + concatenated = inp_spec(concat_block, c(processed_1 = "input_a", processed_2 = "input_b")), + output_1 = inp_spec(output_block_1, "concatenated"), # New output block 1 + output_2 = inp_spec(output_block_2, "concatenated") # New output block 2 ), - mode = "regression" + mode = "regression" # Still regression, but will have two columns in y ) ``` ### Step 4: Use and Fit the Model -The new function `forked_reg_spec()` is now available. Its arguments (`path_a_units`, `path_b_units`) were discovered automatically from our block definitions. +The new function `two_input_reg_spec()` is now available. Its arguments (`processed_1_units`, `processed_2_units`) were discovered automatically from our block definitions. -```{r fit-functional} -# We can override the default `units` from `path_block` for each path. -spec <- forked_reg_spec( - path_a_units = 16, - path_b_units = 8, +```{r fit-functional-two-input} +# We can override the default `units` for each path. +spec <- two_output_reg_spec( # Changed spec name + processed_1_units = 16, + processed_2_units = 8, fit_epochs = 10, fit_verbose = 0 # Suppress fitting output in vignette ) |> @@ -124,94 +129,106 @@ spec <- forked_reg_spec( print(spec) -# Fit the model on the mtcars dataset -rec <- recipe(mpg ~ ., data = mtcars) +# Prepare dummy data with two inputs and two outputs +set.seed(123) +x_data_1 <- matrix(runif(100 * 5), ncol = 5) +x_data_2 <- matrix(runif(100 * 3), ncol = 3) +y_data_1 <- runif(100) +y_data_2 <- runif(100) # New second output + +# For tidymodels, inputs and outputs need to be in a data frame, potentially as lists of matrices +train_df <- tibble::tibble( + input_1 = lapply(seq_len(nrow(x_data_1)), function(i) x_data_1[i, , drop = FALSE]), + input_2 = lapply(seq_len(nrow(x_data_2)), function(i) x_data_2[i, , drop = FALSE]), + output_1 = y_data_1, # Named output 1 + output_2 = y_data_2 # Named output 2 +) + +rec <- recipe(output_1 + output_2 ~ input_1 + input_2, data = train_df) # Recipe for two outputs wf <- workflow() |> - add_recipe(rec) |> + add_recipe(rec) |> add_model(spec) +fit_obj <- fit(wf, data = train_df) -fit_obj <- fit(wf, data = mtcars) - -predict(fit_obj, new_data = mtcars[1:5, ]) +# Predict on new data +new_data_df <- tibble::tibble( + input_1 = lapply(seq_len(5), function(i) matrix(runif(5), ncol = 5)), + input_2 = lapply(seq_len(5), function(i) matrix(runif(3), ncol = 3)) +) +predict(fit_obj, new_data = new_data_df) ``` -## Example 2: Tuning a Functional Model's Depth +## A common debugging workflow: `compile_keras_grid()` -A key feature of `kerasnip` is the ability to tune the *depth* of the network by repeating a block multiple times. A block can be repeated if it has **exactly one input tensor** from another block in the graph. +In the original Keras guide, a common workflow is to incrementally add layers and call `summary()` to inspect the architecture. With `kerasnip`, the model is defined declaratively, so we can't inspect it layer-by-layer in the same way. -Let's create a simple functional model and tune both its width (`units`) and its depth (`num_...`). +However, `kerasnip` provides a powerful equivalent: `compile_keras_grid()`. This function checks if your `layer_blocks` define a valid Keras model and returns the compiled model structure, all without running a full training cycle. This is perfect for debugging your architecture. -### Step 1: Define Blocks and Create Spec +Let's see this in action with the `two_input_reg_spec` model: -This model is architecturally sequential, but we build it with the functional API to demonstrate the repetition feature. - -```{r create-tunable-functional-spec} -dense_block <- function(tensor, units = 16) { - tensor |> layer_dense(units = units, activation = "relu") -} -output_block_class <- function(tensor, num_classes) { - tensor |> layer_dense(units = num_classes, activation = "softmax") -} +```{r compile-grid-debug-functional} +# Create a spec instance +spec <- two_output_reg_spec( # Changed spec name + processed_1_units = 16, + processed_2_units = 8 +) -model_name_tune <- "tunable_func_mlp" -on.exit(remove_keras_spec(model_name_tune), add = TRUE) +# Prepare dummy data with two inputs and two outputs +x_dummy_1 <- matrix(runif(10 * 5), ncol = 5) +x_dummy_2 <- matrix(runif(10 * 3), ncol = 3) +y_dummy_1 <- runif(10) +y_dummy_2 <- runif(10) # New second output -create_keras_functional_spec( - model_name = model_name_tune, - layer_blocks = list( - main_input = input_block, - # This block has a single input ('main_input'), so it can be repeated. - dense_path = inp_spec(dense_block, "main_input"), - output = inp_spec(output_block_class, "dense_path") - ), - mode = "classification" +# For tidymodels, inputs and outputs need to be in a data frame, potentially as lists of matrices +x_dummy_df <- tibble::tibble( + input_1 = lapply(seq_len(nrow(x_dummy_1)), function(i) x_dummy_1[i, , drop = FALSE]), + input_2 = lapply(seq_len(nrow(x_dummy_2)), function(i) x_dummy_2[i, , drop = FALSE]) +) +y_dummy_df <- tibble::tibble(output_1 = y_dummy_1, output_2 = y_dummy_2) # Named outputs + +# Use compile_keras_grid to get the model +compilation_results <- compile_keras_grid( + spec = spec, + grid = tibble::tibble(), + x = x_dummy_df, + y = y_dummy_df ) -``` - -### Step 2: Set up and Run Tuning -We will tune `dense_path_units` (the width) and `num_dense_path` (the depth). The `num_dense_path` argument was created automatically because `dense_path` is a repeatable block. +# Print the summary +compilation_results |> + select(compiled_model) |> + pull() |> + pluck(1) |> + summary() +``` -```{r tune-functional, cache=TRUE} -tune_spec <- tunable_func_mlp( - dense_path_units = tune(), - num_dense_path = tune(), - fit_epochs = 5, - fit_verbose = 0 -) |> - set_engine("keras") +```{r grid-debug-plot, eval=FALSE} +compilation_results |> + select(compiled_model) |> + pull() |> + pluck(1) |> + plot(show_shapes = TRUE) +``` -rec <- recipe(Species ~ ., data = iris) -tune_wf <- workflow() |> - add_recipe(rec) |> - add_model(tune_spec) +![model](images/model_plot_shapes_fAPI.png){fig-alt="A picture showing the model shape"} -folds <- vfold_cv(iris, v = 2) +## When to use the functional API -# Define the tuning grid -params <- extract_parameter_set_dials(tune_wf) |> - update( - dense_path_units = hidden_units(c(8, 32)), - num_dense_path = num_terms(c(1, 3)) # Test models with 1, 2, or 3 hidden layers - ) +In general, the functional API is higher-level, easier and safer, and has a number of features that subclassed models do not support. -grid <- grid_regular(params, levels = 2) -grid +However, model subclassing provides greater flexibility when building models that are not easily expressible as directed acyclic graphs of layers. For example, you could not implement a Tree-RNN with the functional API and would have to subclass `Model` directly. -control <- control_grid(save_pred = FALSE, verbose = FALSE) +### Functional API strengths -tune_res <- tune_grid( - tune_wf, - resamples = folds, - grid = grid, - control = control -) +* **Less verbose**: There is no `super$initialize()`, no `call = function(...)`, no `self$...`, etc. +* **Model validation during graph definition**: In the functional API, the input specification (shape and dtype) is created in advance using `layer_input()`. Each time a layer is called, it validates that the input specification matches its assumptions, raising a helpful error message if not. +* **A functional model is plottable and inspectable**: You can plot the model as a graph, and you can easily access intermediate nodes in this graph. +* **A functional model can be serialized or cloned**: As a data structure rather than code, a functional model is safely serializable. It can be saved as a single file, allowing you to recreate the exact same model without needing the original code. -show_best(tune_res, metric = "accuracy") -``` +### Functional API weakness -The results show that `tidymodels` successfully trained and evaluated models with different numbers of hidden layers, demonstrating that we can tune the very architecture of the network. +* **It does not support dynamic architectures**: The functional API treats models as DAGs of layers. This is true for most deep learning architectures, but not all – for example, recursive networks or Tree RNNs do not follow this assumption and cannot be implemented in the functional API. ## Conclusion diff --git a/vignettes/images/model_plot_shapes_fAPI.png b/vignettes/images/model_plot_shapes_fAPI.png new file mode 100644 index 0000000..82afe5f Binary files /dev/null and b/vignettes/images/model_plot_shapes_fAPI.png differ diff --git a/vignettes/images/model_plot_shapes_s.png b/vignettes/images/model_plot_shapes_s.png new file mode 100644 index 0000000..0d72c05 Binary files /dev/null and b/vignettes/images/model_plot_shapes_s.png differ diff --git a/vignettes/sequential_model.Rmd b/vignettes/sequential_model.Rmd index c36f0c5..2930991 100644 --- a/vignettes/sequential_model.Rmd +++ b/vignettes/sequential_model.Rmd @@ -142,116 +142,20 @@ compilation_results <- compile_keras_grid( ) # Print the summary -summary(compilation_results$compiled_model[[1]]) +compilation_results |> + select(compiled_model) |> + pull() |> + pluck(1) |> + summary() ``` - - - - - - - - - -## Feature Extraction with a Sequential Model - -Once a Sequential model has been built, it behaves like a Functional API model. This means that every layer has an input and output attribute. In `kerasnip`, we can get access to this underlying model structure using `compile_keras_grid()`. - -This allows us to create a new model that outputs the values of the intermediate layers. - -```{r feature-extraction} -# We can reuse the compilation results from the previous chunk -keras_model_obj <- compilation_results$compiled_model[[1]] - -# Create a new Keras model for feature extraction -feature_extractor <- keras_model( - inputs = keras_model_obj$inputs, - outputs = lapply(keras_model_obj$layers, function(x) x$output) -) - -# Call the feature extractor on a dummy input tensor -x_tensor <- op_ones(c(1, 28, 28, 1)) -features <- feature_extractor(x_tensor) - -# Print the shapes of the extracted feature maps -lapply(features, dim) -``` - -## Transfer Learning with a Sequential Model - - - - -Transfer learning consists of freezing the bottom layers in a model and only training the top layers. A common blueprint is to use a Sequential model to stack a pre-trained model and some freshly initialized classification layers. - -`kerasnip` supports this by allowing a `layer_block` to contain a pre-trained model. - -```{r transfer-learning} -# Define a block that incorporates a pre-trained base -# This block creates a new sequential model and adds the pre-trained, -# frozen base model as its first layer. -pretrained_base_block <- function(model, input_shape) { - base_model <- application_xception( - weights = "imagenet", - include_top = FALSE, - pooling = "avg", - input_shape = input_shape - ) - # Freeze the weights of the pre-trained base - freeze_weights(base_model) - - # The block must return a sequential model - keras_model_sequential(input_shape = input_shape) |> - base_model() -} - -# Define a new classification head. This block will be appended to the -# sequential model returned by the previous block. -classification_head_block <- function(model, num_classes) { - model |> - layer_dense(units = 1000, activation = "relu") |> - layer_dense(units = num_classes, activation = "softmax") -} - -# Create a new kerasnip spec with the pre-trained base and new head -create_keras_sequential_spec( - model_name = "transfer_cnn", - layer_blocks = list( - base = pretrained_base_block, - head = classification_head_block - ), - mode = "classification" -) - -# Create a spec instance -transfer_spec <- transfer_cnn( - compile_loss = "categorical_crossentropy", - compile_optimizer = "adam" -) - -# Prepare dummy data for a 224x224x3 image -x_dummy_tl_list <- lapply(1:10, function(i) array(runif(224*224*3), dim = c(224, 224, 3))) -x_dummy_tl_df <- tibble::tibble(x = x_dummy_tl_list) -y_dummy_tl <- factor(sample(0:9, 10, replace = TRUE), levels = 0:9) -y_dummy_tl_df <- tibble::tibble(y = y_dummy_tl) - - -# Use compile_keras_grid to inspect the model and trainable parameters -compilation_results_tl <- compile_keras_grid( - spec = transfer_spec, - grid = tibble::tibble(), - x = x_dummy_tl_df, - y = y_dummy_tl_df -) - -# Print the summary to verify that the base model's parameters are non-trainable -summary(compilation_results_tl$compiled_model[[1]]) +```{r grid-debug-plot, eval=FALSE} +compilation_results |> + select(compiled_model) |> + pull() |> + pluck(1) |> + plot(show_shapes = TRUE) ``` - +![model](images/model_plot_shapes_s.png){fig-alt="A picture showing the model shape"} - - - - \ No newline at end of file