From f4ad6d2f99b9e2aecafa49ab1e282aa7e583e33d Mon Sep 17 00:00:00 2001 From: davidrsch Date: Sun, 17 Aug 2025 10:58:06 +0200 Subject: [PATCH 1/8] Addnig package dependancy in register core model --- R/register_core_model.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/register_core_model.R b/R/register_core_model.R index c8d9fb6..5e4ed57 100644 --- a/R/register_core_model.R +++ b/R/register_core_model.R @@ -21,6 +21,7 @@ register_core_model <- function(model_name, mode) { parsnip::set_model_mode(model_name, mode) parsnip::set_model_engine(model_name, mode, "keras") parsnip::set_dependency(model_name, "keras", "keras3") + parsnip::set_dependency(model_name, "keras", "kerasnip") parsnip::set_encoding( model = model_name, From 1c020b30bf0cb91512ddcfbb902e24adca17a86b Mon Sep 17 00:00:00 2001 From: davidrsch Date: Sun, 17 Aug 2025 10:58:53 +0200 Subject: [PATCH 2/8] Adding custom step to collapse columns in list columns --- NAMESPACE | 4 ++ R/step_collapse.R | 140 +++++++++++++++++++++++++++++++++++++++++++ man/step_collapse.Rd | 71 ++++++++++++++++++++++ 3 files changed, 215 insertions(+) create mode 100644 R/step_collapse.R create mode 100644 man/step_collapse.Rd diff --git a/NAMESPACE b/NAMESPACE index 6f66623..ea17d00 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +S3method(print,step_collapse) +export(bake.step_collapse) export(compile_keras_grid) export(create_keras_functional_spec) export(create_keras_sequential_spec) @@ -16,6 +18,7 @@ export(keras_metrics) export(keras_optimizers) export(loss_function_keras) export(optimizer_function) +export(prep.step_collapse) export(process_x_functional) export(process_x_sequential) export(process_y_functional) @@ -24,6 +27,7 @@ export(register_keras_loss) export(register_keras_metric) export(register_keras_optimizer) export(remove_keras_spec) +export(step_collapse) importFrom(cli,cli_alert_danger) importFrom(cli,cli_alert_info) importFrom(cli,cli_alert_success) diff --git a/R/step_collapse.R b/R/step_collapse.R new file mode 100644 index 0000000..1996620 --- /dev/null +++ b/R/step_collapse.R @@ -0,0 +1,140 @@ +#' Collapse Predictors into a single list-column +#' +#' `step_collapse()` creates a a *specification* of a recipe step that will +#' convert a group of predictors into a single list-column. This is useful +#' for custom models that need the predictors in a different format. +#' +#' @param recipe A recipe object. The step will be added to the sequence of +#' operations for this recipe. +#' @param ... One or more selector functions to choose which variables are +#' affected by the step. See `[selections()]` for more details. For the `tidy` +#' method, these are not currently used. +#' @param role For model terms created by this step, what analysis role should +#' they be assigned?. By default, the new columns are used as predictors. +#' @param trained A logical to indicate if the quantities for preprocessing +#' have been estimated. +#' @param columns A character string of the selected variable names. This is +#' `NULL` until the step is trained by `[prep.recipe()]`. +#' @param new_col A character string for the name of the new list-column. The +#' default is "predictor_matrix". +#' @param skip A logical. Should the step be skipped when the recipe is +#' baked by `[bake.recipe()]`? While all operations are baked when `prep` is run, +#' skipping when `bake` is run may be other times when it is desirable to +#' skip a processing step. +#' @param id A character string that is unique to this step to identify it. +#' +#' @return An updated version of `recipe` with the new step added to the +#' sequence of existing steps (if any). For the `tidy` method, a tibble with +#' columns `terms` which is the columns that are affected and `value` which is +#' the type of collapse. +#' +#' @examples +#' library(recipes) +#' +#' # 2 predictors +#' dat <- data.frame( +#' x1 = 1:10, +#' x2 = 11:20, +#' y = 1:10 +#' ) +#' +#' rec <- recipe(y ~ ., data = dat) %>% +#' step_collapse(x1, x2, new_col = "pred") %>% +#' prep() +#' +#' bake(rec, new_data = NULL) +#' @export +step_collapse <- function( + recipe, + ..., + role = "predictor", + trained = FALSE, + columns = NULL, + new_col = "predictor_matrix", + skip = FALSE, + id = recipes::rand_id("collapse") +) { + recipes::add_step( + recipe, + step_collapse_new( + terms = enquos(...), + role = role, + trained = trained, + columns = columns, + new_col = new_col, + skip = skip, + id = id + ) + ) +} + +step_collapse_new <- function( + terms, + role, + trained, + columns, + new_col, + skip, + id +) { + recipes::step( + subclass = "collapse", + terms = terms, + role = role, + trained = trained, + columns = columns, + new_col = new_col, + skip = skip, + id = id + ) +} + +#' @export +prep.step_collapse <- function(x, training, info = NULL, ...) { + col_names <- recipes::recipes_eval_select(x$terms, training, info) + + step_collapse_new( + terms = x$terms, + role = x$role, + trained = TRUE, + columns = col_names, + new_col = x$new_col, + skip = x$skip, + id = x$id + ) +} + +#' @export +bake.step_collapse <- function(object, new_data, ...) { + recipes::check_new_data(object$columns, object, new_data) + + rows_list <- apply( + new_data[, object$columns, drop = FALSE], + 1, + function(row) matrix(row, nrow = 1), + simplify = FALSE + ) + + new_data[[object$new_col]] <- rows_list + + # drop original predictor columns + new_data <- new_data[, setdiff(names(new_data), object$columns), drop = FALSE] + + new_data +} + +#' @export +print.step_collapse <- function(x, ...) { + if (is.null(x$columns)) { + cat("Collapse predictors into list-column (unprepped)\n") + } else { + cat( + "Collapse predictors into list-column:", + paste(x$columns, collapse = ", "), + " → ", + x$new_col, + "\n" + ) + } + invisible(x) +} diff --git a/man/step_collapse.Rd b/man/step_collapse.Rd new file mode 100644 index 0000000..6f40162 --- /dev/null +++ b/man/step_collapse.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/step_collapse.R +\name{step_collapse} +\alias{step_collapse} +\title{Collapse Predictors into a single list-column} +\usage{ +step_collapse( + recipe, + ..., + role = "predictor", + trained = FALSE, + columns = NULL, + new_col = "predictor_matrix", + skip = FALSE, + id = recipes::rand_id("collapse") +) +} +\arguments{ +\item{recipe}{A recipe object. The step will be added to the sequence of +operations for this recipe.} + +\item{...}{One or more selector functions to choose which variables are +affected by the step. See \verb{[selections()]} for more details. For the \code{tidy} +method, these are not currently used.} + +\item{role}{For model terms created by this step, what analysis role should +they be assigned?. By default, the new columns are used as predictors.} + +\item{trained}{A logical to indicate if the quantities for preprocessing +have been estimated.} + +\item{columns}{A character string of the selected variable names. This is +\code{NULL} until the step is trained by \verb{[prep.recipe()]}.} + +\item{new_col}{A character string for the name of the new list-column. The +default is "predictor_matrix".} + +\item{skip}{A logical. Should the step be skipped when the recipe is +baked by \verb{[bake.recipe()]}? While all operations are baked when \code{prep} is run, +skipping when \code{bake} is run may be other times when it is desirable to +skip a processing step.} + +\item{id}{A character string that is unique to this step to identify it.} +} +\value{ +An updated version of \code{recipe} with the new step added to the +sequence of existing steps (if any). For the \code{tidy} method, a tibble with +columns \code{terms} which is the columns that are affected and \code{value} which is +the type of collapse. +} +\description{ +\code{step_collapse()} creates a a \emph{specification} of a recipe step that will +convert a group of predictors into a single list-column. This is useful +for custom models that need the predictors in a different format. +} +\examples{ +library(recipes) + +# 2 predictors +dat <- data.frame( + x1 = 1:10, + x2 = 11:20, + y = 1:10 +) + +rec <- recipe(y ~ ., data = dat) \%>\% + step_collapse(x1, x2, new_col = "pred") \%>\% + prep() + +bake(rec, new_data = NULL) +} From 7d8af4e6f9796e614e9eb690f07976b031e365da Mon Sep 17 00:00:00 2001 From: davidrsch Date: Sun, 17 Aug 2025 11:05:51 +0200 Subject: [PATCH 3/8] Adding sequential workflow --- vignettes/workflows_sequential.Rmd | 275 +++++++++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 vignettes/workflows_sequential.Rmd diff --git a/vignettes/workflows_sequential.Rmd b/vignettes/workflows_sequential.Rmd new file mode 100644 index 0000000..a047a14 --- /dev/null +++ b/vignettes/workflows_sequential.Rmd @@ -0,0 +1,275 @@ +--- +title: "Tidymodels Workflow with Sequential Keras Models" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Tidymodels Workflow with Sequential Keras Models} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + eval = FALSE # Set to TRUE to run all chunks when knitting +) +# Suppress verbose Keras output for the vignette +options(keras.fit_verbose = 0) +set.seed(123) +``` + +## Introduction + +This vignette demonstrates a complete `tidymodels` workflow for a classification task using a Keras sequential model defined with `kerasnip`. We will use the Palmer Penguins dataset to predict penguin species based on physical measurements. + +The `kerasnip` package allows you to define Keras models using a modular "layer block" approach, which then integrates seamlessly with the `parsnip` and `tune` packages for model specification and hyperparameter tuning. + +## Setup + +First, we load the necessary packages. + +```{r load-packages} +library(kerasnip) +library(tidymodels) +library(keras3) +library(dplyr) # For data manipulation +library(ggplot2) # For plotting +library(future) # For parallel processing +library(finetune) # For racing +``` + +## Data Preparation + +We'll use the `penguins` dataset from the `modeldata` package. We will clean it by removing rows with missing values and ensuring the `species` column is a factor. + +```{r data-prep} +# Remove rows with missing values +penguins_df <- penguins |> + na.omit() |> + # Convert species to factor for classification + mutate(species = factor(species)) + +# Split data into training and testing sets +set.seed(123) +penguin_split <- initial_split(penguins_df, prop = 0.8, strata = species) +penguin_train <- training(penguin_split) + +penguin_test <- testing(penguin_split) + +# Create cross-validation folds for tuning +penguin_folds <- vfold_cv(penguin_train, v = 5, strata = species) +``` + +## Recipe for Preprocessing + +We will create a `recipes` object to preprocess our data. This recipe will: +* Predict `species` using all other variables. +* Normalize all numeric predictors. +* Create dummy variables for all categorical predictors. + +```{r create-recipe} +penguin_recipe <- recipe(species ~ ., data = penguin_train) |> + step_normalize(all_numeric_predictors()) |> + step_dummy(all_nominal_predictors()) + +# You can prep and bake the recipe to see the processed data +# prep(penguin_recipe) |> bake(new_data = penguin_train) +``` + +## Define Keras Sequential Model with `kerasnip` + +Now, we define our Keras sequential model using `kerasnip`'s layer blocks. We'll create a simple Multi-Layer Perceptron (MLP) with two hidden layers. + +For a sequential Keras model with tabular data, all preprocessed input features are typically combined into a single input layer. The `recipes` package handles this preprocessing, transforming predictors into a single matrix that serves as the input to the Keras model. + +```{r define-kerasnip-model} +# Define layer blocks +input_block <- function(model, input_shape) { + keras_model_sequential(input_shape = input_shape) +} + +hidden_block <- function(model, units = 32, activation = "relu", rate = 0.2) { + model |> + layer_dense(units = units, activation = activation) |> + layer_dropout(rate = rate) +} + +output_block <- function(model, num_classes, activation = "softmax") { + model |> + layer_dense(units = num_classes, activation = activation) +} + +# Create the kerasnip model specification function +create_keras_sequential_spec( + model_name = "penguin_mlp", + layer_blocks = list( + input = input_block, + hidden_1 = hidden_block, + hidden_2 = hidden_block, + output = output_block + ), + mode = "classification" +) + +# Clean up the spec when the vignette is done knitting +on.exit(remove_keras_spec("penguin_mlp"), add = TRUE) +``` + +## Model Specification + +We'll define our `penguin_mlp` model specification and set some hyperparameters to `tune()`, indicating that they should be optimized. We will also set fixed parameters for compilation and fitting. + +```{r define-tune-spec} +# Define the tunable model specification +mlp_spec <- penguin_mlp( + # Tunable parameters for hidden layers + hidden_1_units = tune(), + hidden_1_rate = tune(), + hidden_2_units = tune(), + hidden_2_rate = tune(), + # Fixed compilation and fitting parameters + compile_loss = "categorical_crossentropy", + compile_optimizer = "adam", + compile_metrics = c("accuracy"), + fit_epochs = 20, + fit_batch_size = 32, + fit_validation_split = 0.2, + fit_callbacks = list(callback_early_stopping(monitor = "val_loss", patience = 5)) +) |> + set_engine("keras") + +print(mlp_spec) +``` + +## Create Workflow + +A `workflow` combines the recipe and the model specification. + +```{r create-workflow} +penguin_wf <- workflow() |> + add_recipe(penguin_recipe) |> + add_model(mlp_spec) + +print(penguin_wf) +``` + +## Define Tuning Grid + +We will create a regular grid for our hyperparameters. + +```{r create-tuning-grid} +# Define the tuning grid +params <- extract_parameter_set_dials(penguin_wf) |> + update( + hidden_1_units = hidden_units(range = c(32, 128)), + hidden_1_rate = dropout(range = c(0.1, 0.4)), + hidden_2_units = hidden_units(range = c(16, 64)), + hidden_2_rate = dropout(range = c(0.1, 0.4)) + ) +mlp_grid <- grid_regular(params, levels = 3) + +print(mlp_grid) +``` + +## Tune Model + +Now, we'll use `tune_race_anova()` to perform cross-validation and find the best hyperparameters. + +```{r tune-model} +# Note: Parallel processing with `plan(multisession)` is currently not working +# with Keras models due to backend conflicts. +# plan(multisession) +set.seed(123) + +penguin_tune_results <- tune_race_anova( + penguin_wf, + resamples = penguin_folds, + grid = mlp_grid, + metrics = metric_set(accuracy, roc_auc, f_meas), # Evaluate multiple metrics + control = control_race(save_pred = TRUE, save_workflow = TRUE) +) +``` + +## Inspect Tuning Results + +We can inspect the tuning results to see which hyperparameter combinations performed best. + +```{r inspect-results} +# Show the best performing models based on accuracy +show_best(penguin_tune_results, metric = "accuracy", n = 5) + +# Autoplot the results +# autoplot(penguin_tune_results) # Currently does not work due to a label issue. + +# Select the best hyperparameters +best_mlp_params <- select_best(penguin_tune_results, metric = "accuracy") +print(best_mlp_params) +``` + +## Finalize Workflow and Fit Model + +Once we have the best hyperparameters, we finalize the workflow and fit the model on the entire training dataset. + +```{r finalize-fit} +# Finalize the workflow with the best hyperparameters +final_penguin_wf <- finalize_workflow(penguin_wf, best_mlp_params) + +# Fit the final model on the full training data +final_penguin_fit <- fit(final_penguin_wf, data = penguin_train) + +print(final_penguin_fit) +``` + +### Inspect Final Model + +You can extract the underlying Keras model and its training history for further inspection. + +```{r inspect-final-keras-model-summary} +# Extract the Keras model summary +final_penguin_fit |> + extract_fit_parsnip() |> + extract_keras_model() |> + summary() +``` + +```{r inspect-final-keras-model-plot} +# Plot the Keras model +final_penguin_fit |> + extract_fit_parsnip() |> + extract_keras_model() |> + plot() +``` + +```{r inspect-final-keras-model-history} +# Plot the training history +final_penguin_fit |> + extract_fit_parsnip() |> + extract_keras_history() |> + plot() +``` + +## Make Predictions and Evaluate + +Finally, we will make predictions on the test set and evaluate the model's performance. + +```{r predict-evaluate} +# Make predictions on the test set +penguin_test_pred <- predict(final_penguin_fit, new_data = penguin_test) +penguin_test_prob <- predict(final_penguin_fit, new_data = penguin_test, type = "prob") + +# Combine predictions with actuals +penguin_results <- penguin_test |> + select(species) |> + bind_cols( penguin_test_pred, penguin_test_prob) + +print(head(penguin_results)) + +# Evaluate performance using yardstick metrics +metrics_results <- metric_set(accuracy, roc_auc, f_meas)(penguin_results, truth = species, estimate = .pred_class, .pred_Adelie, .pred_Chinstrap, .pred_Gentoo) + +print(metrics_results) + +# Confusion Matrix +conf_mat(penguin_results, truth = species, estimate = .pred_class) |> + autoplot(type = "heatmap") +``` From 9fffc49d63af599faeaa671c3ee750ed3eb3e4af Mon Sep 17 00:00:00 2001 From: davidrsch Date: Sun, 17 Aug 2025 11:06:12 +0200 Subject: [PATCH 4/8] Adding functional workflow --- vignettes/workflows_functional.Rmd | 315 +++++++++++++++++++++++++++++ 1 file changed, 315 insertions(+) create mode 100644 vignettes/workflows_functional.Rmd diff --git a/vignettes/workflows_functional.Rmd b/vignettes/workflows_functional.Rmd new file mode 100644 index 0000000..3ab453f --- /dev/null +++ b/vignettes/workflows_functional.Rmd @@ -0,0 +1,315 @@ +--- +title: "Tidymodels Workflow with Functional Keras Models (Multi-Input)" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Tidymodels Workflow with Functional Keras Models (Multi-Input)} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + eval = FALSE # Set to TRUE to run all chunks when knitting +) +# Suppress verbose Keras output for the vignette +options(keras.fit_verbose = 0) +set.seed(123) +``` + +## Introduction + +This vignette demonstrates a complete `tidymodels` workflow for a regression task using a Keras functional model defined with `kerasnip`. We will use the Ames Housing dataset to predict house prices. A key feature of this example is the use of a multi-input Keras model, where numerical and categorical features are processed through separate input branches. + +`kerasnip` allows you to define complex Keras architectures, including those with multiple inputs, and integrate them seamlessly into the `tidymodels` ecosystem for robust modeling and tuning. + +## Setup + +First, we load the necessary packages. + +```{r load-packages} +library(kerasnip) +library(tidymodels) +library(keras3) +library(dplyr) # For data manipulation +library(ggplot2) # For plotting +library(future) # For parallel processing +library(finetune) # For racing +``` + +## Data Preparation + +We'll use the Ames Housing dataset, which is available in the `modeldata` package. We will then split the data into training and testing sets. + +```{r data-prep} +# Select relevant columns and remove rows with missing values +ames_df <- ames |> + select(Sale_Price, Gr_Liv_Area, Year_Built, Neighborhood, Bldg_Type, Overall_Cond, Total_Bsmt_SF, contains("SF")) |> + na.omit() + +# Split data into training and testing sets +set.seed(123) +ames_split <- initial_split(ames_df, prop = 0.8, strata = Sale_Price) +ames_train <- training(ames_split) +ames_test <- testing(ames_split) + +# Create cross-validation folds for tuning +ames_folds <- vfold_cv(ames_train, v = 5, strata = Sale_Price) +``` + +## Recipe for Preprocessing + +We will create a `recipes` object to preprocess our data. This recipe will: +* Predict `Sale_Price` using all other variables. +* Normalize all numerical predictors. +* Create dummy variables for categorical predictors. +* Collapse each group of predictors into a single matrix column using `step_collapse()`. + +This final step is crucial for the multi-input Keras model, as the `kerasnip` functional API expects a list of matrices for multiple inputs, where each matrix corresponds to a distinct input layer. + +```{r create-recipe} +ames_recipe <- recipe(Sale_Price ~ ., data = ames_train) |> + step_normalize(all_numeric_predictors()) |> + step_collapse(all_numeric_predictors(), new_col = "numerical_input") |> + step_dummy(Neighborhood) |> + step_collapse(starts_with("Neighborhood"), new_col = "neighborhood_input") |> + step_dummy(Bldg_Type) |> + step_collapse(starts_with("Bldg_Type"), new_col = "bldg_input") |> + step_dummy(Overall_Cond) |> + step_collapse(starts_with("Overall_Cond"), new_col = "condition_input") + +# You can prep and bake the recipe to see the processed data +# prep(ames_recipe) |> bake(new_data = ames_train) +``` + +## Define Keras Functional Model with `kerasnip` + +Now, we define our Keras functional model using `kerasnip`'s layer blocks. This model will have four distinct input layers: one for numerical features and three for categorical features. These branches will be processed separately and then concatenated before the final output layer. + +```{r define-kerasnip-model} +# Define layer blocks for multi-input functional model + +# Input blocks for numerical and categorical features +input_numerical <- function(input_shape) { + layer_input(shape = input_shape, name = "numerical_input") +} + +input_neighborhood <- function(input_shape) { + layer_input(shape = input_shape, name = "neighborhood_input") +} + +input_bldg <- function(input_shape) { + layer_input(shape = input_shape, name = "bldg_input") +} + +input_condition <- function(input_shape) { + layer_input(shape = input_shape, name = "condition_input") +} + +# Processing blocks for each input type +dense_numerical <- function(tensor, units = 32, activation = "relu") { + tensor |> + layer_dense(units = units, activation = activation) +} + +dense_categorical <- function(tensor, units = 16, activation = "relu") { + tensor |> + layer_dense(units = units, activation = activation) +} + +# Concatenation block +concatenate_features <- function(numeric, neighborhood, bldg, condition) { + layer_concatenate(list(numeric, neighborhood, bldg, condition)) +} + +# Output block for regression +output_regression <- function(tensor) { + layer_dense(tensor, units = 1, name = "output") +} + +# Create the kerasnip model specification function +create_keras_functional_spec( + model_name = "ames_functional_mlp", + layer_blocks = list( + numerical_input = input_numerical, + neighborhood_input = input_neighborhood, + bldg_input = input_bldg, + condition_input = input_condition, + processed_numerical = inp_spec(dense_numerical, "numerical_input"), + processed_neighborhood = inp_spec(dense_categorical, "neighborhood_input"), + processed_bldg = inp_spec(dense_categorical, "bldg_input"), + processed_condition = inp_spec(dense_categorical, "condition_input"), + combined_features = inp_spec( + concatenate_features, + c( + processed_numerical = "numeric", + processed_neighborhood = "neighborhood", + processed_bldg = "bldg", + processed_condition = "condition" + ) + ), + output = inp_spec(output_regression, "combined_features") + ), + mode = "regression" +) + +# Clean up the spec when the vignette is done knitting +on.exit(remove_keras_spec("ames_functional_mlp"), add = TRUE) +``` + +## Model Specification + +We'll define our `ames_functional_mlp` model specification and set some hyperparameters to `tune()`. Note how the arguments are prefixed with their corresponding block names (e.g., `processed_numerical_units`). + +```{r define-tune-spec} +# Define the tunable model specification +functional_mlp_spec <- ames_functional_mlp( + # Tunable parameters for numerical branch + processed_numerical_units = tune(), + # Tunable parameters for categorical branch + processed_neighborhood_units = tune(), + processed_bldg_units = tune(), + processed_condition_units = tune(), + # Fixed compilation and fitting parameters + compile_loss = "mean_squared_error", + compile_optimizer = "adam", + compile_metrics = c("mean_absolute_error"), + fit_epochs = 50, + fit_batch_size = 32, + fit_validation_split = 0.2, + fit_callbacks = list(callback_early_stopping(monitor = "val_loss", patience = 5)) +) |> + set_engine("keras") + +print(functional_mlp_spec) +``` + +## Create Workflow + +A `workflow` combines the recipe and the model specification. + +```{r create-workflow} +ames_wf <- workflow() |> + add_recipe(ames_recipe) |> + add_model(functional_mlp_spec) + +print(ames_wf) +``` + +## Define Tuning Grid + +We will create a regular grid for our hyperparameters. + +```{r create-tuning-grid} +# Define the tuning grid +params <- extract_parameter_set_dials(ames_wf) |> + update( + processed_numerical_units = hidden_units(range = c(32, 128)), + processed_neighborhood_units = hidden_units(range = c(16, 64)), + processed_bldg_units = hidden_units(range = c(16, 64)), + processed_condition_units = hidden_units(range = c(16, 64)) + ) +functional_mlp_grid <- grid_regular(params, levels = 3) + +print(functional_mlp_grid) +``` + +## Tune Model + +Now, we'll use `tune_race_anova()` to perform cross-validation and find the best hyperparameters. + +```{r tune-model} +# Note: Parallel processing with `plan(multisession)` is currently not working +# with Keras models due to backend conflicts. +# plan(multisession) + +set.seed(123) +ames_tune_results <- tune_race_anova( + ames_wf, + resamples = ames_folds, + grid = functional_mlp_grid, + metrics = metric_set(rmse, mae, rsq), # Evaluate regression metrics + control = control_race(save_pred = TRUE, save_workflow = TRUE) +) +``` + +## Inspect Tuning Results + +We can inspect the tuning results to see which hyperparameter combinations performed best. + +```{r inspect-results} +# Show the best performing models based on RMSE +show_best(ames_tune_results, metric = "rmse", n = 5) + +# Autoplot the results +# autoplot(ames_tune_results) # Currently does not work due to a label issue. + +# Select the best hyperparameters +best_functional_mlp_params <- select_best(ames_tune_results, metric = "rmse") +print(best_functional_mlp_params) +``` + +## Finalize Workflow and Fit Model + +Once we have the best hyperparameters, we finalize the workflow and fit the model on the entire training dataset. + +```{r finalize-fit} +# Finalize the workflow with the best hyperparameters +final_ames_wf <- finalize_workflow(ames_wf, best_functional_mlp_params) + +# Fit the final model on the full training data +final_ames_fit <- fit(final_ames_wf, data = ames_train) + +print(final_ames_fit) +``` + +### Inspect Final Model + +You can extract the underlying Keras model and its training history for further inspection. + +```{r inspect-final-keras-model-summary} +# Extract the Keras model summary +final_ames_fit |> + extract_fit_parsnip() |> + extract_keras_model() |> + summary() +``` + +```{r inspect-final-keras-model-plot} +# Plot the Keras model +final_ames_fit |> + extract_fit_parsnip() |> + extract_keras_model() |> + plot() +``` + +```{r inspect-final-keras-model-history} +# Plot the training history +final_ames_fit |> + extract_fit_parsnip() |> + extract_keras_history() |> + plot() +``` + +## Make Predictions and Evaluate + +Finally, we will make predictions on the test set and evaluate the model's performance. + +```{r predict-evaluate} +# Make predictions on the test set +ames_test_pred <- predict(final_ames_fit, new_data = ames_test) + +# Combine predictions with actuals +ames_results <- tibble::tibble( + Sale_Price = ames_test$Sale_Price, + .pred = ames_test_pred$.pred +) + +print(head(ames_results)) + +# Evaluate performance using yardstick metrics +metrics_results <- metric_set(rmse, mae, rsq)(ames_results, truth = Sale_Price, estimate = .pred) + +print(metrics_results) +``` From a92d34b795cb329cc8e4b399349623eb0455c63c Mon Sep 17 00:00:00 2001 From: davidrsch Date: Sun, 17 Aug 2025 11:21:51 +0200 Subject: [PATCH 5/8] Update pkgdowm.yml --- _pkgdown.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index ad7eb44..4e3e7a1 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -19,6 +19,8 @@ guides: contents: - sequential_model - functional_api + - workflows_sequential + - workflows_functional # examples: @@ -54,6 +56,12 @@ reference: - extract_keras_model - keras_evaluate + - title: "Custom recipe steps" + desc: > + Custom stpes for recipe which uses kerasnip models specifications + contents: + - step_collapse + development: mode: auto @@ -75,6 +83,11 @@ navbar: href: articles/sequential_model.html - text: "Functional API" href: articles/functional_api.html + - text: "Workflows" + - text: "Sequential Model" + href: articles/workflows_sequential.html + - text: "Functional API" + href: articles/workflows_functional.html github: icon: fa-github href: https://github.com/davidrsch/kerasnip From c9ed2a1f6fb0f7ed7ba19474d77580f770f346a1 Mon Sep 17 00:00:00 2001 From: davidrsch Date: Sun, 17 Aug 2025 11:49:44 +0200 Subject: [PATCH 6/8] Fixing issues with custom steps --- NAMESPACE | 4 ++-- R/step_collapse.R | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index ea17d00..6c7a688 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,8 @@ # Generated by roxygen2: do not edit by hand +S3method(bake,step_collapse) +S3method(prep,step_collapse) S3method(print,step_collapse) -export(bake.step_collapse) export(compile_keras_grid) export(create_keras_functional_spec) export(create_keras_sequential_spec) @@ -18,7 +19,6 @@ export(keras_metrics) export(keras_optimizers) export(loss_function_keras) export(optimizer_function) -export(prep.step_collapse) export(process_x_functional) export(process_x_sequential) export(process_y_functional) diff --git a/R/step_collapse.R b/R/step_collapse.R index 1996620..ffad7ab 100644 --- a/R/step_collapse.R +++ b/R/step_collapse.R @@ -126,14 +126,14 @@ bake.step_collapse <- function(object, new_data, ...) { #' @export print.step_collapse <- function(x, ...) { if (is.null(x$columns)) { - cat("Collapse predictors into list-column (unprepped)\n") + cat("Collapse predictors into list-column (unprepped)\\n") } else { cat( "Collapse predictors into list-column:", paste(x$columns, collapse = ", "), - " → ", + " -> ", x$new_col, - "\n" + "\\n" ) } invisible(x) From 12826a986cb1263788ce901438f6d26ceaf94592 Mon Sep 17 00:00:00 2001 From: davidrsch Date: Sun, 17 Aug 2025 12:12:33 +0200 Subject: [PATCH 7/8] Fixing issue with step_collapse --- NAMESPACE | 2 ++ R/step_collapse.R | 1 + 2 files changed, 3 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 6c7a688..8af9663 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -41,6 +41,8 @@ importFrom(dplyr,filter) importFrom(dplyr,select) importFrom(keras3,to_categorical) importFrom(parsnip,update_dot_check) +importFrom(recipes,bake) +importFrom(recipes,prep) importFrom(rlang,arg_match) importFrom(rlang,dots_list) importFrom(rlang,enquos) diff --git a/R/step_collapse.R b/R/step_collapse.R index ffad7ab..f7bd245 100644 --- a/R/step_collapse.R +++ b/R/step_collapse.R @@ -43,6 +43,7 @@ #' prep() #' #' bake(rec, new_data = NULL) +#' @importFrom recipes prep bake #' @export step_collapse <- function( recipe, From a184868ded9879ed38e7adb15cbf5d38641032f0 Mon Sep 17 00:00:00 2001 From: davidrsch Date: Sun, 17 Aug 2025 12:27:50 +0200 Subject: [PATCH 8/8] Fixing issue with checks --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d105091..0ab2577 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,7 +18,8 @@ Imports: tibble, purrr, dplyr, - cli + cli, + recipes Suggests: testthat (>= 3.0.0), modeldata,