diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index fc9d2d3..a03b688 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -62,7 +62,11 @@ jobs: if: runner.os == 'Linux' run: | sudo apt-get update - sudo apt-get install -y qpdf ghostscript + sudo apt-get install -y qpdf ghostscript graphviz + if: runner.os == 'macOS' + run: brew install graphviz + if: runner.os == 'Windows' + run: choco install graphviz -y - uses: r-lib/actions/setup-r-dependencies@v2 with: diff --git a/.gitignore b/.gitignore index ae3d780..1a93ee2 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ docs .httr-oauth .DS_Store .quarto +vignettes/*_cache \ No newline at end of file diff --git a/R/generic_functional_fit.R b/R/generic_functional_fit.R index bab2f70..a1e43b7 100644 --- a/R/generic_functional_fit.R +++ b/R/generic_functional_fit.R @@ -128,7 +128,15 @@ generic_functional_fit <- function( # --- Get Repetition Count --- num_repeats_arg <- paste0("num_", block_name) - num_repeats <- all_args[[num_repeats_arg]] %||% 1 + num_repeats_val <- all_args[[num_repeats_arg]] + + # If num_repeats_val is NULL or zapped, default to 1. + # Otherwise, use the value provided by the user. + if (is.null(num_repeats_val) || inherits(num_repeats_val, "rlang_zap")) { + num_repeats <- 1 + } else { + num_repeats <- as.integer(num_repeats_val) + } # --- Get Hyperparameters for this block --- # Hyperparameters are formals that are NOT other block names (graph connections) diff --git a/R/generic_sequential_fit.R b/R/generic_sequential_fit.R index 6c0009c..f776338 100644 --- a/R/generic_sequential_fit.R +++ b/R/generic_sequential_fit.R @@ -118,7 +118,14 @@ generic_sequential_fit <- function( num_repeats_arg <- paste0("num_", block_name) num_repeats_val <- all_args[[num_repeats_arg]] - num_repeats <- num_repeats_val %||% 1 + + # If num_repeats_val is NULL or zapped, default to 1. + # Otherwise, use the value provided by the user. + if (is.null(num_repeats_val) || inherits(num_repeats_val, "rlang_zap")) { + num_repeats <- 1 + } else { + num_repeats <- as.integer(num_repeats_val) + } # Get the arguments for this specific block from `...` block_arg_names <- names(block_fmls)[-1] # Exclude 'model' diff --git a/R/register_fit_predict.R b/R/register_fit_predict.R index 6b3b748..5b8eba9 100644 --- a/R/register_fit_predict.R +++ b/R/register_fit_predict.R @@ -57,7 +57,7 @@ register_fit_predict <- function(model_name, mode, layer_blocks, functional) { func = c(fun = "predict"), args = list( object = rlang::expr(object$fit$fit), - x = rlang::expr(as.matrix(new_data)) + x = rlang::expr(process_x(new_data)$x_proc) ) ) ) @@ -74,7 +74,7 @@ register_fit_predict <- function(model_name, mode, layer_blocks, functional) { func = c(fun = "predict"), args = list( object = rlang::expr(object$fit$fit), - x = rlang::expr(as.matrix(new_data)) + x = rlang::expr(process_x(new_data)$x_proc) ) ) ) @@ -89,7 +89,7 @@ register_fit_predict <- function(model_name, mode, layer_blocks, functional) { func = c(fun = "predict"), args = list( object = rlang::expr(object$fit$fit), - x = rlang::expr(as.matrix(new_data)) + x = rlang::expr(process_x(new_data)$x_proc) ) ) ) diff --git a/R/utils.R b/R/utils.R index 5cce55c..cb8a11b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -220,6 +220,11 @@ process_x <- function(x) { #' @importFrom keras3 to_categorical #' @noRd process_y <- function(y, is_classification = NULL, class_levels = NULL) { + # If y is a data frame/tibble, extract the first column + if (is.data.frame(y)) { + y <- y[[1]] + } + if (is.null(is_classification)) { is_classification <- is.factor(y) } diff --git a/_pkgdown.yml b/_pkgdown.yml index ebce87e..c585ff1 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -17,8 +17,8 @@ guides: - title: "Getting Started" navbar: ~ contents: - - getting-started - - functional-api + - getting_started + - functional_api # examples: @@ -63,7 +63,7 @@ navbar: components: intro: text: "Getting started" - href: guides/getting-started.html + href: guides/getting_started.html github: icon: fa-github href: https://github.com/davidrsch/kerasnip diff --git a/tests/testthat/helper-keras.R b/tests/testthat/helper_keras.R similarity index 97% rename from tests/testthat/helper-keras.R rename to tests/testthat/helper_keras.R index ce44dee..fc1aded 100644 --- a/tests/testthat/helper-keras.R +++ b/tests/testthat/helper_keras.R @@ -6,6 +6,7 @@ library(modeldata) library(rsample) library(dials) library(tune) +library(purrr) skip_if_no_keras <- function() { testthat::skip_if_not_installed("keras3") diff --git a/tests/testthat/test-e2e-classification.R b/tests/testthat/test_e2e_classification.R similarity index 100% rename from tests/testthat/test-e2e-classification.R rename to tests/testthat/test_e2e_classification.R diff --git a/tests/testthat/test-e2e-features.R b/tests/testthat/test_e2e_features.R similarity index 92% rename from tests/testthat/test-e2e-features.R rename to tests/testthat/test_e2e_features.R index 67e776e..e9d4f85 100644 --- a/tests/testthat/test-e2e-features.R +++ b/tests/testthat/test_e2e_features.R @@ -102,14 +102,19 @@ test_that("E2E: Customizing fit arguments works", { expect_lt(length(fit_obj$fit$history$metrics$loss), 5) }) -test_that("E2E: Setting num_blocks = 0 works", { +test_that("E2E: Setting num_blocks = 0 works for sequential models", { skip_if_no_keras() input_block_zero <- function(model, input_shape) { keras3::keras_model_sequential(input_shape = input_shape) } dense_block_zero <- function(model, units = 16) { - model |> keras3::layer_dense(units = units, activation = "relu") + model |> + keras3::layer_dense( + units = units, + activation = "relu", + name = "i_should_not_exist" + ) } output_block_zero <- function(model) { model |> keras3::layer_dense(units = 1) @@ -128,10 +133,18 @@ test_that("E2E: Setting num_blocks = 0 works", { mode = "regression" ) - spec <- e2e_mlp_zero(num_dense = 0, fit_epochs = 2) |> + spec <- e2e_mlp_zero(num_dense = 0, fit_epochs = 1) |> parsnip::set_engine("keras") - # This should fit a model with only an input and output layer - expect_no_error(parsnip::fit(spec, mpg ~ ., data = mtcars)) + + fit_obj <- parsnip::fit(spec, mpg ~ ., data = mtcars) + + # Check that the dense layer is NOT in the model + keras_model <- fit_obj |> extract_keras_summary() + expect_equal(length(keras_model$layers), 1) # Output layers only + + # Check layer names explicitly + layer_names <- sapply(keras_model$layers, function(l) l$name) + expect_false("i_should_not_exist" %in% layer_names) }) test_that("E2E: Error handling for reserved names works", { diff --git a/tests/testthat/test-e2e-functional.R b/tests/testthat/test_e2e_functional.R similarity index 77% rename from tests/testthat/test-e2e-functional.R rename to tests/testthat/test_e2e_functional.R index 5ef3fea..704cb92 100644 --- a/tests/testthat/test-e2e-functional.R +++ b/tests/testthat/test_e2e_functional.R @@ -182,3 +182,57 @@ test_that("E2E: Functional spec tuning (including repetition) works", { expect_s3_class(metrics, "tbl_df") expect_true(all(c("num_dense_path", "dense_path_units") %in% names(metrics))) }) + +test_that("E2E: Block repetition works for functional models", { + skip_if_no_keras() + + input_block <- function(input_shape) keras3::layer_input(shape = input_shape) + dense_block <- function(tensor, units = 8) { + tensor |> keras3::layer_dense(units = units, activation = "relu") + } + output_block <- function(tensor) keras3::layer_dense(tensor, units = 1) + + model_name <- "e2e_func_repeat" + on.exit(suppressMessages(remove_keras_spec(model_name)), add = TRUE) + + create_keras_functional_spec( + model_name = model_name, + layer_blocks = list( + main_input = input_block, + dense_path = inp_spec(dense_block, "main_input"), + output = inp_spec(output_block, "dense_path") + ), + mode = "regression" + ) + + # --- Test with 1 repetition --- + spec_1 <- e2e_func_repeat(num_dense_path = 1, fit_epochs = 1) |> + set_engine("keras") + fit_1 <- fit(spec_1, mpg ~ ., data = mtcars) + model_1_layers <- fit_1 |> + extract_keras_summary() |> + pluck("layers") + + # Expect 3 layers: Input, Dense, Output + expect_equal(length(model_1_layers), 3) + + # --- Test with 2 repetitions --- + spec_2 <- e2e_func_repeat(num_dense_path = 2, fit_epochs = 1) |> + set_engine("keras") + fit_2 <- fit(spec_2, mpg ~ ., data = mtcars) + model_2_layers <- fit_2 |> + extract_keras_summary() |> + pluck("layers") + # Expect 4 layers: Input, Dense, Dense, Output + expect_equal(length(model_2_layers), 4) + + # --- Test with 0 repetitions --- + spec_3 <- e2e_func_repeat(num_dense_path = 0, fit_epochs = 1) |> + set_engine("keras") + fit_3 <- fit(spec_3, mpg ~ ., data = mtcars) + model_3_layers <- fit_3 |> + extract_keras_summary() |> + pluck("layers") + # Expect 2 layers: Input, Output + expect_equal(length(model_3_layers), 2) +}) diff --git a/tests/testthat/test-e2e-multiblock-tuning.R b/tests/testthat/test_e2e_multiblock_tuning.R similarity index 100% rename from tests/testthat/test-e2e-multiblock-tuning.R rename to tests/testthat/test_e2e_multiblock_tuning.R diff --git a/tests/testthat/test-e2e-regression.R b/tests/testthat/test_e2e_regression.R similarity index 100% rename from tests/testthat/test-e2e-regression.R rename to tests/testthat/test_e2e_regression.R diff --git a/tests/testthat/test-e2e-spec-removal.R b/tests/testthat/test_e2e_spec_removal.R similarity index 100% rename from tests/testthat/test-e2e-spec-removal.R rename to tests/testthat/test_e2e_spec_removal.R diff --git a/tests/testthat/test-e2e-tuning.R b/tests/testthat/test_e2e_tuning.R similarity index 100% rename from tests/testthat/test-e2e-tuning.R rename to tests/testthat/test_e2e_tuning.R diff --git a/vignettes/functional-api.Rmd b/vignettes/functional_api.Rmd similarity index 100% rename from vignettes/functional-api.Rmd rename to vignettes/functional_api.Rmd diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd deleted file mode 100644 index 7ecc58d..0000000 --- a/vignettes/getting-started.Rmd +++ /dev/null @@ -1,243 +0,0 @@ ---- -title: "Getting Started with kerasnip" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Getting Started with kerasnip} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -## The Core Idea: From Keras Layers to Tidymodels Specs - -The `keras3` package allows for building deep learning models layer-by-layer, which is a powerful and flexible approach. However, the `tidymodels` ecosystem is designed around declarative model specifications, where you define what model you want and which of its parameters you want to tune, rather than building it imperatively. - -`kerasnip` bridges this gap with a simple but powerful concept: layer blocks. You define the components of your neural network (e.g., an input block, a dense block, a dropout block) as simple R functions. `kerasnip` then uses these blocks as building materials to create a brand new parsnip model specification function for you. - -This new function behaves just like any other parsnip model (e.g., `rand_forest()` or `linear_reg()`), making it easy to integrate into workflows and tune with tune. - -We’ll start by loading `kerasnip`, `tidymodels` and `keras3`: - -```{r load-kerasnip} -library(kerasnip) -library(tidymodels) -library(keras3) -``` - -## Example 1: Building and Fitting a Basic MLP - -Let's start by building a simple Multi-Layer Perceptron (MLP) for a regression task using the `mtcars` dataset. - -### Step 1: Define the Layer Blocks - -We need three blocks: - -- 1\. An input block to initialize the model and define the input shape. `kerasnip` will automatically pass the input_shape argument during fitting. - -- 2\. A dense block for our hidden layers. We'll give it a units argument so we can control the number of neurons. - -- 3\. An output block for the final prediction. For regression, this is typically a single neuron with a linear activation. - - ```{r define-blocks} - # 1. The input block must initialize the model. - # input_shape is passed automatically by the fit engine. - mlp_input_block <- function(model, input_shape) { - keras_model_sequential(input_shape = input_shape) - } - - # 2. A block for hidden layers. units will become a tunable parameter. - mlp_dense_block <- function(model, units = 32) { - model |> - layer_dense(units = units, activation = "relu") - } - - # 3. The output block for a regression model. - mlp_output_block <- function(model) { - model |> - layer_dense(units = 1) - } - ``` - -### Step 2: Create the Model Specification - -Now, we use `create_keras_sequential_spec()` to generate a new model function, which we'll call `basic_mlp()`. We provide our layer blocks in the order they should be assembled. - -```{r create-spec} -create_keras_sequential_spec( - model_name = "basic_mlp", - layer_blocks = list( - input = mlp_input_block, - dense = mlp_dense_block, - output = mlp_output_block - ), - mode = "regression" -) -``` - -This function call has a side-effect: a new function `basic_mlp()` is now available in our environment! Notice its arguments: `kerasnip`automatically created `num_dense` (to control the number of dense layers) and `dense_units` (from the units argument in our `mlp_dense_block`). - -### Step 3: Use the Spec in a Workflow - -We can now use `basic_mlp()` like any other parsnip model. Let's define a model with two hidden layers, each with 64 units, and train it for 50 epochs. - -```{r use-spec} -spec <- basic_mlp( - num_dense = 2, - dense_units = 64, - fit_epochs = 50, - learn_rate = 0.01 -) |> - set_engine("keras") - -print(spec) -``` - -We'll use a simple recipe to normalize the predictors and combine it with our model spec in a workflow. - -```{r fit-model} -# Suppress verbose Keras output for the vignette -options(keras.fit_verbose = 0) - -rec <- recipe(mpg ~ ., data = mtcars) |> - step_normalize(all_numeric_predictors()) - -wf <- workflow() |> - add_recipe(rec) |> - add_model(spec) - -set.seed(123) -fit_obj <- fit(wf, data = mtcars) -``` - -### Step 4: Make Predictions - -Predictions work just as you'd expect in `tidymodels`. - -```{r predict} -predictions <- predict(fit_obj, new_data = mtcars[1:5, ]) -print(predictions) -``` - -## Example 2: Tuning the Model Architecture - -The real power of `kerasnip` comes from its ability to tune not just *hyperparameters* (like learning rate or dropout), but the *architecture* of the network itself. - -Let's create a more complex *tunable* specification where we let `tune` find the optimal number of dense layers, the number of units in those layers, and the rate for a final dropout layer. - -### Step 1: Define Blocks and Create a New Spec - -First, we'll define an additional block for dropout and then create a new model specification, `tunable_mlp`, that includes it. - -```{r define-tunable-blocks} -tunable_dropout_block <- function(model, rate = 0.2) { - model |> - layer_dropout(rate = rate) -} - -create_keras_sequential_spec( - model_name = "tunable_mlp", - layer_blocks = list( - input = mlp_input_block, - dense = mlp_dense_block, - dropout = tunable_dropout_block, - output = mlp_output_block - ), - mode = "regression" -) -``` - -### Step 2: Define a Tunable Specification - -We use our new `tunable_mlp()` function, passing `tune()` to the arguments we want to optimize. We will have one dropout layer before the output. - -```{r tune-spec} -tune_spec <- tunable_mlp( - num_dense = tune(), - dense_units = tune(), - num_dropout = 1, - dropout_rate = tune(), - fit_epochs = 20 -) |> - set_engine("keras") - -print(tune_spec) -``` - -### Step 3: Set up the Tuning Grid - -We create a `workflow` as before. Then, we can use helper functions from `dials` to define the search space for our parameters. - -```{r setup-tuning} -tune_wf <- workflow() |> - add_recipe(rec) |> - add_model(tune_spec) - -# Define the tuning grid. -# `num_terms()` is the dials function for `num_*` parameters. -# `hidden_units()` is the dials function for `*_units` parameters. -params <- extract_parameter_set_dials(tune_wf) |> - update( - num_dense = dials::num_terms(c(1, 3)), - dense_units = dials::hidden_units(c(8, 64)), - dropout_rate = dials::dropout(c(0.1, 0.5)) - ) -grid <- grid_regular(params, levels = 2) -print(grid) -``` - -### Step 4: Run the Tuning - -We use `tune_grid()` with resamples to evaluate each combination of architectural parameters. - -```{r run-tuning, cache=TRUE} -set.seed(456) -folds <- vfold_cv(mtcars, v = 3) - -# The control argument is used to prevent saving predictions, which -# can be large for Keras models. -tune_res <- tune_grid( - tune_wf, - resamples = folds, - grid = grid, - control = control_grid(save_pred = FALSE) -) -``` - -### Step 5: Analyze the Results - -We can now see which architecture performed best. - -```{r show-best} -show_best(tune_res, metric = "rmse") -``` - -The results show that `tune` has successfully tested different network depths (`num_dense`), widths (`dense_units`), and dropout rates to find the best-performing combination. This demonstrates how `kerasnip` seamlessly integrates complex architectural tuning into the standard `tidymodels` workflow. - -## Advanced Customization - -`kerasnip` provides a clean API for passing arguments directly to Keras's `compile()` and `fit()` methods. - -- **Compile Arguments**: Pass any argument to `keras3::compile()` by prefixing it with `compile_`. For example, to change the loss function you would use `compile_loss = "mae"`. -- **Fit Arguments**: Pass any argument to `keras3::fit()` by prefixing it with `fit_`. For example, to set a validation split and add a callback, you would use `fit_validation_split = 0.2` and `fit_callbacks = list(...)`. - -Here is an example of using these arguments to specify a different loss function, a validation split, and an early stopping callback. - -```{r advanced-customization} - -adv_spec <- basic_mlp( - num_dense = 2, - dense_units = 32, - fit_epochs = 100, - # Arguments for keras3::compile() - compile_loss = "mae", - # Arguments for keras3::fit() - fit_validation_split = 0.2, - fit_callbacks = list( - keras3::callback_early_stopping(patience = 5) - ) -) |> - set_engine("keras") - -print(adv_spec) -``` - -This system gives you full control over the Keras training process while keeping the model specification function signature clean and focused on the *tunable* parameters. \ No newline at end of file diff --git a/vignettes/getting_started.Rmd b/vignettes/getting_started.Rmd new file mode 100644 index 0000000..556dec5 --- /dev/null +++ b/vignettes/getting_started.Rmd @@ -0,0 +1,328 @@ +--- +title: "Getting Started with kerasnip" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Getting Started with kerasnip} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +# Suppress verbose Keras output for the vignette +options(keras.fit_verbose = 0) +set.seed(123) +``` + +## The Core Idea: From Keras Layers to Tidymodels Specs + +The `keras3` package allows for building deep learning models layer-by-layer, which is a powerful and flexible approach. However, the `tidymodels` ecosystem is designed around declarative model specifications, where you define *what* model you want and which of its parameters you want to tune, rather than building it imperatively. + +`kerasnip` bridges this gap with a simple but powerful concept: **layer blocks**. You define the components of your neural network (e.g., an input block, a dense block, a dropout block) as simple R functions. `kerasnip` then uses these blocks as building materials to create a brand new `parsnip` model specification function for you. + +This new function behaves just like any other `parsnip` model (e.g., `rand_forest()` or `linear_reg()`), making it easy to integrate into `tidymodels` workflows. + +## Installation + +You can install the development version of `kerasnip` from GitHub. You will also need `keras3` and a backend (like TensorFlow). + +```{r} +# install.packages("pak") +pak::pak("davidrsch/kerasnip") +pak::pak("rstudio/keras3") + +# Install the backend +keras3::install_keras() +``` + +We’ll start by loading `kerasnip`, `tidymodels` and `keras3`: + +```{r load-kerasnip} +library(kerasnip) +library(tidymodels) +library(keras3) +``` + +## A `kerasnip` MNIST Example + +Let’s replicate the classic Keras introductory example, training a simple MLP on the MNIST dataset, but using the `kerasnip` workflow. This will demonstrate how to translate a standard Keras model into a reusable, modular `parsnip` specification. + +If you’re familiar with Keras, you’ll recognize the structure; if not, this is a perfect place to start. We’ll begin by learning the basics through a simple task: recognizing handwritten digits from the MNIST dataset. + +The MNIST dataset contains 28×28 pixel grayscale images of handwritten digits, like these: + +![MINIST](images/MNIST.png){fig-alt="A picture showing grayscale images of handwritten digits (5, 0, 4 and 1)"} + +Each image comes with a label indicating which digit it represents. For example, the labels for the images above might be 5, 0, 4, and 1. + +### Preparing the Data + +This step is identical to any other Keras model. We load the MNIST dataset, reshape the predictors, and convert the outcome to a factor for `tidymodels`. + +```{r prepare-data} +mnist <- dataset_mnist() +x_train <- mnist$train$x +y_train <- mnist$train$y +x_test <- mnist$test$x +y_test <- mnist$test$y + +# Reshape +x_train <- array_reshape(x_train, c(nrow(x_train), 784)) +x_test <- array_reshape(x_test, c(nrow(x_test), 784)) +# Rescale +x_train <- x_train / 255 +x_test <- x_test / 255 + +# Convert outcomes to factors for tidymodels +# kerasnip will handle y convertion internally using keras3::to_categorical() +y_train_factor <- factor(y_train) +y_test_factor <- factor(y_test) + +# For tidymodels, it's best to work with data frames +# Use I() to keep the matrix structure of x within the data frame +train_df <- data.frame(x = I(x_train), y = y_train_factor) +test_df <- data.frame(x = I(x_test), y = y_test_factor) +``` + +### The Standard Keras Approach (for comparison) + +Before diving into the `kerasnip` workflow, let's quickly look at how this same model is built using standard `keras3` code. This will help highlight the different approach `kerasnip` enables. + +```{r keras-standard, eval=FALSE, echo=TRUE, results='hide'} +# The standard Keras3 approach +model <- keras_model_sequential(input_shape = 784) |> + layer_dense(units = 256, activation = "relu") |> + layer_dropout(rate = 0.4) |> + layer_dense(units = 128, activation = "relu") |> + layer_dropout(rate = 0.3) |> + layer_dense(units = 10, activation = "softmax") + +summary(model) + +model |> + compile( + loss = "categorical_crossentropy", + optimizer = optimizer_rmsprop(), + metrics = "accuracy" + ) + +# The model would then be trained with model |> fit(...) +``` + +The code above is imperative: you define each layer and add it to the model step-by-step. Now, let's see how `kerasnip` approaches this by defining reusable components for a declarative, `tidymodels`-friendly workflow. + +### Defining the Model with Reusable Blocks + +The original Keras example interleaves `layer_dense()` and `layer_dropout()`. With `kerasnip`, we can encapsulate this pattern into a single, reusable block. This makes the overall architecture cleaner and more modular. + +```{r define-blocks} +# An input block to initialize the model. +# The 'model' argument is supplied implicitly by the kerasnip backend. +mlp_input_block <- function(model, input_shape) { + keras_model_sequential(input_shape = input_shape) +} + +# A reusable "module" that combines a dense layer and a dropout layer. +# All arguments that should be tunable need a default value. +dense_dropout_block <- function(model, units = 128, rate = 0.1) { + model |> + layer_dense(units = units, activation = "relu") |> + layer_dropout(rate = rate) +} + +# The output block for classification. +mlp_output_block <- function(model, num_classes) { + model |> layer_dense(units = num_classes, activation = "softmax") +} +``` + +Now, we use `create_keras_sequential_spec()` to generate our `parsnip` model function. + +```{r create-spec} +create_keras_sequential_spec( + model_name = "mnist_mlp", + layer_blocks = list( + input = mlp_input_block, + hidden_1 = dense_dropout_block, + hidden_2 = dense_dropout_block, + output = mlp_output_block + ), + mode = "classification" +) +``` + +### Building and Fitting the Model + +We can now use our new `mnist_mlp()` function. Notice how its arguments, such as `hidden_1_units` and `hidden_1_rate`, were automatically generated by `kerasnip`. The names are created by combining the name of the layer block (e.g., `hidden_1`) with the arguments of that block's function (e.g., `units`, `rate`). + +To replicate the `keras3` example, we'll use both `hidden` blocks and provide their parameters. + +```{r use-spec} +mlp_spec <- mnist_mlp( + hidden_1_units = 256, + hidden_1_rate = 0.4, + hidden_2_rate = 0.3, + hidden_2_units = 128, + compile_loss = "categorical_crossentropy", + compile_optimizer = optimizer_rmsprop(), + compile_metrics = c("accuracy"), + fit_epochs = 30, + fit_batch_size = 128, + fit_validation_split = 0.2 +) |> + set_engine("keras") + +# Fit the model +mlp_fit <- fit(mlp_spec, y ~ x, data = train_df) +``` + +```{r model-summarize} +mlp_fit |> + extract_keras_summary() +``` + +```{r model-plot} +mlp_fit |> + extract_keras_summary() |> + plot(show_shapes = TRUE) +``` + +```{r model-fit-history} +mlp_fit |> + extract_keras_history() |> + plot() +``` + +### Evaluating Model Performance + +The `keras_evaluate()` function provides a straightforward way to assess the model's performance on a test set, using the underlying `keras3::evaluate()` method. It returns the loss and any other metrics that were specified during the model compilation step. + +```{r model-evaluate} +mlp_fit |> keras_evaluate(x_test, y_test) +``` + +### Making Predictions + +Once the model is trained, we can use the standard `tidymodels` `predict()` function to generate predictions on new data. By default, `predict()` on a `parsnip` classification model returns the predicted class labels. + +```{r model-predict-class} +# Predict the class for the first 5 images in the test set +class_preds <- mlp_fit |> + predict(new_data = head(test_df)) +class_preds +``` + +To get the underlying probabilities for each class, we can set `type = "prob"`. This returns a tibble with a probability column for each of the 10 classes (0-9). + +```{r model-predict-prob} +# Predict probabilities for the first 5 images +prob_preds <- mlp_fit |> predict(new_data = head(test_df), type = "prob") +prob_preds +``` + +We can then compare the predicted class to the actual class for these images to see how the model is performing. + +```{r model-predict-compare} +# Combine predictions with actuals for comparison +comparison <- bind_cols( + class_preds, + prob_preds +) |> + bind_cols( + head(test_df[, "y", drop = FALSE]) + ) +comparison +``` + +## Example 2: Tuning the Model Architecture + +Now we’ll showcase the main strength of `kerasnip`: tuning the network architecture itself. We can treat the number of layers, and the parameters of those layers, as hyperparameters to be optimized by `tune`. + +Using the `mnist_mlp` spec we just created, let's define a tunable model. + +```{r tune-spec-mnist} +# Define a tunable specification +# We set num_hidden_2 = 0 to disable the second hidden block for this tuning example +tune_spec <- mnist_mlp( + num_hidden_1 = tune(), + hidden_1_units = tune(), + hidden_1_rate = tune(), + num_hidden_2 = 0, + compile_loss = "categorical_crossentropy", + compile_optimizer = optimizer_rmsprop(), + compile_metrics = c("accuracy"), + fit_epochs = 30, + fit_batch_size = 128, + fit_validation_split = 0.2 +) |> + set_engine("keras") + +# Create a workflow +tune_wf <- workflow(y ~ x, tune_spec) +``` + +Next, we define the search space for our tunable parameters using `dials`. + +```{r create-grid-mnist} +# Define the tuning grid +params <- extract_parameter_set_dials(tune_wf) |> + update( + num_hidden_1 = dials::num_terms(c(1, 3)), + hidden_1_units = dials::hidden_units(c(64, 256)), + hidden_1_rate = dials::dropout(c(0.2, 0.4)) + ) +grid <- grid_regular(params, levels = 3) +grid +``` + +```{r run-tuning-mnist, cache=TRUE} +folds <- vfold_cv(train_df, v = 3) + +tune_res <- tune_grid( + tune_wf, + resamples = folds, + grid = grid, + metrics = metric_set(accuracy), + control = control_grid(save_pred = FALSE, save_workflow = TRUE) +) +``` + +Finally, we can inspect the results to find which architecture performed the best. First, a summary table: + +```{r show-best-mnist} +# Show the summary table of the best models +show_best(tune_res, metric = "accuracy") +``` + +Now that we've identified the best-performing hyperparameters, our final step is to create and train the final model. We use `select_best()` to get the top parameters, `finalize_workflow()` to update our workflow with them, and then `fit()` one last time on our full training dataset. + +```{r finalize-best-model} +# Select the best hyperparameters +best_hps <- select_best(tune_res, metric = "accuracy") + +# Finalize the workflow with the best hyperparameters +final_wf <- finalize_workflow(tune_wf, best_hps) + +# Fit the final model on the full training data +final_fit <- fit(final_wf, data = train_df) +``` + +We can now inspect our final, tuned model. + +```{r inspect-final-model} +# Print the model summary +final_fit |> + extract_fit_parsnip() |> + extract_keras_summary() + +# Plot the training history +final_fit |> + extract_fit_parsnip() |> + extract_keras_history() |> + plot() +``` + +This result shows that `tune` has tested various network depths, widths, and dropout rates, successfully finding the best-performing combination within the search space. By using `kerasnip`, we were able to integrate this complex architectural tuning directly into a standard `tidymodels` workflow. \ No newline at end of file diff --git a/vignettes/images/MNIST.png b/vignettes/images/MNIST.png new file mode 100644 index 0000000..0a558b7 Binary files /dev/null and b/vignettes/images/MNIST.png differ