apache · paleolimbot · Jul 22, 2022 · Jul 7, 2022 · Jul 7, 2022 · Jul 7, 2022
diff --git a/r/NAMESPACE b/r/NAMESPACE
@@ -348,6 +348,7 @@ export(s3_bucket)
 export(schema)
 export(set_cpu_count)
 export(set_io_thread_count)
+export(show_exec_plan)
 export(starts_with)
 export(string)
 export(struct)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
@@ -219,6 +219,31 @@ tail.arrow_dplyr_query <- function(x, n = 6L, ...) {
   x
 }
 
+#' Show the details of an Arrow ExecPlan
+#'
+#' This is a function which gives more details about the `ExecPlan` of an
+#' `arrow_dplyr_query` object. It is similar to `dplyr::show_query()`.
+#'
+#' @param x an `arrow_dplyr_query` to print the ExecPlan for.
+#'
+#' @return The argument, invisibly.
+#' @export
+#'
+#' @examples
+#' library(dplyr)
+#' mtcars %>%
+#'   arrow_table() %>%
+#'   filter(mpg > 20) %>%
+#'   mutate(x = gear/carb) %>%
+#'   show_exec_plan()
+show_exec_plan <- function(x) {
+  adq <- as_adq(x)
+  plan <- ExecPlan$create()
+  final_node <- plan$Build(x)
+  cat(plan$ToString())
+  invisible(x)
+}
+
 ensure_group_vars <- function(x) {
   if (inherits(x, "arrow_dplyr_query")) {
     # Before pulling data from Arrow, make sure all group vars are in the projection

diff --git a/r/R/query-engine.R b/r/R/query-engine.R
@@ -259,6 +259,7 @@ ExecPlan <- R6Class("ExecPlan",
         ...
       )
     },
+    ToString = function() ExecPlan_ToString(self),
     Stop = function() ExecPlan_StopProducing(self)
   )
 )

diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
@@ -219,6 +219,7 @@ reference:
       - match_arrow
       - value_counts
       - list_compute_functions
+      - show_exec_plan
   - title: Connections to other systems
     contents:
       - to_arrow

diff --git a/r/man/show_exec_plan.Rd b/r/man/show_exec_plan.Rd
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
@@ -125,6 +125,11 @@ std::shared_ptr<arrow::Schema> ExecNode_output_schema(
   return node->output_schema();
 }
 
+// [[arrow::export]]
+std::string ExecPlan_ToString(const std::shared_ptr<compute::ExecPlan>& plan) {
+  return plan->ToString();
+}
+
 #if defined(ARROW_R_WITH_DATASET)
 
 #include <arrow/dataset/file_base.h>

diff --git a/r/tests/testthat/_snaps/dplyr-query.md b/r/tests/testthat/_snaps/dplyr-query.md
@@ -0,0 +1,22 @@
+# show_exec_plan()
+
+    Code
+      tbl %>% arrow_table() %>% filter(dbl > 2, chr != "e") %>% select(chr, int, lgl) %>%
+        mutate(int_plus_ten = int + 10) %>% show_exec_plan()
+    Output
+      ExecPlan with 3 nodes:
+      2:ProjectNode{projection=[chr, int, lgl, "int_plus_ten": add_checked(cast(int, {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false}), 10)]}
+        1:FilterNode{filter=((dbl > 2) and (chr != "e"))}
+          0:TableSourceNode{}
+
+---
+
+    Code
+      tbl %>% record_batch() %>% filter(dbl > 2, chr != "e") %>% select(chr, int, lgl) %>%
+        mutate(int_plus_ten = int + 10) %>% show_exec_plan()
+    Output
+      ExecPlan with 3 nodes:
+      2:ProjectNode{projection=[chr, int, lgl, "int_plus_ten": add_checked(cast(int, {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false}), 10)]}
+        1:FilterNode{filter=((dbl > 2) and (chr != "e"))}
+          0:TableSourceNode{}
+
diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R
@@ -293,3 +293,23 @@ test_that("No duplicate field names are allowed in an arrow_dplyr_query", {
     )
   )
 })
+
+test_that("show_exec_plan()", {
+  expect_snapshot(
+    tbl %>%
+      arrow_table() %>%
+      filter(dbl > 2, chr != "e") %>%
+      select(chr, int, lgl) %>%
+      mutate(int_plus_ten = int + 10) %>%
+      show_exec_plan()
+  )
+
+  expect_snapshot(
+    tbl %>%
+      record_batch() %>%
+      filter(dbl > 2, chr != "e") %>%
+      select(chr, int, lgl) %>%
+      mutate(int_plus_ten = int + 10) %>%
+      show_exec_plan()
+  )
+})