From 1be5ab857a0a0866373cbaf82864c570dce9a6b3 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Sun, 17 May 2026 08:08:11 -0700 Subject: [PATCH 1/5] Set up Emscripten and R-Universe build --- dev/tasks/r/github.linux.r-universe.wasm.yml | 75 ++++++++++++++++++++ dev/tasks/tasks.yml | 4 ++ r/inst/build_arrow_static.sh | 39 +++++++++- 3 files changed, 116 insertions(+), 2 deletions(-) create mode 100644 dev/tasks/r/github.linux.r-universe.wasm.yml diff --git a/dev/tasks/r/github.linux.r-universe.wasm.yml b/dev/tasks/r/github.linux.r-universe.wasm.yml new file mode 100644 index 000000000000..ee38740bb47d --- /dev/null +++ b/dev/tasks/r/github.linux.r-universe.wasm.yml @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + r-universe-wasm: + name: "R-universe Wasm build" + runs-on: ubuntu-latest + timeout-minutes: 60 + + steps: + {{ macros.github_checkout_arrow()|indent }} + + - uses: r-lib/actions/setup-r@v2 + with: + install-r: true + + - name: Build Arrow R source package + shell: bash + run: | + cd arrow/r + make sync-cpp + R CMD build --no-build-vignettes . + + - name: Pull the R-universe Wasm image + shell: bash + run: docker pull ghcr.io/r-universe-org/build-wasm:latest + + - name: Build Wasm binary in the R-universe container + shell: bash + run: | + docker run --rm \ + -v "${PWD}/arrow/r:/work" \ + -w /work \ + ghcr.io/r-universe-org/build-wasm:latest \ + bash -lc ' + set -euxo pipefail + R -q -e "if (!requireNamespace(\"pak\", quietly = TRUE)) install.packages(\"pak\", repos = \"https://cloud.r-project.org\"); if (!requireNamespace(\"rwasm\", quietly = TRUE)) pak::pak(\"r-wasm/rwasm\"); rwasm::build(\".\")" \ + 2>&1 | tee build-wasm.log + ' + + - name: List generated artifacts + if: always() + shell: bash + run: | + ls -lh arrow/r/arrow_*.tar.gz + ls -lh arrow/r/arrow_*.tgz || true + ls -lh arrow/r/build-wasm.log || true + + - name: Upload Wasm build artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: r-wasm-build + path: | + arrow/r/arrow_*.tar.gz + arrow/r/arrow_*.tgz + arrow/r/build-wasm.log diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 9647e5531da3..4b46fad3f0df 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -651,6 +651,10 @@ tasks: env: CMAKE_BUILD_TYPE: MinSizeRel + test-r-r-universe-wasm: + ci: github + template: r/github.linux.r-universe.wasm.yml + test-r-devdocs: ci: github template: r/github.devdocs.yml diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh index 241994223d3e..2e49051f469f 100755 --- a/r/inst/build_arrow_static.sh +++ b/r/inst/build_arrow_static.sh @@ -36,7 +36,7 @@ set -x SOURCE_DIR="$(cd "${SOURCE_DIR}" && pwd)" DEST_DIR="$(mkdir -p "${DEST_DIR}" && cd "${DEST_DIR}" && pwd)" -if [ "$N_JOBS" = "" ]; then +if ! [[ "${N_JOBS:-}" =~ ^[1-9][0-9]*$ ]]; then if [ "`uname -s`" = "Darwin" ]; then N_JOBS="$(sysctl -n hw.logicalcpu)" else @@ -62,9 +62,39 @@ case "$CXX" in ;; esac +# When building the R package for webR/rwasm, the R toolchain points CC/CXX to +# emcc/em++, but CMake still needs to be invoked via emcmake so it configures +# Arrow for Emscripten instead of treating the target as a generic wasm32 host. +ARROW_WASM_BUILD="OFF" +CMAKE_WRAPPER="" +case "${CC} ${CXX}" in + *emcc*|*em++*) + ARROW_WASM_BUILD="ON" + CMAKE_WRAPPER="emcmake" + ARROW_DEPENDENCY_SOURCE="BUNDLED" + ARROW_DEPENDENCY_USE_SHARED="OFF" + ARROW_ENABLE_THREADING="OFF" + ARROW_GCS="OFF" + ARROW_JEMALLOC="OFF" + ARROW_MIMALLOC="OFF" + ARROW_RUNTIME_SIMD_LEVEL="NONE" + ARROW_S3="OFF" + ARROW_SIMD_LEVEL="NONE" + ARROW_WITH_BROTLI="OFF" + ARROW_WITH_BZ2="OFF" + ARROW_WITH_ZSTD="OFF" + N_JOBS=2 + ;; +esac + +if [ "${ARROW_WASM_BUILD}" = "ON" ] && ! command -v emcmake >/dev/null 2>&1; then + echo "emcmake is required for Emscripten/webR builds but was not found in PATH" + exit 1 +fi + mkdir -p "${BUILD_DIR}" pushd "${BUILD_DIR}" -${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ +${CMAKE_WRAPPER} ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ -DARROW_SNAPPY_USE_SHARED=OFF \ -DARROW_BUILD_TESTS=OFF \ -DARROW_BUILD_SHARED=OFF \ @@ -74,6 +104,9 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ -DARROW_CSV=ON \ -DARROW_DATASET=${ARROW_DATASET:-ON} \ -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \ + -DARROW_DEPENDENCY_USE_SHARED=${ARROW_DEPENDENCY_USE_SHARED:-ON} \ + -DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \ + -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \ -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \ -DBoost_SOURCE=${Boost_SOURCE:-} \ -Dlz4_SOURCE=${lz4_SOURCE:-} \ @@ -84,6 +117,8 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ -DARROW_JSON=${ARROW_JSON:-ON} \ -DARROW_PARQUET=${ARROW_PARQUET:-ON} \ -DARROW_S3=${ARROW_S3:-$ARROW_DEFAULT_PARAM} \ + -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ + -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \ -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-$ARROW_DEFAULT_PARAM} \ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-$ARROW_DEFAULT_PARAM} \ -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-ON} \ From 5ee845735ebc70610d2597e41917a59895b4b148 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 18 May 2026 01:08:18 +0000 Subject: [PATCH 2/5] clean up --- r/inst/build_arrow_static.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh index 2e49051f469f..8cd209758471 100755 --- a/r/inst/build_arrow_static.sh +++ b/r/inst/build_arrow_static.sh @@ -36,7 +36,7 @@ set -x SOURCE_DIR="$(cd "${SOURCE_DIR}" && pwd)" DEST_DIR="$(mkdir -p "${DEST_DIR}" && cd "${DEST_DIR}" && pwd)" -if ! [[ "${N_JOBS:-}" =~ ^[1-9][0-9]*$ ]]; then +if [ "$N_JOBS" = "" ]; then if [ "`uname -s`" = "Darwin" ]; then N_JOBS="$(sysctl -n hw.logicalcpu)" else @@ -62,9 +62,7 @@ case "$CXX" in ;; esac -# When building the R package for webR/rwasm, the R toolchain points CC/CXX to -# emcc/em++, but CMake still needs to be invoked via emcmake so it configures -# Arrow for Emscripten instead of treating the target as a generic wasm32 host. +# Detect and handle Emscripten build for R-Universe ARROW_WASM_BUILD="OFF" CMAKE_WRAPPER="" case "${CC} ${CXX}" in From b37e9d027dd2ccb65acb6f84f63c534c30872d26 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 18 May 2026 04:14:02 +0000 Subject: [PATCH 3/5] Move cmake stuff into nixlibs --- r/inst/build_arrow_static.sh | 31 ++----------------------------- r/tools/nixlibs.R | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 29 deletions(-) diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh index 8cd209758471..121211fb01fa 100755 --- a/r/inst/build_arrow_static.sh +++ b/r/inst/build_arrow_static.sh @@ -62,33 +62,8 @@ case "$CXX" in ;; esac -# Detect and handle Emscripten build for R-Universe -ARROW_WASM_BUILD="OFF" -CMAKE_WRAPPER="" -case "${CC} ${CXX}" in - *emcc*|*em++*) - ARROW_WASM_BUILD="ON" - CMAKE_WRAPPER="emcmake" - ARROW_DEPENDENCY_SOURCE="BUNDLED" - ARROW_DEPENDENCY_USE_SHARED="OFF" - ARROW_ENABLE_THREADING="OFF" - ARROW_GCS="OFF" - ARROW_JEMALLOC="OFF" - ARROW_MIMALLOC="OFF" - ARROW_RUNTIME_SIMD_LEVEL="NONE" - ARROW_S3="OFF" - ARROW_SIMD_LEVEL="NONE" - ARROW_WITH_BROTLI="OFF" - ARROW_WITH_BZ2="OFF" - ARROW_WITH_ZSTD="OFF" - N_JOBS=2 - ;; -esac - -if [ "${ARROW_WASM_BUILD}" = "ON" ] && ! command -v emcmake >/dev/null 2>&1; then - echo "emcmake is required for Emscripten/webR builds but was not found in PATH" - exit 1 -fi +# Used for Emscripten +: ${CMAKE_WRAPPER:=""} mkdir -p "${BUILD_DIR}" pushd "${BUILD_DIR}" @@ -115,8 +90,6 @@ ${CMAKE_WRAPPER} ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ -DARROW_JSON=${ARROW_JSON:-ON} \ -DARROW_PARQUET=${ARROW_PARQUET:-ON} \ -DARROW_S3=${ARROW_S3:-$ARROW_DEFAULT_PARAM} \ - -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ - -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \ -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-$ARROW_DEFAULT_PARAM} \ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-$ARROW_DEFAULT_PARAM} \ -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-ON} \ diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 947bb6c3f5cc..ba705e03ad7e 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -616,6 +616,7 @@ build_libarrow <- function(src_dir, dst_dir) { } env_var_list <- with_cloud_support(env_var_list) + env_var_list <- with_wasm_support(env_var_list) # turn_off_all_optional_features() needs to happen after # with_cloud_support(), since it might turn features ON. @@ -883,6 +884,40 @@ is_feature_requested <- function(env_varname, env_var_list, default = env_is("LI requested } +with_wasm_support <- function(env_var_list) { + cc <- env_var_list[["CC"]] + cxx <- env_var_list[["CXX"]] + if (!grepl("emcc", cc) && !grepl("em\\+\\+", cxx)) { + return(env_var_list) + } + + lg("Emscripten compiler detected; configuring for WASM build", .indent = "****") + + if (!nzchar(Sys.which("emcmake"))) { + stop("emcmake is required for Emscripten/webR builds but was not found in PATH") + } + + wasm_overrides <- c( + CMAKE_WRAPPER = "emcmake", + ARROW_DEPENDENCY_SOURCE = "BUNDLED", + ARROW_DEPENDENCY_USE_SHARED = "OFF", + ARROW_ENABLE_THREADING = "OFF", + ARROW_GCS = "OFF", + ARROW_JEMALLOC = "OFF", + ARROW_MIMALLOC = "OFF", + ARROW_S3 = "OFF", + ARROW_WITH_BROTLI = "OFF", + ARROW_WITH_BZ2 = "OFF", + ARROW_WITH_ZSTD = "OFF", + N_JOBS = "2", + EXTRA_CMAKE_FLAGS = paste( + env_var_list[["EXTRA_CMAKE_FLAGS"]], + "-DARROW_SIMD_LEVEL=NONE -DARROW_RUNTIME_SIMD_LEVEL=NONE" + ) + ) + replace(env_var_list, names(wasm_overrides), wasm_overrides) +} + with_cloud_support <- function(env_var_list) { arrow_s3 <- is_feature_requested("ARROW_S3", env_var_list) arrow_gcs <- is_feature_requested("ARROW_GCS", env_var_list) From 48ffd7eee21e31eae11f812573e008e35beac976 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 19 May 2026 09:25:27 -0700 Subject: [PATCH 4/5] rename task --- ...thub.linux.r-universe.wasm.yml => github.linux.r-wasm.yml} | 0 dev/tasks/tasks.yml | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename dev/tasks/r/{github.linux.r-universe.wasm.yml => github.linux.r-wasm.yml} (100%) diff --git a/dev/tasks/r/github.linux.r-universe.wasm.yml b/dev/tasks/r/github.linux.r-wasm.yml similarity index 100% rename from dev/tasks/r/github.linux.r-universe.wasm.yml rename to dev/tasks/r/github.linux.r-wasm.yml diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 4b46fad3f0df..2d2b9ae39e41 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -651,9 +651,9 @@ tasks: env: CMAKE_BUILD_TYPE: MinSizeRel - test-r-r-universe-wasm: + test-r-wasm: ci: github - template: r/github.linux.r-universe.wasm.yml + template: r/github.linux.r-wasm.yml test-r-devdocs: ci: github From 7eb72ac2d1927411ddbe9ce5bd328691ce142437 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 19 May 2026 09:26:42 -0700 Subject: [PATCH 5/5] remove flight line --- r/inst/build_arrow_static.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh index 121211fb01fa..349531b75fd9 100755 --- a/r/inst/build_arrow_static.sh +++ b/r/inst/build_arrow_static.sh @@ -79,7 +79,6 @@ ${CMAKE_WRAPPER} ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \ -DARROW_DEPENDENCY_USE_SHARED=${ARROW_DEPENDENCY_USE_SHARED:-ON} \ -DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \ - -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \ -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \ -DBoost_SOURCE=${Boost_SOURCE:-} \ -Dlz4_SOURCE=${lz4_SOURCE:-} \