Skip to content

Commit

Permalink
rules_r: reproducible builds on demand
Browse files Browse the repository at this point in the history
Summary:
Best effort feature to produce files with identical checksum on
successive builds. Activated with --features=rlang-reproducible.

These are slightly more expensive and are more complex because of the
need to have the same src and install paths. And depends on the
compiler's willingness to produce reproducible .so files.

Known to not work on macOS where the .o files can be at different
positions within the .so, and on packages with C++ cc_deps.

Reviewers: razvanm

Reviewed By: razvanm

Differential Revision: https://phabricator.grailbio.com/D12645

fbshipit-source-id: fadd8e5
  • Loading branch information
Siddhartha Bagaria authored and grailbot committed Apr 17, 2018
1 parent dafe01d commit 5bb812b
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 14 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ addons:

script:
- tests/run_tests.sh
- tests/run_reproducible_build_tests.sh
13 changes: 11 additions & 2 deletions R/internal/build.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ def _build_impl(ctx):
pkg_src_archive = ctx.outputs.src_archive
package_files = _package_files(ctx)
output_files = package_files + [pkg_lib_path, pkg_bin_archive]
flock = ctx.attr._flock.files_to_run.executable

library_deps = _library_deps(ctx.attr.deps)
cc_deps = _cc_deps(ctx.attr.cc_deps, pkg_src_dir, ctx.bin_dir.path, ctx.genfiles_dir.path)
Expand All @@ -204,7 +205,7 @@ def _build_impl(ctx):
all_input_files = (library_deps["lib_files"] + ctx.files.srcs
+ cc_deps["files"].to_list()
+ build_tools.to_list()
+ [ctx.file.makevars_user])
+ [ctx.file.makevars_user, flock])

if ctx.file.config_override:
all_input_files += [ctx.file.config_override]
Expand All @@ -227,7 +228,10 @@ def _build_impl(ctx):
"INSTALL_ARGS": _sh_quote_args(ctx.attr.install_args),
"EXPORT_ENV_VARS_CMD": "; ".join(_env_vars(ctx.attr.env_vars)),
"BUILD_TOOLS_EXPORT_CMD": _build_path_export(build_tools),
"REPRODUCIBLE_BUILD": "true" if "rlang-no-stamp" in ctx.features else "false",
"FLOCK_PATH": flock.path,
"REPRODUCIBLE_BUILD": "true" if "rlang-reproducible" in ctx.features else "false",
"BAZEL_R_DEBUG": "true" if "rlang-debug" in ctx.features else "false",
"BAZEL_R_VERBOSE": "true" if "rlang-verbose" in ctx.features else "false",
"R": " ".join(_R),
"RSCRIPT": " ".join(_Rscript),
}
Expand Down Expand Up @@ -324,6 +328,11 @@ r_pkg = rule(
executable = True,
cfg = "host",
),
"_flock": attr.label(
default = "@com_grail_rules_r//R/scripts:flock",
executable = True,
cfg = "host",
),
},
doc = ("Rule to install the package and its transitive dependencies" +
"in the Bazel sandbox."),
Expand Down
6 changes: 6 additions & 0 deletions R/scripts/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,9 @@ exports_files([
"test.sh.tpl",
"check.sh.tpl",
])

cc_binary(
name = "flock",
srcs = ["flock.c"],
visibility = ["//visibility:public"],
)
77 changes: 65 additions & 12 deletions R/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

set -euo pipefail

PWD=$(pwd -P)
EXEC_ROOT=$(pwd -P)

TMP_FILES=() # Temporary files to be cleaned up before exiting the script.

Expand All @@ -38,6 +38,39 @@ silent() {
set -e
}

# TODO: Log only when verbose is set.
log() {
echo "$@"
}

# Function to lock the common temp library directory for this package, until we
# have moved out of it.
lock() {
local lock_dir="$1"
local lock_name="$2"
# Open the lock file and assign fd 200; file remains open as long as we are alive.
local lock_file="${lock_dir}/BZL_LOCK-${lock_name}"
TMP_FILES+=("${lock_file}")
# Use fd 200 for the lock; will be released when the fd is closed on process termination.
exec 200>"${lock_file}"

# We use a non-blocking lock to define our timeout and messaging strategy here.
local tries=0
local max_tries=20
local backoff=10
while (( tries++ < max_tries )) && ! "${FLOCK_PATH}" 200; do
log "Failed to acquire lock; will try again in $backoff seconds"
sleep $backoff
done
if (( tries >= max_tries )); then
log "Failed to acquire lock on ${lock_file} to build package; is another bazel build running?"
exit 1
elif (( tries > 1 )); then
# Message only if it took more than one attempt.
log "Acquired lock in $tries attempts"
fi
}

eval "${EXPORT_ENV_VARS_CMD}"

if "${BAZEL_R_DEBUG:-"false"}"; then
Expand All @@ -63,17 +96,19 @@ if "${BUILD_SRC_ARCHIVE:-"false"}"; then
exit
fi

export PKG_LIBS="${C_LIBS_FLAGS//_EXEC_ROOT_/$PWD/}"
export PKG_CPPFLAGS="${C_CPP_FLAGS//_EXEC_ROOT_/$PWD/}"
export R_MAKEVARS_USER="${PWD}/${R_MAKEVARS_USER}"
export PKG_LIBS="${C_LIBS_FLAGS//_EXEC_ROOT_/${EXEC_ROOT}/}"
export PKG_CPPFLAGS="${C_CPP_FLAGS//_EXEC_ROOT_/${EXEC_ROOT}/}"
export R_MAKEVARS_USER="${EXEC_ROOT}/${R_MAKEVARS_USER}"

# Use R_LIBS in place of R_LIBS_USER because on some sytems (e.g., Ubuntu),
# R_LIBS_USER is parameter substituted with a default in .Renviron, which
# imposes length limits.
export R_LIBS="${R_LIBS//_EXEC_ROOT_/$PWD/}"
export R_LIBS="${R_LIBS//_EXEC_ROOT_/${EXEC_ROOT}/}"
export R_LIBS_USER=dummy

mkdir -p "${PKG_LIB_PATH}"

# Easy case -- we allow timestamp and install paths to be stamped inside the package files.
if ! ${REPRODUCIBLE_BUILD}; then
silent "${R}" CMD INSTALL "${INSTALL_ARGS}" --build --library="${PKG_LIB_PATH}" \
"${PKG_SRC_DIR}"
Expand All @@ -84,10 +119,28 @@ if ! ${REPRODUCIBLE_BUILD}; then
exit
fi

# There is additional complexity to ensure that the the build produces the same
# file content. This feature is turned off by default and can be enabled on
# the bazel command line by --features=rlang-no-stamp.

# TODO: Implement locking mechanism for reproducible builds.
echo "REPRODUCIBLE_BUILD not implemented yet."
exit 1
# Not so easy case -- we make builds reproducible by asking R to use a constant
# timestamp, and by installing the packages to the same destination, from the
# same source path, to get reproducibility in embedded paths.
LOCK_DIR="/tmp/bazel/R/locks"
TMP_LIB="/tmp/bazel/R/lib"
TMP_SRC="/tmp/bazel/R/src"
mkdir -p "${LOCK_DIR}"
mkdir -p "${TMP_LIB}"
mkdir -p "${TMP_SRC}"
lock "${LOCK_DIR}" "${PKG_NAME}"

TMP_SRC_PKG="${TMP_SRC}/${PKG_NAME}"
rm -rf "${TMP_SRC_PKG}" 2>/dev/null || true
cp -a "${EXEC_ROOT}/${PKG_SRC_DIR}" "${TMP_SRC_PKG}"
TMP_FILES+=("${TMP_SRC_PKG}")

# Install the package to the common temp library.
silent "${R}" CMD INSTALL "${INSTALL_ARGS}" --built-timestamp='' --no-lock --build --library="${TMP_LIB}" "${TMP_SRC_PKG}"
rm -rf "${PKG_LIB_PATH:?}/${PKG_NAME}" # Delete empty directories to make way for move.
mv -f "${TMP_LIB}/${PKG_NAME}" "${PKG_LIB_PATH}/"
find "${PKG_LIB_PATH}" -name '*.so' -exec sed -i.orig -e "s|${EXEC_ROOT}|_EXEC_ROOT_|g" {} \;
mv "${PKG_NAME}"*gz "${PKG_BIN_ARCHIVE}" # .tgz on macOS and .tar.gz on Linux.

trap - EXIT
cleanup
50 changes: 50 additions & 0 deletions R/scripts/flock.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright 2018 The Bazel Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Uses the given file descriptor to acquire an exclusive non-blocking lock.

// The lock is released when the file descriptor is closed, i.e. when the
// parent process that opened the corresponding file is terminated for any
// reason. A SIGKILL to the parent process might have a delay of a few
// seconds, but everything else is instantaneous.

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/file.h>

int main(int argc, char** argv) {
int fd;

if (argc != 2) {
fprintf(stderr, "Usage: %s [fd]\n", argv[0]);
exit(EXIT_FAILURE);
}

fd = (int)strtol(argv[1], (char**)NULL, 10);
errno = 0;
if (errno != 0) {
perror("strtol");
exit(EXIT_FAILURE);
}

if (flock(fd, LOCK_EX | LOCK_NB)) {
if (errno != EAGAIN) {
perror("flock");
}
exit(EXIT_FAILURE);
}

return 0;
}
23 changes: 23 additions & 0 deletions tests/run_reproducible_build_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash
# Copyright 2018 The Bazel Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Providing features invalidates cache; hence this test suite is separate.

set -euxo pipefail

cd "$(dirname "${BASH_SOURCE[0]}")"

# Just to see if this mode builds.
bazel build --features=rlang-reproducible :library_image

0 comments on commit 5bb812b

Please sign in to comment.