diff --git a/.bazelrc b/.bazelrc index 5573f91..ee5c665 100644 --- a/.bazelrc +++ b/.bazelrc @@ -41,6 +41,9 @@ build --define=use_fast_cpp_protos=true build --copt="-Wno-maybe-uninitialized" build --copt="-Wno-unused-function" +# Set c++ version. +build --cxxopt="-std=c++17" + # These are errors coming from the protobuf library itself. We'd like to see # them in our own build. # build --copt="-Wno-write-strings" diff --git a/README.md b/README.md index d091975..bee9ab2 100644 --- a/README.md +++ b/README.md @@ -59,18 +59,22 @@ pip install --user google-nucleus==0.3.2 ## Building from source -For Ubuntu 14, Ubuntu 16, Ubuntu 18 and Debian 9 systems, building from source -is easy. Simply type +For Ubuntu 18, building from source is easy. Simply type ```shell source install.sh ``` +This will call `build_clif.sh`, which will build CLIF from scratch as well. + For all other systems, you will need to first install CLIF by following the instructions at [https://github.com/google/clif#installation](https://github.com/google/clif#installation) before running install.sh. You'll need to run this command with Python 3.6 or -3.7. +3.7. If you don't want to build CLIF binaries on your own, you can consider +using pre-built CLIF binaries (see +[an example here](https://github.com/google/nucleus/blob/v0.5.6/install.sh#L143-L152)). Note that we don't plan to update these pre-built CLIF binaries, so we +recommend building CLIF binaries from scratch. Note that install.sh extensively depends on apt-get, so it is unlikely to run without extensive modifications on non-Debian-based systems. diff --git a/WORKSPACE b/WORKSPACE index a48c3f4..3019abd 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -61,6 +61,17 @@ http_archive( ], ) +http_archive( + name = "com_google_glog", + sha256 = "1ee310e5d0a19b9d584a855000434bb724aa744745d5b8ab1855c85bff8a8e21", + strip_prefix = "glog-028d37889a1e80e8a07da1b8945ac706259e5fd8", + urls = [ + "https://mirror.bazel.build/github.com/google/glog/archive/028d37889a1e80e8a07da1b8945ac706259e5fd8.tar.gz", + "https://github.com/google/glog/archive/028d37889a1e80e8a07da1b8945ac706259e5fd8.tar.gz", + ], +) + + # bazel_skylib is now a required dependency of com_google_protobuf. http_archive( name = "bazel_skylib", diff --git a/build_clif.sh b/build_clif.sh new file mode 100755 index 0000000..b6bf148 --- /dev/null +++ b/build_clif.sh @@ -0,0 +1,135 @@ +#!/bin/bash +# Copyright 2020 Google LLC. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# +# This script is only maintained for Ubuntu 18.04. + +set -eux -o pipefail + +echo ========== This script is only maintained for Ubuntu 18.04. +echo ========== See https://github.com/google/clif for how to build on different Unix distributions. + +UBUNTU_VERSION=18.04 +ABSL_VERSION=20200923 +PROTOBUF_VERSION=3.13.0 +PYTHON_VERSION=3.6 + +APT_ARGS=( +"-qq" +"-y" +) + + +sudo apt-get update "${APT_ARGS[@]}" +sudo apt-get install "${APT_ARGS[@]}" --no-install-recommends \ + autoconf \ + automake \ + cmake \ + curl \ + gpg-agent \ + g++ \ + libtool \ + make \ + pkg-config \ + software-properties-common \ + wget \ + unzip + +# Configure LLVM 11 apt repository +wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - && \ + sudo add-apt-repository "deb http://apt.llvm.org/$(lsb_release -sc)/ llvm-toolchain-$(lsb_release -sc)-11 main" + +# Install CLIF dependencies +sudo apt-get update "${APT_ARGS[@]}" +sudo apt-get install "${APT_ARGS[@]}" \ + clang-11 \ + libclang-11-dev \ + libgoogle-glog-dev \ + libgtest-dev \ + libllvm11 \ + llvm-11-dev \ + python3-dev \ + python3-pyparsing \ + zlib1g-dev + +# Configure deadsnakes PPA with the more recent versions of python packaged for +# Ubuntu. See https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa +sudo apt-get update "${APT_ARGS[@]}" && \ + sudo apt-get install "${APT_ARGS[@]}" \ + "python$PYTHON_VERSION-dev" \ + "python$PYTHON_VERSION-distutils" + +# Install latest version of pip since the version on ubuntu could be outdated +curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ + "python$PYTHON_VERSION" get-pip.py && \ + rm get-pip.py + +# Compile and install absl-cpp from source +wget "https://github.com/abseil/abseil-cpp/archive/$ABSL_VERSION.tar.gz" && \ + tar -xf "$ABSL_VERSION.tar.gz" && \ + mkdir "abseil-cpp-$ABSL_VERSION/build" && \ + cd "abseil-cpp-$ABSL_VERSION/build" && \ + cmake .. -DCMAKE_POSITION_INDEPENDENT_CODE=true && \ + sudo make install && \ + rm -rf "/abseil-cpp-$ABSL_VERSION" "/$ABSL_VERSION.tar.gz" + +# Compile and install protobuf from source +wget "https://github.com/protocolbuffers/protobuf/releases/download/v$PROTOBUF_VERSION/protobuf-cpp-$PROTOBUF_VERSION.tar.gz" && \ + tar -xf "protobuf-cpp-$PROTOBUF_VERSION.tar.gz" && \ + cd "protobuf-$PROTOBUF_VERSION" && \ + # Configure and install C++ libraries + ./autogen.sh && \ + ./configure && \ + make -j"$(nproc)" && \ + sudo make install && \ + sudo ldconfig && \ + rm -rf "/protobuf-$PROTOBUF_VERSION" "/protobuf-cpp-$PROTOBUF_VERSION.tar.gz" + +# Install googletest +cd /usr/src/gtest && \ + sudo cmake . && \ + sudo make install + +# Install python runtime and test dependencies +"python$PYTHON_VERSION" -m pip install \ + absl-py \ + parameterized \ + protobuf=="$PROTOBUF_VERSION" + +sudo "python$PYTHON_VERSION" -m pip uninstall -y pyparsing && \ + "python$PYTHON_VERSION" -m pip install -Iv 'pyparsing==2.2.0' + +DV_PLATFORM="ubuntu-${UBUNTU_VERSION}" + +sudo ln -sf /usr/bin/python\$PYTHON_VERSION /usr/local/bin/python3 + +cd && rm -rf clif && git clone https://github.com/google/clif.git && \ + cd clif && \ + sudo ./INSTALL.sh diff --git a/install.sh b/install.sh index 86f1752..5d7d2d9 100755 --- a/install.sh +++ b/install.sh @@ -86,7 +86,7 @@ python3 -m pip install --user 'setuptools==49.6.0' python3 -m pip install --user 'keras_preprocessing==1.1.2' --no-deps python3 -m pip install --user 'h5py==2.10.0' python3 -m pip install --user enum34 -python3 -m pip install --user 'protobuf==3.9.2' +python3 -m pip install --user 'protobuf==3.13.0' # Install Bazel ################################################################################ @@ -122,35 +122,22 @@ ensure_wanted_bazel_version "${NUCLEUS_BAZEL_VERSION}" note_build_stage "Install CLIF binary" -if [[ -e /usr/local/clif/bin/pyclif ]]; +if [[ -e /usr/local/bin/pyclif ]]; then echo "CLIF already installed." else - # Figure out which linux installation we are on to fetch an appropriate - # version of the pre-built CLIF binary. Note that we only support now Ubuntu - # 14, 16, and 18. - case "$(lsb_release -d)" in - *Ubuntu*18.*.*) PLATFORM="ubuntu-18" ;; - *Ubuntu*16.*.*) PLATFORM="ubuntu-16" ;; - *Ubuntu*14.*.*) PLATFORM="ubuntu-14" ;; - *Debian*9.*) PLATFORM="debian" ;; - *Debian*rodete*) PLATFORM="debian" ;; - *) echo "CLIF is not installed on this machine and a prebuilt binary is not -available for this platform. Please install CLIF at -https://github.com/google/clif before continuing." - exit 1 - esac - - PACKAGE_CURL_PATH="https://storage.googleapis.com/deepvariant/packages" - OSS_CLIF_CURL_ROOT="${PACKAGE_CURL_PATH}/oss_clif" - OSS_CLIF_PKG="oss_clif.${PLATFORM}.latest.tgz" - - if [[ ! -f "/tmp/${OSS_CLIF_PKG}" ]]; then - curl "${OSS_CLIF_CURL_ROOT}/${OSS_CLIF_PKG}" > /tmp/${OSS_CLIF_PKG} - fi - - (cd / && sudo tar xzf "/tmp/${OSS_CLIF_PKG}") - sudo ldconfig # Reload shared libraries. + # Build clif binary from scratch. Might not be ideal because it installs a + # bunch of dependencies, but this works fine when we used this in a Dockerfile + # because we don't do build-prereq.sh in the final image. + time ./build_clif.sh + # TODO(b/181283422): Figure out why these symbolic links are needed and see if + # we can do this better. + sudo mkdir -p /usr/clang/bin/ + sudo ln -sf /usr/local/bin/clif-matcher /usr/clang/bin/clif-matcher + sudo mkdir -p /usr/local/clif/bin + sudo ln -sf /usr/local/bin/pyclif* /usr/local/clif/bin/ + DIST_PACKAGES_DIR=$(python3 -c "import site; print(site.getsitepackages()[0])") + sudo ln -sf ${DIST_PACKAGES_DIR}/clif/python /usr/local/clif/ fi # Download and build TensorFlow diff --git a/nucleus/io/BUILD b/nucleus/io/BUILD index 94bb2e2..1646927 100644 --- a/nucleus/io/BUILD +++ b/nucleus/io/BUILD @@ -1021,6 +1021,7 @@ cc_library( deps = [ "//nucleus/platform:types", "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings:cord", "@org_tensorflow//tensorflow/core:lib", "@org_tensorflow//tensorflow/core/platform/cloud:gcs_file_system", ], @@ -1044,6 +1045,7 @@ cc_library( hdrs = ["tfrecord_writer.h"], deps = [ "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings:cord", "@org_tensorflow//tensorflow/core:lib", "@org_tensorflow//tensorflow/core/platform/cloud:gcs_file_system", ], @@ -1055,6 +1057,7 @@ cc_library( hdrs = ["gfile.h"], deps = [ "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings:cord", "@org_tensorflow//tensorflow/core:lib", ], ) diff --git a/nucleus/pip_package/build_pip_package.sh b/nucleus/pip_package/build_pip_package.sh index 4175b26..387ff8c 100755 --- a/nucleus/pip_package/build_pip_package.sh +++ b/nucleus/pip_package/build_pip_package.sh @@ -30,7 +30,7 @@ set -x # egg_files/PKG-INFO. NUCLEUS_VERSION="0.5.8" PACKAGE_NAME="google_nucleus-${NUCLEUS_VERSION}" -PYTHON_VERSION="3.5" +PYTHON_VERSION="3.6" TMPDIR=$(mktemp -d -t tmp.XXXXXXXXXXX) TOPDIR="${TMPDIR}/${PACKAGE_NAME}" diff --git a/nucleus/util/BUILD b/nucleus/util/BUILD index e583562..959b303 100644 --- a/nucleus/util/BUILD +++ b/nucleus/util/BUILD @@ -132,6 +132,7 @@ cc_library( srcs = ["math.cc"], hdrs = ["math.h"], deps = [ + "@com_google_absl//absl/strings:cord", "@org_tensorflow//tensorflow/core:lib", ], ) @@ -368,6 +369,7 @@ cc_library( deps = [ ":proto_ptr", "@clif//:cpp_runtime", + "@com_google_absl//absl/strings:cord", "@com_google_protobuf//:proto_api", "@com_google_protobuf//:protobuf", "@org_tensorflow//tensorflow/core:lib", diff --git a/nucleus/vendor/BUILD b/nucleus/vendor/BUILD index fdf813b..dde936f 100644 --- a/nucleus/vendor/BUILD +++ b/nucleus/vendor/BUILD @@ -60,6 +60,7 @@ cc_library( deps = [ ":statusor", "@clif//:cpp_runtime", + "@com_google_absl//absl/strings:cord", "@local_config_python//:python_headers", ], ) diff --git a/third_party/clif.BUILD b/third_party/clif.BUILD index 17bbe6a..e8da758 100644 --- a/third_party/clif.BUILD +++ b/third_party/clif.BUILD @@ -19,6 +19,7 @@ cc_library( hdrs = glob(["clif/python/*.h"]), visibility = ["//visibility:public"], deps = [ + "@com_google_glog//:glog", "@com_google_protobuf//:protobuf", "@local_config_python//:python_headers", ], diff --git a/third_party/clif.bzl b/third_party/clif.bzl index 2a00f99..05356fd 100644 --- a/third_party/clif.bzl +++ b/third_party/clif.bzl @@ -9,8 +9,8 @@ CLIF_PROTO = "@clif//:proto" # Label for our OSS CLIF C++ runtime headers and sources. CLIF_CPP_RUNTIME = "@clif//:cpp_runtime" -# The CLIF generated code only compiles with C++11. -EXTRA_CC_FLAGS = ["-std=c++11"] +# The CLIF generated code only compiles with C++17. +EXTRA_CC_FLAGS = ["-std=c++17"] _PROTO_LIBRARY_SUFFIX = "_pyclif"