From bde172c7911e0572e88a526f4d7e36ee601ff299 Mon Sep 17 00:00:00 2001 From: igorborgest Date: Sun, 12 Apr 2020 23:44:10 -0300 Subject: [PATCH 1/3] Revisiting the lambda layer building --- building/build-lambda-layers.sh | 43 +++++++------ building/lambda/Dockerfile | 3 +- building/lambda/build-apache-arrow.sh | 37 ----------- building/lambda/build-docker-images.sh | 0 building/lambda/build-lambda-layer.sh | 88 ++++++++++++++++++++++++++ building/lambda/build-layer.sh | 22 ------- building/lambda/build-pyarrow.sh | 23 ------- setup-dev-env.sh | 1 + 8 files changed, 113 insertions(+), 104 deletions(-) delete mode 100755 building/lambda/build-apache-arrow.sh mode change 100755 => 100644 building/lambda/build-docker-images.sh create mode 100644 building/lambda/build-lambda-layer.sh delete mode 100755 building/lambda/build-layer.sh delete mode 100755 building/lambda/build-pyarrow.sh diff --git a/building/build-lambda-layers.sh b/building/build-lambda-layers.sh index ec6484d84..9936c15d4 100755 --- a/building/build-lambda-layers.sh +++ b/building/build-lambda-layers.sh @@ -2,34 +2,35 @@ set -ex VERSION=$(python -c "import awswrangler as wr; print(wr.__version__)") -echo "Building Lambda Layers for AWS Data Wrangler ${VERSION}" DIR_NAME=$(dirname "$PWD") +echo "Building Lambda Layers for AWS Data Wrangler ${VERSION}" + pushd lambda # Building all related docker images ./build-docker-images.sh -# Building Apache Arrow binary artifacts +# Python 3.6 docker run \ - --volume "$DIR_NAME":/aws-data-wrangler/ \ - --workdir /aws-data-wrangler/building/lambda \ - -it \ - awswrangler-build-py36 \ - build-apache-arrow.sh + --volume "$DIR_NAME":/aws-data-wrangler/ \ + --workdir /aws-data-wrangler/building/lambda \ + -it \ + awswrangler-build-py36 \ + build-lambda-layer.sh "${VERSION}-py3.6" "ninja" -# Generating PyArrow Files for Python 3.6 -#docker run \ -# --volume "$DIR_NAME":/aws-data-wrangler/ \ -# --workdir /aws-data-wrangler/building/lambda \ -# -it \ -# awswrangler-build-py36 \ -# build-pyarrow.sh +# Python 3.7 +docker run \ + --volume "$DIR_NAME":/aws-data-wrangler/ \ + --workdir /aws-data-wrangler/building/lambda \ + -it \ + awswrangler-build-py37 \ + build-lambda-layer.sh "${VERSION}-py3.7" "ninja" -# Building the AWS Lambda Layer for Python 3.6 -#docker run \ -# --volume "$DIR_NAME":/aws-data-wrangler/ \ -# --workdir /aws-data-wrangler/building/lambda \ -# -it \ -# awswrangler-build-py36 \ -# build-layer.sh "${VERSION}-py3.6" +# Python 3.8 +docker run \ + --volume "$DIR_NAME":/aws-data-wrangler/ \ + --workdir /aws-data-wrangler/building/lambda \ + -it \ + awswrangler-build-py38 \ + build-lambda-layer.sh "${VERSION}-py3.8" "ninja-build" diff --git a/building/lambda/Dockerfile b/building/lambda/Dockerfile index b1e708bc4..503e6f0a2 100644 --- a/building/lambda/Dockerfile +++ b/building/lambda/Dockerfile @@ -9,9 +9,10 @@ RUN yum install -y \ bison \ flex \ autoconf \ + ninja-build \ ${py_dev} -RUN pip3 install --upgrade pip six cython cmake +RUN pip3 install --upgrade pip six cython cmake hypothesis ADD requirements.txt /root/ RUN pip3 install -r /root/requirements.txt diff --git a/building/lambda/build-apache-arrow.sh b/building/lambda/build-apache-arrow.sh deleted file mode 100755 index 36699a6e2..000000000 --- a/building/lambda/build-apache-arrow.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env bash -set -ex - -rm -rf arrow dist - -git clone \ - --branch apache-arrow-0.16.0 \ - --single-branch \ - https://github.com/apache/arrow.git - -export ARROW_HOME=$(pwd)/dist -export LD_LIBRARY_PATH=$(pwd)/dist/lib:$LD_LIBRARY_PATH - -mkdir dist -mkdir arrow/cpp/build -pushd arrow/cpp/build - -cmake \ - -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ - -DCMAKE_INSTALL_LIBDIR=lib \ - -DARROW_FLIGHT=OFF \ - -DARROW_GANDIVA=OFF \ - -DARROW_ORC=OFF \ - -DARROW_WITH_SNAPPY=ON \ - -DARROW_WITH_ZLIB=ON \ - -DARROW_PARQUET=ON \ - -DARROW_CSV=OFF \ - -DARROW_PYTHON=ON \ - -DARROW_PLASMA=OFF \ - -DARROW_BUILD_TESTS=OFF \ - .. -make -j -make install - -popd - -rm -rf arrow diff --git a/building/lambda/build-docker-images.sh b/building/lambda/build-docker-images.sh old mode 100755 new mode 100644 diff --git a/building/lambda/build-lambda-layer.sh b/building/lambda/build-lambda-layer.sh new file mode 100644 index 000000000..5c1b94645 --- /dev/null +++ b/building/lambda/build-lambda-layer.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +set -ex + +FILENAME="awswrangler-layer-${1}.zip" +NINJA=${2} + +pushd /aws-data-wrangler +rm -rf python dist/pyarrow_files "dist/${FILENAME}" "${FILENAME}" +popd + +rm -rf dist arrow + +export ARROW_HOME=$(pwd)/dist +export LD_LIBRARY_PATH=$(pwd)/dist/lib:$LD_LIBRARY_PATH + +git clone \ + --branch apache-arrow-0.16.0 \ + --single-branch \ + https://github.com/apache/arrow.git + +mkdir dist +mkdir arrow/cpp/build +pushd arrow/cpp/build + +cmake \ + -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DARROW_FLIGHT=OFF \ + -DARROW_GANDIVA=OFF \ + -DARROW_ORC=OFF \ + -DARROW_WITH_SNAPPY=ON \ + -DARROW_WITH_ZLIB=ON \ + -DARROW_PARQUET=ON \ + -DARROW_CSV=OFF \ + -DARROW_PYTHON=ON \ + -DARROW_PLASMA=OFF \ + -DARROW_BUILD_TESTS=OFF \ + -GNinja \ + .. + +eval $NINJA +eval "${NINJA} install" + +popd + +pushd arrow/python + +export ARROW_PRE_0_15_IPC_FORMAT=0 +export PYARROW_WITH_HDFS=0 +export PYARROW_WITH_FLIGHT=0 +export PYARROW_WITH_GANDIVA=0 +export PYARROW_WITH_ORC=0 +export PYARROW_WITH_CUDA=0 +export PYARROW_WITH_PLASMA=0 +export PYARROW_WITH_PARQUET=1 + +python setup.py build_ext \ + --build-type=release \ + --bundle-arrow-cpp \ + bdist_wheel + +pip install dist/pyarrow-*.whl -t /aws-data-wrangler/dist/pyarrow_files + +popd + +pushd /aws-data-wrangler + +pip install . -t ./python + +rm -rf python/pyarrow* +rm -rf python/boto* + +rm -f /aws-data-wrangler/dist/pyarrow_files/pyarrow/libarrow.so +rm -f /aws-data-wrangler/dist/pyarrow_files/pyarrow/libparquet.so +rm -f /aws-data-wrangler/dist/pyarrow_files/pyarrow/libarrow_python.so + +cp -r /aws-data-wrangler/dist/pyarrow_files/pyarrow* python/ + +find python -wholename "*/tests/*" -type f -delete + +zip -r9 "${FILENAME}" ./python +mv "${FILENAME}" dist/ + +rm -rf python dist/pyarrow_files "${FILENAME}" + +popd + +rm -rf dist arrow diff --git a/building/lambda/build-layer.sh b/building/lambda/build-layer.sh deleted file mode 100755 index 6c029bec3..000000000 --- a/building/lambda/build-layer.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash -set -ex - -FILENAME="awswrangler-layer-${1}.zip" - -pushd /aws-data-wrangler - -mkdir -p dist -rm -rf python "${FILENAME}" "dist/${FILENAME}" - -pip install . -t ./python -rm -rf python/pyarrow* -rm -rf python/boto* -rm -f /aws-data-wrangler/dist/pyarrow_files/pyarrow/libarrow.so -rm -f /aws-data-wrangler/dist/pyarrow_files/pyarrow/libparquet.so -rm -f /aws-data-wrangler/dist/pyarrow_files/pyarrow/libarrow_python.so -cp -r /aws-data-wrangler/dist/pyarrow_files/pyarrow* python/ -find python -wholename "*/tests/*" -type f -delete -zip -r9 "${FILENAME}" ./python -mv "${FILENAME}" dist/ - -rm -rf python diff --git a/building/lambda/build-pyarrow.sh b/building/lambda/build-pyarrow.sh deleted file mode 100755 index 7540344a1..000000000 --- a/building/lambda/build-pyarrow.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash -set -ex - -rm -rf /aws-data-wrangler/dist/pyarrow_files - -pushd arrow/python - -export ARROW_PRE_0_15_IPC_FORMAT=0 -export PYARROW_WITH_HDFS=0 -export PYARROW_WITH_FLIGHT=0 -export PYARROW_WITH_GANDIVA=0 -export PYARROW_WITH_ORC=0 -export PYARROW_WITH_CUDA=0 -export PYARROW_WITH_PARQUET=1 - -python setup.py build_ext \ - --build-type=release \ - --bundle-arrow-cpp \ - bdist_wheel - -pip install dist/pyarrow-*.whl -t /aws-data-wrangler/dist/pyarrow_files - -rm -rf dist diff --git a/setup-dev-env.sh b/setup-dev-env.sh index 4f595aa15..692724ee0 100755 --- a/setup-dev-env.sh +++ b/setup-dev-env.sh @@ -4,3 +4,4 @@ set -ex pip install --upgrade pip pip install -r requirements-dev.txt pip install -r requirements.txt +pip install -e . From c91cccb8aad4fec5b6763065469ef76dbc9269f5 Mon Sep 17 00:00:00 2001 From: igorborgest Date: Sun, 12 Apr 2020 23:51:19 -0300 Subject: [PATCH 2/3] add exec permission for build-docker-images.sh --- building/lambda/build-docker-images.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 building/lambda/build-docker-images.sh diff --git a/building/lambda/build-docker-images.sh b/building/lambda/build-docker-images.sh old mode 100644 new mode 100755 From 08d200168bd812ed3356c317b4020a2c81bc01bb Mon Sep 17 00:00:00 2001 From: Igor Tavares Date: Sun, 12 Apr 2020 23:53:22 -0300 Subject: [PATCH 3/3] Limiting Static Checking to run only on py36 --- .github/workflows/static-checking.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/static-checking.yml b/.github/workflows/static-checking.yml index d74026e22..2e2e57d71 100644 --- a/.github/workflows/static-checking.yml +++ b/.github/workflows/static-checking.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8] + python-version: [3.6] steps: - uses: actions/checkout@v2