Skip to content

Commit

Permalink
PARQUET-1300: [C++] Implement encrypted Parquet read and write support
Browse files Browse the repository at this point in the history
Adds encrypted file read and write per the additions to the Parquet
specification.

Closes #4826

Lead-authored-by: Ha Thi Tham <thamht01188@gmail.com>
Co-authored-by: Revital Sur <eres@il.ibm.com>
Co-authored-by: Gidon Gershinsky <gidon@il.ibm.com>
Co-authored-by: <Gal.Lushi@ibm.com>
Co-authored-by: Deepak Majeti <deepak.majeti@microfocus.com>
Co-authored-by: Wes McKinney <wesm+git@apache.org>
Signed-off-by: Wes McKinney <wesm+git@apache.org>
  • Loading branch information
5 people committed Oct 30, 2019
1 parent 0885a64 commit 41753ac
Show file tree
Hide file tree
Showing 41 changed files with 5,518 additions and 233 deletions.
3 changes: 3 additions & 0 deletions .travis.yml
Expand Up @@ -64,6 +64,7 @@ matrix:
- ARROW_TRAVIS_GANDIVA_JAVA=1
- ARROW_TRAVIS_ORC=1
- ARROW_TRAVIS_PARQUET=1
- ARROW_TRAVIS_PARQUET_ENCRYPTION=1
- ARROW_TRAVIS_PLASMA=1
- ARROW_TRAVIS_S3=1
- ARROW_TRAVIS_USE_SYSTEM_JAVA=1
Expand Down Expand Up @@ -129,6 +130,7 @@ matrix:
- ARROW_TRAVIS_ORC=1
- ARROW_TRAVIS_S3=1
- ARROW_TRAVIS_PARQUET=1
- ARROW_TRAVIS_PARQUET_ENCRYPTION=1
# TODO(ARROW-4763): llvm and llvmdev packages are in conflict:
# https://github.com/conda-forge/llvmdev-feedstock/issues/60
# - ARROW_TRAVIS_GANDIVA=1
Expand Down Expand Up @@ -274,6 +276,7 @@ matrix:
dist: xenial
env:
- ARROW_TRAVIS_PARQUET=1
- ARROW_TRAVIS_PARQUET_ENCRYPTION=1
- ARROW_TRAVIS_USE_SYSTEM=1
- ARROW_TRAVIS_MIMALLOC=1
before_install:
Expand Down
1 change: 1 addition & 0 deletions ci/appveyor-cpp-build-mingw.bat
Expand Up @@ -52,6 +52,7 @@ cmake ^
-DARROW_WITH_SNAPPY=ON ^
-DARROW_WITH_BROTLI=ON ^
-DARROW_PARQUET=ON ^
-DPARQUET_REQUIRE_ENCRYPTION=ON ^
-DARROW_PYTHON=ON ^
-DARROW_USE_GLOG=OFF ^
-DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% ^
Expand Down
1 change: 1 addition & 0 deletions ci/cpp-msvc-build-main.bat
Expand Up @@ -79,6 +79,7 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
-DARROW_S3=%ARROW_S3% ^
-DARROW_MIMALLOC=ON ^
-DARROW_PARQUET=ON ^
-DPARQUET_REQUIRE_ENCRYPTION=ON ^
-DPARQUET_BUILD_EXECUTABLES=ON ^
-DARROW_PYTHON=ON ^
.. || exit /B
Expand Down
4 changes: 4 additions & 0 deletions ci/travis_before_script_cpp.sh
Expand Up @@ -141,6 +141,10 @@ if [ "$ARROW_TRAVIS_PARQUET" == "1" ]; then
-DPARQUET_BUILD_EXECUTABLES=ON"
fi

if [ "$ARROW_TRAVIS_PARQUET_ENCRYPTION" == "1" ]; then
CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DPARQUET_REQUIRE_ENCRYPTION=ON"
fi

if [ "$ARROW_TRAVIS_GANDIVA" == "1" ]; then
CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_GANDIVA=ON"
if [ "$ARROW_TRAVIS_GANDIVA_JAVA" == "1" ]; then
Expand Down
1 change: 1 addition & 0 deletions ci/travis_script_python.sh
Expand Up @@ -141,6 +141,7 @@ cmake -GNinja \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_BROTLI=ON \
-DARROW_PARQUET=on \
-DPARQUET_REQUIRE_ENCRYPTION=on \
-DARROW_PLASMA=on \
-DARROW_TENSORFLOW=on \
-DARROW_PYTHON=on \
Expand Down
4 changes: 4 additions & 0 deletions ci/windows-pkg-arrow-for-r.sh
Expand Up @@ -60,6 +60,10 @@ mkdir deps40 && cd deps40
# double-conversion is only available in the Rtools4.0 builds, but apparently that's ok
wget https://dl.bintray.com/rtools/mingw64/mingw-w64-x86_64-double-conversion-3.1.2-1-any.pkg.tar.xz
wget https://dl.bintray.com/rtools/mingw32/mingw-w64-i686-double-conversion-3.1.2-1-any.pkg.tar.xz

wget https://dl.bintray.com/rtools/mingw64/mingw-w64-x86_64-openssl-1.1.1.a-1-any.pkg.tar.xz
wget https://dl.bintray.com/rtools/mingw32/mingw-w64-i686-openssl-1.1.1.a-1-any.pkg.tar.xz

# These are the other Rtools 4.0 packages, for future reference
# wget https://dl.bintray.com/rtools/mingw32/mingw-w64-i686-boost-1.67.0-9002-any.pkg.tar.xz
# wget https://dl.bintray.com/rtools/mingw64/mingw-w64-x86_64-boost-1.67.0-9002-any.pkg.tar.xz
Expand Down
10 changes: 3 additions & 7 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Expand Up @@ -946,7 +946,6 @@ if(ARROW_WITH_BROTLI)
include_directories(SYSTEM ${BROTLI_INCLUDE_DIR})
endif()

set(ARROW_USE_OPENSSL OFF)
if(PARQUET_REQUIRE_ENCRYPTION AND NOT ARROW_PARQUET)
set(PARQUET_REQUIRE_ENCRYPTION OFF)
endif()
Expand All @@ -959,19 +958,16 @@ if(BREW_BIN AND NOT OPENSSL_ROOT_DIR)
set(OPENSSL_ROOT_DIR ${OPENSSL_BREW_PREFIX})
endif()
endif()

set(ARROW_USE_OPENSSL OFF)
if(PARQUET_REQUIRE_ENCRYPTION OR ARROW_FLIGHT OR ARROW_S3)
# This must work
find_package(OpenSSL ${ARROW_OPENSSL_REQUIRED_VERSION} REQUIRED)
set(ARROW_USE_OPENSSL ON)
elseif(ARROW_PARQUET)
# Enable Parquet encryption if OpenSSL is there, but don't fail if it's not
find_package(OpenSSL ${ARROW_OPENSSL_REQUIRED_VERSION} QUIET)
if(OPENSSL_FOUND)
set(ARROW_USE_OPENSSL ON)
endif()
endif()

if(ARROW_USE_OPENSSL)
message(STATUS "Found OpenSSL Crypto Library: ${OPENSSL_CRYPTO_LIBRARY}")
message(STATUS "Building with OpenSSL (Version: ${OPENSSL_VERSION}) support")

# OpenSSL::SSL and OpenSSL::Crypto were not added to
Expand Down
15 changes: 15 additions & 0 deletions cpp/examples/parquet/CMakeLists.txt
Expand Up @@ -21,6 +21,15 @@ add_executable(parquet-arrow-example parquet-arrow/reader-writer.cc)
target_include_directories(parquet-low-level-example PRIVATE low-level-api/)
target_include_directories(parquet-low-level-example2 PRIVATE low-level-api/)

if (PARQUET_REQUIRE_ENCRYPTION)
add_executable(parquet-encryption-example low-level-api/encryption-reader-writer.cc)
add_executable(parquet-encryption-example-all-crypto-options low-level-api/encryption-reader-writer-all-crypto-options.cc)
target_include_directories(parquet-encryption-example PRIVATE low-level-api/)
target_include_directories(parquet-encryption-example-all-crypto-options PRIVATE low-level-api/)
target_link_libraries(parquet-encryption-example parquet_static)
target_link_libraries(parquet-encryption-example-all-crypto-options parquet_static)
endif()

# Prefer shared linkage but use static if shared build is deactivated
if (ARROW_BUILD_SHARED)
set(PARQUET_EXAMPLE_LINK_LIBS parquet_shared)
Expand All @@ -36,3 +45,9 @@ add_dependencies(parquet
parquet-low-level-example
parquet-low-level-example2
parquet-arrow-example)

if (PARQUET_REQUIRE_ENCRYPTION)
add_dependencies(parquet
parquet-encryption-example
parquet-encryption-example-all-crypto-options)
endif()

0 comments on commit 41753ac

Please sign in to comment.