Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gzip pre-decompress w/IAA #6176

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions CMake/resolve_dependency_modules/inteliaa.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include_guard(GLOBAL)

set(VELOX_INTELIAA_VERSION 1.3.0)
set(VELOX_INTELIAA_BUILD_SHA256_CHECKSUM
c3eba4d04a9d7aabcf26c9eaf81f6e9b26d19cb1b87a4a5f197a652cfa98f310)
set(VELOX_INTELIAA_SOURCE_URL
"https://github.com/intel/qpl/archive/refs/tags/v${VELOX_INTELIAA_VERSION}.tar.gz"
)

resolve_dependency_url(INTELIAA)

message(STATUS "Building Intel IAA from source")

set(QPL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/qpl_ep/install")
set(QPL_STATIC_LIB_NAME
${CMAKE_STATIC_LIBRARY_PREFIX}qpl${CMAKE_STATIC_LIBRARY_SUFFIX})
set(QPL_STATIC_LIB "${QPL_PREFIX}/lib/${QPL_STATIC_LIB_NAME}")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ldl -laccel-config -L/usr/lib64")

set(QPL_CMAKE_ARGS
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_LIBDIR=${QPL_PREFIX}/lib
-DCMAKE_INSTALL_PREFIX=${QPL_PREFIX}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DQPL_BUILD_TESTS=OFF
-DQPL_BUILD_EXAMPLES=OFF
-DQPL_LIB=ON)

ExternalProject_Add(
intel_iaa
URL ${VELOX_INTELIAA_SOURCE_URL}
URL_HASH ${VELOX_INTELIAA_BUILD_SHA256_CHECKSUM}
BUILD_BYPRODUCTS "${QPL_STATIC_LIB}"
CMAKE_ARGS ${QPL_CMAKE_ARGS})

file(MAKE_DIRECTORY "${QPL_PREFIX}/include")

add_library(iaa::iaa UNKNOWN IMPORTED)
set(QPL_LIBRARIES ${QPL_STATIC_LIB})
set(QPL_INCLUDE_DIRS "${QPL_PREFIX}/include")
set_target_properties(
iaa::iaa PROPERTIES IMPORTED_LOCATION ${QPL_LIBRARIES}
INTERFACE_INCLUDE_DIRECTORIES ${QPL_INCLUDE_DIRS})

add_dependencies(iaa::iaa intel_iaa-build)
11 changes: 11 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ option(
VELOX_ENABLE_INT64_BUILD_PARTITION_BOUND
"make buildPartitionBounds_ a vector int64 instead of int32 to avoid integer overflow when the hashtable has billions of records"
OFF)
option(VELOX_ENABLE_INTEL_IAA "Enable Intel IAA support" OFF)

# Explicitly force compilers to generate colored output. Compilers usually do
# this by default if they detect the output is a terminal, but this assumption
Expand Down Expand Up @@ -256,6 +257,11 @@ if(VELOX_ENABLE_PARQUET)
set(VELOX_ENABLE_ARROW ON)
endif()

if(VELOX_ENABLE_INTEL_IAA)
add_definitions(-DVELOX_ENABLE_INTEL_IAA)
message(STATUS "Intel IAA acceleration enabled")
endif()

# define processor variable for conditional compilation
if(${VELOX_CODEGEN_SUPPORT})
add_compile_definitions(CODEGEN_ENABLED=1)
Expand Down Expand Up @@ -474,6 +480,11 @@ if(NOT ${VELOX_BUILD_MINIMAL})
include_directories(${Protobuf_INCLUDE_DIRS})
endif()

if(VELOX_ENABLE_INTEL_IAA)
set_source(inteliaa)
resolve_dependency(inteliaa)
endif()

# GCC needs to link a library to enable std::filesystem.
if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
set(FILESYSTEM "stdc++fs")
Expand Down
6 changes: 6 additions & 0 deletions velox/dwio/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,9 @@ target_link_libraries(
Boost::regex
Folly::folly
glog::glog)

if(VELOX_ENABLE_INTEL_IAA)
add_library(velox_dwio_qpl QplJobPool.cpp)
target_link_libraries(velox_dwio_qpl iaa::iaa Folly::folly)
target_link_libraries(velox_dwio_common velox_dwio_qpl)
endif()
128 changes: 128 additions & 0 deletions velox/dwio/common/QplJobPool.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/*
yaqi-zhao marked this conversation as resolved.
Show resolved Hide resolved
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/dwio/common/QplJobPool.h"
#include <folly/Random.h>
#include "velox/common/base/Exceptions.h"

namespace facebook::velox::dwio::common {

std::array<std::atomic<bool>, QplJobHWPool::MAX_JOB_NUMBER>
QplJobHWPool::hwJobPtrLocks;

QplJobHWPool& QplJobHWPool::getInstance() {
static QplJobHWPool pool;
return pool;
}

QplJobHWPool::QplJobHWPool() {
if (!iaaJobReady) {
allocateQPLJob();
}
}

QplJobHWPool::~QplJobHWPool() {
for (uint32_t i = 0; i < MAX_JOB_NUMBER; ++i) {
if (hwJobPtrPool[i]) {
qpl_fini_job(hwJobPtrPool[i]);
hwJobPtrPool[i] = nullptr;
}
}
iaaJobReady = false;
}

/**
* Allocate qpl job and put it into hwJobPtrPool
*/
void QplJobHWPool::allocateQPLJob() {
uint32_t job_size = 0;

// Get size required for saving a single qpl job object
qpl_get_job_size(qpl_path, &job_size);

// Allocate entire buffer for storing all job objects
hwJobsBuffer = std::make_unique<uint8_t[]>(job_size * MAX_JOB_NUMBER);

// Initialize pool for storing all job object pointers
// Allocate buffer by shifting address offset for each job object.
hwJobPtrPool.resize(MAX_JOB_NUMBER);
for (uint32_t i = 0; i < MAX_JOB_NUMBER; ++i) {
qpl_job* qplJobPtr =
reinterpret_cast<qpl_job*>(hwJobsBuffer.get() + i * job_size);
auto status = qpl_init_job(qpl_path, qplJobPtr);
if (status != QPL_STS_OK) {
iaaJobReady = false;
LOG(WARNING) << "Initialization of hardware IAA failed, statsu: "
<< status << ". Please check if Intel \
In-Memory Analytics Accelerator (IAA) is properly set up!";
return;
}
this->hwJobPtrPool[i] = qplJobPtr;
hwJobPtrLocks[i].store(false);
}

iaaJobReady = true;
return;
}

/**
* Acquire a deflate job.
* QplJobHWPool maintains MAX_JOB_NUMBER job slot to avoid frequently allocate,
* initialize and release job. Random slots is used to select a job and
* tryLockJob will check if the job is free.
* @return job_id and qpl_job pair
*/
std::pair<int, qpl_job*> QplJobHWPool::acquireDeflateJob() {
std::pair<int, qpl_job*> res;
res.first = -1;
res.second = nullptr;
if (!job_ready()) {
return res;
}
uint32_t retry = 0;
uint32_t index = folly::Random::rand32(1, MAX_JOB_NUMBER - 1);
while (!tryLockJob(index)) {
yaqi-zhao marked this conversation as resolved.
Show resolved Hide resolved
index = folly::Random::rand32(1, MAX_JOB_NUMBER - 1);
retry++;
if (retry > MAX_JOB_NUMBER) {
return res;
}
}
res.first = index;
if (index >= MAX_JOB_NUMBER) {
return res;
}
res.second = hwJobPtrPool[index];

return res;
}

void QplJobHWPool::releaseJob(int job_id) {
if (job_id >= MAX_JOB_NUMBER || job_id <= 0) {
return;
}
VELOX_CHECK_LT(job_id, MAX_JOB_NUMBER);
hwJobPtrLocks[job_id].store(false);
return;
}

bool QplJobHWPool::tryLockJob(uint32_t index) {
bool expected = false;
VELOX_CHECK_LT(index, MAX_JOB_NUMBER);
return hwJobPtrLocks[index].compare_exchange_strong(expected, true);
}

} // namespace facebook::velox::dwio::common
97 changes: 97 additions & 0 deletions velox/dwio/common/QplJobPool.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <memory>
#include <mutex>
#include <random>
#include <vector>

#include "qpl/qpl.h"

namespace facebook::velox::dwio::common {

// QplJobHWPool is resource pool to provide the job that will be submitted to
// Intel® IAA Memory for Intel® IAA job will be allocated when the QPLJobHWPool
// instance is created.
//
// The Intel® In-Memory Analytics Accelerator (Intel® IAA) is a hardware
// accelerator that provides very high throughput compression and decompression
// combined with primitive analytic functions. It primarily targets applications
// such as big-data and in-memory analytic databases, as well as
// application-transparent usages such as memory page compression. Intel® IAA
// contains two main functional blocks: Compression and Analytics. The Analytics
// pipe contains two sub-blocks: Decompress and Filter. These functions are tied
// together, so that each analytics operation can perform decompress-only,
// filter-only, or decompress-and-filter processing.
//
// Intel QPL is library to provide application programming interface (API) for
// interaction with Intel® In-Memory Analytics Accelerator (Intel® IAA) hardware
//
// Intel® IAA:
// https://www.intel.com/content/www/us/en/content-details/780887/intel-in-memory-analytics-accelerator-intel-iaa.html
// Intel QPL:
// https://intel.github.io/qpl/documentation/introduction_docs/introduction.html
class QplJobHWPool {
public:
static QplJobHWPool& getInstance();
QplJobHWPool();
~QplJobHWPool();

// Release QPL job by the job_id.
void releaseJob(int job_id);

// Return if the QPL job is allocated sucessfully.
const bool& job_ready() {
return iaaJobReady;
}

std::pair<int, qpl_job*> acquireDeflateJob();

/**
* Get qpl job by job id
* @param job_id the job id or index in the qpl job pool
* @return nullptr if the job id is invalid
*/
qpl_job* getJobById(int job_id) {
if (job_id >= MAX_JOB_NUMBER || job_id <= 0) {
return nullptr;
}
return hwJobPtrPool[job_id];
}

static constexpr uint32_t MAX_JOB_NUMBER = 1024;

private:
bool tryLockJob(uint32_t index);
void allocateQPLJob();

qpl_path_t qpl_path = qpl_path_hardware;

// Max jobs in QPL_JOB_POOL
// Entire buffer for storing all job objects
std::unique_ptr<uint8_t[]> hwJobsBuffer;

// Job pool for storing all job object pointers
std::vector<qpl_job*> hwJobPtrPool;

// Locks for accessing each job object pointers
bool iaaJobReady;
static std::array<std::atomic<bool>, MAX_JOB_NUMBER> hwJobPtrLocks;
};

} // namespace facebook::velox::dwio::common
yaqi-zhao marked this conversation as resolved.
Show resolved Hide resolved
Loading
Loading