diff --git a/.github/workflows/github-ci.yml b/.github/workflows/github-ci.yml
index 6c29546..ee5e9c0 100644
--- a/.github/workflows/github-ci.yml
+++ b/.github/workflows/github-ci.yml
@@ -9,19 +9,25 @@ on:
     branches:
       - main
       - development
-      - "[0-9]+.[0-9]+.[0-9]+"
+      - "ipcl_v[0-9]+.[0-9]+.[0-9]+"
   push:
     branches:
       - main
       - development
-      - "[0-9]+.[0-9]+.[0-9]+"
+      - "ipcl_v[0-9]+.[0-9]+.[0-9]+"
 
   # Manually run this workflow on any specified branch.
   workflow_dispatch:
 
-##############
-# IceLake CI #
-##############
+###################
+# Define env vars #
+###################
+env:
+  IPCL_VER: 2.0.0
+  IPCL_DIR: ${GITHUB_WORKSPACE}/ipcl_install
+  IPCL_HINT_DIR: >
+      -DIPCL_HINT_DIR=${GITHUB_WORKSPACE}/ipcl_install/lib/cmake/ipcl-${IPCL_VER}
+
 jobs:
   format:
     name: Format check
@@ -37,69 +43,140 @@ jobs:
         run: pre-commit run --all-files
 
   build-and-test:
-    name: Build, test and run kernels - shared
+    name: '${{ matrix.build_type }} qat=${{ matrix.enable_qat }} detect_cpu_runtime=${{ matrix.detect_cpu_runtime }} shared=${{ matrix.shared_lib }}'
     needs: [format]
     runs-on: [self-hosted, linux, x64, icx]
-    # Use environment protection (require review)
-    environment: intel_workflow
     defaults:
       run:
         shell: bash
-        working-directory: .
+    strategy:
+      matrix:
+        build_type: [Release, Debug]
+        shared_lib: [ON, OFF]
+        detect_cpu_runtime: [ON, OFF]
+        # qat disabled for ICX - to be added with SPR runner
+        enable_qat: [OFF]
+        include:
+          # run minimum for debug mode
+          - build_type: Debug
+            benchmark_min_time: "--benchmark_min_time=0.001"
+          - build_type: Release
+            benchmark_min_time: ""
     steps:
       - uses: actions/checkout@v2
-      - name: Validate paths
+      - name: Setup cmake
+        uses: jwlawson/actions-setup-cmake@v1.13
+        with:
+          cmake-version: '3.16.x'
+      - name: Validate environment
         run: |
+          set -x
+          export CC=clang-10
+          export CXX=clang++-10
+
+          # Validate paths
           whoami
           echo $HOME
           echo $GITHUB_WORKSPACE
           echo "Testing from branch:"
           echo $GITHUB_REF
+          cmake --version
           pwd
 
-      # Build library
-      - name: Build the repository
+      - name: Build library
         run: |
-          cmake -S . -B build -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_BUILD_TYPE=Release
-          cmake --build build --target all -j
+          # QAT unavailable in ICX - to be added
+          cmake -S . -B build -DCMAKE_BUILD_TYPE=${{ matrix.build_type}} \
+            -DIPCL_DETECT_CPU_RUNTIME=${{ matrix.detect_cpu_runtime }} \
+            -DIPCL_ENABLE_QAT=${{ matrix.enable_qat }} \
+            -DCMAKE_INSTALL_PREFIX=./ipcl_install
+          cmake --build build -j$(nproc)
+          cmake --build build --target install
 
-      # Unit tests and examples
-      - name: Run the unit tests
-        run: ./build/test/unittest_ipcl
-
-      - name: Run the benchmarks
-        run: ./build/benchmark/bench_ipcl
+      - name: Run unittest
+        run: |
+          cmake --build build --target unittest
 
-  build-and-test-static:
-    name: Build, test and run kernels - static
-    needs: [format]
-    runs-on: [self-hosted, linux, x64, icx]
-    # Use environment protection (require review)
-    environment: intel_workflow
-    defaults:
-      run:
-        shell: bash
-        working-directory: .
-    steps:
-      - uses: actions/checkout@v2
-      - name: Validate paths
+      - name: Run benchmark
         run: |
-          whoami
-          echo $HOME
-          echo $GITHUB_WORKSPACE
-          echo "Testing from branch:"
-          echo $GITHUB_REF
-          pwd
+          ./build/benchmark/bench_ipcl \
+            --benchmark_out="${GITHUB_WORKFLOW}_${GITHUB_SHA}" \
+            --benchmark_out_format=csv ${{ matrix.benchmark_min_time }}
 
-      # Build library
-      - name: Build the repository
+      - name: Build and run examples
         run: |
-          cmake -S . -B build -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_BUILD_TYPE=Release -DIPCL_SHARED=OFF
-          cmake --build build --target all -j
+          cd $GITHUB_WORKSPACE/example
+          cmake -S . -B build ${{ env.IPCL_HINT_DIR }}
+          cmake --build build
+          cmake --build build --target run_all_examples
+
+      - name: Archive benchmark results
+        uses: actions/upload-artifact@v2
+        with:
+          name: bench_ipcl_${{ github.sha }}.csv
+          path: ${{ github.workspace }}/${{ github.workflow }}_${{ github.sha }}
+          retention-days: 90
+
+
+  #   name: Build, test and run kernels - shared
+  #   needs: [format]
+  #   runs-on: [self-hosted, linux, x64, icx]
+  #   defaults:
+  #     run:
+  #       shell: bash
+  #       working-directory: .
+  #   steps:
+  #     - uses: actions/checkout@v2
+  #     - name: Validate paths
+  #       run: |
+  #         whoami
+  #         echo $HOME
+  #         echo $GITHUB_WORKSPACE
+  #         echo "Testing from branch:"
+  #         echo $GITHUB_REF
+  #         pwd
+
+  #     # Build library
+  #     - name: Build the repository
+  #       run: |
+  #         cmake -S . -B build -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_BUILD_TYPE=Release -DIPCL_ENABLE_QAT=ON
+  #         cmake --build build --target all -j
+
+  #     # Unit tests and examples
+  #     - name: Run the unit tests
+  #       run: ./build/test/unittest_ipcl
+
+  #     - name: Run the benchmarks
+  #       run: ./build/benchmark/bench_ipcl
+
+  # build-and-test-static:
+  #   name: Build, test and run kernels - static
+  #   needs: [format]
+  #   runs-on: [self-hosted, linux, x64, icx]
+  #   defaults:
+  #     run:
+  #       shell: bash
+  #       working-directory: .
+  #   steps:
+  #     - uses: actions/checkout@v2
+  #     - name: Validate paths
+  #       run: |
+  #         whoami
+  #         echo $HOME
+  #         echo $GITHUB_WORKSPACE
+  #         echo "Testing from branch:"
+  #         echo $GITHUB_REF
+  #         pwd
+
+  #     # Build library
+  #     - name: Build the repository
+  #       run: |
+  #         cmake -S . -B build -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_BUILD_TYPE=Release -DIPCL_ENABLE_QAT=ON -DIPCL_SHARED=OFF
+  #         cmake --build build --target all -j
 
-      # Unit tests and examples
-      - name: Run the unit tests
-        run: ./build/test/unittest_ipcl
+  #     # Unit tests and examples
+  #     - name: Run the unit tests
+  #       run: ./build/test/unittest_ipcl
 
-      - name: Run the benchmarks
-        run: ./build/benchmark/bench_ipcl
+  #     - name: Run the benchmarks
+  #       run: ./build/benchmark/bench_ipcl
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4ef929a..aecc032 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -45,7 +45,7 @@ repos:
           entry: cpplint
           language: system
           files: \.(cxx|cpp|hpp|hxx)$
-          exclude: ipcl/bignum.cpp|example/test.cpp
+          exclude: ipcl/bignum.cpp|module/heqat/heqat/misc/bignum.cpp
           args:
             - --recursive
             - --filter=-runtime/references,-whitespace/comments,-whitespace/indent
@@ -54,7 +54,7 @@ repos:
           entry: cpplint
           language: system
           files: \.(c|cc|h)$
-          exclude: ipcl/include/ipcl/bignum.h
+          exclude: ipcl/include/ipcl/bignum.h|module/heqat/heqat/include/heqat/misc/bignum.h
           args:
             - --recursive
             - --filter=-runtime/references,-whitespace/comments,-whitespace/indent,-readability/casting,-runtime/int,-runtime/printf
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7196e4c..b9b9030 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,12 +3,15 @@
 
 cmake_minimum_required(VERSION 3.15.1)
 
-project(IPCL VERSION 1.1.4 LANGUAGES C CXX)
+project(IPCL VERSION 2.0.0 LANGUAGES C CXX)
 
+# includes
 include(CMakePackageConfigHelpers)
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 include(GNUInstallDirs)
+set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/ipcl)
+include(ipcl-util)
 
 if(CMAKE_BUILD_TYPE)
   set(RELEASE_TYPES
@@ -40,45 +43,127 @@ if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
   set(CMAKE_INSTALL_PREFIX "/opt/intel/ipcl")
 endif()
 
+if(NOT CMAKE_PREFIX_PATH)
+  set(CMAKE_PREFIX_PATH $ENV{HOME}/intel /opt/intel)
+endif()
+
+# Compiler version check - icx/icpx-2021.3.0 is supported
+if(CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
+  if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 2021.3.0)
+    message(FATAL_ERROR
+    " ${CMAKE_CXX_COMPILER_ID}-${CMAKE_CXX_COMPILER_VERSION} is not supported."
+    " Please refer to Intel IPP-Crypto (https://github.com/intel/ipp-crypto"
+    " for more information.")
+  endif()
+endif()
+
 set(CMAKE_C_FLAGS "-O2 -Wno-error=deprecated-declarations")
 set(CMAKE_CXX_FLAGS "-O2 -fpermissive -Wno-error=deprecated-declarations")
-set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR};${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/ippcrypto")
 
+# Add -Wno-error=deprecated-copy if GNU>=9.1
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+  if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 9.1.0)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-error=deprecated-copy")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-copy")
+  endif()
+endif()
+
+set(CMAKE_INSTALL_RPATH "$ORIGIN;$ORIGIN/${CMAKE_INSTALL_LIBDIR};$ORIGIN/ippcrypto")
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_INSTALL_LIBDIR})
 
 #---------------------------------------------------
 option(IPCL_TEST "Enable testing" ON)
 option(IPCL_BENCHMARK "Enable benchmark" ON)
+option(IPCL_ENABLE_QAT "Enable QAT" OFF)
+option(IPCL_USE_QAT_LITE "Enable uses QAT for base and exponent length different than modulus" OFF)
 option(IPCL_ENABLE_OMP "Enable OpenMP testing/benchmarking" ON)
 option(IPCL_THREAD_COUNT "The max number of threads used by OpenMP(If the value is OFF/0, it is determined at runtime)" OFF)
 option(IPCL_DOCS "Enable document building" OFF)
 option(IPCL_SHARED "Build shared library" ON)
-option(IPCL_DETECT_IFMA_RUNTIME "Detect AVX512/IFMA instructions during runtime" OFF)
-option(IPCL_DEBUG_DISABLE_AVX512IFMA "(Debugging) Disable usage of AVX512IFMA instructions" OFF)
+option(IPCL_DETECT_CPU_RUNTIME "Detect CPU supported instructions during runtime" OFF)
+option(IPCL_INTERNAL_PYTHON_BUILD "Additional steps for IPCL_Python build" OFF)
+
+# Used only for ipcl_python IPCL_INTERNAL_PYTHON_BUILD - additional check if invalid parameters
+if(IPCL_INTERNAL_PYTHON_BUILD)
+  if(NOT DEFINED CMAKE_LIBRARY_OUTPUT_DIRECTORY)
+    set(IPCL_INTERNAL_PYTHON_BUILD OFF)
+  elseif(NOT IS_ABSOLUTE ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
+    set(IPCL_INTERNAL_PYTHON_BUILD OFF)
+  endif()
+endif()
+
+if(IPCL_ENABLE_QAT)
+  ipcl_detect_qat()
+  if(IPCL_FOUND_QAT)
+    add_compile_definitions(IPCL_USE_QAT)
+    set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH};$ORIGIN/../heqat")
+    if(IPCL_USE_QAT_LITE)
+      add_compile_definitions(IPCL_USE_QAT_LITE)
+      message(STATUS "QAT Lite enabled - IPCL_USE_QAT_LITE set to ON")
+    else()
+      message(STATUS "QAT Lite disabled - IPCL_USE_QAT_LITE set to OFF")
+    endif()
+  else()
+    set(IPCL_ENABLE_QAT OFF)
+  endif()
+endif()
+
+if(IPCL_THREAD_COUNT LESS_EQUAL 1)
+  set(IPCL_ENABLE_OMP OFF)
+endif()
+
 if(IPCL_ENABLE_OMP)
 	add_compile_definitions(IPCL_USE_OMP)
-    if(IPCL_THREAD_COUNT)
-	    add_compile_definitions(IPCL_NUM_THREADS=${IPCL_THREAD_COUNT})
+  ipcl_get_core_thread_count(num_cores num_threads num_nodes)
+  if(IPCL_THREAD_COUNT)
+    # if thread_count is invalid, set to maximum threads
+    if(IPCL_THREAD_COUNT GREATER num_threads)
+      set(IPCL_THREAD_COUNT ${num_threads})
     endif()
+    add_compile_definitions(IPCL_NUM_THREADS=${IPCL_THREAD_COUNT})
+  endif()
 endif()
 
-if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-  set(IPCL_DEBUG ON)
+if(IPCL_DETECT_CPU_RUNTIME)
+  # add_compile_definitions(IPCL_RUNTIME_MOD_EXP)
+  add_compile_definitions(IPCL_RUNTIME_DETECT_CPU_FEATURES)
+  set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH};$ORIGIN/cpufeatures")
 else()
-  set(IPCL_DEBUG OFF)
-endif()
+  # set cpu node count parsed from lscpu precompile
+  add_compile_definitions(IPCL_NUM_NODES=${num_nodes})
 
-set(IPCL_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/ipcl")
+  # check whether cpu support avx512ifma instructions
+  ipcl_detect_lscpu_flag("avx512ifma")
+  if(IPCL_FOUND_avx512ifma)
+    add_compile_definitions(IPCL_CRYPTO_MB_MOD_EXP)
+  endif()
+
+  # check whether cpu support rdseed/rdrand instructions
+  ipcl_detect_lscpu_flag("rdseed")
+  if(IPCL_FOUND_rdseed)
+    add_compile_definitions(IPCL_RNG_INSTR_RDSEED)
+  else()
+    ipcl_detect_lscpu_flag("rdrand")
+    if(IPCL_FOUND_rdrand)
+      add_compile_definitions(IPCL_RNG_INSTR_RDRAND)
+    else()
+      message(WARNING
+          "CPU doesn't support RDSEED and RDRAND instruction, using IPP-Crypto"
+          " S/W pseudo random number generator"
+      )
+    endif()
+  endif()
+endif()
 
 message(STATUS "CMAKE_BUILD_TYPE:           ${CMAKE_BUILD_TYPE}")
 message(STATUS "CMAKE_C_COMPILER:           ${CMAKE_C_COMPILER}")
 message(STATUS "CMAKE_CXX_COMPILER:         ${CMAKE_CXX_COMPILER}")
 message(STATUS "CMAKE_INSTALL_PREFIX:       ${CMAKE_INSTALL_PREFIX}")
-message(STATUS "CMAKE_INSTALL_LIBDIR:       ${CMAKE_INSTALL_FULL_LIBDIR}")
-message(STATUS "CMAKE_INSTALL_INCLUDEDIR:   ${CMAKE_INSTALL_FULL_INCLUDEDIR}")
+message(STATUS "CMAKE_PREFIX_PATH:          ${CMAKE_PREFIX_PATH}")
 message(STATUS "IPCL_TEST:                  ${IPCL_TEST}")
 message(STATUS "IPCL_BENCHMARK:             ${IPCL_BENCHMARK}")
 message(STATUS "IPCL_ENABLE_OMP:            ${IPCL_ENABLE_OMP}")
+message(STATUS "IPCL_ENABLE_QAT:            ${IPCL_ENABLE_QAT}")
 if (IPCL_ENABLE_OMP)
   message(STATUS "IPCL_THREAD_COUNT:          ${IPCL_THREAD_COUNT}")
 else()
@@ -86,11 +171,10 @@ else()
 endif()
 message(STATUS "IPCL_DOCS:                  ${IPCL_DOCS}")
 message(STATUS "IPCL_SHARED:                ${IPCL_SHARED}")
-message(STATUS "IPCL_DETECT_IFMA_RUNTIME:   ${IPCL_DETECT_IFMA_RUNTIME}")
-
-set(IPCL_ROOT_DIR ${CMAKE_CURRENT_LIST_DIR})
-set(IPCL_SRC_DIR ${IPCL_ROOT_DIR}/ipcl)
-set(IPCL_INC_DIR ${IPCL_SRC_DIR}/include)
+message(STATUS "IPCL_DETECT_CPU_RUNTIME:    ${IPCL_DETECT_CPU_RUNTIME}")
+if(IPCL_INTERNAL_PYTHON_BUILD)
+  message(STATUS "IPCL_INTERNAL_PYTHON_BUILD: ${IPCL_INTERNAL_PYTHON_BUILD}")
+endif()
 
 set(IPCL_FORWARD_CMAKE_ARGS
     -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
@@ -104,59 +188,43 @@ set(IPCL_FORWARD_CMAKE_ARGS
     -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
 )
 
-if(IPCL_DETECT_IFMA_RUNTIME)
-  add_compile_definitions(IPCL_RUNTIME_MOD_EXP)
-  set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH};${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/cpufeatures")
-else()
-  # check whether cpu support avx512 flag
-  if(IPCL_DEBUG_DISABLE_AVX512IFMA)
-    message(STATUS "Support AVX512IFMA: False")
-  else()
-    set(CPU_AVX512_FLAG "avx512ifma")
-    execute_process(COMMAND lscpu COMMAND grep ${CPU_AVX512_FLAG}  OUTPUT_VARIABLE CPU_ENABLE_AVX512)
-    if("${CPU_ENABLE_AVX512}" STREQUAL "")
-      message(STATUS "Support AVX512IFMA: False")
-    else()
-      message(STATUS "Support AVX512IFMA: True")
-      add_compile_definitions(IPCL_CRYPTO_MB_MOD_EXP)
-    endif()
-  endif()
-endif()
-
-# check whether cpu support rdseed or rdrand instruction
-set(CPU_RDSEED_FLAG "rdseed")
-execute_process(COMMAND lscpu COMMAND grep ${CPU_RDSEED_FLAG}  OUTPUT_VARIABLE CPU_ENABLE_RDSEED)
-if("${CPU_ENABLE_RDSEED}" STREQUAL "")
-  set(CPU_RDRAND_FLAG "rdrand")
-  execute_process(COMMAND lscpu COMMAND grep ${CPU_RDRAND_FLAG}  OUTPUT_VARIABLE CPU_ENABLE_RDRAND)
-  if("${CPU_ENABLE_RDRAND}" STREQUAL "")
-    message(WARNING "CPU doesn't support RDSEED and RDRAND instruction, using random generator will cause errors.")
-  else ()
-    message(STATUS "Support RDRAND instruction: True")
-    add_compile_definitions(IPCL_RNG_INSTR_RDRAND)
-  endif()
+# global IPCL folders
+set(IPCL_ROOT_DIR ${CMAKE_CURRENT_LIST_DIR})
+set(IPCL_SRC_DIR ${IPCL_ROOT_DIR}/ipcl)
+set(IPCL_INC_DIR ${IPCL_SRC_DIR}/include)
+set(IPCL_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/ipcl)
+set(IPCL_INSTALL_LIBDIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/ipcl)
+set(IPCL_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/ipcl")
+if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+  set(IPCL_DEBUG ON)
 else()
-  message(STATUS "Support RDSEED instruction: True")
-  add_compile_definitions(IPCL_RNG_INSTR_RDSEED)
+  set(IPCL_DEBUG OFF)
 endif()
 
-# find package for OpenSSL and Threads
-# set(OPENSSL_USE_STATIC_LIBS TRUE)
+# find package: Threads config
+set(CMAKE_THREAD_PREFER_PTHREAD ON)
 find_package(Threads REQUIRED)
+
+# find package: OpenSSL config
 find_package(OpenSSL REQUIRED)
 
 # External dependencies
-set(CMAKE_THREAD_PREFER_PTHREAD ON)
-set(THREADS_PREFER_PTHREAD_FLAG ON)
-
-set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/ipcl)
-include(ipcl-util)
-
 include(cmake/ippcrypto.cmake)
-if(IPCL_DETECT_IFMA_RUNTIME)
+include(cmake/cereal.cmake)
+
+if(IPCL_DETECT_CPU_RUNTIME)
   include(cmake/cpufeatures.cmake)
 endif()
 
+if(IPCL_ENABLE_QAT)
+  # preset values for including HE_QAT
+  set(HE_QAT_MISC OFF)
+  set(HE_QAT_DOCS ${IPCL_DOCS})
+  set(HE_QAT_SHARED ${IPCL_SHARED})
+  set(HE_QAT_TEST OFF)
+  add_subdirectory(module/heqat)
+endif()
+
 if(IPCL_TEST)
   include(cmake/gtest.cmake)
 endif()
@@ -164,8 +232,7 @@ if(IPCL_BENCHMARK)
   include(cmake/gbenchmark.cmake)
 endif()
 
-
-
+# IPCL main directory
 add_subdirectory(ipcl)
 
 # unit-test and benchmarks
@@ -180,6 +247,7 @@ if(IPCL_BENCHMARK)
   add_custom_target(benchmark COMMAND $<TARGET_FILE:bench_ipcl> DEPENDS bench_ipcl)
 endif()
 
+# doxygen generation
 if(IPCL_DOCS)
   add_subdirectory(docs)
 endif()
diff --git a/README.md b/README.md
index e90903f..03cfcea 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 # Intel Paillier Cryptosystem Library
-Intel Paillier Cryptosystem Library is an open-source library which provides accelerated performance of a partial homomorphic encryption (HE), named Paillier cryptosystem, by utilizing Intel® [Integrated Performance Primitives Cryptography](https://github.com/intel/ipp-crypto) technologies on Intel CPUs supporting the AVX512IFMA instructions. The library is written in modern standard C++ and provides the essential API for the Paillier cryptosystem scheme. Intel Paillier Cryptosystem Library is certified for ISO compliance.
+Intel Paillier Cryptosystem Library is an open-source library which provides accelerated performance of a partial homomorphic encryption (HE), named Paillier cryptosystem, by utilizing Intel® [Integrated Performance Primitives Cryptography](https://github.com/intel/ipp-crypto) technologies on Intel CPUs supporting the AVX512IFMA instructions and Intel® [Quickassist Technology](https://01.org/intel-quickassist-technology). The library is written in modern standard C++ and provides the essential API for the Paillier cryptosystem scheme. Intel Paillier Cryptosystem Library is certified for ISO compliance.
 
 ## Contents
 - [Intel Paillier Cryptosystem Library](#intel-paillier-cryptosystem-library)
@@ -10,6 +10,7 @@ Intel Paillier Cryptosystem Library is an open-source library which provides acc
     - [Dependencies](#dependencies)
     - [Instructions](#instructions)
     - [Installing and Using Example](#installing-and-using-example)
+  - [Compiling for QAT](#compiling-for-qat)
   - [Testing and Benchmarking](#testing-and-benchmarking)
 - [Python Extension](#python-extension)
 - [Standardization](#standardization)
@@ -26,19 +27,21 @@ As a public key encryption scheme, Paillier cryptosystem has three stages:
  - Encryption with public key
  - Decryption with private key
 
-For increased security, typically the key length is at least 1024 bits, but recommendation is 2048 bits or larger. To handle such large size integers, conventional implementations of the Paillier cryptosystem utilizes the GNU Multiple Precision Arithmetic Library (GMP). The essential computation of the scheme relies on the modular exponentiation, and our library takes advantage of the multi-buffer modular exponentiation function (```mbx_exp_mb8```) of IPP-Crypto library, which is enabled in AVX512IFMA instruction sets supporting SKUs, such as Intel Icelake Xeon CPUs.
+For increased security, typically the key length is at least 1024 bits, but recommendation is 2048 bits or larger. To handle such large size integers, conventional implementations of the Paillier cryptosystem utilizes the GNU Multiple Precision Arithmetic Library (GMP). The essential computation of the scheme relies on the modular exponentiation, and our library takes advantage of two Intel features - the multi-buffer modular exponentiation function (```mbx_exp_mb8```) of IPP-Crypto library, which is enabled in AVX512IFMA instruction sets supporting SKUs, such as Intel Icelake/Sapphire Rapid Xeon® scalable processors and the modular exponentiation operation (```cpaCyLnModExp```) of Quickassist Technology library for QAT devices.
 
 ## Building the Library
 ### Prerequisites
 For best performance, especially due to the multi-buffer modular exponentiation function, the library is to be used on AVX512IFMA enabled systems, as listed below in Intel CPU codenames:
  - Intel Cannon Lake
  - Intel Ice Lake
+ - Intel Sapphire Rapids
 
-The library can be built and used without AVX512IFMA, as if the instruction set is not detected on the system, it will automatically switch to non multi-buffer modular exponentiation.
+The library can be built and used without AVX512IFMA and/or QAT, if the features are not supported. But for better performance, it is recommended to use the library on Intel Xeon® scalable processors - Ice Lake-SP or Sapphire Rapids-SP Xeon CPUs while fully utilizing the features.
 
 The following operating systems have been tested and deemed to be fully functional.
   - Ubuntu 18.04 and higher
   - Red Hat Enterprise Linux 8.1 and higher
+  - CentOS Stream
 
 We will keep working on adding more supported operating systems.
 ### Dependencies
@@ -47,29 +50,28 @@ Must have dependencies include:
 cmake >= 3.15.1
 git
 pthread
-g++ >= 7.0 or clang >= 10.0
+Intel C++ Compiler Classic 2021.3 for Linux* OS
+Intel oneAPI DPC++/C++ Compiler for Linux* OS >= 2021.3
+g++ >= 8.0
+clang >= 10.0
 ```
 
 The following libraries and tools are also required,
 ```
 nasm >= 2.15
 OpenSSL >= 1.1.0
-numa >= 2.0.12
 ```
 
-For ```nasm```, please refer to the [Netwide Assembler](https://nasm.us/) for installation details.
-
-On Ubuntu, ```OpenSSL``` and ```numa``` can be installed with:
+```OpenSSL``` can be installed with:
 ```bash
-sudo apt update
+# Ubuntu
 sudo apt install libssl-dev
-sudo apt install libnuma-dev
-```
-For RHEL, ```OpenSSL``` needs to be built and installed from source as the static libraries are missing when installed through the package managers. Please refer to [OpenSSL Project](https://github.com/openssl/openssl) for installation details for static libraries. ```numa``` can be installed with:
-```
-sudo yum install numactl-devel
+# Fedora (RHEL 8, Centos)
+sudo dnf install openssl-devel
 ```
 
+In order to install ```nasm```, please refer to the [Netwide Assembler webpage](https://nasm.us/) for download and installation details.
+
 ### Instructions
 The library can be built using the following commands:
 ```bash
@@ -84,19 +86,32 @@ It is possible to pass additional options to enable more features. The following
 |--------------------------|-----------|---------|-------------------------------------|
 |`IPCL_TEST`               | ON/OFF    | ON      | unit-test                           |
 |`IPCL_BENCHMARK`          | ON/OFF    | ON      | benchmark                           |
+|`IPCL_ENABLE_QAT`         | ON/OFF    | OFF     | enables QAT functionalities         |
 |`IPCL_ENABLE_OMP`         | ON/OFF    | ON      | enables OpenMP functionalities      |
 |`IPCL_THREAD_COUNT`       | Integer   | OFF     | explicitly set max number of threads|
 |`IPCL_DOCS`               | ON/OFF    | OFF     | build doxygen documentation         |
 |`IPCL_SHARED`             | ON/OFF    | ON      | build shared library                |
-|`IPCL_DETECT_IFMA_RUNTIME`| ON/OFF    | OFF     | detects AVX512IFMA during runtime   |
+|`IPCL_DETECT_CPU_RUNTIME` | ON/OFF    | OFF     | detects CPU supported instructions (AVX512IFMA, rdseed, rdrand) during runtime |
 
-If ```IPCL_DETECT_IFMA_RUNTIME``` flag is set to ```ON```, it will determine whether the system supports the AVX512IFMA instructions on runtime. It is still possible to disable IFMA exclusive feature (multi-buffer modular exponentiation) during runtime by setting up the environment variable ```IPCL_DISABLE_AVX512IFMA=1```.
+If ```IPCL_DETECT_CPU_RUNTIME``` flag is ```ON```, it will determine whether the system supports the AVX512IFMA instructions on runtime. It is still possible to disable IFMA exclusive feature (multi-buffer modular exponentiation) during runtime by setting up the environment variable ```IPCL_DISABLE_AVX512IFMA=1```.
 
 ### Installing and Using Example
 For installing and using the library externally, see [example/README.md](./example/README.md).
 
+## Compiling for QAT
+
+Install QAT software stack following the [Building the HE QAT Library](./module/heqat/README.md#building-the-library).
+
+```bash
+export IPCL_DIR=$(pwd)
+export ICP_ROOT=$HOME/QAT
+cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DIPCL_ENABLE_QAT=ON
+cmake --build build -j
+```
+For more details, please refer to the [HEQAT Readme](./module/heqat/README.md).
+
 ## Testing and Benchmarking
-To run a set of unit tests via [Googletest](https://github.com/google/googletest), configure and build library with `-DIPCL_TEST=ON` (see [Instructions](#instructions)).
+To run a set of unit tests via [GoogleTest](https://github.com/google/googletest), configure and build library with `-DIPCL_TEST=ON` (see [Instructions](#instructions)).
 Then, run
 ```bash
 cmake --build build --target unittest
@@ -115,8 +130,7 @@ The executables are located at `${IPCL_ROOT}/build/test/unittest_ipcl` and `${IP
 Alongside the Intel Paillier Cryptosystem Library, we provide a Python extension package utilizing this library as a backend. For installation and usage detail, refer to [Intel Paillier Cryptosystem Library - Python](https://github.com/intel/pailliercryptolib_python).
 
 # Standardization
-This library is certified for ISO compliance with the homomorphic encryption standards [ISO/IEC 18033-6](https://www.iso.org/standard/67740.html) by Dekra.
-
+This library is certified for ISO compliance with the homomorphic encryption standards [ISO/IEC 18033-6](https://www.iso.org/standard/67740.html) by [Dekra](https://www.dekra.com).
 # Contributors
 Main contributors to this project, sorted by alphabetical order of last name are:
   - [Flavio Bergamaschi](https://www.linkedin.com/in/flavio-bergamaschi)
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 00662dc..cadb1f6 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -1,14 +1,20 @@
 # Copyright (C) 2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set(IPCL_BENCH_SRC main.cpp
+set(IPCL_BENCH_SRC
+  main.cpp
   bench_cryptography.cpp
-  bench_ops.cpp)
+  bench_ops.cpp
+)
+
+if(IPCL_ENABLE_QAT)
+  list(APPEND IPCL_BENCH_SRC bench_hybrid.cpp)
+endif()
 
 add_executable(bench_ipcl ${IPCL_BENCH_SRC})
 target_include_directories(bench_ipcl PRIVATE
-    ${CMAKE_CURRENT_SOURCE_DIR}
-    ${IPCL_INC_DIR}
+  ${CMAKE_CURRENT_SOURCE_DIR}
+  ${IPCL_INC_DIR}
 )
 
 target_link_libraries(bench_ipcl PRIVATE
diff --git a/benchmark/bench_cryptography.cpp b/benchmark/bench_cryptography.cpp
index 9f30a53..33b0d12 100644
--- a/benchmark/bench_cryptography.cpp
+++ b/benchmark/bench_cryptography.cpp
@@ -15,7 +15,8 @@
       ->Args({256})                 \
       ->Args({512})                 \
       ->Args({1024})                \
-      ->Args({2048})
+      ->Args({2048})                \
+      ->Args({2100})
 
 constexpr bool Enable_DJN = true;
 
@@ -64,7 +65,7 @@ const BigNumber HS_BN =
 static void BM_KeyGen(benchmark::State& state) {
   int64_t n_length = state.range(0);
   for (auto _ : state) {
-    ipcl::keyPair key = ipcl::generateKeypair(n_length, Enable_DJN);
+    ipcl::KeyPair key = ipcl::generateKeypair(n_length, Enable_DJN);
   }
 }
 BENCHMARK(BM_KeyGen)->Unit(benchmark::kMicrosecond)->ADD_SAMPLE_KEY_LENGTH_ARGS;
@@ -74,12 +75,12 @@ static void BM_Encrypt(benchmark::State& state) {
 
   BigNumber n = P_BN * Q_BN;
   int n_length = n.BitSize();
-  ipcl::PublicKey* pub_key = new ipcl::PublicKey(n, n_length, Enable_DJN);
-  ipcl::PrivateKey* priv_key = new ipcl::PrivateKey(pub_key, P_BN, Q_BN);
+  ipcl::PublicKey pk(n, n_length, Enable_DJN);
+  ipcl::PrivateKey sk(pk, P_BN, Q_BN);
 
   std::vector<BigNumber> r_bn_v(dsize, R_BN);
-  pub_key->setRandom(r_bn_v);
-  pub_key->setHS(HS_BN);
+  pk.setRandom(r_bn_v);
+  pk.setHS(HS_BN);
 
   std::vector<BigNumber> exp_bn_v(dsize);
   for (size_t i = 0; i < dsize; i++)
@@ -88,10 +89,7 @@ static void BM_Encrypt(benchmark::State& state) {
   ipcl::PlainText pt(exp_bn_v);
 
   ipcl::CipherText ct;
-  for (auto _ : state) ct = pub_key->encrypt(pt);
-
-  delete pub_key;
-  delete priv_key;
+  for (auto _ : state) ct = pk.encrypt(pt);
 }
 BENCHMARK(BM_Encrypt)
     ->Unit(benchmark::kMicrosecond)
@@ -102,23 +100,20 @@ static void BM_Decrypt(benchmark::State& state) {
 
   BigNumber n = P_BN * Q_BN;
   int n_length = n.BitSize();
-  ipcl::PublicKey* pub_key = new ipcl::PublicKey(n, n_length, Enable_DJN);
-  ipcl::PrivateKey* priv_key = new ipcl::PrivateKey(pub_key, P_BN, Q_BN);
+  ipcl::PublicKey pk(n, n_length, Enable_DJN);
+  ipcl::PrivateKey sk(pk, P_BN, Q_BN);
 
   std::vector<BigNumber> r_bn_v(dsize, R_BN);
-  pub_key->setRandom(r_bn_v);
-  pub_key->setHS(HS_BN);
+  pk.setRandom(r_bn_v);
+  pk.setHS(HS_BN);
 
   std::vector<BigNumber> exp_bn_v(dsize);
   for (size_t i = 0; i < dsize; i++)
     exp_bn_v[i] = P_BN - BigNumber((unsigned int)(i * 1024));
 
   ipcl::PlainText pt(exp_bn_v), dt;
-  ipcl::CipherText ct = pub_key->encrypt(pt);
-  for (auto _ : state) dt = priv_key->decrypt(ct);
-
-  delete pub_key;
-  delete priv_key;
+  ipcl::CipherText ct = pk.encrypt(pt);
+  for (auto _ : state) dt = sk.decrypt(ct);
 }
 
 BENCHMARK(BM_Decrypt)
diff --git a/benchmark/bench_hybrid.cpp b/benchmark/bench_hybrid.cpp
new file mode 100644
index 0000000..ecab201
--- /dev/null
+++ b/benchmark/bench_hybrid.cpp
@@ -0,0 +1,222 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <benchmark/benchmark.h>
+
+#include <vector>
+
+#include "ipcl/ipcl.hpp"
+
+#define BENCH_HYBRID_DETAIL 0
+
+#define INPUT_BN_NUM_MAX 256
+#define INPUT_BN_NUM_MIN 16
+#define INPUT_BN_NUM_GROWTH_RATE 2
+
+// scale it from [0, 1] to [0, 100]
+#define HYBRID_QAT_RATIO_MAX 100
+#define HYBRID_QAT_RATIO_MIN 0
+#define HYBRID_QAT_RATIO_STEP 10
+
+constexpr bool Enable_DJN = true;
+
+// P_BN comes from ISO_IEC_18033_6_Compliance
+const BigNumber P_BN =
+    "0xff03b1a74827c746db83d2eaff00067622f545b62584321256e62b01509f10962f9c5c"
+    "8fd0b7f5184a9ce8e81f439df47dda14563dd55a221799d2aa57ed2713271678a5a0b8b4"
+    "0a84ad13d5b6e6599e6467c670109cf1f45ccfed8f75ea3b814548ab294626fe4d14ff76"
+    "4dd8b091f11a0943a2dd2b983b0df02f4c4d00b413";
+
+// Q_BN comes from ISO_IEC_18033_6_Compliance
+const BigNumber Q_BN =
+    "0xdacaabc1dc57faa9fd6a4274c4d588765a1d3311c22e57d8101431b07eb3ddcb05d77d"
+    "9a742ac2322fe6a063bd1e05acb13b0fe91c70115c2b1eee1155e072527011a5f849de70"
+    "72a1ce8e6b71db525fbcda7a89aaed46d27aca5eaeaf35a26270a4a833c5cda681ffd49b"
+    "aa0f610bad100cdf47cc86e5034e2a0b2179e04ec7";
+
+// R_BN comes from ISO_IEC_18033_6_Compliance
+const BigNumber R_BN =
+    "0x57fb19590c31dc7c034b2a889cf4037ce3db799909c1eb0adb6199d8e96791daca9018"
+    "891f34309daff32dced4af7d793d16734d055e28023acab7295956bfbfdf62bf0ccb2ed3"
+    "1d5d176ca8b404e93007565fb6b72c33a512b4dc4f719231d62e27e34c3733929af32247"
+    "f88c20d1ee77096cc80d3d642464054c815b35878ba812349c8bdc3c6b645daf1a0de609"
+    "65f44dcf705681032480f1eeba82243196b96903becdc0df0801d4120cbd6db1c4b2841a"
+    "27991c44a43750c24ed0825718ad14cfb9c6b40b78ff3d25f71741f2def1c9d420d4b0fa"
+    "1e0a02e7851b5ec6a81133a368b80d1500b0f28fc653d2e6ff4366236dbf80ae3b4beae3"
+    "5e04579f2c";
+
+const BigNumber HS_BN =
+    "0x7788f6e8f57d3488cf9e0c7f4c19521de9aa172bf35924c7827a1189d6c688ac078f77"
+    "7efcfc230e34f1fa5ae8d9d2ed5b062257618e0a0a485b0084b3fd39080031ea739bb48c"
+    "dcce4ad41704ed930d40f53a1cc5d7f70bcb379f17a912b0ad14fabe8fc10213dcd1eabd"
+    "9175ee9bf66c31e9af9703c9d92fa5c8d36279459631ba7e9d4571a10960f8e8d031b267"
+    "22f6ae6f618895b9ce4fce926c8f54169168f6bb3e033861e08c2eca2161198481bc7c52"
+    "3a38310be22f4dd7d028dc6b774e5cb8e6f33b24168697743b7deff411510e27694bf2e8"
+    "0258b325fd97370f5110f54d8d7580b45ae3db26da4e3b0409f0cfbc56d9d9856b66d8bf"
+    "46e727dc3148f70362d05faea743621e3841c94c78d53ee7e7fdef61022dd56922368991"
+    "f843ca0aebf8436e5ec7e737c7ce72ac58f138bb11a3035fe96cc5a7b1aa9d565cb8a317"
+    "f42564482dd3c842c5ee9fb523c165a8507ecee1ac4f185bdbcb7a51095c4c46bfe15aec"
+    "3dfd77e1fd2b0003596df83bbb0d5521f16e2301ec2d4aafe25e4479ee965d8bb30a689a"
+    "6f38ba710222fff7cf359d0f317b8e268f40f576c04262a595cdfc9a07b72978b9564ace"
+    "699208291da7024e86b6eeb1458658852f10794c677b53db8577af272233722ad4579d7a"
+    "074e57217e1c57d11862f74486c7f2987e4d09cd6fb2923569b577de50e89e6965a27e18"
+    "7a8a341a7282b385ef";
+
+// (data_size, qat_ratio)
+static void customArgs(benchmark::internal::Benchmark* b) {
+  for (int i = INPUT_BN_NUM_MIN; i <= INPUT_BN_NUM_MAX;
+       i *= INPUT_BN_NUM_GROWTH_RATE) {
+#if BENCH_HYBRID_DETAIL
+    for (int j = HYBRID_QAT_RATIO_MIN; j <= HYBRID_QAT_RATIO_MAX;
+         j += HYBRID_QAT_RATIO_STEP) {
+      b->Args({i, j});
+    }
+#else
+    b->Args({i});
+#endif
+  }
+}
+
+static void BM_Hybrid_ModExp(benchmark::State& state) {
+  ipcl::setHybridOff();
+
+  int64_t dsize = state.range(0);
+  float qat_ratio = state.range(1) * 0.01;  // scale it back
+
+  BigNumber n = P_BN * Q_BN;
+  int n_length = n.BitSize();
+  ipcl::PublicKey pk(n, n_length, Enable_DJN);
+  ipcl::PrivateKey sk(pk, P_BN, Q_BN);
+
+  std::vector<BigNumber> r_bn_v(dsize, R_BN);
+  pk.setRandom(r_bn_v);
+  pk.setHS(HS_BN);
+
+  std::vector<BigNumber> exp_bn_v(dsize);
+  for (size_t i = 0; i < dsize; i++)
+    exp_bn_v[i] = P_BN - BigNumber((unsigned int)(i * 1024));
+
+  ipcl::PlainText pt(exp_bn_v);
+
+  BigNumber lambda = sk.getLambda();
+  std::vector<BigNumber> pow(dsize, lambda);
+  std::vector<BigNumber> m(dsize, n * n);
+
+  ipcl::CipherText ct = pk.encrypt(pt);
+  std::vector<BigNumber> res(dsize);
+
+#if BENCH_HYBRID_DETAIL
+  ipcl::setHybridRatio(qat_ratio);
+#else
+  ipcl::setHybridMode(ipcl::HybridMode::OPTIMAL);
+#endif
+
+  for (auto _ : state) res = ipcl::modExp(ct.getTexts(), pow, m);  // decryptRAW
+}
+BENCHMARK(BM_Hybrid_ModExp)->Unit(benchmark::kMicrosecond)->Apply(customArgs);
+
+static void BM_Hybrid_Encrypt(benchmark::State& state) {
+  // need to reset, otherwise will be affected by the previous benchmark
+  // (i.e. BM_Hybrid_ModExp)
+  ipcl::setHybridOff();
+
+  int64_t dsize = state.range(0);
+  float qat_ratio = state.range(1) * 0.01;  // scale it back
+
+  BigNumber n = P_BN * Q_BN;
+  int n_length = n.BitSize();
+  ipcl::PublicKey pk(n, n_length, Enable_DJN);
+  ipcl::PrivateKey sk(pk, P_BN, Q_BN);
+
+  std::vector<BigNumber> r_bn_v(dsize, R_BN);
+  pk.setRandom(r_bn_v);
+  pk.setHS(HS_BN);
+
+  std::vector<BigNumber> exp_bn_v(dsize);
+  for (size_t i = 0; i < dsize; i++)
+    exp_bn_v[i] = P_BN - BigNumber((unsigned int)(i * 1024));
+  ipcl::PlainText pt(exp_bn_v);
+  ipcl::CipherText ct;
+
+#if BENCH_HYBRID_DETAIL
+  ipcl::setHybridRatio(qat_ratio);
+#else
+  ipcl::setHybridMode(ipcl::HybridMode::OPTIMAL);
+#endif
+
+  for (auto _ : state) ct = pk.encrypt(pt);
+}
+BENCHMARK(BM_Hybrid_Encrypt)->Unit(benchmark::kMicrosecond)->Apply(customArgs);
+
+static void BM_Hybrid_Decrypt(benchmark::State& state) {
+  // need to reset, otherwise will be affected by the previous benchmark
+  // (i.e. BM_Hybrid_Encrypt)
+  ipcl::setHybridOff();
+
+  int64_t dsize = state.range(0);
+  float qat_ratio = state.range(1) * 0.01;  // scale it back
+
+  BigNumber n = P_BN * Q_BN;
+  int n_length = n.BitSize();
+  ipcl::PublicKey pk(n, n_length, Enable_DJN);
+  ipcl::PrivateKey sk(pk, P_BN, Q_BN);
+
+  std::vector<BigNumber> r_bn_v(dsize, R_BN);
+  pk.setRandom(r_bn_v);
+  pk.setHS(HS_BN);
+
+  std::vector<BigNumber> exp_bn_v(dsize);
+  for (size_t i = 0; i < dsize; i++)
+    exp_bn_v[i] = P_BN - BigNumber((unsigned int)(i * 1024));
+
+  ipcl::PlainText pt(exp_bn_v), dt;
+  ipcl::CipherText ct = pk.encrypt(pt);
+
+#if BENCH_HYBRID_DETAIL
+  ipcl::setHybridRatio(qat_ratio);
+#else
+  ipcl::setHybridMode(ipcl::HybridMode::OPTIMAL);
+#endif
+
+  for (auto _ : state) dt = sk.decrypt(ct);
+}
+BENCHMARK(BM_Hybrid_Decrypt)->Unit(benchmark::kMicrosecond)->Apply(customArgs);
+
+static void BM_Hybrid_MulCTPT(benchmark::State& state) {
+  // need to reset, otherwise will be affected by the previous benchmark
+  // (i.e. BM_Hybrid_Decrypt)
+  ipcl::setHybridOff();
+
+  int64_t dsize = state.range(0);
+  float qat_ratio = state.range(1) * 0.01;  // scale it back
+
+  BigNumber n = P_BN * Q_BN;
+  int n_length = n.BitSize();
+  ipcl::PublicKey pk(n, n_length, Enable_DJN);
+  ipcl::PrivateKey sk(pk, P_BN, Q_BN);
+
+  std::vector<BigNumber> r_bn_v(dsize, R_BN);
+  pk.setRandom(r_bn_v);
+  pk.setHS(HS_BN);
+
+  std::vector<BigNumber> exp_bn1_v(dsize), exp_bn2_v(dsize);
+  for (int i = 0; i < dsize; i++) {
+    exp_bn1_v[i] = P_BN - BigNumber((unsigned int)(i * 1024));
+    exp_bn2_v[i] = Q_BN + BigNumber((unsigned int)(i * 1024));
+  }
+
+  ipcl::PlainText pt1(exp_bn1_v);
+  ipcl::PlainText pt2(exp_bn2_v);
+
+  ipcl::CipherText ct1 = pk.encrypt(pt1);
+  ipcl::CipherText product;
+
+#if BENCH_HYBRID_DETAIL
+  ipcl::setHybridRatio(qat_ratio);
+#else
+  ipcl::setHybridMode(ipcl::HybridMode::OPTIMAL);
+#endif
+
+  for (auto _ : state) product = ct1 * pt2;
+}
+BENCHMARK(BM_Hybrid_MulCTPT)->Unit(benchmark::kMicrosecond)->Apply(customArgs);
diff --git a/benchmark/bench_ops.cpp b/benchmark/bench_ops.cpp
index 48fe76c..de97b06 100644
--- a/benchmark/bench_ops.cpp
+++ b/benchmark/bench_ops.cpp
@@ -3,7 +3,6 @@
 
 #include <benchmark/benchmark.h>
 
-#include <iostream>
 #include <vector>
 
 #include "ipcl/ipcl.hpp"
@@ -16,7 +15,8 @@
       ->Args({256})                 \
       ->Args({512})                 \
       ->Args({1024})                \
-      ->Args({2048})
+      ->Args({2048})                \
+      ->Args({2100})
 
 constexpr bool Enable_DJN = true;
 
@@ -67,12 +67,12 @@ static void BM_Add_CTCT(benchmark::State& state) {
 
   BigNumber n = P_BN * Q_BN;
   int n_length = n.BitSize();
-  ipcl::PublicKey* pub_key = new ipcl::PublicKey(n, n_length, Enable_DJN);
-  ipcl::PrivateKey* priv_key = new ipcl::PrivateKey(pub_key, P_BN, Q_BN);
+  ipcl::PublicKey pk(n, n_length, Enable_DJN);
+  ipcl::PrivateKey sk(pk, P_BN, Q_BN);
 
   std::vector<BigNumber> r_bn_v(dsize, R_BN);
-  pub_key->setRandom(r_bn_v);
-  pub_key->setHS(HS_BN);
+  pk.setRandom(r_bn_v);
+  pk.setHS(HS_BN);
 
   std::vector<BigNumber> exp_bn1_v(dsize), exp_bn2_v(dsize);
   for (int i = 0; i < dsize; i++) {
@@ -83,14 +83,11 @@ static void BM_Add_CTCT(benchmark::State& state) {
   ipcl::PlainText pt1(exp_bn1_v);
   ipcl::PlainText pt2(exp_bn2_v);
 
-  ipcl::CipherText ct1 = pub_key->encrypt(pt1);
-  ipcl::CipherText ct2 = pub_key->encrypt(pt2);
+  ipcl::CipherText ct1 = pk.encrypt(pt1);
+  ipcl::CipherText ct2 = pk.encrypt(pt2);
 
   ipcl::CipherText sum;
   for (auto _ : state) sum = ct1 + ct2;
-
-  delete pub_key;
-  delete priv_key;
 }
 BENCHMARK(BM_Add_CTCT)
     ->Unit(benchmark::kMicrosecond)
@@ -101,12 +98,12 @@ static void BM_Add_CTPT(benchmark::State& state) {
 
   BigNumber n = P_BN * Q_BN;
   int n_length = n.BitSize();
-  ipcl::PublicKey* pub_key = new ipcl::PublicKey(n, n_length, Enable_DJN);
-  ipcl::PrivateKey* priv_key = new ipcl::PrivateKey(pub_key, P_BN, Q_BN);
+  ipcl::PublicKey pk(n, n_length, Enable_DJN);
+  ipcl::PrivateKey sk(pk, P_BN, Q_BN);
 
   std::vector<BigNumber> r_bn_v(dsize, R_BN);
-  pub_key->setRandom(r_bn_v);
-  pub_key->setHS(HS_BN);
+  pk.setRandom(r_bn_v);
+  pk.setHS(HS_BN);
 
   std::vector<BigNumber> exp_bn1_v(dsize), exp_bn2_v(dsize);
   for (int i = 0; i < dsize; i++) {
@@ -117,13 +114,10 @@ static void BM_Add_CTPT(benchmark::State& state) {
   ipcl::PlainText pt1(exp_bn1_v);
   ipcl::PlainText pt2(exp_bn2_v);
 
-  ipcl::CipherText ct1 = pub_key->encrypt(pt1);
+  ipcl::CipherText ct1 = pk.encrypt(pt1);
 
   ipcl::CipherText sum;
   for (auto _ : state) sum = ct1 + pt2;
-
-  delete pub_key;
-  delete priv_key;
 }
 BENCHMARK(BM_Add_CTPT)
     ->Unit(benchmark::kMicrosecond)
@@ -133,12 +127,12 @@ static void BM_Mul_CTPT(benchmark::State& state) {
   size_t dsize = state.range(0);
   BigNumber n = P_BN * Q_BN;
   int n_length = n.BitSize();
-  ipcl::PublicKey* pub_key = new ipcl::PublicKey(n, n_length, Enable_DJN);
-  ipcl::PrivateKey* priv_key = new ipcl::PrivateKey(pub_key, P_BN, Q_BN);
+  ipcl::PublicKey pk(n, n_length, Enable_DJN);
+  ipcl::PrivateKey sk(pk, P_BN, Q_BN);
 
   std::vector<BigNumber> r_bn_v(dsize, R_BN);
-  pub_key->setRandom(r_bn_v);
-  pub_key->setHS(HS_BN);
+  pk.setRandom(r_bn_v);
+  pk.setHS(HS_BN);
 
   std::vector<BigNumber> exp_bn1_v(dsize), exp_bn2_v(dsize);
   for (int i = 0; i < dsize; i++) {
@@ -149,13 +143,10 @@ static void BM_Mul_CTPT(benchmark::State& state) {
   ipcl::PlainText pt1(exp_bn1_v);
   ipcl::PlainText pt2(exp_bn2_v);
 
-  ipcl::CipherText ct1 = pub_key->encrypt(pt1);
+  ipcl::CipherText ct1 = pk.encrypt(pt1);
 
   ipcl::CipherText product;
   for (auto _ : state) product = ct1 * pt2;
-
-  delete pub_key;
-  delete priv_key;
 }
 BENCHMARK(BM_Mul_CTPT)
     ->Unit(benchmark::kMicrosecond)
diff --git a/benchmark/main.cpp b/benchmark/main.cpp
index da89895..d411fb7 100644
--- a/benchmark/main.cpp
+++ b/benchmark/main.cpp
@@ -1,11 +1,20 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
-#include <benchmark/benchmark.h>
+#include "benchmark/benchmark.h"
+#include "ipcl/ipcl.hpp"
 
 int main(int argc, char** argv) {
+#ifdef IPCL_USE_QAT
+  ipcl::initializeContext("QAT");
+#else
+  ipcl::initializeContext("default");
+#endif  // IPCL_USE_QAT
+
   benchmark::Initialize(&argc, argv);
   benchmark::RunSpecifiedBenchmarks();
 
+  ipcl::terminateContext();
+
   return 0;
 }
diff --git a/cmake/cereal.cmake b/cmake/cereal.cmake
new file mode 100644
index 0000000..ff3c44d
--- /dev/null
+++ b/cmake/cereal.cmake
@@ -0,0 +1,23 @@
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+include(ExternalProject)
+message(STATUS "Configuring cereal")
+set(CEREAL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/ext_cereal)
+set(CEREAL_GIT_REPO_URL https://github.com/USCiLab/cereal.git)
+set(CEREAL_GIT_LABEL ebef1e929807629befafbb2918ea1a08c7194554) # cereal - v1.3.2
+
+ExternalProject_Add(
+    ext_cereal
+    PREFIX ${CEREAL_PREFIX}
+    GIT_REPOSITORY ${CEREAL_GIT_REPO_URL}
+    GIT_TAG ${CEREAL_GIT_LABEL}
+    UPDATE_COMMAND ""
+    EXCLUDE_FROM_ALL
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND ""
+    INSTALL_COMMAND ""
+)
+
+ExternalProject_Get_Property(ext_cereal SOURCE_DIR BINARY_DIR)
+set(CEREAL_INC_DIR ${SOURCE_DIR}/include)
diff --git a/cmake/cpufeatures.cmake b/cmake/cpufeatures.cmake
index 478fa10..7c67092 100644
--- a/cmake/cpufeatures.cmake
+++ b/cmake/cpufeatures.cmake
@@ -3,7 +3,7 @@
 
 include(ExternalProject)
 
-message(STATUS "configuring cpu_features")
+message(STATUS "Configuring cpu_features")
 set(CPUFEATURES_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/ext_cpufeatures)
 set(CPUFEATURES_DESTDIR ${CPUFEATURES_PREFIX}/cpufeatures_install)
 set(CPUFEATURES_GIT_REPO_URL https://github.com/google/cpu_features.git)
@@ -25,7 +25,6 @@ ExternalProject_Add(
   INSTALL_COMMAND make DESTDIR=${CPUFEATURES_DESTDIR} install
   )
 
-
 set(CPUFEATURES_INC_DIR ${CPUFEATURES_DESTDIR}/${CMAKE_INSTALL_PREFIX}/include)
 set(CPUFEATURES_LIB_DIR ${CPUFEATURES_DESTDIR}/${CMAKE_INSTALL_PREFIX}/lib)
 
@@ -35,12 +34,23 @@ if(IPCL_SHARED)
 
   target_include_directories(libcpu_features SYSTEM
                             INTERFACE ${CPUFEATURES_INC_DIR})
-  target_link_libraries(libcpu_features
-                        INTERFACE ${CPUFEATURES_LIB_DIR}/libcpu_features.a)
+  # ipcl python build
+  if(IPCL_INTERNAL_PYTHON_BUILD)
+    target_link_libraries(libcpu_features INTERFACE
+      ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/cpufeatures/libcpu_features.a)
+
+    add_custom_command(TARGET ext_cpufeatures
+      POST_BUILD
+      COMMAND ${CMAKE_COMMAND} -E copy_directory ${CPUFEATURES_LIB_DIR} ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/cpufeatures
+    )
+  else()
+    target_link_libraries(libcpu_features INTERFACE
+      ${CPUFEATURES_LIB_DIR}/libcpu_features.a)
+  endif()
 
   install(
     DIRECTORY ${CPUFEATURES_LIB_DIR}/
-    DESTINATION "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/cpufeatures"
+    DESTINATION "${IPCL_INSTALL_LIBDIR}/cpufeatures"
     USE_SOURCE_PERMISSIONS
   )
 else()
diff --git a/cmake/gbenchmark.cmake b/cmake/gbenchmark.cmake
index 65dc933..94011be 100644
--- a/cmake/gbenchmark.cmake
+++ b/cmake/gbenchmark.cmake
@@ -4,7 +4,6 @@
 include(ExternalProject)
 
 set(GBENCHMARK_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/ext_gbenchmark)
-
 set(GBENCHMARK_SRC_DIR ${GBENCHMARK_PREFIX}/src/ext_gbenchmark/)
 set(GBENCHMARK_BUILD_DIR ${GBENCHMARK_PREFIX}/src/ext_gbenchmark-build/)
 set(GBENCHMARK_REPO_URL https://github.com/google/benchmark.git)
@@ -22,8 +21,8 @@ ExternalProject_Add(
              -DCMAKE_INSTALL_PREFIX=${GBENCHMARK_PREFIX}
              -DBENCHMARK_ENABLE_GTEST_TESTS=OFF
              -DBENCHMARK_ENABLE_TESTING=OFF
-             -DCMAKE_INSTALL_LIBDIR=lib
              -DCMAKE_BUILD_TYPE=Release
+             -DCMAKE_INSTALL_LIBDIR=lib
   BUILD_BYPRODUCTS ${GBENCHMARK_PATHS}
   # Skip updates
   UPDATE_COMMAND ""
diff --git a/cmake/gtest.cmake b/cmake/gtest.cmake
index b5d9dc5..09ad1d1 100644
--- a/cmake/gtest.cmake
+++ b/cmake/gtest.cmake
@@ -2,10 +2,11 @@
 # SPDX-License-Identifier: Apache-2.0
 
 include(ExternalProject)
+include(GNUInstallDirs)
 
 set(GTEST_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/ext_gtest)
 set(GTEST_GIT_REPO_URL https://github.com/google/googletest.git)
-set(GTEST_GIT_LABEL release-1.10.0)
+set(GTEST_GIT_LABEL release-1.12.1)
 set(GTEST_CXX_FLAGS "${IPCL_FORWARD_CMAKE_ARGS} -fPIC")
 
 ExternalProject_Add(
@@ -21,12 +22,6 @@ ExternalProject_Add(
   INSTALL_COMMAND ""
 )
 
-# install(
-#   DIRECTORY ${GTEST_DESTDIR}/${CMAKE_INSTALL_PREFIX}/
-#   DESTINATION "."
-#   USE_SOURCE_PERMISSIONS
-# )
-
 ExternalProject_Get_Property(ext_gtest SOURCE_DIR BINARY_DIR)
 
 add_library(libgtest INTERFACE)
diff --git a/cmake/ipcl/ipcl-util.cmake b/cmake/ipcl/ipcl-util.cmake
index e9d2cfe..65c1a74 100644
--- a/cmake/ipcl/ipcl-util.cmake
+++ b/cmake/ipcl/ipcl-util.cmake
@@ -29,3 +29,116 @@ function(ipcl_create_archive target dependency)
     message(WARNING "Unsupported compiler ${CMAKE_CXX_COMPILER_ID}")
   endif()
 endfunction()
+
+
+function(ipcl_detect_lscpu_flag flag)
+  # Detect IFMA by parsing lscpu
+  set(LSCPU_FLAG ${flag})
+  execute_process(COMMAND lscpu COMMAND grep ${LSCPU_FLAG} OUTPUT_VARIABLE LSCPU_FLAG)
+  if("${LSCPU_FLAG}" STREQUAL "")
+    message(STATUS "Support ${flag}: False")
+    set(IPCL_FOUND_${flag} FALSE PARENT_SCOPE)
+  else()
+    message(STATUS "Support ${flag}: True")
+    set(IPCL_FOUND_${flag} TRUE PARENT_SCOPE)
+  endif()
+endfunction()
+
+function(ipcl_detect_qat)
+  # Detect SPR based QAT
+  message(STATUS "Detecting QAT...... ")
+  set(IPCL_FOUND_QAT FALSE PARENT_SCOPE)
+
+  if(DEFINED ENV{ICP_ROOT})
+    # Validate environment variable ICP_ROOT
+    set(tmp_ICP_ROOT $ENV{ICP_ROOT})
+    get_filename_component(tmp_ICP_ROOT_fullpath "${tmp_ICP_ROOT}" REALPATH)
+    if(EXISTS "${tmp_ICP_ROOT_fullpath}" AND
+       EXISTS "${tmp_ICP_ROOT_fullpath}/build" AND
+       EXISTS "${tmp_ICP_ROOT_fullpath}/quickassist")
+      message(STATUS "Environment variable ICP_ROOT is defined as ${tmp_ICP_ROOT_fullpath}.")
+      execute_process(COMMAND lspci -d 8086:4940 COMMAND wc -l OUTPUT_VARIABLE QAT_PHYSICAL OUTPUT_STRIP_TRAILING_WHITESPACE)
+      if(${QAT_PHYSICAL} GREATER_EQUAL "1")
+        message(STATUS "Detected ${QAT_PHYSICAL} physical QAT processes")
+        execute_process(COMMAND lspci -d 8086:4941 COMMAND wc -l OUTPUT_VARIABLE QAT_VIRTUAL OUTPUT_STRIP_TRAILING_WHITESPACE)
+        if(${QAT_VIRTUAL} GREATER_EQUAL "1")
+          message(STATUS "Detected ${QAT_VIRTUAL} virtual QAT processes")
+          ipcl_check_qat_service_status()
+          set(IPCL_FOUND_QAT TRUE PARENT_SCOPE)
+        else()
+          message(WARNING "NO virtual QAT processors - IPCL_ENABLE_QAT set to OFF")
+        endif()
+      else()
+        message(WARNING "NO physical QAT processors - IPCL_ENABLE_QAT set to OFF")
+      endif()
+    else()
+      message(WARNING "Environment variable ICP_ROOT is incorrect - IPCL_ENABLE_QAT set to OFF")
+    endif()
+  else()
+  	message(WARNING "Environment variable ICP_ROOT must be defined - IPCL_ENABLE_QAT set to OFF")
+  endif()
+endfunction()
+
+
+function(ipcl_check_qat_service_status)
+  # Detect qat_service service status
+  execute_process(COMMAND systemctl status qat_service.service COMMAND grep "Active: active" COMMAND wc -l OUTPUT_VARIABLE QAT_SERVICE_STATUS OUTPUT_STRIP_TRAILING_WHITESPACE)
+  if(${QAT_SERVICE_STATUS} EQUAL "1")
+    message(STATUS "qat_service is ACTIVE")
+  else()
+    message(WARNING
+      " qat_service is NOT ACTIVE!\n"
+      " Since QAT is detected, compilation will continue however the"
+      " qat_service need to be active to use the library.\n"
+      " To start the service, issue the following command --"
+      " \$ sudo systemctl start qat_service.service"
+    )
+  endif()
+endfunction()
+
+function(ipcl_define_icp_variables OutVariable)
+  set(ICP_ROOT             $ENV{ICP_ROOT})
+  set(ICP_BUILDOUTPUT_PATH ${ICP_ROOT}/build)
+  set(ICP_BUILDSYSTEM_PATH ${ICP_ROOT}/quickassist/build_system)
+  set(ICP_API_DIR          ${ICP_ROOT}/quickassist)
+  set(ICP_LAC_DIR          ${ICP_ROOT}/quickassist/lookaside/access_layer)
+  set(ICP_OSAL_DIR         ${ICP_ROOT}/quickassist/utilities/oasl)
+  set(ICP_ADF_DIR          ${ICP_ROOT}/quickassist/lookaside/access_layer/src/qat_direct)
+  set(CMN_ROOT             ${ICP_ROOT}/quickassist/utilities/libusdm_drv)
+
+  set(${OutVariable} ${ICP_API_DIR}/include
+                  ${ICP_LAC_DIR}/include
+                  ${ICP_ADF_DIR}/include
+                  ${CMN_ROOT}
+                  ${ICP_API_DIR}/include/dc
+                  ${ICP_API_DIR}/include/lac
+                  PARENT_SCOPE)
+endfunction()
+
+function(ipcl_get_core_thread_count cores threads nodes)
+  include(ProcessorCount)
+
+  # Get number threads
+  ProcessorCount(n_threads)
+  set(${threads} ${n_threads} PARENT_SCOPE)
+  message(STATUS "# of threads:               ${n_threads}")
+
+  # check hyperthreading
+  execute_process(COMMAND cat /sys/devices/system/cpu/smt/active OUTPUT_VARIABLE IS_HYPERTHREADING OUTPUT_STRIP_TRAILING_WHITESPACE)
+  if("${IS_HYPERTHREADING}" STREQUAL "1")
+    math(EXPR n_cores "${n_threads} / 2" )
+    set(${cores} ${n_cores} PARENT_SCOPE)
+  else()
+    set(n_cores ${n_threads})
+  endif()
+
+  set(${cores} ${n_cores} PARENT_SCOPE)
+  message(STATUS "# of physical cores:        ${n_cores}")
+
+  # check number of nodes
+  execute_process(COMMAND lscpu COMMAND grep Socket OUTPUT_VARIABLE output_nodes OUTPUT_STRIP_TRAILING_WHITESPACE)
+  string(REGEX MATCHALL "([^\ ]+\ |[^\ ]+$)" output_nodes_list "${output_nodes}")
+  list(GET output_nodes_list -1 n_nodes)
+  message(STATUS "# of nodes:                 ${n_nodes}")
+  set(${nodes} ${n_nodes} PARENT_SCOPE)
+endfunction()
diff --git a/cmake/ippcrypto.cmake b/cmake/ippcrypto.cmake
index 55381a0..e0dc364 100644
--- a/cmake/ippcrypto.cmake
+++ b/cmake/ippcrypto.cmake
@@ -4,75 +4,127 @@
 include(ExternalProject)
 message(STATUS "Configuring ipp-crypto")
 
-set(IPPCRYPTO_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/ext_ipp-crypto)
-set(IPPCRYPTO_DESTDIR ${IPPCRYPTO_PREFIX}/ippcrypto_install)
-set(IPPCRYPTO_GIT_REPO_URL https://github.com/intel/ipp-crypto.git)
-set(IPPCRYPTO_GIT_LABEL ippcp_2021.6)
-set(IPPCRYPTO_SRC_DIR ${IPPCRYPTO_PREFIX}/src/ext_ipp-crypto/)
-
-set(IPPCRYPTO_CXX_FLAGS "${IPCL_FORWARD_CMAKE_ARGS} -DNONPIC_LIB:BOOL=off -DMERGED_BLD:BOOL=on")
-
-set(IPPCRYPTO_ARCH intel64)
-set(BUILD_x64 ON)
-if(BUILD_x64)
-  if(NOT ${BUILD_x64})
-    set(IPPCRYPTO_ARCH ia32)
+set(IPPCRYPTO_VERSION 11.4)
+set(IPPCRYPTO_GIT_LABEL ippcp_2021.6) #ippcp version 11.4
+
+if(CMAKE_PREFIX_PATH)
+  if(IPCL_SHARED)
+    set(IPPCP_SHARED ON)
+  else()
+    set(IPPCP_SHARED OFF)
   endif()
+  find_package(ippcp ${IPPCRYPTO_VERSION})
 endif()
 
-ExternalProject_Add(
-  ext_ipp-crypto
-  GIT_REPOSITORY ${IPPCRYPTO_GIT_REPO_URL}
-  GIT_TAG ${IPPCRYPTO_GIT_LABEL}
-  PREFIX ${IPPCRYPTO_PREFIX}
-  INSTALL_DIR ${IPPCRYPTO_PREFIX}
-  CMAKE_ARGS ${IPPCRYPTO_CXX_FLAGS}
-             -DCMAKE_INSTALL_PREFIX=${IPPCRYPTO_PREFIX}
-             -DARCH=${IPPCRYPTO_ARCH}
-             -DCMAKE_ASM_NASM_COMPILER=nasm
-             -DCMAKE_BUILD_TYPE=Release
-             -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
-             -DCMAKE_INSTALL_LIBDIR=lib
-  UPDATE_COMMAND ""
-  PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/patch/ippcrypto_patch.patch
-  INSTALL_COMMAND make DESTDIR=${IPPCRYPTO_DESTDIR} install
-)
-
-set(IPPCRYPTO_INC_DIR ${IPPCRYPTO_DESTDIR}/${CMAKE_INSTALL_PREFIX}/include)
-set(IPPCRYPTO_LIB_DIR ${IPPCRYPTO_DESTDIR}/${CMAKE_INSTALL_PREFIX}/lib/${IPPCRYPTO_ARCH})
-if(IPCL_SHARED)
-  add_library(libippcrypto INTERFACE)
-  add_dependencies(libippcrypto ext_ipp-crypto)
-
-  ExternalProject_Get_Property(ext_ipp-crypto SOURCE_DIR BINARY_DIR)
-
-  target_link_libraries(libippcrypto INTERFACE
-  ${IPPCRYPTO_LIB_DIR}/libippcp.so
-  ${IPPCRYPTO_LIB_DIR}/libcrypto_mb.so)
-  target_include_directories(libippcrypto SYSTEM INTERFACE ${IPPCRYPTO_INC_DIR})
-
+if(ippcp_FOUND)
+  message(STATUS "IPP-Crypto ${IPPCRYPTO_VERSION} found at ${ippcp_DIR}")
+  get_target_property(IPPCRYPTO_INC_DIR IPPCP::ippcp INTERFACE_INCLUDE_DIRECTORIES)
+  get_target_property(IPPCRYPTO_IMPORTED_LOCATION IPPCP::ippcp IMPORTED_LOCATION)
+  get_filename_component(IPPCRYPTO_LIB_DIR ${IPPCRYPTO_IMPORTED_LOCATION} DIRECTORY)
   install(
     DIRECTORY ${IPPCRYPTO_LIB_DIR}/
-    DESTINATION "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/ippcrypto"
+    DESTINATION "${IPCL_INSTALL_LIBDIR}/ippcrypto"
     USE_SOURCE_PERMISSIONS
   )
+
 else()
+  message(STATUS "IPP-Crypto NOT found - building from source")
 
-  add_library(libippcrypto::ippcp STATIC IMPORTED GLOBAL)
-  add_library(libippcrypto::crypto_mb STATIC IMPORTED GLOBAL)
+  set(IPPCRYPTO_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/ext_ipp-crypto)
+  set(IPPCRYPTO_DESTDIR ${IPPCRYPTO_PREFIX}/ippcrypto_install)
+  set(IPPCRYPTO_DEST_INCLUDE_DIR include/ippcrypto)
+  set(IPPCRYPTO_GIT_REPO_URL https://github.com/intel/ipp-crypto.git)
+  set(IPPCRYPTO_SRC_DIR ${IPPCRYPTO_PREFIX}/src/ext_ipp-crypto/)
 
-  add_dependencies(libippcrypto::ippcp ext_ipp-crypto)
-  add_dependencies(libippcrypto::crypto_mb ext_ipp-crypto)
+  set(IPPCRYPTO_CXX_FLAGS "${IPCL_FORWARD_CMAKE_ARGS} -DNONPIC_LIB:BOOL=off -DMERGED_BLD:BOOL=on")
 
-  find_package(OpenSSL REQUIRED)
+  set(IPPCRYPTO_ARCH intel64)
+  set(BUILD_x64 ON)
+  if(BUILD_x64)
+    if(NOT ${BUILD_x64})
+      set(IPPCRYPTO_ARCH ia32)
+    endif()
+  endif()
 
-  set_target_properties(libippcrypto::ippcp PROPERTIES
-            IMPORTED_LOCATION ${IPPCRYPTO_LIB_DIR}/libippcp.a
-            INCLUDE_DIRECTORIES ${IPPCRYPTO_INC_DIR}
+  ExternalProject_Add(
+    ext_ipp-crypto
+    GIT_REPOSITORY ${IPPCRYPTO_GIT_REPO_URL}
+    GIT_TAG ${IPPCRYPTO_GIT_LABEL}
+    PREFIX ${IPPCRYPTO_PREFIX}
+    INSTALL_DIR ${IPPCRYPTO_PREFIX}
+    CMAKE_ARGS ${IPPCRYPTO_CXX_FLAGS}
+              -DCMAKE_INSTALL_PREFIX=${IPPCRYPTO_PREFIX}
+              -DARCH=${IPPCRYPTO_ARCH}
+              -DCMAKE_ASM_NASM_COMPILER=nasm
+              -DCMAKE_BUILD_TYPE=Release
+              -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+    UPDATE_COMMAND ""
+    PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/patch/ippcrypto_patch.patch
+    INSTALL_COMMAND make DESTDIR=${IPPCRYPTO_DESTDIR} install
   )
 
-  set_target_properties(libippcrypto::crypto_mb PROPERTIES
-            IMPORTED_LOCATION ${IPPCRYPTO_LIB_DIR}/libcrypto_mb.a
-            INCLUDE_DIRECTORIES ${IPPCRYPTO_INC_DIR}
-  )
+  set(IPPCRYPTO_INC_DIR ${IPPCRYPTO_DESTDIR}/${CMAKE_INSTALL_PREFIX}/include)
+  set(IPPCRYPTO_LIB_DIR ${IPPCRYPTO_DESTDIR}/${CMAKE_INSTALL_PREFIX}/lib/${IPPCRYPTO_ARCH})
+  if(IPCL_SHARED)
+    add_library(IPPCP_ippcp INTERFACE)
+    add_library(IPPCP::ippcp ALIAS IPPCP_ippcp)
+
+    add_library(IPPCP_crypto_mb INTERFACE)
+    add_library(IPPCP::crypto_mb ALIAS IPPCP_crypto_mb)
+
+    add_dependencies(IPPCP_ippcp ext_ipp-crypto)
+    add_dependencies(IPPCP_crypto_mb ext_ipp-crypto)
+
+    target_include_directories(IPPCP_ippcp SYSTEM INTERFACE ${IPPCRYPTO_INC_DIR})
+    target_include_directories(IPPCP_crypto_mb SYSTEM INTERFACE ${IPPCRYPTO_INC_DIR})
+
+    # if ipcl python build
+    if(IPCL_INTERNAL_PYTHON_BUILD)
+      target_link_libraries(IPPCP_ippcp INTERFACE
+        ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/ippcrypto/libippcp.so
+      )
+      target_link_libraries(IPPCP_crypto_mb INTERFACE
+        ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/ippcrypto/libcrypto_mb.so
+      )
+
+      add_custom_command(TARGET ext_ipp-crypto
+        POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_directory ${IPPCRYPTO_LIB_DIR} ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/ippcrypto
+      )
+    else()
+      target_link_libraries(IPPCP_ippcp INTERFACE
+        ${IPPCRYPTO_LIB_DIR}/libippcp.so
+      )
+      target_link_libraries(IPPCP_crypto_mb INTERFACE
+        ${IPPCRYPTO_LIB_DIR}/libcrypto_mb.so
+      )
+
+    endif()
+
+    install(
+      DIRECTORY ${IPPCRYPTO_LIB_DIR}/
+      DESTINATION "${IPCL_INSTALL_LIBDIR}/ippcrypto"
+      USE_SOURCE_PERMISSIONS
+    )
+
+  else()
+
+    add_library(IPPCP::ippcp STATIC IMPORTED GLOBAL)
+    add_library(IPPCP::crypto_mb STATIC IMPORTED GLOBAL)
+
+    add_dependencies(IPPCP::ippcp ext_ipp-crypto)
+    add_dependencies(IPPCP::crypto_mb ext_ipp-crypto)
+
+    find_package(OpenSSL REQUIRED)
+
+    set_target_properties(IPPCP::ippcp PROPERTIES
+              IMPORTED_LOCATION ${IPPCRYPTO_LIB_DIR}/libippcp.a
+              INCLUDE_DIRECTORIES ${IPPCRYPTO_INC_DIR}
+    )
+
+    set_target_properties(IPPCP::crypto_mb PROPERTIES
+              IMPORTED_LOCATION ${IPPCRYPTO_LIB_DIR}/libcrypto_mb.a
+              INCLUDE_DIRECTORIES ${IPPCRYPTO_INC_DIR}
+    )
+  endif()
 endif()
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
index cb66be8..67fe74a 100644
--- a/example/CMakeLists.txt
+++ b/example/CMakeLists.txt
@@ -9,7 +9,16 @@ set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 
-find_package(IPCL 1.1.4 REQUIRED HINTS ${IPCL_HINT_DIR})
+find_package(IPCL 2.0.0 REQUIRED HINTS ${IPCL_HINT_DIR})
 
-add_executable(test test.cpp)
-target_link_libraries(test PRIVATE IPCL::ipcl)
+set(examples encrypt_decrypt add_mul hybridmode)
+
+add_custom_target(run_all_examples)
+
+foreach(ex IN LISTS examples)
+  add_executable(example_${ex} example_${ex}.cpp)
+  target_link_libraries(example_${ex} PRIVATE IPCL::ipcl)
+  add_custom_command(TARGET run_all_examples POST_BUILD
+    COMMAND $<TARGET_FILE:example_${ex}>)
+  add_dependencies(run_all_examples example_${ex})
+endforeach()
diff --git a/example/README.md b/example/README.md
index 0bfe98c..ca69b52 100644
--- a/example/README.md
+++ b/example/README.md
@@ -7,8 +7,9 @@ This document provides an example program for using the Intel Paillier Cryptosys
   - [Installation](#installation)
   - [Linking and Running Applications](#linking-and-running-applications)
     - [Building with CMake](#building-with-cmake)
-    - [Manually Compiling](#manually-compiling)
   - [Using Intel Paillier Cryptosystem Library](#using-intel-paillier-cryptosystem-library)
+    - [Enabling QAT usage](#enabling-qat-usage)
+    - [Hybrid mode configuration](#hybrid-mode-configuration)
     - [Data handling](#data-handling)
       - [```ipcl::PlainText``` Constructor](#ipclplaintext-constructor)
       - [Accessing data](#accessing-data)
@@ -34,7 +35,7 @@ For more details about the build configuration options, please refer to the buil
 
 Before proceeding after the library is installed, it is useful to setup an environment variable to point to the installation location.
 ```bash
-export IPCL_DIR=/path/to/install/
+export IPCL_DIR=/path/to/ipcl/install/
 ```
 
 ### Building with CMake
@@ -42,27 +43,23 @@ A more convenient way to use the library is via the `find_package` functionality
 In your external applications, add the following lines to your `CMakeLists.txt`.
 
 ```bash
-find_package(IPCL 1.1.4
+find_package(IPCL 2.0.0
     HINTS ${IPCL_HINT_DIR}
     REQUIRED)
 target_link_libraries(${TARGET} IPCL::ipcl)
 ```
 
-If the library is installed globally, `IPCL_DIR` or `IPCL_HINT_DIR` flag is not needed. If `IPCL_DIR` is properly set, `IPCL_HINT_DIR` is not needed as well. Otherwise `IPCL_HINT_DIR` should be the directory containing `IPCLCOnfig.cmake`, under `${CMAKE_INSTALL_PREFIX}/lib/cmake/ipcl-1.1.4/`
-
-### Manually Compiling
-In order to directly use `g++` or `clang++` to compile an example code, it can be done by:
-```bash
-# gcc
-g++ test.cpp -o test -L${IPCL_DIR}/lib -I${IPCL_DIR}/include -lipcl -fopenmp -lnuma -lcrypto
-
-# clang
-clang++ test.cpp -o test -L${IPCL_DIR}/lib -I${IPCL_DIR}/include -lipcl -fopenmp -lnuma -lcrypto
-```
-
+If the library is installed globally, `IPCL_DIR` or `IPCL_HINT_DIR` flag is not needed. If environment variable `IPCL_DIR` is set, `IPCL_HINT_DIR` is not needed as well. Otherwise `IPCL_HINT_DIR` should be the directory containing `IPCLCOnfig.cmake`, under `${CMAKE_INSTALL_PREFIX}/lib/cmake/ipcl-2.0.0/`
 
 ## Using Intel Paillier Cryptosystem Library
 
+### Key Generation
+The public key and private key pair can be generated by using the ```ipcl::generateKeypair``` function.
+```C++
+// key.pub_key, key.priv_key
+ipcl::KeyPair key = ipcl::generateKeypair(2048, true); // previously ipcl::keyPair
+```
+- Note: With version ```v2.0.0``` and beyond, the key pair struct type has been changed from ```ipcl::keyPair``` to ```ipcl::KeyPair``` (uppercase ```K```).
 ### Data handling
 The library uses a container - ```ipcl::PlainText``` for encryption inputs and decryption outputs as well as plaintext HE operations.
 
@@ -101,24 +98,15 @@ pt_copy.clear(); // empty the container
 ```
 FOr more details, please refer to the [```base_text.hpp```](../ipcl/include/ipcl/base_text.hpp) and [```plaintext.hpp```](../ipcl/include/ipcl/plaintext.hpp).
 
-### Key Generation
-The public key and private key pair can be generated by using the ```ipcl::generateKeypair``` function.
-```C++
-// key.pub_key, key.priv_key
-ipcl::keyPair key = ipcl::generateKeypair(2048, true);
-// After computation, need to delete the key objects
-delete key.pub_key;
-delete key.priv_key;
-```
-
 ### Encryption and Decryption
 The public key is used to encrypt ```ipcl::PlainText``` objects for ```ipcl::CipherText``` outputs.
 In the same way, the private key is used to decrypt ```ipcl::CipherText``` objects for ```ipcl::PlainText``` outputs.
 ```C++
 ipcl::PlainText pt(raw_data);
-ipcl::CipherText ct = key.pub_key->encrypt(pt);
-ipcl::PlainText dec_pt = key.priv_key->decrypt(ct);
+ipcl::CipherText ct = key.pub_key.encrypt(pt); // previously key.pub_key->encrypt(pt)
+ipcl::PlainText dec_pt = key.priv_key.decrypt(ct);  // previously key.priv_key->decrypt(ct)
 ```
+- Note: With version ```v2.0.0``` and beyond, the keys in ```ipcl::KeyPair``` have been changed to be objects, hence the changes from ```key.pub_key->encrypt(pt)``` and ``` key.priv_key->decrypt(ct)``` to ```key.pub_key.encrypt(pt)``` and ```key.priv_key.decrypt(ct)```, respectively.
 
 ### HE Operations
 Since the Intel Paillier Cryptosystem Library being a partially homomorphic encryption scheme, while addition is supports both ciphertext operands, multiplication only supports single ciphertext operand.
@@ -127,8 +115,8 @@ Since the Intel Paillier Cryptosystem Library being a partially homomorphic encr
 // setup
 ipcl::PlainText a, b;
 
-ipcl::CipherText ct_a = key.pub_key->encrypt(a);
-ipcl::CipherText ct_b = key.pub_key->encrypt(b);
+ipcl::CipherText ct_a = key.pub_key.encrypt(a);
+ipcl::CipherText ct_b = key.pub_key.encrypt(b);
 
 // Addition (all three end up being same values after decryption)
 ipcl::CipherText ct_c1 = ct_a + ct_b; // ciphertext + ciphertext
@@ -139,3 +127,32 @@ ipcl::CipherText ct_c3 = ct_b + a; // ciphertext + plaintext
 ipcl::CipherText ct_d1 = ct_a * b; // ciphertext * plaintext
 ipcl::CipherText ct_d2 = ct_b * a;
 ```
+
+### Enabling QAT usage
+When QAT is enabled while building the library with the flag ```IPCL_ENABLE_QAT=ON```, it is essential to initialize and release the HE QAT context.
+```C++
+// Initialize HE QAT context
+ipcl::initializeContext("QAT");
+
+// perform IPCL operations
+auto ct = key.pub_key.encrypt(pt);
+auto dec_pt = key.priv_key.decrypt(ct);
+
+// Release HE QAT context
+ipcl::terminateContext();
+```
+If QAT is disabled, ```ipcl::initializeContext("QAT")``` statement will not do anything, thus safe to include in any codes using the library.
+
+### Hybrid mode configuration
+The main accelerated operation - modular exponentiation - can be performed by either IPP-Crypto or the HE QAT. Our library provides a configurable method to distribute the workload between these two methods.
+```C++
+// Use optimal mode
+ipcl::setHybridMode(ipcl::HybridMode::OPTIMAL);
+
+// Use IPP-Crypto modexp only
+ipcl::setHybridMode(ipcl::HybridMode::IPP);
+
+// Use QAT modexp only
+ipcl::setHybridMode(ipcl::HybridMode::QAT);
+```
+By default, the hybrid mode is set to ```ipcl::HybridMode::OPTIMAL```. For more details about the modes, please refer to [```mod_exp.hpp```](../ipcl/include/ipcl/mod_exp.hpp#L16).
diff --git a/example/example_add_mul.cpp b/example/example_add_mul.cpp
new file mode 100644
index 0000000..b06fd15
--- /dev/null
+++ b/example/example_add_mul.cpp
@@ -0,0 +1,104 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+/*
+  Example of encryption and decryption
+*/
+#include <climits>
+#include <iostream>
+#include <random>
+#include <vector>
+
+#include "ipcl/ipcl.hpp"
+
+int main() {
+  std::cout << std::endl;
+  std::cout << "==============================================" << std::endl;
+  std::cout << "Example: Addition and Multiplication with IPCL" << std::endl;
+  std::cout << "==============================================" << std::endl;
+
+  ipcl::initializeContext("QAT");
+
+  const uint32_t num_total = 20;
+
+  ipcl::KeyPair key = ipcl::generateKeypair(2048, true);
+
+  std::vector<uint32_t> x(num_total);
+  std::vector<uint32_t> y(num_total);
+
+  std::random_device dev;
+  std::mt19937 rng(dev());
+  std::uniform_int_distribution<std::mt19937::result_type> dist(0,
+                                                                UINT_MAX >> 16);
+
+  for (int i = 0; i < num_total; i++) {
+    x[i] = dist(rng);
+    y[i] = dist(rng);
+  }
+
+  ipcl::PlainText pt_x = ipcl::PlainText(x);
+  ipcl::PlainText pt_y = ipcl::PlainText(y);
+
+  ipcl::setHybridMode(ipcl::HybridMode::OPTIMAL);
+
+  ipcl::CipherText ct_x = key.pub_key.encrypt(pt_x);
+  ipcl::CipherText ct_y = key.pub_key.encrypt(pt_y);
+
+  // Perform enc(x) + enc(y)
+  std::cout << "--- IPCL CipherText + CipherText ---" << std::endl;
+  ipcl::CipherText ct_add_ctx_cty = ct_x + ct_y;
+  ipcl::PlainText dt_add_ctx_cty = key.priv_key.decrypt(ct_add_ctx_cty);
+
+  // verify result
+  bool verify = true;
+  for (int i = 0; i < num_total; i++) {
+    std::vector<uint32_t> v = dt_add_ctx_cty.getElementVec(i);
+    if (v[0] != (x[i] + y[i])) {
+      verify = false;
+      break;
+    }
+  }
+  std::cout << "Test (x + y) == dec(enc(x) + enc(y)) -- "
+            << (verify ? "pass" : "fail") << std::endl
+            << std::endl;
+
+  // Perform enc(x) + y
+  std::cout << "--- IPCL CipherText + PlainText ---" << std::endl;
+  ipcl::CipherText ct_add_ctx_pty = ct_x + pt_y;
+  ipcl::PlainText dt_add_ctx_pty = key.priv_key.decrypt(ct_add_ctx_pty);
+
+  // verify result
+  verify = true;
+  for (int i = 0; i < num_total; i++) {
+    std::vector<uint32_t> v = dt_add_ctx_pty.getElementVec(i);
+    if (v[0] != (x[i] + y[i])) {
+      verify = false;
+      break;
+    }
+  }
+  std::cout << "Test (x + y) == dec(enc(x) + y) -- "
+            << (verify ? "pass" : "fail") << std::endl
+            << std::endl;
+
+  // Perform enc(x) * y
+  std::cout << "--- IPCL CipherText * PlainText ---" << std::endl;
+  ipcl::CipherText ct_mul_ctx_pty = ct_x * pt_y;
+  ipcl::PlainText dt_mul_ctx_pty = key.priv_key.decrypt(ct_mul_ctx_pty);
+
+  // verify result
+  verify = true;
+  for (int i = 0; i < num_total; i++) {
+    std::vector<uint32_t> v = dt_mul_ctx_pty.getElementVec(i);
+    if (v[0] != (x[i] * y[i])) {
+      verify = false;
+      break;
+    }
+  }
+  std::cout << "Test (x * y) == dec(enc(x) * y) -- "
+            << (verify ? "pass" : "fail") << std::endl;
+
+  ipcl::setHybridOff();
+
+  ipcl::terminateContext();
+  std::cout << "Complete!" << std::endl;
+}
diff --git a/example/example_encrypt_decrypt.cpp b/example/example_encrypt_decrypt.cpp
new file mode 100644
index 0000000..e959a01
--- /dev/null
+++ b/example/example_encrypt_decrypt.cpp
@@ -0,0 +1,59 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+/*
+  Example of encryption and decryption
+*/
+#include <climits>
+#include <iostream>
+#include <random>
+#include <vector>
+
+#include "ipcl/ipcl.hpp"
+
+int main() {
+  std::cout << std::endl;
+  std::cout << "======================================" << std::endl;
+  std::cout << "Example: Encrypt and Decrypt with IPCL" << std::endl;
+  std::cout << "======================================" << std::endl;
+
+  ipcl::initializeContext("QAT");
+
+  const uint32_t num_total = 20;
+
+  ipcl::KeyPair key = ipcl::generateKeypair(2048, true);
+
+  std::vector<uint32_t> exp_value(num_total);
+
+  std::random_device dev;
+  std::mt19937 rng(dev());
+  std::uniform_int_distribution<std::mt19937::result_type> dist(0, UINT_MAX);
+
+  for (int i = 0; i < num_total; i++) {
+    exp_value[i] = dist(rng);
+  }
+
+  ipcl::PlainText pt = ipcl::PlainText(exp_value);
+
+  ipcl::setHybridMode(ipcl::HybridMode::OPTIMAL);
+
+  ipcl::CipherText ct = key.pub_key.encrypt(pt);
+  ipcl::PlainText dt = key.priv_key.decrypt(ct);
+
+  ipcl::setHybridOff();
+
+  // verify result
+  bool verify = true;
+  for (int i = 0; i < num_total; i++) {
+    std::vector<uint32_t> v = dt.getElementVec(i);
+    if (v[0] != exp_value[i]) {
+      verify = false;
+      break;
+    }
+  }
+  std::cout << "Test pt == dec(enc(pt)) -- " << (verify ? "pass" : "fail")
+            << std::endl;
+
+  ipcl::terminateContext();
+  std::cout << "Complete!" << std::endl << std::endl;
+}
diff --git a/example/example_hybridmode.cpp b/example/example_hybridmode.cpp
new file mode 100644
index 0000000..4ac1f91
--- /dev/null
+++ b/example/example_hybridmode.cpp
@@ -0,0 +1,100 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+/*
+  Example of encryption and decryption
+*/
+#include <chrono>  // NOLINT [build/c++11]
+#include <climits>
+#include <iostream>
+#include <random>
+#include <vector>
+
+#include "ipcl/ipcl.hpp"
+
+typedef std::chrono::high_resolution_clock::time_point tVar;
+#define tNow() std::chrono::high_resolution_clock::now()
+#define tStart(t) t = tNow()
+#define tEnd(t) \
+  std::chrono::duration_cast<std::chrono::milliseconds>(tNow() - t).count()
+
+int main() {
+  std::cout << std::endl;
+  std::cout << "===================================" << std::endl;
+  std::cout << "Example: Hybrid Mode usage with QAT" << std::endl;
+  std::cout << "===================================" << std::endl;
+
+  ipcl::initializeContext("QAT");
+  tVar t;
+  double elapsed(0.);
+
+  const uint32_t num_total = 64;
+
+  ipcl::KeyPair key = ipcl::generateKeypair(2048, true);
+
+  std::vector<BigNumber> exp_value(num_total);
+
+  std::random_device dev;
+  std::mt19937 rng(dev());
+  std::uniform_int_distribution<std::mt19937::result_type> dist(0, UINT_MAX);
+  BigNumber bigNum =
+      "0xff03b1a74827c746db83d2eaff00067622f545b62584321256e62b01509f10962f9c5c"
+      "8fd0b7f5184a9ce8e81f439df47dda14563dd55a221799d2aa57ed2713271678a5a0b8b4"
+      "0a84ad13d5b6e6599e6467c670109cf1f45ccfed8f75ea3b814548ab294626fe4d14ff76"
+      "4dd8b091f11a0943a2dd2b983b0df02f4c4d00b413acaabc1dc57faa9fd6a4274c4d5887"
+      "65a1d3311c22e57d8101431b07eb3ddcb05d77d9a742ac2322fe6a063bd1e05acb13b0fe"
+      "91c70115c2b1eee1155e072527011a5f849de7072a1ce8e6b71db525fbcda7a89aaed46d"
+      "27aca5eaeaf35a26270a4a833c5cda681ffd49baa0f610bad100cdf47cc86e5034e2a0b2"
+      "179e04ec7";
+
+  for (int i = 0; i < num_total; i++) {
+    exp_value[i] = bigNum - dist(rng);
+  }
+
+  ipcl::PlainText pt = ipcl::PlainText(exp_value);
+
+  // Encrypt/Decrypt - IPP-Crypto only mode
+  ipcl::setHybridMode(ipcl::HybridMode::IPP);
+  tStart(t);
+  ipcl::CipherText ct = key.pub_key.encrypt(pt);
+  elapsed = tEnd(t);
+  std::cout << " Encrypt - HybridMode::IPP     = " << elapsed << "ms"
+            << std::endl;
+  tStart(t);
+  ipcl::PlainText dt = key.priv_key.decrypt(ct);
+  elapsed = tEnd(t);
+  std::cout << " Decrypt - HybridMode::IPP     = " << elapsed << "ms"
+            << std::endl
+            << std::endl;
+
+  // Encrypt/Decrypt - QAT only mode
+  ipcl::setHybridMode(ipcl::HybridMode::QAT);
+  tStart(t);
+  ct = key.pub_key.encrypt(pt);
+  elapsed = tEnd(t);
+  std::cout << " Encrypt - HybridMode::QAT     = " << elapsed << "ms"
+            << std::endl;
+  tStart(t);
+  dt = key.priv_key.decrypt(ct);
+  elapsed = tEnd(t);
+  std::cout << " Decrypt - HybridMode::QAT     = " << elapsed << "ms"
+            << std::endl
+            << std::endl;
+
+  // Encrypt/Decrypt - OPTIMAL mode
+  ipcl::setHybridMode(ipcl::HybridMode::OPTIMAL);
+  tStart(t);
+  ct = key.pub_key.encrypt(pt);
+  elapsed = tEnd(t);
+  std::cout << " Encrypt - HybridMode::OPTIMAL = " << elapsed << "ms"
+            << std::endl;
+  tStart(t);
+  dt = key.priv_key.decrypt(ct);
+  elapsed = tEnd(t);
+  std::cout << " Decrypt - HybridMode::OPTIMAL = " << elapsed << "ms"
+            << std::endl
+            << std::endl;
+
+  ipcl::terminateContext();
+  std::cout << "Complete!" << std::endl << std::endl;
+}
diff --git a/example/test.cpp b/example/test.cpp
deleted file mode 100644
index 5ac7a31..0000000
--- a/example/test.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (C) 2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include <climits>
-#include <iostream>
-#include <ipcl/ipcl.hpp>
-#include <random>
-#include <vector>
-
-int main() {
-  const uint32_t num_values = 9;
-
-  ipcl::keyPair key = ipcl::generateKeypair(2048, true);
-
-  std::vector<uint32_t> exp_value(num_values);
-  ipcl::PlainText pt;
-  ipcl::CipherText ct;
-  ipcl::PlainText dt;
-
-  std::random_device dev;
-  std::mt19937 rng(dev());
-  std::uniform_int_distribution<std::mt19937::result_type> dist(0, UINT_MAX);
-
-  for (int i = 0; i < num_values; i++) {
-    exp_value[i] = dist(rng);
-  }
-
-  pt = ipcl::PlainText(exp_value);
-  ct = key.pub_key->encrypt(pt);
-  dt = key.priv_key->decrypt(ct);
-
-  for (int i = 0; i < num_values; i++) {
-    std::vector<uint32_t> v = dt.getElementVec(i);
-    bool chk = v[0] == exp_value[i];
-    std::cout << (chk ? "pass" : "fail") << std::endl;
-  }
-
-  delete key.pub_key;
-  delete key.priv_key;
-}
diff --git a/ipcl/CMakeLists.txt b/ipcl/CMakeLists.txt
index 1966aae..a601caf 100644
--- a/ipcl/CMakeLists.txt
+++ b/ipcl/CMakeLists.txt
@@ -2,18 +2,19 @@
 # SPDX-License-Identifier: Apache-2.0
 
 set(IPCL_SRCS pri_key.cpp
-							pub_key.cpp
-							keygen.cpp
-							bignum.cpp
-							mod_exp.cpp
-                            base_text.cpp
-                            plaintext.cpp
-                            ciphertext.cpp
-                            util.cpp
-                            common.cpp
+              pub_key.cpp
+              keygen.cpp
+              bignum.cpp
+              mod_exp.cpp
+              base_text.cpp
+              plaintext.cpp
+              ciphertext.cpp
+              utils/context.cpp
+              utils/util.cpp
+              utils/common.cpp
+              utils/parse_cpuinfo.cpp
 )
 
-
 if(IPCL_SHARED)
 	add_library(ipcl SHARED ${IPCL_SRCS})
 else()
@@ -22,48 +23,82 @@ endif()
 
 add_library(IPCL::ipcl ALIAS ipcl)
 
+set_target_properties(ipcl PROPERTIES
+    BUILD_WITH_INSTALL_RPATH FALSE
+    LINK_FLAGS "-Wl,-rpath,'$ORIGIN' -Wl,-rpath,'$ORIGIN'/ippcrypto -Wl,-rpath,'$ORIGIN'/cpufeatures")
+
+target_include_directories(ipcl PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+
+# include and install definition of IPCL
 target_include_directories(ipcl
-	PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
 	PUBLIC $<BUILD_INTERFACE:${IPCL_INC_DIR}>
-	PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+	PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ipcl>
+)
+
+install(DIRECTORY ${IPCL_INC_DIR}/
+	DESTINATION ${IPCL_INSTALL_INCLUDEDIR}
+	FILES_MATCHING
+	PATTERN "*.hpp"
+	PATTERN "*.h"
 )
 
+# CEREAL (third party dep): include and install definition
+add_dependencies(ipcl ext_cereal)
+target_include_directories(ipcl
+    PUBLIC $<BUILD_INTERFACE:${CEREAL_INC_DIR}>
+	PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ipcl>
+)
+
+install(DIRECTORY ${CEREAL_INC_DIR}/
+    DESTINATION ${IPCL_INSTALL_INCLUDEDIR}
+    FILES_MATCHING
+    PATTERN "*.hpp"
+    PATTERN "*.h"
+)
 
+# IPP-Crypto (third party dep): include and install definition
 target_include_directories(ipcl
-	PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
-	PUBLIC $<BUILD_INTERFACE:${IPPCRYPTO_INC_DIR}>
-	PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+    PUBLIC $<BUILD_INTERFACE:${IPPCRYPTO_INC_DIR}>
+    PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ipcl/ippcrypto>
 )
 
-if(IPCL_DETECT_IFMA_RUNTIME)
+install(DIRECTORY ${IPPCRYPTO_INC_DIR}/
+    DESTINATION ${IPCL_INSTALL_INCLUDEDIR}/ippcrypto
+    FILES_MATCHING
+    PATTERN "*.hpp"
+    PATTERN "*.h"
+)
+
+# include and install definition of cpu_features
+if(IPCL_DETECT_CPU_RUNTIME)
 	target_include_directories(ipcl
-		PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
-		PUBLIC $<BUILD_INTERFACE:${CPUFEATURES_INC_DIR}>
-		PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+        PUBLIC $<BUILD_INTERFACE:${CPUFEATURES_INC_DIR}>
+		PRIVATE $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ipcl>
 	)
 	install(DIRECTORY ${CPUFEATURES_INC_DIR}/
-		DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}
+		DESTINATION ${IPCL_INSTALL_INCLUDEDIR}
 		FILES_MATCHING
 		PATTERN "*.hpp"
-		PATTERN "*.h")
-
+		PATTERN "*.h"
+    )
 endif()
 
-install(DIRECTORY ${IPCL_INC_DIR}/
-        DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}
-        FILES_MATCHING
-        PATTERN "*.hpp"
-        PATTERN "*.h")
+# include and install definition of he_qat
+if(IPCL_ENABLE_QAT)
+    ipcl_define_icp_variables(icp_inc_dir)
+    target_include_directories(ipcl
+        PRIVATE "$<BUILD_INTERFACE:${icp_inc_dir}>"
+    )
 
-install(DIRECTORY ${IPPCRYPTO_INC_DIR}/
-        DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}
-        FILES_MATCHING
-        PATTERN "*.hpp"
-        PATTERN "*.h")
+    target_include_directories(ipcl
+        PRIVATE "$<BUILD_INTERFACE:${HE_QAT_INC_DIR}>"
+        PRIVATE $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDE_DIR}/heqat>
+    )
+endif()
 
 find_package(OpenSSL REQUIRED)
 find_package(Threads REQUIRED)
-target_link_libraries(ipcl PUBLIC OpenSSL::SSL OpenSSL::Crypto Threads::Threads -lnuma)
+target_link_libraries(ipcl PUBLIC OpenSSL::SSL OpenSSL::Crypto Threads::Threads)
 
 if(IPCL_ENABLE_OMP)
   find_package(OpenMP REQUIRED)
@@ -71,33 +106,34 @@ if(IPCL_ENABLE_OMP)
 endif()
 
 if(IPCL_SHARED)
-	target_link_libraries(ipcl PRIVATE libippcrypto)
-	if(IPCL_DETECT_IFMA_RUNTIME)
-		target_link_libraries(ipcl PRIVATE libcpu_features)
-		target_include_directories(ipcl PRIVATE ${CPUFEATURES_INC_DIR})
+	target_link_libraries(ipcl PRIVATE IPPCP::ippcp IPPCP::crypto_mb)
+	if(IPCL_DETECT_CPU_RUNTIME)
+        target_link_libraries(ipcl PRIVATE libcpu_features)
+	endif()
+	if(IPCL_ENABLE_QAT)
+	    target_link_libraries(ipcl PRIVATE he_qat udev z)
 	endif()
-	target_include_directories(ipcl PRIVATE ${IPPCRYPTO_INC_DIR})
 else()
-	ipcl_create_archive(ipcl libippcrypto::ippcp)
-	ipcl_create_archive(ipcl libippcrypto::crypto_mb)
-	if(IPCL_DETECT_IFMA_RUNTIME)
-		ipcl_create_archive(ipcl libcpu_features)
-		target_include_directories(ipcl PRIVATE ${CPUFEATURES_INC_DIR})
+    ipcl_create_archive(ipcl IPPCP::crypto_mb)
+    ipcl_create_archive(ipcl IPPCP::ippcp)
+	if(IPCL_ENABLE_QAT)
+        ipcl_create_archive(ipcl he_qat)
+        target_link_libraries(ipcl PRIVATE udev z)
+    endif()
+
+	if(IPCL_DETECT_CPU_RUNTIME)
+        ipcl_create_archive(ipcl libcpu_features)
 	endif()
-	target_include_directories(ipcl PRIVATE ${IPPCRYPTO_INC_DIR})
 endif()
 
 set_target_properties(ipcl PROPERTIES POSITION_INDEPENDENT_CODE ON)
 set_target_properties(ipcl PROPERTIES VERSION ${IPCL_VERSION})
-
 if(IPCL_DEBUG)
 	set_target_properties(ipcl PROPERTIES OUTPUT_NAME "ipcl_debug")
 else()
 	set_target_properties(ipcl PROPERTIES OUTPUT_NAME "ipcl")
 endif()
 
-install(TARGETS ipcl DESTINATION ${CMAKE_INSTALL_LIBDIR})
-
 # config cmake config and target file
 set(IPCL_TARGET_FILENAME ${CMAKE_CURRENT_BINARY_DIR}/cmake/ipcl-${IPCL_VERSION}/IPCLTargets.cmake)
 set(IPCL_CONFIG_IN_FILENAME ${IPCL_CMAKE_PATH}/IPCLConfig.cmake.in)
@@ -125,9 +161,9 @@ configure_package_config_file(
 install(
     TARGETS ipcl
     EXPORT IPCLTargets
-    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+    ARCHIVE DESTINATION ${IPCL_INSTALL_LIBDIR}
+    LIBRARY DESTINATION ${IPCL_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${IPCL_INSTALL_LIBDIR}
     )
 
 install(FILES ${IPCL_CONFIG_FILENAME}
diff --git a/ipcl/base_text.cpp b/ipcl/base_text.cpp
index 4a1b0a6..ab770da 100644
--- a/ipcl/base_text.cpp
+++ b/ipcl/base_text.cpp
@@ -3,7 +3,7 @@
 
 #include "ipcl/base_text.hpp"
 
-#include "ipcl/util.hpp"
+#include "ipcl/utils/util.hpp"
 
 namespace ipcl {
 
diff --git a/ipcl/bignum.cpp b/ipcl/bignum.cpp
index 7a685db..03045e6 100644
--- a/ipcl/bignum.cpp
+++ b/ipcl/bignum.cpp
@@ -18,6 +18,7 @@
 
 #include <cstdlib>
 #include <cstring>
+#include <iostream>
 
 //////////////////////////////////////////////////////////////////////
 //
@@ -506,3 +507,59 @@ void BigNumber::num2char(std::vector<Ipp8u>& dest) const {
   int len = (bnBitLen + 7) >> 3;
   dest.assign(bnData, bnData + len);
 }
+
+bool BigNumber::fromBin(BigNumber& bn, const unsigned char* data, int len) {
+  if (len <= 0) return false;
+
+  // Create BigNumber containg input data passed as argument
+  bn = BigNumber(reinterpret_cast<const Ipp32u*>(data), (len / 4));
+  Ipp32u* ref_bn_data_ = NULL;
+  ippsRef_BN(NULL, NULL, &ref_bn_data_, BN(bn));
+
+  // Convert it to little endian format
+  unsigned char* data_ = reinterpret_cast<unsigned char*>(ref_bn_data_);
+  for (int i = 0; i < len; i++) data_[i] = data[len - 1 - i];
+
+  return true;
+}
+
+bool BigNumber::toBin(unsigned char* data, int len, const BigNumber& bn) {
+  if (len <= 0) return false;
+
+  // Extract raw vector of data in little endian format
+  int bitSize = 0;
+  Ipp32u* ref_bn_data_ = NULL;
+  ippsRef_BN(NULL, &bitSize, &ref_bn_data_, BN(bn));
+
+  // Revert it to big endian format
+  int bitSizeLen = BITSIZE_WORD(bitSize) * 4;
+  unsigned char* data_ = reinterpret_cast<unsigned char*>(ref_bn_data_);
+  for (int i = 0; i < bitSizeLen; i++) data[len - 1 - i] = data_[i];
+
+  return true;
+}
+
+bool BigNumber::toBin(unsigned char** bin, int* len, const BigNumber& bn) {
+  if (NULL == bin) return false;
+  if (NULL == len) return false;
+
+  // Extract raw vector of data in little endian format
+  int bitSize = 0;
+  Ipp32u* ref_bn_data_ = NULL;
+  ippsRef_BN(NULL, &bitSize, &ref_bn_data_, BN(bn));
+
+  // Revert it to big endian format
+  int bitSizeLen = BITSIZE_WORD(bitSize) * 4;
+  *len = bitSizeLen;
+  bin[0] = reinterpret_cast<unsigned char*>(
+      malloc(bitSizeLen * sizeof(unsigned char)));
+  memset(bin[0], 0, *len);
+  if (NULL == bin[0]) return false;
+
+  unsigned char* data_out = bin[0];
+  unsigned char* bn_data_ = reinterpret_cast<unsigned char*>(ref_bn_data_);
+  for (int i = 0; i < bitSizeLen; i++)
+    data_out[bitSizeLen - 1 - i] = bn_data_[i];
+
+  return true;
+}
diff --git a/ipcl/ciphertext.cpp b/ipcl/ciphertext.cpp
index 685b264..e0d4b23 100644
--- a/ipcl/ciphertext.cpp
+++ b/ipcl/ciphertext.cpp
@@ -8,27 +8,25 @@
 #include "ipcl/mod_exp.hpp"
 
 namespace ipcl {
-CipherText::CipherText(const PublicKey* pub_key, const uint32_t& n)
-    : BaseText(n), m_pubkey(pub_key) {}
+CipherText::CipherText(const PublicKey& pk, const uint32_t& n)
+    : BaseText(n), m_pk(std::make_shared<PublicKey>(pk)) {}
 
-CipherText::CipherText(const PublicKey* pub_key,
-                       const std::vector<uint32_t>& n_v)
-    : BaseText(n_v), m_pubkey(pub_key) {}
+CipherText::CipherText(const PublicKey& pk, const std::vector<uint32_t>& n_v)
+    : BaseText(n_v), m_pk(std::make_shared<PublicKey>(pk)) {}
 
-CipherText::CipherText(const PublicKey* pub_key, const BigNumber& bn)
-    : BaseText(bn), m_pubkey(pub_key) {}
+CipherText::CipherText(const PublicKey& pk, const BigNumber& bn)
+    : BaseText(bn), m_pk(std::make_shared<PublicKey>(pk)) {}
 
-CipherText::CipherText(const PublicKey* pub_key,
-                       const std::vector<BigNumber>& bn_v)
-    : BaseText(bn_v), m_pubkey(pub_key) {}
+CipherText::CipherText(const PublicKey& pk, const std::vector<BigNumber>& bn_v)
+    : BaseText(bn_v), m_pk(std::make_shared<PublicKey>(pk)) {}
 
 CipherText::CipherText(const CipherText& ct) : BaseText(ct) {
-  this->m_pubkey = ct.m_pubkey;
+  this->m_pk = ct.m_pk;
 }
 
 CipherText& CipherText::operator=(const CipherText& other) {
   BaseText::operator=(other);
-  this->m_pubkey = other.m_pubkey;
+  this->m_pk = other.m_pk;
 
   return *this;
 }
@@ -38,7 +36,7 @@ CipherText CipherText::operator+(const CipherText& other) const {
   std::size_t b_size = other.getSize();
   ERROR_CHECK(this->m_size == b_size || b_size == 1,
               "CT + CT error: Size mismatch!");
-  ERROR_CHECK(m_pubkey->getN() == other.m_pubkey->getN(),
+  ERROR_CHECK(*(m_pk->getN()) == *(other.m_pk->getN()),
               "CT + CT error: 2 different public keys detected!");
 
   const auto& a = *this;
@@ -46,7 +44,7 @@ CipherText CipherText::operator+(const CipherText& other) const {
 
   if (m_size == 1) {
     BigNumber sum = a.raw_add(a.m_texts.front(), b.getTexts().front());
-    return CipherText(m_pubkey, sum);
+    return CipherText(*m_pk, sum);
   } else {
     std::vector<BigNumber> sum(m_size);
 
@@ -69,14 +67,14 @@ CipherText CipherText::operator+(const CipherText& other) const {
       for (std::size_t i = 0; i < m_size; i++)
         sum[i] = a.raw_add(a.m_texts[i], b.m_texts[i]);
     }
-    return CipherText(m_pubkey, sum);
+    return CipherText(*m_pk, sum);
   }
 }
 
 // CT + PT
 CipherText CipherText::operator+(const PlainText& other) const {
   // convert PT to CT
-  CipherText b = this->m_pubkey->encrypt(other, false);
+  CipherText b = this->m_pk->encrypt(other, false);
   // calculate CT + CT
   return this->operator+(b);
 }
@@ -92,7 +90,7 @@ CipherText CipherText::operator*(const PlainText& other) const {
 
   if (m_size == 1) {
     BigNumber product = a.raw_mul(a.m_texts.front(), b.getTexts().front());
-    return CipherText(m_pubkey, product);
+    return CipherText(*m_pk, product);
   } else {
     std::vector<BigNumber> product;
     if (b_size == 1) {
@@ -103,7 +101,7 @@ CipherText CipherText::operator*(const PlainText& other) const {
       // multiply vector by vector
       product = a.raw_mul(a.m_texts, b.getTexts());
     }
-    return CipherText(m_pubkey, product);
+    return CipherText(*m_pk, product);
   }
 }
 
@@ -111,10 +109,10 @@ CipherText CipherText::getCipherText(const size_t& idx) const {
   ERROR_CHECK((idx >= 0) && (idx < m_size),
               "CipherText::getCipherText index is out of range");
 
-  return CipherText(m_pubkey, m_texts[idx]);
+  return CipherText(*m_pk, m_texts[idx]);
 }
 
-const PublicKey* CipherText::getPubKey() const { return m_pubkey; }
+std::shared_ptr<PublicKey> CipherText::getPubKey() const { return m_pk; }
 
 CipherText CipherText::rotate(int shift) const {
   ERROR_CHECK(m_size != 1, "rotate: Cannot rotate single CipherText");
@@ -122,7 +120,7 @@ CipherText CipherText::rotate(int shift) const {
               "rotate: Cannot shift more than the test size");
 
   if (shift == 0 || shift == m_size || shift == (-1) * static_cast<int>(m_size))
-    return CipherText(m_pubkey, m_texts);
+    return CipherText(*m_pk, m_texts);
 
   if (shift > 0)
     shift = m_size - shift;
@@ -131,25 +129,36 @@ CipherText CipherText::rotate(int shift) const {
 
   std::vector<BigNumber> new_bn = getTexts();
   std::rotate(std::begin(new_bn), std::begin(new_bn) + shift, std::end(new_bn));
-  return CipherText(m_pubkey, new_bn);
+  return CipherText(*m_pk, new_bn);
 }
 
 BigNumber CipherText::raw_add(const BigNumber& a, const BigNumber& b) const {
   // Hold a copy of nsquare for multi-threaded
-  const BigNumber& sq = m_pubkey->getNSQ();
+  // The BigNumber % operator is not thread safe
+  // const BigNumber& sq = *(m_pk->getNSQ());
+  const BigNumber sq = *(m_pk->getNSQ());
   return a * b % sq;
 }
 
 BigNumber CipherText::raw_mul(const BigNumber& a, const BigNumber& b) const {
-  const BigNumber& sq = m_pubkey->getNSQ();
-  return ipcl::ippModExp(a, b, sq);
+  const BigNumber& sq = *(m_pk->getNSQ());
+  return modExp(a, b, sq);
 }
 
 std::vector<BigNumber> CipherText::raw_mul(
     const std::vector<BigNumber>& a, const std::vector<BigNumber>& b) const {
   std::size_t v_size = a.size();
-  std::vector<BigNumber> sq(v_size, m_pubkey->getNSQ());
-  return ipcl::ippModExp(a, b, sq);
+  std::vector<BigNumber> sq(v_size, *(m_pk->getNSQ()));
+
+  // If hybrid OPTIMAL mode is used, use a special ratio
+  if (isHybridOptimal()) {
+    float qat_ratio = (v_size <= IPCL_WORKLOAD_SIZE_THRESHOLD)
+                          ? IPCL_HYBRID_MODEXP_RATIO_FULL
+                          : IPCL_HYBRID_MODEXP_RATIO_MULTIPLY;
+    setHybridRatio(qat_ratio, false);
+  }
+
+  return modExp(a, b, sq);
 }
 
 }  // namespace ipcl
diff --git a/ipcl/include/ipcl/bignum.h b/ipcl/include/ipcl/bignum.h
index b97c0c2..27760a0 100644
--- a/ipcl/include/ipcl/bignum.h
+++ b/ipcl/include/ipcl/bignum.h
@@ -14,15 +14,18 @@
  * limitations under the License.
  *******************************************************************************/
 
-#ifndef _BIGNUM_H_
-#define _BIGNUM_H_
-
-#include <ippcp.h>
+//#ifndef _BIGNUM_H_
+//#define _BIGNUM_H_
+#if !defined _BIGNUMBER_H_
+#define _BIGNUMBER_H_
 
 #include <ostream>
 #include <vector>
 
-class BigNumber {
+#include "ipcl/utils/serialize.hpp"
+#include "ippcp.h"
+
+class BigNumber : public ipcl::serializer::serializerBase {
  public:
   BigNumber(Ipp32u value = 0);
   BigNumber(Ipp32s value);
@@ -120,7 +123,30 @@ class BigNumber {
   friend std::ostream& operator<<(std::ostream& os, const BigNumber& a);
   void num2char(std::vector<Ipp8u>& dest) const;
 
+  // Support QAT data format
+  static bool fromBin(BigNumber& bn, const unsigned char* data, int len);
+  static bool toBin(unsigned char* data, int len, const BigNumber& bn);
+  static bool toBin(unsigned char** data, int* len, const BigNumber& bn);
+
  protected:
+  friend class cereal::access;
+  template <class Archive>
+  void save(Archive& ar, const Ipp32u version) const {
+    std::vector<Ipp32u> vec;
+    num2vec(vec);
+    ar(cereal::make_nvp("BigNumber", vec));
+  }
+
+  template <class Archive>
+  void load(Archive& ar, const Ipp32u version) {
+    std::vector<Ipp32u> vec;
+    ar(cereal::make_nvp("BigNumber", vec));
+    create(vec.data(), vec.size(), IppsBigNumPOS);
+  }
+
+  std::string serializedName() const { return "BigNumber"; }
+  static Ipp32u serializedVersion() { return 1; }
+
   bool create(const Ipp32u* pData, int length,
               IppsBigNumSGN sgn = IppsBigNumPOS);
   IppsBigNumState* m_pBN;
diff --git a/ipcl/include/ipcl/ciphertext.hpp b/ipcl/include/ipcl/ciphertext.hpp
index a742921..8c116af 100644
--- a/ipcl/include/ipcl/ciphertext.hpp
+++ b/ipcl/include/ipcl/ciphertext.hpp
@@ -4,11 +4,12 @@
 #ifndef IPCL_INCLUDE_IPCL_CIPHERTEXT_HPP_
 #define IPCL_INCLUDE_IPCL_CIPHERTEXT_HPP_
 
+#include <memory>
 #include <vector>
 
 #include "ipcl/plaintext.hpp"
 #include "ipcl/pub_key.hpp"
-#include "ipcl/util.hpp"
+#include "ipcl/utils/util.hpp"
 
 namespace ipcl {
 
@@ -20,10 +21,10 @@ class CipherText : public BaseText {
   /**
    * CipherText constructors
    */
-  CipherText(const PublicKey* pub_key, const uint32_t& n);
-  CipherText(const PublicKey* pub_key, const std::vector<uint32_t>& n_v);
-  CipherText(const PublicKey* pub_key, const BigNumber& bn);
-  CipherText(const PublicKey* pub_key, const std::vector<BigNumber>& bn_vec);
+  CipherText(const PublicKey& pk, const uint32_t& n);
+  CipherText(const PublicKey& pk, const std::vector<uint32_t>& n_v);
+  CipherText(const PublicKey& pk, const BigNumber& bn);
+  CipherText(const PublicKey& pk, const std::vector<BigNumber>& bn_vec);
 
   /**
    * CipherText copy constructor
@@ -49,7 +50,7 @@ class CipherText : public BaseText {
   /**
    * Get public key
    */
-  const PublicKey* getPubKey() const;
+  std::shared_ptr<PublicKey> getPubKey() const;
 
   /**
    * Rotate CipherText
@@ -63,7 +64,7 @@ class CipherText : public BaseText {
   std::vector<BigNumber> raw_mul(const std::vector<BigNumber>& a,
                                  const std::vector<BigNumber>& b) const;
 
-  const PublicKey* m_pubkey;  ///< Public key used to encrypt big number
+  std::shared_ptr<PublicKey> m_pk;  ///< Public key used to encrypt big number
 };
 
 }  // namespace ipcl
diff --git a/ipcl/include/ipcl/ipcl.hpp b/ipcl/include/ipcl/ipcl.hpp
index f82ad31..4aca630 100644
--- a/ipcl/include/ipcl/ipcl.hpp
+++ b/ipcl/include/ipcl/ipcl.hpp
@@ -4,18 +4,21 @@
 #ifndef IPCL_INCLUDE_IPCL_IPCL_HPP_
 #define IPCL_INCLUDE_IPCL_IPCL_HPP_
 
+#include "ipcl/mod_exp.hpp"
 #include "ipcl/pri_key.hpp"
+#include "ipcl/utils/context.hpp"
+#include "ipcl/utils/serialize.hpp"
 
 namespace ipcl {
 
 /**
  * Paillier key structure contains a public key and private key
- * pub_key: paillier public key
- * priv_key: paillier private key
+ * pk: paillier public key
+ * sk: paillier private key
  */
-struct keyPair {
-  PublicKey* pub_key;
-  PrivateKey* priv_key;
+struct KeyPair {
+  PublicKey pub_key;
+  PrivateKey priv_key;
 };
 
 /**
@@ -31,7 +34,7 @@ BigNumber getPrimeBN(int maxBitSize);
  * @param[in] enable_DJN Enable DJN (default=true)
  * @return The function return the public and private key pair
  */
-keyPair generateKeypair(int64_t n_length, bool enable_DJN = true);
+KeyPair generateKeypair(int64_t n_length, bool enable_DJN = true);
 
 }  // namespace ipcl
 #endif  // IPCL_INCLUDE_IPCL_IPCL_HPP_
diff --git a/ipcl/include/ipcl/mod_exp.hpp b/ipcl/include/ipcl/mod_exp.hpp
index 0419e60..2c9fc04 100644
--- a/ipcl/include/ipcl/mod_exp.hpp
+++ b/ipcl/include/ipcl/mod_exp.hpp
@@ -9,8 +9,81 @@
 #include "ipcl/bignum.h"
 
 namespace ipcl {
+
+/**
+ * Hybrid mode type
+ */
+enum class HybridMode {
+  OPTIMAL = 95,
+  QAT = 100,
+  PREF_QAT90 = 90,
+  PREF_QAT80 = 80,
+  PREF_QAT70 = 70,
+  PREF_QAT60 = 60,
+  HALF = 50,
+  PREF_IPP60 = 40,
+  PREF_IPP70 = 30,
+  PREF_IPP80 = 20,
+  PREF_IPP90 = 10,
+  IPP = 0,
+  UNDEFINED = -1
+};
+
+/**
+ * Set hybrid mode
+ * @param[in] mode The type of hybrid mode
+ */
+void setHybridMode(HybridMode mode);
+
+/**
+ * Set the number of mod exp operatiions
+ * @param[in] Proportion calculated with QAT
+ * @param[in] rest_mode Whether reset the mode to UNDIFINED(default is true)
+ */
+void setHybridRatio(float qat_ratio, bool reset_mode = true);
+
+/**
+ * Turn off hybrid mod exp.
+ */
+void setHybridOff();
+
+/**
+ * Get current hybrid qat ratio
+ */
+float getHybridRatio();
+
+/**
+ * Get current hybrid mode
+ */
+HybridMode getHybridMode();
+
+/**
+ * Check current hybrid mode is OPTIMAL
+ */
+bool isHybridOptimal();
+
 /**
- * Modular exponentiation for multi buffer
+ * Modular exponentiation for multi BigNumber
+ * @param[in] base base of the exponentiation
+ * @param[in] exp pow of the exponentiation
+ * @param[in] mod modular
+ * @return the modular exponentiation result of type BigNumber
+ */
+std::vector<BigNumber> modExp(const std::vector<BigNumber>& base,
+                              const std::vector<BigNumber>& exp,
+                              const std::vector<BigNumber>& mod);
+/**
+ * Modular exponentiation for single BigNumber
+ * @param[in] base base of the exponentiation
+ * @param[in] exp pow of the exponentiation
+ * @param[in] mod modular
+ * @return the modular exponentiation result of type BigNumber
+ */
+BigNumber modExp(const BigNumber& base, const BigNumber& exp,
+                 const BigNumber& mod);
+
+/**
+ * IPP modular exponentiation for multi buffer
  * @param[in] base base of the exponentiation
  * @param[in] exp pow of the exponentiation
  * @param[in] mod modular
@@ -21,7 +94,7 @@ std::vector<BigNumber> ippModExp(const std::vector<BigNumber>& base,
                                  const std::vector<BigNumber>& mod);
 
 /**
- * Modular exponentiation for single buffer
+ * IPP modular exponentiation for single buffer
  * @param[in] base base of the exponentiation
  * @param[in] exp pow of the exponentiation
  * @param[in] mod modular
@@ -30,5 +103,16 @@ std::vector<BigNumber> ippModExp(const std::vector<BigNumber>& base,
 BigNumber ippModExp(const BigNumber& base, const BigNumber& exp,
                     const BigNumber& mod);
 
+/**
+ * QAT modular exponentiation for multi BigNumber
+ * @param[in] base base of the exponentiation
+ * @param[in] exp pow of the exponentiation
+ * @param[in] mod modular
+ * @return the modular exponentiation result of type BigNumber
+ */
+std::vector<BigNumber> qatModExp(const std::vector<BigNumber>& base,
+                                 const std::vector<BigNumber>& exp,
+                                 const std::vector<BigNumber>& mod);
+
 }  // namespace ipcl
 #endif  // IPCL_INCLUDE_IPCL_MOD_EXP_HPP_
diff --git a/ipcl/include/ipcl/plaintext.hpp b/ipcl/include/ipcl/plaintext.hpp
index fcdb49d..63618f7 100644
--- a/ipcl/include/ipcl/plaintext.hpp
+++ b/ipcl/include/ipcl/plaintext.hpp
@@ -58,29 +58,29 @@ class PlainText : public BaseText {
    * User define implicit type conversion
    * Convert 1st element to uint32_t vector.
    */
-  operator std::vector<uint32_t>();
+  operator std::vector<uint32_t>() const;
 
   /**
-   * PT + CT
+   * User define implicit type conversion
+   * Convert 1st element to type BigNumber.
    */
-  CipherText operator+(const CipherText& other) const;
+  operator BigNumber() const;
 
   /**
-   * PT * CT
+   * User define implicit type conversion
+   * Convert all element to type BigNumber.
    */
-  CipherText operator*(const CipherText& other) const;
+  operator std::vector<BigNumber>() const;
 
   /**
-   * User define implicit type conversion
-   * Convert 1st element to type BigNumber.
+   * PT + CT
    */
-  operator BigNumber();
+  CipherText operator+(const CipherText& other) const;
 
   /**
-   * User define implicit type conversion
-   * Convert all element to type BigNumber.
+   * PT * CT
    */
-  operator std::vector<BigNumber>();
+  CipherText operator*(const CipherText& other) const;
 
   /**
    * Rotate PlainText
diff --git a/ipcl/include/ipcl/pri_key.hpp b/ipcl/include/ipcl/pri_key.hpp
index d351d03..6e79e0c 100644
--- a/ipcl/include/ipcl/pri_key.hpp
+++ b/ipcl/include/ipcl/pri_key.hpp
@@ -4,6 +4,8 @@
 #ifndef IPCL_INCLUDE_IPCL_PRI_KEY_HPP_
 #define IPCL_INCLUDE_IPCL_PRI_KEY_HPP_
 
+#include <memory>
+#include <utility>
 #include <vector>
 
 #include "ipcl/ciphertext.hpp"
@@ -13,14 +15,24 @@ namespace ipcl {
 
 class PrivateKey {
  public:
+  PrivateKey() = default;
+  ~PrivateKey() = default;
+
   /**
    * PrivateKey constructor
-   * @param[in] public_key paillier public key
+   * @param[in] pk paillier public key
    * @param[in] p p of private key in paillier scheme
    * @param[in] q q of private key in paillier scheme
    */
-  PrivateKey(const PublicKey* public_key, const BigNumber& p,
-             const BigNumber& q);
+  PrivateKey(const PublicKey& pk, const BigNumber& p, const BigNumber& q);
+
+  /**
+   * PrivateKey constructor
+   * @param[in] pk paillier public key
+   * @param[in] p p of private key in paillier scheme
+   * @param[in] q q of private key in paillier scheme
+   */
+  PrivateKey(const BigNumber& n, const BigNumber& p, const BigNumber& q);
 
   /**
    * Enable Chinese Remainder Theorem
@@ -40,27 +52,17 @@ class PrivateKey {
   /**
    * Get N of public key in paillier scheme
    */
-  BigNumber getN() const { return m_n; }
+  std::shared_ptr<BigNumber> getN() const { return m_n; }
 
   /**
    * Get p of private key in paillier scheme
    */
-  BigNumber getP() const { return m_p; }
+  std::shared_ptr<BigNumber> getP() const { return m_p; }
 
   /**
    * Get q of private key in paillier scheme
    */
-  BigNumber getQ() const { return m_q; }
-
-  /**
-   * Get bits of key
-   */
-  int getBits() const { return m_bits; }
-
-  /**
-   * Get public key
-   */
-  const PublicKey* getPubKey() const { return m_pubkey; }
+  std::shared_ptr<BigNumber> getQ() const { return m_q; }
 
   /**
    * @brief Support function for ISO/IEC 18033-6 compliance check
@@ -69,13 +71,21 @@ class PrivateKey {
    */
   BigNumber getLambda() const { return m_lambda; }
 
+  /**
+   * Check whether priv key is initialized
+   */
+  bool isInitialized() { return m_isInitialized; }
+
  private:
-  const PublicKey* m_pubkey;
-  BigNumber m_n;
-  BigNumber m_nsquare;
-  BigNumber m_g;
-  BigNumber m_p;
-  BigNumber m_q;
+  bool m_isInitialized = false;
+  bool m_enable_crt = false;
+
+  std::shared_ptr<BigNumber> m_n;
+  std::shared_ptr<BigNumber> m_nsquare;
+  std::shared_ptr<BigNumber> m_g;
+  std::shared_ptr<BigNumber> m_p;
+  std::shared_ptr<BigNumber> m_q;
+
   BigNumber m_pminusone;
   BigNumber m_qminusone;
   BigNumber m_psquare;
@@ -85,9 +95,6 @@ class PrivateKey {
   BigNumber m_hq;
   BigNumber m_lambda;
   BigNumber m_x;
-  int m_bits;
-  int m_dwords;
-  bool m_enable_crt;
 
   /**
    * Compute L function in paillier scheme
diff --git a/ipcl/include/ipcl/pub_key.hpp b/ipcl/include/ipcl/pub_key.hpp
index 130c5bd..377a916 100644
--- a/ipcl/include/ipcl/pub_key.hpp
+++ b/ipcl/include/ipcl/pub_key.hpp
@@ -4,6 +4,8 @@
 #ifndef IPCL_INCLUDE_IPCL_PUB_KEY_HPP_
 #define IPCL_INCLUDE_IPCL_PUB_KEY_HPP_
 
+#include <memory>
+#include <utility>
 #include <vector>
 
 #include "ipcl/bignum.h"
@@ -15,6 +17,9 @@ class CipherText;
 
 class PublicKey {
  public:
+  PublicKey() = default;
+  ~PublicKey() = default;
+
   /**
    * PublicKey constructor
    * @param[in] n n of public key in paillier scheme
@@ -30,6 +35,7 @@ class PublicKey {
    * @param[in] bits bit length of public key(default value is 1024)
    * @param[in] enableDJN_ enables DJN scheme(default value is false)
    */
+
   explicit PublicKey(const Ipp32u n, int bits = 1024, bool enableDJN_ = false)
       : PublicKey(BigNumber(n), bits, enableDJN_) {}
 
@@ -56,17 +62,17 @@ class PublicKey {
   /**
    * Get N of public key in paillier scheme
    */
-  BigNumber getN() const { return m_n; }
+  std::shared_ptr<BigNumber> getN() const { return m_n; }
 
   /**
    * Get NSQ of public key in paillier scheme
    */
-  BigNumber getNSQ() const { return m_nsquare; }
+  std::shared_ptr<BigNumber> getNSQ() const { return m_nsquare; }
 
   /**
    * Get G of public key in paillier scheme
    */
-  BigNumber getG() const { return m_g; }
+  std::shared_ptr<BigNumber> getG() const { return m_g; }
 
   /**
    * Get bits of key
@@ -113,17 +119,53 @@ class PublicKey {
     return -1;
   }
 
+  /**
+   * Check whether pub key is initialized
+   */
+  bool isInitialized() { return m_isInitialized; }
+
+  void create(const BigNumber& n, int bits, bool enableDJN_ = false);
+  void create(const BigNumber& n, int bits, const BigNumber& hs, int randbits);
+
   const void* addr = static_cast<const void*>(this);
 
  private:
-  BigNumber m_n;
-  BigNumber m_g;
-  BigNumber m_nsquare;
-  BigNumber m_hs;
+  friend class cereal::access;
+  template <class Archive>
+  void save(Archive& ar, const Ipp32u version) const {
+    ar(::cereal::make_nvp("n", *m_n));
+    ar(::cereal::make_nvp("bits", m_bits));
+    ar(::cereal::make_nvp("enable_DJN", m_enable_DJN));
+    ar(::cereal::make_nvp("hs", m_hs));
+    ar(::cereal::make_nvp("randbits", m_randbits));
+  }
+
+  template <class Archive>
+  void load(Archive& ar, const Ipp32u version) {
+    BigNumber n, hs;
+    bool enable_DJN;
+    int bits, randbits;
+
+    ar(::cereal::make_nvp("n", *m_n));
+    ar(::cereal::make_nvp("bits", bits));
+    ar(::cereal::make_nvp("enable_DJN", enable_DJN));
+    ar(::cereal::make_nvp("hs", m_hs));
+    ar(::cereal::make_nvp("randbits", randbits));
+
+    if (enable_DJN)
+      create(n, bits, hs, randbits);
+    else
+      create(n, bits);
+  }
+
+  bool m_isInitialized = false;
+  std::shared_ptr<BigNumber> m_n;
+  std::shared_ptr<BigNumber> m_g;
+  std::shared_ptr<BigNumber> m_nsquare;
   int m_bits;
-  int m_randbits;
   int m_dwords;
-  unsigned int m_init_seed;
+  BigNumber m_hs;
+  int m_randbits;
   bool m_enable_DJN;
   std::vector<BigNumber> m_r;
   bool m_testv;
diff --git a/ipcl/include/ipcl/common.hpp b/ipcl/include/ipcl/utils/common.hpp
similarity index 69%
rename from ipcl/include/ipcl/common.hpp
rename to ipcl/include/ipcl/utils/common.hpp
index e15d44e..67a26ad 100644
--- a/ipcl/include/ipcl/common.hpp
+++ b/ipcl/include/ipcl/utils/common.hpp
@@ -1,14 +1,22 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
-#ifndef IPCL_INCLUDE_IPCL_COMMON_HPP_
-#define IPCL_INCLUDE_IPCL_COMMON_HPP_
+#ifndef IPCL_INCLUDE_IPCL_UTILS_COMMON_HPP_
+#define IPCL_INCLUDE_IPCL_UTILS_COMMON_HPP_
 
 #include "ipcl/bignum.h"
 
 namespace ipcl {
 
 constexpr int IPCL_CRYPTO_MB_SIZE = 8;
+constexpr int IPCL_QAT_MODEXP_BATCH_SIZE = 1024;
+
+constexpr int IPCL_WORKLOAD_SIZE_THRESHOLD = 128;
+
+constexpr float IPCL_HYBRID_MODEXP_RATIO_FULL = 1.0;
+constexpr float IPCL_HYBRID_MODEXP_RATIO_ENCRYPT = 0.25;
+constexpr float IPCL_HYBRID_MODEXP_RATIO_DECRYPT = 0.12;
+constexpr float IPCL_HYBRID_MODEXP_RATIO_MULTIPLY = 0.18;
 
 /**
  * Random generator wrapper.Generates a random unsigned Big Number of the
@@ -38,4 +46,4 @@ IppStatus ippGenRandomBN(IppsBigNumState* rand, int bits, void* ctx);
 BigNumber getRandomBN(int bits);
 
 }  // namespace ipcl
-#endif  // IPCL_INCLUDE_IPCL_COMMON_HPP_
+#endif  // IPCL_INCLUDE_IPCL_UTILS_COMMON_HPP_
diff --git a/ipcl/include/ipcl/utils/context.hpp b/ipcl/include/ipcl/utils/context.hpp
new file mode 100644
index 0000000..b70315b
--- /dev/null
+++ b/ipcl/include/ipcl/utils/context.hpp
@@ -0,0 +1,47 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#ifndef IPCL_INCLUDE_IPCL_UTILS_CONTEXT_HPP_
+#define IPCL_INCLUDE_IPCL_UTILS_CONTEXT_HPP_
+
+#include <string>
+
+namespace ipcl {
+
+/**
+ * Initialize device (CPU, QAT, or both) runtime context for the Paillier crypto
+ * services.
+ * @details It must be called if there is intent of using QAT devices for
+ * compute acceleration.
+ * @param[in] runtime_choice Acceptable values are "CPU", "cpu", "QAT", "qat",
+ * "HYBRID", "hybrid", "DEFAULT", "default". Anything other than the accepted
+ * values, including typos and absence thereof, will default to the "DEFAULT"
+ * runtime choice.
+ * @return true if runtime context has been properly initialized, false
+ * otherwise.
+ */
+bool initializeContext(const std::string runtime_choice);
+
+/**
+ * Terminate runtime context.
+ * @return true if runtime context has been properly terminated, false
+ * otherwise.
+ */
+bool terminateContext(void);
+
+/**
+ * Determine if QAT instances are running for IPCL.
+ * @return true if QAT instances are active and running, false otherwise.
+ */
+bool isQATRunning(void);
+
+/**
+ * Determine if QAT instances are active for IPCL.
+ * @return true if QAT instances are active, false otherwise.
+ */
+bool isQATActive(void);
+
+}  // namespace ipcl
+#endif  // IPCL_INCLUDE_IPCL_UTILS_CONTEXT_HPP_
diff --git a/ipcl/include/ipcl/utils/parse_cpuinfo.hpp b/ipcl/include/ipcl/utils/parse_cpuinfo.hpp
new file mode 100644
index 0000000..5d81163
--- /dev/null
+++ b/ipcl/include/ipcl/utils/parse_cpuinfo.hpp
@@ -0,0 +1,77 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef IPCL_INCLUDE_IPCL_UTILS_PARSE_CPUINFO_HPP_
+#define IPCL_INCLUDE_IPCL_UTILS_PARSE_CPUINFO_HPP_
+
+#include <algorithm>
+#include <exception>
+#include <fstream>
+#include <sstream>
+#include <string>
+
+namespace ipcl {
+// trim from start (in place)
+static inline void ltrim(std::string& s) {
+  s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
+            return !std::isspace(ch);
+          }));
+}
+
+// trim from end (in place)
+static inline void rtrim(std::string& s) {
+  s.erase(std::find_if(s.rbegin(), s.rend(),
+                       [](unsigned char ch) { return !std::isspace(ch); })
+              .base(),
+          s.end());
+}
+
+static inline void trim(std::string& s) {
+  ltrim(s);
+  rtrim(s);
+}
+
+typedef struct {
+  int n_processors;
+  int n_cores;
+  int n_nodes;
+} linuxCPUInfo;
+
+static void parseCPUInfo(linuxCPUInfo& info) {
+  std::ifstream cpuinfo;
+  cpuinfo.exceptions(std::ifstream::badbit);
+  info.n_cores = 0;
+  info.n_processors = 0;
+  info.n_nodes = 0;
+
+  try {
+    cpuinfo.open("/proc/cpuinfo", std::ios::in);
+    std::string line;
+    while (std::getline(cpuinfo, line)) {
+      std::stringstream ss(line);
+      std::string key, val;
+      if (std::getline(ss, key, ':') && std::getline(ss, val)) {
+        trim(key);
+        trim(val);
+        if (key == "processor")
+          info.n_processors++;
+        else if (key == "core id")
+          info.n_cores = std::max(info.n_cores, std::stoi(val));
+        else if (key == "physical id")
+          info.n_nodes = std::max(info.n_nodes, std::stoi(val));
+      }
+    }
+    info.n_nodes++;
+    info.n_cores = (info.n_cores + 1) * info.n_nodes;
+  } catch (const std::ifstream::failure& e) {
+    std::ostringstream log;
+    log << "\nFile: " << __FILE__ << "\nLine: " << __LINE__ << "\nError: "
+        << "cannot parse /proc/cpuinfo";
+    throw std::runtime_error(log.str());
+  }
+}
+linuxCPUInfo getLinuxCPUInfoImpl(void);
+
+}  // namespace ipcl
+
+#endif  // IPCL_INCLUDE_IPCL_UTILS_PARSE_CPUINFO_HPP_
diff --git a/ipcl/include/ipcl/utils/serialize.hpp b/ipcl/include/ipcl/utils/serialize.hpp
new file mode 100644
index 0000000..59863d7
--- /dev/null
+++ b/ipcl/include/ipcl/utils/serialize.hpp
@@ -0,0 +1,69 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef IPCL_INCLUDE_IPCL_UTILS_SERIALIZE_HPP_
+#define IPCL_INCLUDE_IPCL_UTILS_SERIALIZE_HPP_
+
+#include <cstdlib>
+#include <exception>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "cereal/archives/portable_binary.hpp"
+#include "cereal/types/vector.hpp"
+
+namespace ipcl {
+
+class PublicKey;
+class PrivateKey;
+class CipherText;
+
+namespace serializer {
+
+template <typename T>
+void serialize(std::ostream& ss, const T& obj) {
+  cereal::PortableBinaryOutputArchive archive(ss);
+  archive(obj);
+}
+
+template <typename T>
+void deserialize(std::istream& ss, T& obj) {
+  cereal::PortableBinaryInputArchive archive(ss);
+  archive(obj);
+}
+
+template <typename T>
+bool serializeToFile(const std::string& fn, const T& obj) {
+  std::ofstream ofs(fn, std::ios::out | std::ios::binary);
+  if (ofs.is_open()) {
+    serializer::serialize(obj, ofs);
+    ofs.close();
+    return true;
+  }
+  return false;
+}
+
+template <typename T>
+bool deserializeFromFile(const std::string& fn, T& obj) {
+  std::ifstream ifs(fn, std::ios::in | std::ios::binary);
+  if (ifs.is_open()) {
+    serializer::deserialize(obj, ifs);
+    ifs.close();
+    return true;
+  }
+  return false;
+}
+
+class serializerBase {
+ public:
+  virtual ~serializerBase() {}
+  virtual std::string serializedName() const = 0;
+};
+
+};  // namespace serializer
+
+}  // namespace ipcl
+
+#endif  // IPCL_INCLUDE_IPCL_UTILS_SERIALIZE_HPP_
diff --git a/ipcl/include/ipcl/util.hpp b/ipcl/include/ipcl/utils/util.hpp
similarity index 66%
rename from ipcl/include/ipcl/util.hpp
rename to ipcl/include/ipcl/utils/util.hpp
index 202e277..17c6c4e 100644
--- a/ipcl/include/ipcl/util.hpp
+++ b/ipcl/include/ipcl/utils/util.hpp
@@ -1,12 +1,14 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
-#ifndef IPCL_INCLUDE_IPCL_UTIL_HPP_
-#define IPCL_INCLUDE_IPCL_UTIL_HPP_
+#ifndef IPCL_INCLUDE_IPCL_UTILS_UTIL_HPP_
+#define IPCL_INCLUDE_IPCL_UTILS_UTIL_HPP_
 
-#ifdef IPCL_RUNTIME_MOD_EXP
+#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES
 #include <cpu_features/cpuinfo_x86.h>
-#endif  // IPCL_RUNTIME_MOD_EXP
+
+#include "ipcl/utils/parse_cpuinfo.hpp"
+#endif  // IPCL_RUNTIME_DETECT_CPU_FEATURES
 
 #include <cstdlib>
 #include <exception>
@@ -14,7 +16,7 @@
 #include <string>
 #include <vector>
 
-#include "ipcl/common.hpp"
+#include "ipcl/utils/common.hpp"
 
 namespace ipcl {
 
@@ -41,6 +43,22 @@ inline void vec_size_check(const std::vector<T>& v, const char* file,
 
 #define VEC_SIZE_CHECK(v) vec_size_check(v, __FILE__, __LINE__)
 
+#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES
+static const bool disable_avx512ifma =
+    (std::getenv("IPCL_DISABLE_AVX512IFMA") != nullptr);
+static const bool prefer_rdrand =
+    (std::getenv("IPCL_PREFER_RDRAND") != nullptr);
+static const bool prefer_ipp_prng =
+    (std::getenv("IPCL_PREFER_IPP_PRNG") != nullptr);
+static const cpu_features::X86Features features =
+    cpu_features::GetX86Info().features;
+static const bool has_avx512ifma = features.avx512ifma && !disable_avx512ifma;
+static const bool has_rdseed =
+    features.rdseed && !prefer_rdrand && !prefer_ipp_prng;
+static const bool has_rdrand = features.rdrnd && prefer_rdrand;
+
+#endif  // IPCL_RUNTIME_DETECT_CPU_FEATURES
+
 #ifdef IPCL_USE_OMP
 class OMPUtilities {
  public:
@@ -57,9 +75,20 @@ class OMPUtilities {
   }
 
  private:
+#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES
+  static const linuxCPUInfo cpuinfo;
+  static const linuxCPUInfo getLinuxCPUInfo() { return getLinuxCPUInfoImpl(); }
+#endif
   static const int nodes;
   static const int cpus;
 
+  static int getNodes() {
+#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES
+    return cpuinfo.n_nodes;
+#else
+    return IPCL_NUM_NODES;
+#endif  // IPCL_RUNTIME_DETECT_CPU_FEATURES
+  }
   static int getMaxThreads() {
 #ifdef IPCL_NUM_THREADS
     return IPCL_NUM_THREADS;
@@ -71,14 +100,6 @@ class OMPUtilities {
 
 #endif  // IPCL_USE_OMP
 
-#ifdef IPCL_RUNTIME_MOD_EXP
-static const bool disable_avx512ifma =
-    (std::getenv("IPCL_DISABLE_AVX512IFMA") != nullptr);
-static const cpu_features::X86Features features =
-    cpu_features::GetX86Info().features;
-static const bool has_avx512ifma = features.avx512ifma && !disable_avx512ifma;
-#endif  // IPCL_RUNTIME_MOD_EXP
-
 }  // namespace ipcl
 
-#endif  // IPCL_INCLUDE_IPCL_UTIL_HPP_
+#endif  // IPCL_INCLUDE_IPCL_UTILS_UTIL_HPP_
diff --git a/ipcl/keygen.cpp b/ipcl/keygen.cpp
index f2d712e..fe064bb 100644
--- a/ipcl/keygen.cpp
+++ b/ipcl/keygen.cpp
@@ -17,7 +17,7 @@ BigNumber getPrimeBN(int max_bits) {
   ippsPrimeInit(max_bits, reinterpret_cast<IppsPrimeState*>(prime_ctx.data()));
 
 #if defined(IPCL_RNG_INSTR_RDSEED) || defined(IPCL_RNG_INSTR_RDRAND)
-  bool rand_param = NULL;
+  Ipp8u* rand_param = NULL;
 #else
   auto buff = std::vector<Ipp8u>(prime_size);
   auto rand_param = buff.data();
@@ -83,7 +83,7 @@ static void getDJNBN(int64_t n_length, BigNumber& p, BigNumber& q, BigNumber& n,
            isClosePrimeBN(p, q, ref_dist));  // gcd(p-1,q-1)=2
 }
 
-keyPair generateKeypair(int64_t n_length, bool enable_DJN) {
+KeyPair generateKeypair(int64_t n_length, bool enable_DJN) {
   /*
   https://www.intel.com/content/www/us/en/develop/documentation/ipp-crypto-reference/top/multi-buffer-cryptography-functions/modular-exponentiation/mbx-exp-1024-2048-3072-4096-mb8.html
   modulus size = n * n (keySize * keySize )
@@ -104,10 +104,10 @@ keyPair generateKeypair(int64_t n_length, bool enable_DJN) {
   else
     getNormalBN(n_length, p, q, n, ref_dist);
 
-  PublicKey* public_key = new PublicKey(n, n_length, enable_DJN);
-  PrivateKey* private_key = new PrivateKey(public_key, p, q);
+  PublicKey pk(n, n_length, enable_DJN);
+  PrivateKey sk(pk, p, q);
 
-  return keyPair{public_key, private_key};
+  return KeyPair{pk, sk};
 }
 
 }  // namespace ipcl
diff --git a/ipcl/mod_exp.cpp b/ipcl/mod_exp.cpp
index 1f885a4..d56dfc5 100644
--- a/ipcl/mod_exp.cpp
+++ b/ipcl/mod_exp.cpp
@@ -3,16 +3,323 @@
 
 #include "ipcl/mod_exp.hpp"
 
-#include <crypto_mb/exp.h>
-
 #include <algorithm>
 #include <cstring>
 #include <iostream>
+#include <thread>  //NOLINT
+
+#include "crypto_mb/exp.h"
 
-#include "ipcl/util.hpp"
+#ifdef IPCL_USE_QAT
+#include <heqat/bnops.h>
+#include <heqat/common.h>
+#endif
+
+#include "ipcl/utils/util.hpp"
 
 namespace ipcl {
 
+static thread_local struct {
+  float ratio;
+  HybridMode mode;
+} g_hybrid_params = {0.0, HybridMode::OPTIMAL};
+
+static inline float scale_down(int value, float scale = 100.0) {
+  return value / scale;
+}
+
+static inline int scale_up(float value, int scale = 100) {
+  return value * scale;
+}
+
+void setHybridRatio(float ratio, bool reset_mode) {
+#ifdef IPCL_USE_QAT
+  ERROR_CHECK((ratio <= 1.0) && (ratio >= 0),
+              "setHybridRatio: Hybrid modexp qat ratio is NOT correct");
+  g_hybrid_params.ratio = ratio;
+  if (reset_mode) g_hybrid_params.mode = HybridMode::UNDEFINED;
+#endif  // IPCL_USE_QAT
+}
+
+void setHybridMode(HybridMode mode) {
+#ifdef IPCL_USE_QAT
+  int mode_value = static_cast<std::underlying_type<HybridMode>::type>(mode);
+  float ratio = scale_down(mode_value);
+  g_hybrid_params = {ratio, mode};
+#endif  // IPCL_USE_QAT
+}
+
+void setHybridOff() {
+#ifdef IPCL_USE_QAT
+  g_hybrid_params = {0.0, HybridMode::UNDEFINED};
+#endif  // IPCL_USE_QAT
+}
+
+float getHybridRatio() { return g_hybrid_params.ratio; }
+
+HybridMode getHybridMode() { return g_hybrid_params.mode; }
+
+bool isHybridOptimal() {
+  return (g_hybrid_params.mode == HybridMode::OPTIMAL) ? true : false;
+}
+
+#ifdef IPCL_USE_QAT
+// Multiple input QAT ModExp interface to offload computation to QAT
+static std::vector<BigNumber> heQatBnModExp(
+    const std::vector<BigNumber>& base, const std::vector<BigNumber>& exponent,
+    const std::vector<BigNumber>& modulus, unsigned int batch_size) {
+  static unsigned int counter = 0;
+  int nbits = modulus.front().BitSize();
+  int length = BITSIZE_WORD(nbits) * 4;
+  nbits = 8 * length;
+
+  // Check if QAT Exec Env supports requested batch size
+  unsigned int worksize = base.size();
+  unsigned int nslices = worksize / batch_size;
+  unsigned int residue = worksize % batch_size;
+
+  if (0 == nslices) {
+    nslices = 1;
+    residue = 0;
+    batch_size = worksize;
+  }
+
+  HE_QAT_STATUS status = HE_QAT_STATUS_FAIL;
+
+  // TODO(fdiasmor): Try replace calloc by alloca to see impact on performance.
+  unsigned char* bn_base_data_[batch_size];
+  unsigned char* bn_exponent_data_[batch_size];
+  unsigned char* bn_modulus_data_[batch_size];
+  unsigned char* bn_remainder_data_[batch_size];
+
+#if defined(IPCL_USE_QAT_LITE)
+  // int base_len_[batch_size];
+  std::vector<int> base_len_(batch_size, 0);
+  // int exp_len_[batch_size];
+  std::vector<int> exp_len_(batch_size, 0);
+#endif
+
+  // Pre-allocate memory used to batch input data
+  for (int i = 0; i < batch_size; i++) {
+#if !defined(IPCL_USE_QAT_LITE)
+    bn_base_data_[i] = reinterpret_cast<unsigned char*>(
+        malloc(length * sizeof(unsigned char)));
+    bn_exponent_data_[i] = reinterpret_cast<unsigned char*>(
+        malloc(length * sizeof(unsigned char)));
+#endif
+
+    bn_modulus_data_[i] = reinterpret_cast<unsigned char*>(
+        malloc(length * sizeof(unsigned char)));
+    bn_remainder_data_[i] = reinterpret_cast<unsigned char*>(
+        malloc(length * sizeof(unsigned char)));
+
+#if !defined(IPCL_USE_QAT_LITE)
+    ERROR_CHECK(
+        bn_base_data_[i] != nullptr && bn_exponent_data_[i] != nullptr &&
+            bn_modulus_data_[i] != nullptr && bn_remainder_data_[i] != nullptr,
+        "qatMultiBuffExp: alloc memory for error");
+#else
+    ERROR_CHECK(
+        bn_modulus_data_[i] != nullptr && bn_remainder_data_[i] != nullptr,
+        "qatMultiBuffExp: alloc memory for error");
+#endif
+  }  // End preparing input containers
+
+  // Container to hold total number of outputs to be returned
+  std::vector<BigNumber> remainder(worksize, 0);
+
+  for (unsigned int j = 0; j < nslices; j++) {
+    // Prepare batch of input data
+    for (unsigned int i = 0; i < batch_size; i++) {
+      bool ret = false;
+
+#if !defined(IPCL_USE_QAT_LITE)
+      memset(bn_base_data_[i], 0, length);
+      memset(bn_exponent_data_[i], 0, length);
+      memset(bn_modulus_data_[i], 0, length);
+      memset(bn_remainder_data_[i], 0, length);
+
+      ret =
+          BigNumber::toBin(bn_base_data_[i], length, base[j * batch_size + i]);
+      if (!ret) {
+        printf("bn_base_data_: failed at bigNumberToBin()\n");
+        exit(1);
+      }
+
+      ret = BigNumber::toBin(bn_exponent_data_[i], length,
+                             exponent[j * batch_size + i]);
+      if (!ret) {
+        printf("bn_exponent_data_: failed at bigNumberToBin()\n");
+        exit(1);
+      }
+#else
+      base_len_[i] = 0;
+      ret = BigNumber::toBin(&bn_base_data_[i], &base_len_[i],
+                             base[j * batch_size + i]);
+      if (!ret) {
+        printf("bn_base_data_: failed at bigNumberToBin()\n");
+        exit(1);
+      }
+      exp_len_[i] = 0;
+      ret = BigNumber::toBin(&bn_exponent_data_[i], &exp_len_[i],
+                             exponent[j * batch_size + i]);
+      if (!ret) {
+        printf("bn_exponent_data_: failed at bigNumberToBin()\n");
+        exit(1);
+      }
+#endif
+
+      ret = BigNumber::toBin(bn_modulus_data_[i], length,
+                             modulus[j * batch_size + i]);
+      if (!ret) {
+        printf("bn_modulus_data_: failed at bigNumberToBin()\n");
+        exit(1);
+      }
+    }  // End input setup
+
+    // Process batch of input
+    for (unsigned int i = 0; i < batch_size; i++) {
+#if !defined(IPCL_USE_QAT_LITE)
+      // Assumes all inputs and the output have the same length
+      status =
+          HE_QAT_bnModExp(bn_remainder_data_[i], bn_base_data_[i],
+                          bn_exponent_data_[i], bn_modulus_data_[i], nbits);
+#else
+      // Base and exponent can be of variable length (for more or less)
+      status = HE_QAT_bnModExp_lite(bn_remainder_data_[i], bn_base_data_[i],
+                                    base_len_[i], bn_exponent_data_[i],
+                                    exp_len_[i], bn_modulus_data_[i], nbits);
+#endif
+      if (HE_QAT_STATUS_SUCCESS != status) {
+        HE_QAT_PRINT_ERR("\nQAT bnModExp with BigNumber failed\n");
+      }
+    }
+    getBnModExpRequest(batch_size);
+
+    // Collect results and pack them into BigNumber
+    for (unsigned int i = 0; i < batch_size; i++) {
+      bool ret = BigNumber::fromBin(remainder[j * batch_size + i],
+                                    bn_remainder_data_[i], length);
+      if (!ret) {
+        printf("bn_remainder_data_: failed at bignumbertobin()\n");
+        exit(1);
+      }
+#if defined(IPCL_USE_QAT_LITE)
+      free(bn_base_data_[i]);
+      bn_base_data_[i] = NULL;
+      free(bn_exponent_data_[i]);
+      bn_exponent_data_[i] = NULL;
+#endif
+    }
+  }  // Batch Process
+
+  // Takes care of remaining
+  if (residue) {
+    for (unsigned int i = 0; i < residue; i++) {
+#if !defined(IPCL_USE_QAT_LITE)
+      memset(bn_base_data_[i], 0, length);
+      memset(bn_exponent_data_[i], 0, length);
+#endif
+      memset(bn_modulus_data_[i], 0, length);
+      memset(bn_remainder_data_[i], 0, length);
+    }
+
+    for (unsigned int i = 0; i < residue; i++) {
+      bool ret = false;
+#if !defined(IPCL_USE_QAT_LITE)
+      ret = BigNumber::toBin(bn_base_data_[i], length,
+                             base[nslices * batch_size + i]);
+      if (!ret) {
+        printf("bn_base_data_: failed at bigNumberToBin()\n");
+        exit(1);
+      }
+      ret = BigNumber::toBin(bn_exponent_data_[i], length,
+                             exponent[nslices * batch_size + i]);
+      if (!ret) {
+        printf("bn_exponent_data_: failed at bigNumberToBin()\n");
+        exit(1);
+      }
+#else
+      base_len_[i] = 0;
+      ret = BigNumber::toBin(&bn_base_data_[i], &base_len_[i],
+                             base[nslices * batch_size + i]);
+      if (!ret) {
+        printf("bn_base_data_: failed at bigNumberToBin()\n");
+        exit(1);
+      }
+      exp_len_[i] = 0;
+      ret = BigNumber::toBin(&bn_exponent_data_[i], &exp_len_[i],
+                             exponent[nslices * batch_size + i]);
+      if (!ret) {
+        printf("bn_exponent_data_: failed at bigNumberToBin()\n");
+        exit(1);
+      }
+#endif
+
+      ret = BigNumber::toBin(bn_modulus_data_[i], length,
+                             modulus[nslices * batch_size + i]);
+      if (!ret) {
+        printf("bn_modulus_data_: failed at bigNumberToBin()\n");
+        exit(1);
+      }
+    }  //
+
+    for (unsigned int i = 0; i < residue; i++) {
+#if !defined(IPCL_USE_QAT_LITE)
+      // Assumes all inputs and the output have the same length
+      status =
+          HE_QAT_bnModExp(bn_remainder_data_[i], bn_base_data_[i],
+                          bn_exponent_data_[i], bn_modulus_data_[i], nbits);
+#else
+      // Base and exponent can be of variable length (for more or less)
+      status = HE_QAT_bnModExp_lite(bn_remainder_data_[i], bn_base_data_[i],
+                                    base_len_[i], bn_exponent_data_[i],
+                                    exp_len_[i], bn_modulus_data_[i], nbits);
+#endif
+      if (HE_QAT_STATUS_SUCCESS != status) {
+        HE_QAT_PRINT_ERR("\nQAT bnModExp with BigNumber failed\n");
+      }
+    }
+    getBnModExpRequest(residue);
+
+    // Collect results and pack them into BigNumber
+    for (unsigned int i = 0; i < residue; i++) {
+      unsigned char* bn_remainder_ = bn_remainder_data_[i];
+      bool ret = BigNumber::fromBin(remainder[nslices * batch_size + i],
+                                    bn_remainder_, length);
+      if (!ret) {
+        printf("residue bn_remainder_data_: failed at BigNumber::fromBin()\n");
+        exit(1);
+      }
+#if defined(IPCL_USE_QAT_LITE)
+      free(bn_base_data_[i]);
+      bn_base_data_[i] = NULL;
+      free(bn_exponent_data_[i]);
+      bn_exponent_data_[i] = NULL;
+#endif
+    }
+  }
+
+  for (unsigned int i = 0; i < batch_size; i++) {
+#if !defined(IPCL_USE_QAT_LITE)
+    free(bn_base_data_[i]);
+    bn_base_data_[i] = NULL;
+    free(bn_exponent_data_[i]);
+    bn_exponent_data_[i] = NULL;
+#endif
+    free(bn_modulus_data_[i]);
+    bn_modulus_data_[i] = NULL;
+  }
+
+  for (unsigned int i = 0; i < batch_size; i++) {
+    free(bn_remainder_data_[i]);
+    bn_remainder_data_[i] = NULL;
+  }
+
+  return remainder;
+}
+#endif  // IPCL_USE_QAT
+
 static std::vector<BigNumber> ippMBModExp(const std::vector<BigNumber>& base,
                                           const std::vector<BigNumber>& exp,
                                           const std::vector<BigNumber>& mod) {
@@ -154,74 +461,21 @@ static BigNumber ippSBModExp(const BigNumber& base, const BigNumber& exp,
   return res;
 }
 
-std::vector<BigNumber> ippModExp(const std::vector<BigNumber>& base,
+std::vector<BigNumber> qatModExp(const std::vector<BigNumber>& base,
                                  const std::vector<BigNumber>& exp,
                                  const std::vector<BigNumber>& mod) {
-  std::size_t v_size = base.size();
-  std::vector<BigNumber> res(v_size);
-
-#ifdef IPCL_RUNTIME_MOD_EXP
-
-  // If there is only 1 big number, we don't need to use MBModExp
-  if (v_size == 1) {
-    res[0] = ippSBModExp(base[0], exp[0], mod[0]);
-    return res;
-  }
-
-  if (has_avx512ifma) {
-    std::size_t remainder = v_size % IPCL_CRYPTO_MB_SIZE;
-    std::size_t num_chunk =
-        (v_size + IPCL_CRYPTO_MB_SIZE - 1) / IPCL_CRYPTO_MB_SIZE;
-#ifdef IPCL_USE_OMP
-    int omp_remaining_threads = OMPUtilities::MaxThreads;
-#pragma omp parallel for num_threads( \
-    OMPUtilities::assignOMPThreads(omp_remaining_threads, num_chunk))
-#endif  // IPCL_USE_OMP
-    for (std::size_t i = 0; i < num_chunk; i++) {
-      std::size_t chunk_size = IPCL_CRYPTO_MB_SIZE;
-      if ((i == (num_chunk - 1)) && (remainder > 0)) chunk_size = remainder;
-
-      std::size_t chunk_offset = i * IPCL_CRYPTO_MB_SIZE;
-
-      auto base_start = base.begin() + chunk_offset;
-      auto base_end = base_start + chunk_size;
-
-      auto exp_start = exp.begin() + chunk_offset;
-      auto exp_end = exp_start + chunk_size;
-
-      auto mod_start = mod.begin() + chunk_offset;
-      auto mod_end = mod_start + chunk_size;
-
-      auto base_chunk = std::vector<BigNumber>(base_start, base_end);
-      auto exp_chunk = std::vector<BigNumber>(exp_start, exp_end);
-      auto mod_chunk = std::vector<BigNumber>(mod_start, mod_end);
-
-      auto tmp = ippMBModExp(base_chunk, exp_chunk, mod_chunk);
-      std::copy(tmp.begin(), tmp.end(), res.begin() + chunk_offset);
-    }
-
-    return res;
-
-  } else {
-#ifdef IPCL_USE_OMP
-    int omp_remaining_threads = OMPUtilities::MaxThreads;
-#pragma omp parallel for num_threads( \
-    OMPUtilities::assignOMPThreads(omp_remaining_threads, v_size))
-#endif  // IPCL_USE_OMP
-    for (int i = 0; i < v_size; i++)
-      res[i] = ippSBModExp(base[i], exp[i], mod[i]);
-    return res;
-  }
-
+#ifdef IPCL_USE_QAT
+  return heQatBnModExp(base, exp, mod, IPCL_QAT_MODEXP_BATCH_SIZE);
 #else
+  ERROR_CHECK(false, "qatModExp: Need to turn on IPCL_ENABLE_QAT");
+#endif  // IPCL_USE_QAT
+}
 
-#ifdef IPCL_CRYPTO_MB_MOD_EXP
-
-  // If there is only 1 big number, we don't need to use MBModExp
-  if (v_size == 1) {
-    res[0] = ippSBModExp(base[0], exp[0], mod[0]);
-    return res;
-  }
+static std::vector<BigNumber> ippMBModExpWrapper(
+    const std::vector<BigNumber>& base, const std::vector<BigNumber>& exp,
+    const std::vector<BigNumber>& mod) {
+  std::size_t v_size = base.size();
+  std::vector<BigNumber> res(v_size);
 
   std::size_t remainder = v_size % IPCL_CRYPTO_MB_SIZE;
   std::size_t num_chunk =
@@ -256,8 +510,13 @@ std::vector<BigNumber> ippModExp(const std::vector<BigNumber>& base,
   }
 
   return res;
+}
 
-#else
+static std::vector<BigNumber> ippSBModExpWrapper(
+    const std::vector<BigNumber>& base, const std::vector<BigNumber>& exp,
+    const std::vector<BigNumber>& mod) {
+  std::size_t v_size = base.size();
+  std::vector<BigNumber> res(v_size);
 
 #ifdef IPCL_USE_OMP
   int omp_remaining_threads = OMPUtilities::MaxThreads;
@@ -266,15 +525,103 @@ std::vector<BigNumber> ippModExp(const std::vector<BigNumber>& base,
 #endif  // IPCL_USE_OMP
   for (int i = 0; i < v_size; i++)
     res[i] = ippSBModExp(base[i], exp[i], mod[i]);
+
   return res;
+}
 
-#endif  // IPCL_CRYPTO_MB_MOD_EXP
+std::vector<BigNumber> ippModExp(const std::vector<BigNumber>& base,
+                                 const std::vector<BigNumber>& exp,
+                                 const std::vector<BigNumber>& mod) {
+  std::size_t v_size = base.size();
+  std::vector<BigNumber> res(v_size);
+
+  // If there is only 1 big number, we don't need to use MBModExp
+  if (v_size == 1) {
+    res[0] = ippSBModExp(base[0], exp[0], mod[0]);
+    return res;
+  }
+
+#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES
+  if (has_avx512ifma) {
+    return ippMBModExpWrapper(base, exp, mod);
+  } else {
+    return ippSBModExpWrapper(base, exp, mod);
+  }
+#elif IPCL_CRYPTO_MB_MOD_EXP
+  return ippMBModExpWrapper(base, exp, mod);
+#else
+  return ippSBModExpWrapper(base, exp, mod);
+#endif  // IPCL_RUNTIME_DETECT_CPU_FEATURES
+}
+
+std::vector<BigNumber> modExp(const std::vector<BigNumber>& base,
+                              const std::vector<BigNumber>& exp,
+                              const std::vector<BigNumber>& mod) {
+#ifdef IPCL_USE_QAT
+// if QAT is ON, OMP is OFF --> use QAT only
+#if !defined(IPCL_USE_OMP)
+  return qatModExp(base, exp, mod);
+#else
+  ERROR_CHECK(g_hybrid_params.ratio >= 0.0 && g_hybrid_params.ratio <= 1.0,
+              "modExp: hybrid modexp qat ratio is incorrect");
+  std::size_t v_size = base.size();
+  std::size_t hybrid_qat_size =
+      static_cast<std::size_t>(g_hybrid_params.ratio * v_size);
+
+  if (hybrid_qat_size == v_size) {
+    // use QAT only
+    return qatModExp(base, exp, mod);
+  } else if (hybrid_qat_size == 0) {
+    // use IPP only
+    return ippModExp(base, exp, mod);
+  } else {
+    // use QAT & IPP together
+    std::vector<BigNumber> res(v_size);
+
+    auto qat_base_start = base.begin();
+    auto qat_base_end = qat_base_start + hybrid_qat_size;
+
+    auto qat_exp_start = exp.begin();
+    auto qat_exp_end = qat_exp_start + hybrid_qat_size;
+
+    auto qat_mod_start = mod.begin();
+    auto qat_mod_end = qat_mod_start + hybrid_qat_size;
+
+    auto qat_base = std::vector<BigNumber>(qat_base_start, qat_base_end);
+    auto qat_exp = std::vector<BigNumber>(qat_exp_start, qat_exp_end);
+    auto qat_mod = std::vector<BigNumber>(qat_mod_start, qat_mod_end);
+
+    auto ipp_base = std::vector<BigNumber>(qat_base_end, base.end());
+    auto ipp_exp = std::vector<BigNumber>(qat_exp_end, exp.end());
+    auto ipp_mod = std::vector<BigNumber>(qat_mod_end, mod.end());
+
+    std::vector<BigNumber> qat_res, ipp_res;
+    std::thread qat_thread([&] {
+      qat_res = qatModExp(qat_base, qat_exp, qat_mod);
+      std::copy(qat_res.begin(), qat_res.end(), res.begin());
+    });
+
+    ipp_res = ippModExp(ipp_base, ipp_exp, ipp_mod);
+    std::copy(ipp_res.begin(), ipp_res.end(), res.begin() + hybrid_qat_size);
+
+    qat_thread.join();
+    return res;
+  }
+#endif  // IPCL_USE_OMP
+#else
+  return ippModExp(base, exp, mod);
+#endif  // IPCL_USE_QAT
+}
 
-#endif  // IPCL_RUNTIME_MOD_EXP
+BigNumber modExp(const BigNumber& base, const BigNumber& exp,
+                 const BigNumber& mod) {
+  // QAT mod exp is NOT needed, when there is only 1 BigNumber.
+  return ippModExp(base, exp, mod);
 }
 
 BigNumber ippModExp(const BigNumber& base, const BigNumber& exp,
                     const BigNumber& mod) {
+  // IPP multi buffer mod exp is NOT needed, when there is only 1 BigNumber.
   return ippSBModExp(base, exp, mod);
 }
 
diff --git a/ipcl/plaintext.cpp b/ipcl/plaintext.cpp
index 4e4498d..7273719 100644
--- a/ipcl/plaintext.cpp
+++ b/ipcl/plaintext.cpp
@@ -6,7 +6,7 @@
 #include <algorithm>
 
 #include "ipcl/ciphertext.hpp"
-#include "ipcl/util.hpp"
+#include "ipcl/utils/util.hpp"
 
 namespace ipcl {
 
@@ -34,7 +34,7 @@ CipherText PlainText::operator*(const CipherText& other) const {
   return other.operator*(*this);
 }
 
-PlainText::operator std::vector<uint32_t>() {
+PlainText::operator std::vector<uint32_t>() const {
   ERROR_CHECK(m_size > 0,
               "PlainText: type conversion to uint32_t vector error");
   std::vector<uint32_t> v;
@@ -43,12 +43,12 @@ PlainText::operator std::vector<uint32_t>() {
   return v;
 }
 
-PlainText::operator BigNumber() {
+PlainText::operator BigNumber() const {
   ERROR_CHECK(m_size > 0, "PlainText: type conversion to BigNumber error");
   return m_texts[0];
 }
 
-PlainText::operator std::vector<BigNumber>() {
+PlainText::operator std::vector<BigNumber>() const {
   ERROR_CHECK(m_size > 0,
               "PlainText: type conversion to BigNumber vector error");
   return m_texts;
diff --git a/ipcl/pri_key.cpp b/ipcl/pri_key.cpp
index 060e858..923686d 100644
--- a/ipcl/pri_key.cpp
+++ b/ipcl/pri_key.cpp
@@ -3,12 +3,11 @@
 
 #include "ipcl/pri_key.hpp"
 
-#include <crypto_mb/exp.h>
-
 #include <cstring>
 
+#include "crypto_mb/exp.h"
 #include "ipcl/mod_exp.hpp"
-#include "ipcl/util.hpp"
+#include "ipcl/utils/util.hpp"
 
 namespace ipcl {
 /**
@@ -23,38 +22,61 @@ static inline BigNumber lcm(const BigNumber& p, const BigNumber& q) {
   return p * q / gcd;
 }
 
-PrivateKey::PrivateKey(const PublicKey* public_key, const BigNumber& p,
+PrivateKey::PrivateKey(const PublicKey& pk, const BigNumber& p,
+                       const BigNumber& q)
+    : m_n(pk.getN()),
+      m_nsquare(pk.getNSQ()),
+      m_g(pk.getG()),
+      m_enable_crt(true),
+      m_p((q < p) ? std::make_shared<BigNumber>(q)
+                  : std::make_shared<BigNumber>(p)),
+      m_q((q < p) ? std::make_shared<BigNumber>(p)
+                  : std::make_shared<BigNumber>(q)),
+      m_pminusone(*m_p - 1),
+      m_qminusone(*m_q - 1),
+      m_psquare((*m_p) * (*m_p)),
+      m_qsquare((*m_q) * (*m_q)),
+      m_pinverse((*m_q).InverseMul(*m_p)),
+      m_hp(computeHfun(*m_p, m_psquare)),
+      m_hq(computeHfun(*m_q, m_qsquare)),
+      m_lambda(lcm(m_pminusone, m_qminusone)),
+      m_x((*m_n).InverseMul((modExp(*m_g, m_lambda, *m_nsquare) - 1) /
+                            (*m_n))) {
+  ERROR_CHECK((*m_p) * (*m_q) == *m_n,
+              "PrivateKey ctor: Public key does not match p * q.");
+  ERROR_CHECK(*m_p != *m_q, "PrivateKey ctor: p and q are same");
+  m_isInitialized = true;
+}
+
+PrivateKey::PrivateKey(const BigNumber& n, const BigNumber& p,
                        const BigNumber& q)
-    : m_pubkey(public_key),
-      m_n(m_pubkey->getN()),
-      m_nsquare(m_pubkey->getNSQ()),
-      m_g(m_pubkey->getG()),
-      m_p((q < p) ? q : p),
-      m_q((q < p) ? p : q),
-      m_pminusone(m_p - 1),
-      m_qminusone(m_q - 1),
-      m_psquare(m_p * m_p),
-      m_qsquare(m_q * m_q),
-      m_pinverse(m_q.InverseMul(m_p)),
-      m_hp(computeHfun(m_p, m_psquare)),
-      m_hq(computeHfun(m_q, m_qsquare)),
-      // lcm(P-1,Q-1) = (P-1)*(Q-1)/gcd(P-1,Q-1), gcd in ipp-crypto is
-      // ippsGcd_BN
+    : m_n(std::make_shared<BigNumber>(n)),
+      m_nsquare(std::make_shared<BigNumber>((*m_n) * (*m_n))),
+      m_g(std::make_shared<BigNumber>((*m_n) + 1)),
+      m_enable_crt(true),
+      m_p((q < p) ? std::make_shared<BigNumber>(q)
+                  : std::make_shared<BigNumber>(p)),
+      m_q((q < p) ? std::make_shared<BigNumber>(p)
+                  : std::make_shared<BigNumber>(q)),
+      m_pminusone(*m_p - 1),
+      m_qminusone(*m_q - 1),
+      m_psquare((*m_p) * (*m_p)),
+      m_qsquare((*m_q) * (*m_q)),
+      m_pinverse((*m_q).InverseMul(*m_p)),
+      m_hp(computeHfun(*m_p, m_psquare)),
+      m_hq(computeHfun(*m_q, m_qsquare)),
       m_lambda(lcm(m_pminusone, m_qminusone)),
-      // TODO(bwang30): check if ippsModInv_BN does the same thing with
-      // mpz_invert
-      m_x(m_n.InverseMul((ipcl::ippModExp(m_g, m_lambda, m_nsquare) - 1) /
-                         m_n)),
-      m_bits(m_pubkey->getBits()),
-      m_dwords(m_pubkey->getDwords()),
-      m_enable_crt(true) {
-  ERROR_CHECK(p * q == m_n,
+      m_x((*m_n).InverseMul((modExp(*m_g, m_lambda, *m_nsquare) - 1) /
+                            (*m_n))) {
+  ERROR_CHECK((*m_p) * (*m_q) == *m_n,
               "PrivateKey ctor: Public key does not match p * q.");
-  ERROR_CHECK(p != q, "PrivateKey ctor: p and q are same");
+  ERROR_CHECK(*m_p != *m_q, "PrivateKey ctor: p and q are same");
+  m_isInitialized = true;
 }
 
 PlainText PrivateKey::decrypt(const CipherText& ct) const {
-  ERROR_CHECK(ct.getPubKey()->getN() == m_pubkey->getN(),
+  ERROR_CHECK(m_isInitialized, "decrypt: Private key is NOT initialized.");
+  ERROR_CHECK(*(ct.getPubKey()->getN()) == *(this->getN()),
               "decrypt: The value of N in public key mismatch.");
 
   std::size_t ct_size = ct.getSize();
@@ -63,6 +85,14 @@ PlainText PrivateKey::decrypt(const CipherText& ct) const {
   std::vector<BigNumber> pt_bn(ct_size);
   std::vector<BigNumber> ct_bn = ct.getTexts();
 
+  // If hybrid OPTIMAL mode is used, use a special ratio
+  if (isHybridOptimal()) {
+    float qat_ratio = (ct_size <= IPCL_WORKLOAD_SIZE_THRESHOLD)
+                          ? IPCL_HYBRID_MODEXP_RATIO_FULL
+                          : IPCL_HYBRID_MODEXP_RATIO_DECRYPT;
+    setHybridRatio(qat_ratio, false);
+  }
+
   if (m_enable_crt)
     decryptCRT(pt_bn, ct_bn);
   else
@@ -76,8 +106,8 @@ void PrivateKey::decryptRAW(std::vector<BigNumber>& plaintext,
   std::size_t v_size = plaintext.size();
 
   std::vector<BigNumber> pow_lambda(v_size, m_lambda);
-  std::vector<BigNumber> modulo(v_size, m_nsquare);
-  std::vector<BigNumber> res = ipcl::ippModExp(ciphertext, pow_lambda, modulo);
+  std::vector<BigNumber> modulo(v_size, *m_nsquare);
+  std::vector<BigNumber> res = modExp(ciphertext, pow_lambda, modulo);
 
 #ifdef IPCL_USE_OMP
   int omp_remaining_threads = OMPUtilities::MaxThreads;
@@ -85,7 +115,7 @@ void PrivateKey::decryptRAW(std::vector<BigNumber>& plaintext,
     OMPUtilities::assignOMPThreads(omp_remaining_threads, v_size))
 #endif  // IPCL_USE_OMP
   for (int i = 0; i < v_size; i++) {
-    BigNumber nn = m_n;
+    BigNumber nn = *m_n;
     BigNumber xx = m_x;
     BigNumber m = ((res[i] - 1) / nn) * xx;
     plaintext[i] = m % nn;
@@ -112,8 +142,8 @@ void PrivateKey::decryptCRT(std::vector<BigNumber>& plaintext,
   }
 
   // Based on the fact a^b mod n = (a mod n)^b mod n
-  std::vector<BigNumber> resp = ipcl::ippModExp(basep, pm1, psq);
-  std::vector<BigNumber> resq = ipcl::ippModExp(baseq, qm1, qsq);
+  std::vector<BigNumber> resp = modExp(basep, pm1, psq);
+  std::vector<BigNumber> resq = modExp(baseq, qm1, qsq);
 
 #ifdef IPCL_USE_OMP
   omp_remaining_threads = OMPUtilities::MaxThreads;
@@ -121,16 +151,16 @@ void PrivateKey::decryptCRT(std::vector<BigNumber>& plaintext,
     OMPUtilities::assignOMPThreads(omp_remaining_threads, v_size))
 #endif  // IPCL_USE_OMP
   for (int i = 0; i < v_size; i++) {
-    BigNumber dp = computeLfun(resp[i], m_p) * m_hp % m_p;
-    BigNumber dq = computeLfun(resq[i], m_q) * m_hq % m_q;
+    BigNumber dp = computeLfun(resp[i], *m_p) * m_hp % (*m_p);
+    BigNumber dq = computeLfun(resq[i], *m_q) * m_hq % (*m_q);
     plaintext[i] = computeCRT(dp, dq);
   }
 }
 
 BigNumber PrivateKey::computeCRT(const BigNumber& mp,
                                  const BigNumber& mq) const {
-  BigNumber u = (mq - mp) * m_pinverse % m_q;
-  return mp + (u * m_p);
+  BigNumber u = (mq - mp) * m_pinverse % (*m_q);
+  return mp + (u * (*m_p));
 }
 
 BigNumber PrivateKey::computeLfun(const BigNumber& a,
@@ -142,8 +172,8 @@ BigNumber PrivateKey::computeHfun(const BigNumber& a,
                                   const BigNumber& b) const {
   // Based on the fact a^b mod n = (a mod n)^b mod n
   BigNumber xm = a - 1;
-  BigNumber base = m_g % b;
-  BigNumber pm = ipcl::ippModExp(base, xm, b);
+  BigNumber base = *m_g % b;
+  BigNumber pm = modExp(base, xm, b);
   BigNumber lcrt = computeLfun(pm, a);
   return a.InverseMul(lcrt);
 }
diff --git a/ipcl/pub_key.cpp b/ipcl/pub_key.cpp
index 01f8124..d5b2c88 100644
--- a/ipcl/pub_key.cpp
+++ b/ipcl/pub_key.cpp
@@ -3,52 +3,46 @@
 
 #include "ipcl/pub_key.hpp"
 
-#include <crypto_mb/exp.h>
-
 #include <algorithm>
 #include <climits>
 #include <cstring>
 #include <random>
 
+#include "crypto_mb/exp.h"
 #include "ipcl/ciphertext.hpp"
 #include "ipcl/mod_exp.hpp"
-#include "ipcl/util.hpp"
+#include "ipcl/utils/util.hpp"
 
 namespace ipcl {
 
-static inline auto randomUniformUnsignedInt() {
-  std::random_device dev;
-  std::mt19937 rng(dev());
-  std::uniform_int_distribution<std::mt19937::result_type> dist(0, UINT_MAX);
-  return dist(rng);
-}
-
 PublicKey::PublicKey(const BigNumber& n, int bits, bool enableDJN_)
-    : m_n(n),
-      m_g(n + 1),
-      m_nsquare(n * n),
+    : m_n(std::make_shared<BigNumber>(n)),
+      m_g(std::make_shared<BigNumber>(*m_n + 1)),
+      m_nsquare(std::make_shared<BigNumber>((*m_n) * (*m_n))),
       m_bits(bits),
-      m_dwords(BITSIZE_DWORD(bits * 2)),
-      m_init_seed(randomUniformUnsignedInt()),
+      m_dwords(BITSIZE_DWORD(m_bits * 2)),
       m_enable_DJN(false),
-      m_testv(false) {
+      m_testv(false),
+      m_hs(0),
+      m_randbits(0) {
   if (enableDJN_) this->enableDJN();  // sets m_enable_DJN
+  m_isInitialized = true;
 }
 
 void PublicKey::enableDJN() {
   BigNumber gcd;
   BigNumber rmod;
   do {
-    int rand_bit = m_n.BitSize();
+    int rand_bit = (*m_n).BitSize();
     BigNumber rand = getRandomBN(rand_bit + 128);
-    rmod = rand % m_n;
-    gcd = rand.gcd(m_n);
+    rmod = rand % (*m_n);
+    gcd = rand.gcd(*m_n);
   } while (gcd.compare(1));
 
   BigNumber rmod_sq = rmod * rmod;
   BigNumber rmod_neg = rmod_sq * -1;
-  BigNumber h = rmod_neg % m_n;
-  m_hs = ipcl::ippModExp(h, m_n, m_nsquare);
+  BigNumber h = rmod_neg % (*m_n);
+  m_hs = modExp(h, *m_n, *m_nsquare);
   m_randbits = m_bits >> 1;  // bits/2
 
   m_enable_DJN = true;
@@ -57,7 +51,7 @@ void PublicKey::enableDJN() {
 std::vector<BigNumber> PublicKey::getDJNObfuscator(std::size_t sz) const {
   std::vector<BigNumber> r(sz);
   std::vector<BigNumber> base(sz, m_hs);
-  std::vector<BigNumber> sq(sz, m_nsquare);
+  std::vector<BigNumber> sq(sz, *m_nsquare);
 
   if (m_testv) {
     r = m_r;
@@ -66,30 +60,30 @@ std::vector<BigNumber> PublicKey::getDJNObfuscator(std::size_t sz) const {
       r_ = getRandomBN(m_randbits);
     }
   }
-  return ipcl::ippModExp(base, r, sq);
+  return modExp(base, r, sq);
 }
 
 std::vector<BigNumber> PublicKey::getNormalObfuscator(std::size_t sz) const {
   std::vector<BigNumber> r(sz);
-  std::vector<BigNumber> sq(sz, m_nsquare);
-  std::vector<BigNumber> pown(sz, m_n);
+  std::vector<BigNumber> sq(sz, *m_nsquare);
+  std::vector<BigNumber> pown(sz, *m_n);
 
   if (m_testv) {
     r = m_r;
   } else {
     for (int i = 0; i < sz; i++) {
       r[i] = getRandomBN(m_bits);
-      r[i] = r[i] % (m_n - 1) + 1;
+      r[i] = r[i] % (*m_n - 1) + 1;
     }
   }
-  return ipcl::ippModExp(r, pown, sq);
+  return modExp(r, pown, sq);
 }
 
 void PublicKey::applyObfuscator(std::vector<BigNumber>& ciphertext) const {
   std::size_t sz = ciphertext.size();
   std::vector<BigNumber> obfuscator =
       m_enable_DJN ? getDJNObfuscator(sz) : getNormalObfuscator(sz);
-  BigNumber sq = m_nsquare;
+  BigNumber sq = *m_nsquare;
 
   for (std::size_t i = 0; i < sz; ++i)
     ciphertext[i] = sq.ModMul(ciphertext[i], obfuscator[i]);
@@ -109,7 +103,7 @@ std::vector<BigNumber> PublicKey::raw_encrypt(const std::vector<BigNumber>& pt,
   std::vector<BigNumber> ct(pt_size);
 
   for (std::size_t i = 0; i < pt_size; i++)
-    ct[i] = (m_n * pt[i] + 1) % m_nsquare;
+    ct[i] = (*m_n * pt[i] + 1) % (*m_nsquare);
 
   if (make_secure) applyObfuscator(ct);
 
@@ -117,12 +111,22 @@ std::vector<BigNumber> PublicKey::raw_encrypt(const std::vector<BigNumber>& pt,
 }
 
 CipherText PublicKey::encrypt(const PlainText& pt, bool make_secure) const {
+  ERROR_CHECK(m_isInitialized, "encrypt: Public key is NOT initialized.");
+
   std::size_t pt_size = pt.getSize();
   ERROR_CHECK(pt_size > 0, "encrypt: Cannot encrypt empty PlainText");
   std::vector<BigNumber> ct_bn_v(pt_size);
 
+  // If hybrid OPTIMAL mode is used, use a special ratio
+  if (isHybridOptimal()) {
+    float qat_ratio = (pt_size <= IPCL_WORKLOAD_SIZE_THRESHOLD)
+                          ? IPCL_HYBRID_MODEXP_RATIO_FULL
+                          : IPCL_HYBRID_MODEXP_RATIO_ENCRYPT;
+    setHybridRatio(qat_ratio, false);
+  }
+
   ct_bn_v = raw_encrypt(pt.getTexts(), make_secure);
-  return CipherText(this, ct_bn_v);
+  return CipherText(*this, ct_bn_v);
 }
 
 void PublicKey::setDJN(const BigNumber& hs, int randbit) {
@@ -132,4 +136,31 @@ void PublicKey::setDJN(const BigNumber& hs, int randbit) {
   m_randbits = randbit;
   m_enable_DJN = true;
 }
+
+void PublicKey::create(const BigNumber& n, int bits, bool enableDJN_) {
+  m_n = std::make_shared<BigNumber>(n);
+  m_g = std::make_shared<BigNumber>(*m_n + 1);
+  m_nsquare = std::make_shared<BigNumber>((*m_n) * (*m_n));
+  m_bits = bits;
+  m_dwords = BITSIZE_DWORD(m_bits * 2);
+  m_enable_DJN = enableDJN_;
+  if (enableDJN_) {
+    this->enableDJN();
+  } else {
+    m_hs = BigNumber::Zero();
+    m_randbits = 0;
+  }
+  m_testv = false;
+  m_isInitialized = true;
+  std::cout << "create complete" << std::endl;
+}
+
+void PublicKey::create(const BigNumber& n, int bits, const BigNumber& hs,
+                       int randbits) {
+  create(n, bits, false);  // set DJN to false and manually set
+  m_enable_DJN = true;
+  m_hs = hs;
+  m_randbits = randbits;
+}
+
 }  // namespace ipcl
diff --git a/ipcl/util.cpp b/ipcl/util.cpp
deleted file mode 100644
index 1de2b49..0000000
--- a/ipcl/util.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (C) 2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "ipcl/util.hpp"
-
-#ifdef IPCL_USE_OMP
-#include <numa.h>
-#endif  // IPCL_USE_OMP
-
-namespace ipcl {
-
-#ifdef IPCL_USE_OMP
-const int OMPUtilities::nodes = numa_num_configured_nodes();
-const int OMPUtilities::cpus = numa_num_configured_cpus();
-const int OMPUtilities::MaxThreads = OMPUtilities::getMaxThreads();
-
-#endif  // IPCL_USE_OMP
-
-}  // namespace ipcl
diff --git a/ipcl/common.cpp b/ipcl/utils/common.cpp
similarity index 69%
rename from ipcl/common.cpp
rename to ipcl/utils/common.cpp
index 90209eb..e0130aa 100644
--- a/ipcl/common.cpp
+++ b/ipcl/utils/common.cpp
@@ -1,15 +1,22 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
-#include "ipcl/common.hpp"
+#include "ipcl/utils/common.hpp"
 
-#include <crypto_mb/exp.h>
-
-#include "ipcl/util.hpp"
+#include "crypto_mb/exp.h"
+#include "ipcl/utils/util.hpp"
 
 namespace ipcl {
 
 IppStatus ippGenRandom(Ipp32u* rand, int bits, void* ctx) {
+#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES
+  if (has_rdseed)
+    return ippsTRNGenRDSEED(rand, bits, ctx);
+  else if (has_rdrand)
+    return ippsPRNGenRDRAND(rand, bits, ctx);
+  else
+    return ippsPRNGen(rand, bits, ctx);
+#else
 #ifdef IPCL_RNG_INSTR_RDSEED
   return ippsTRNGenRDSEED(rand, bits, ctx);
 #elif defined(IPCL_RNG_INSTR_RDRAND)
@@ -17,9 +24,18 @@ IppStatus ippGenRandom(Ipp32u* rand, int bits, void* ctx) {
 #else
   return ippsPRNGen(rand, bits, ctx);
 #endif
+#endif  // IPCL_RUNTIME_IPP_RNG
 }
 
 IppStatus ippGenRandomBN(IppsBigNumState* rand, int bits, void* ctx) {
+#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES
+  if (has_rdseed)
+    return ippsTRNGenRDSEED_BN(rand, bits, ctx);
+  else if (has_rdrand)
+    return ippsPRNGenRDRAND_BN(rand, bits, ctx);
+  else
+    return ippsPRNGen_BN(rand, bits, ctx);
+#else
 #ifdef IPCL_RNG_INSTR_RDSEED
   return ippsTRNGenRDSEED_BN(rand, bits, ctx);
 #elif defined(IPCL_RNG_INSTR_RDRAND)
@@ -27,6 +43,7 @@ IppStatus ippGenRandomBN(IppsBigNumState* rand, int bits, void* ctx) {
 #else
   return ippsPRNGen_BN(rand, bits, ctx);
 #endif
+#endif  // IPCL_RUNTIME_IPP_RNG
 }
 
 BigNumber getRandomBN(int bits) {
diff --git a/ipcl/utils/context.cpp b/ipcl/utils/context.cpp
new file mode 100644
index 0000000..7f9b93f
--- /dev/null
+++ b/ipcl/utils/context.cpp
@@ -0,0 +1,88 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "ipcl/utils/context.hpp"
+
+#include <map>
+#include <string>
+
+#ifdef IPCL_USE_QAT
+#include <heqat/context.h>
+#endif
+
+namespace ipcl {
+
+///>  Default behavior is selected at runtime and implementation dependent
+enum class RuntimeValue { DEFAULT, CPU, QAT, HYBRID };
+const std::map<std::string, RuntimeValue> runtimeMap = {
+    {"DEFAULT", RuntimeValue::DEFAULT}, {"default", RuntimeValue::DEFAULT},
+    {"CPU", RuntimeValue::CPU},         {"cpu", RuntimeValue::CPU},
+    {"QAT", RuntimeValue::QAT},         {"qat", RuntimeValue::QAT},
+    {"HYBRID", RuntimeValue::HYBRID},   {"hybrid", RuntimeValue::HYBRID}};
+
+enum class FeatureValue { AVX512IFMA, QAT4XXX };
+const std::map<std::string, FeatureValue> hasFeatureMap = {
+    {"avx512", FeatureValue::AVX512IFMA},
+    {"avx512ifma", FeatureValue::AVX512IFMA},
+    {"4xxx", FeatureValue::QAT4XXX},
+    {"qat_4xxx", FeatureValue::QAT4XXX}};
+
+#ifdef IPCL_USE_QAT
+bool hasQAT = false;
+static bool isUsingQAT = false;
+static bool initializeQATContext() {
+  if (!isUsingQAT && HE_QAT_STATUS_SUCCESS == acquire_qat_devices())
+    return (isUsingQAT = true);
+  return false;
+}
+#endif
+
+bool initializeContext(const std::string runtime_choice) {
+#ifdef IPCL_USE_QAT
+  hasQAT = true;
+  switch (runtimeMap.at(runtime_choice)) {
+    case RuntimeValue::QAT:
+      return initializeQATContext();
+    case RuntimeValue::CPU:
+    case RuntimeValue::HYBRID:
+    case RuntimeValue::DEFAULT:
+    default:
+      return true;
+  }
+#else   // Default behavior: CPU choice
+  return true;
+#endif  // IPCL_USE_QAT
+}
+
+bool terminateContext() {
+#ifdef IPCL_USE_QAT
+  if (isUsingQAT) {
+    if (HE_QAT_STATUS_SUCCESS == release_qat_devices()) {
+      isUsingQAT = false;
+      return true;
+    }
+    return false;
+  }
+  return true;
+#else   // Default behavior: CPU choice
+  return true;
+#endif  // IPCL_USE_QAT
+}
+
+bool isQATRunning() {
+#ifdef IPCL_USE_QAT
+  return (HE_QAT_STATUS_RUNNING == get_qat_context_state());
+#else
+  return false;
+#endif
+}
+
+bool isQATActive() {
+#ifdef IPCL_USE_QAT
+  return (HE_QAT_STATUS_ACTIVE == get_qat_context_state());
+#else
+  return false;
+#endif
+}
+
+}  // namespace ipcl
diff --git a/ipcl/utils/parse_cpuinfo.cpp b/ipcl/utils/parse_cpuinfo.cpp
new file mode 100644
index 0000000..a8b6141
--- /dev/null
+++ b/ipcl/utils/parse_cpuinfo.cpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "ipcl/utils/parse_cpuinfo.hpp"
+
+#include <fstream>
+#include <sstream>
+
+ipcl::linuxCPUInfo ipcl::getLinuxCPUInfoImpl(void) {
+  ipcl::linuxCPUInfo info;
+  ipcl::parseCPUInfo(info);
+  return info;
+}
diff --git a/ipcl/utils/util.cpp b/ipcl/utils/util.cpp
new file mode 100644
index 0000000..d0ffdfe
--- /dev/null
+++ b/ipcl/utils/util.cpp
@@ -0,0 +1,19 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "ipcl/utils/util.hpp"
+
+#include <thread>  // NOLINT [build/c++11]
+
+namespace ipcl {
+
+#ifdef IPCL_USE_OMP
+#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES
+const linuxCPUInfo OMPUtilities::cpuinfo = OMPUtilities::getLinuxCPUInfo();
+#endif  // IPCL_RUNTIME_DETECT_CPU_FEATURES
+const int OMPUtilities::cpus = std::thread::hardware_concurrency();
+const int OMPUtilities::nodes = OMPUtilities::getNodes();
+const int OMPUtilities::MaxThreads = OMPUtilities::getMaxThreads();
+#endif  // IPCL_USE_OMP
+
+}  // namespace ipcl
diff --git a/module/heqat.cmake b/module/heqat.cmake
new file mode 100644
index 0000000..a26a272
--- /dev/null
+++ b/module/heqat.cmake
@@ -0,0 +1,71 @@
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+include(ExternalProject)
+MESSAGE(STATUS "Configuring HE QAT")
+set(HEQAT_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/ext_he_qat)
+set(HEQAT_DESTDIR ${HEQAT_PREFIX}/heqat_install)
+set(HEQAT_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/module/heqat)
+set(HEQAT_CXX_FLAGS "${IPCL_FORWARD_CMAKE_ARGS}")
+
+set(HEQAT_BUILD_TYPE Release)
+if (CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
+  set(HEQAT_BUILD_TYPE Debug)
+endif()
+
+ExternalProject_Add(
+  ext_he_qat
+  SOURCE_DIR ${HEQAT_SRC_DIR}
+  PREFIX ${HEQAT_PREFIX}
+  CMAKE_ARGS ${HEQAT_CXX_FLAGS}
+  -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+	     -DHE_QAT_MISC=OFF
+	     -DHE_QAT_DOCS=${IPCL_DOCS}
+	     -DHE_QAT_SHARED=${IPCL_SHARED}
+       -DHE_QAT_TEST=OFF
+	     -DCMAKE_BUILD_TYPE=${HEQAT_BUILD_TYPE}
+  UPDATE_COMMAND ""
+  EXCLUDE_FROM_ALL TRUE
+  INSTALL_COMMAND make DESTDIR=${HEQAT_DESTDIR} install
+)
+add_dependencies(ext_he_qat ext_ipp-crypto)
+
+set(HEQAT_INC_DIR ${HEQAT_DESTDIR}/${CMAKE_INSTALL_PREFIX}/include)
+set(HEQAT_LIB_DIR ${HEQAT_DESTDIR}/${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
+
+# Create heqat library interface
+if(IPCL_SHARED)
+  add_library(he_qat INTERFACE)
+  add_dependencies(he_qat ext_he_qat)
+
+  ExternalProject_Get_Property(ext_he_qat SOURCE_DIR BINARY_DIR)
+
+  if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+	  target_link_libraries(he_qat INTERFACE ${HEQAT_LIB_DIR}/libhe_qat_debug.so)
+  else()
+	  target_link_libraries(he_qat INTERFACE ${HEQAT_LIB_DIR}/libhe_qat.so)
+  endif()
+  target_include_directories(he_qat SYSTEM INTERFACE ${HEQAT_INC_DIR})
+
+  install(
+    DIRECTORY ${HEQAT_LIB_DIR}/
+    DESTINATION "${IPCL_INSTALL_LIBDIR}/heqat"
+    USE_SOURCE_PERMISSIONS
+    PATTERN "cmake" EXCLUDE
+  )
+else()
+  add_library(he_qat STATIC IMPORTED GLOBAL)
+  add_dependencies(he_qat ext_he_qat)
+
+  ExternalProject_Get_Property(ext_he_qat SOURCE_DIR BINARY_DIR)
+
+  if (CMAKE_BUILD_TYPE STREQUAL "Debug")
+    set_target_properties(he_qat PROPERTIES
+            IMPORTED_LOCATION ${HEQAT_LIB_DIR}/libhe_qat_debug.a
+      INCLUDE_DIRECTORIES ${HEQAT_INC_DIR})
+  else()
+    set_target_properties(he_qat PROPERTIES
+            IMPORTED_LOCATION ${HEQAT_LIB_DIR}/libhe_qat.a
+      INCLUDE_DIRECTORIES ${HEQAT_INC_DIR})
+  endif()
+endif()
diff --git a/module/heqat/.clang-format b/module/heqat/.clang-format
new file mode 100644
index 0000000..c16dae7
--- /dev/null
+++ b/module/heqat/.clang-format
@@ -0,0 +1,9 @@
+BasedOnStyle: Google
+Language: Cpp
+DerivePointerAlignment: false
+PointerAlignment: Left
+IndentWidth: 4
+AccessModifierOffset: -4
+IndentCaseLabels: false
+SortIncludes: false
+ColumnLimit: 80
diff --git a/module/heqat/.gitignore b/module/heqat/.gitignore
new file mode 100644
index 0000000..0e2a8ef
--- /dev/null
+++ b/module/heqat/.gitignore
@@ -0,0 +1,12 @@
+.vscode/
+.vs/
+
+
+build*/
+install
+
+cmake/he_qat-*.*.*/HE_QATConfig.cmake
+*.log
+Doxyfile
+
+**.swp
diff --git a/module/heqat/CMakeLists.txt b/module/heqat/CMakeLists.txt
new file mode 100644
index 0000000..cd0c123
--- /dev/null
+++ b/module/heqat/CMakeLists.txt
@@ -0,0 +1,209 @@
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+cmake_minimum_required(VERSION 3.13)
+
+# The hekat or heqat (transcribed HqA.t) was an ancient Egyptian volume unit
+# used to measure grain, bread, and beer.
+project(HE_QAT VERSION 1.3.2 LANGUAGES C CXX)
+
+include(CheckCCompilerFlag)
+include(CheckCXXCompilerFlag)
+include(CMakePackageConfigHelpers)
+include(GNUInstallDirs)
+
+set(HE_QAT_STANDALONE ON)
+if(NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) # built under IPCL
+  set(HE_QAT_STANDALONE OFF)
+endif()
+
+if(HE_QAT_STANDALONE)
+  if(CMAKE_BUILD_TYPE)
+    set(RELEASE_TYPES
+        Debug
+        Release
+        RelWithDebInfo
+        MinSizeRel)
+    list(FIND RELEASE_TYPES ${CMAKE_BUILD_TYPE} INDEX_FOUND)
+    if(${INDEX_FOUND} EQUAL -1)
+      message(
+        FATAL_ERROR
+          "CMAKE_BUILD_TYPE must be one of Debug, Release, RelWithDebInfo, or MinSizeRel"
+        )
+    endif()
+  else()
+    set(CMAKE_BUILD_TYPE Release)
+  endif()
+
+  set(CMAKE_C_STANDARD 99)
+  set(CMAKE_C_STANDARD_REQUIRED ON)
+  set(CMAKE_CXX_STANDARD 11)
+  set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+  set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+  set(CMAKE_INSTALL_MESSAGE LAZY)
+
+  set(CMAKE_C_FLAGS "-O2 -Wunused-variable -Wunused-function")
+  set(CMAKE_CXX_FLAGS "-O2 -Wunused-variable -Wunused-function -fpermissive")
+  if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+    set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_LIST_DIR}/install)
+  endif()
+endif()
+
+set(CMAKE_INSTALL_RPATH "$ORIGIN;$ORIGIN/${CMAKE_INSTALL_LIBDIR}")
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_INSTALL_LIBDIR})
+
+# -------------------------------------------------------------------
+if(HE_QAT_STANDALONE) # standalone
+  option(HE_QAT_MISC "Enable miscellaneous features" ON)
+  option(HE_QAT_SYNC "Enable synchronous mode execution" OFF)
+  option(HE_QAT_MT "Enable interfaces for multithreaded programs" ON)
+  option(HE_QAT_PERF "Show request performance" OFF)
+  option(HE_QAT_TEST "Enable testing" ON)
+  option(HE_QAT_OMP "Enable tests using OpenMP" ON)
+  option(HE_QAT_DOCS "Enable document building" ON)
+  option(HE_QAT_SHARED "Build shared library" ON)
+
+  set(HE_QAT_FORWARD_CMAKE_ARGS
+    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+    -DCMAKE_C_STANDARD=${CMAKE_C_STANDARD}
+    -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
+    -DCMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD}
+    -DCMAKE_CXX_STANDARD_REQUIRED=${CMAKE_CXX_STANDARD_REQUIRED}
+    -DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}
+    -DCMAKE_EXPORT_COMPILE_COMMANDS=${CMAKE_EXPORT_COMPILE_COMMANDS}
+    -DCMAKE_POSITION_INDEPENDENT_CODE=${CMAKE_POSITION_INDEPENDENT_CODE}
+    -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+    -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+  )
+else()
+  option(HE_QAT_SYNC "Enable synchronous mode execution" OFF)
+  option(HE_QAT_MT "Enable interfaces for multithreaded programs" ON)
+  option(HE_QAT_PERF "Show request performance" OFF)
+  option(HE_QAT_OMP "Enable tests using OpenMP" ON)
+  set(HE_QAT_FORWARD_CMAKE_ARGS ${IPCL_FORWARD_CMAKE_ARGS})
+endif()
+
+if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+  set(HE_QAT_DEBUG ON)
+  add_definitions(-DHE_QAT_DEBUG)
+else()
+  set(HE_QAT_DEBUG OFF)
+endif()
+
+if(HE_QAT_STANDALONE)
+  message(STATUS "CMAKE_BUILD_TYPE:           ${CMAKE_BUILD_TYPE}")
+  message(STATUS "CMAKE_C_COMPILER:           ${CMAKE_C_COMPILER}")
+  message(STATUS "CMAKE_CXX_COMPILER:         ${CMAKE_CXX_COMPILER}")
+  message(STATUS "CMAKE_INSTALL_PREFIX:       ${CMAKE_INSTALL_PREFIX}")
+  message(STATUS "HE_QAT_MISC:                ${HE_QAT_MISC}")
+  message(STATUS "HE_QAT_SYNC:                ${HE_QAT_SYNC}")
+  message(STATUS "HE_QAT_MT:                  ${HE_QAT_MT}")
+  message(STATUS "HE_QAT_PERF:                ${HE_QAT_PERF}")
+  message(STATUS "HE_QAT_TEST:                ${HE_QAT_TEST}")
+  message(STATUS "HE_QAT_OMP:                 ${HE_QAT_OMP}")
+  message(STATUS "HE_QAT_DOCS:                ${HE_QAT_DOCS}")
+  message(STATUS "HE_QAT_SHARED:              ${HE_QAT_SHARED}")
+endif()
+
+if(HE_QAT_MISC)
+  add_definitions(-DHE_QAT_MISC)
+endif()
+
+# Why?
+set(HE_QAT_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/he_qat")
+set(HE_QAT_ROOT_DIR ${CMAKE_CURRENT_LIST_DIR})
+set(HE_QAT_SRC_DIR ${HE_QAT_ROOT_DIR}/heqat)
+set(HE_QAT_INC_DIR ${HE_QAT_ROOT_DIR}/heqat/include)
+if(NOT HE_QAT_STANDALONE)
+  set(HE_QAT_INC_DIR ${HE_QAT_INC_DIR} PARENT_SCOPE)
+endif()
+set(HE_QAT_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
+set(HE_QAT_INSTALL_LIBDIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/heqat)
+
+if(HE_QAT_OMP)
+  find_package(OpenMP REQUIRED)
+  if(NOT TARGET OpenMP::OpenMP_CXX)
+    message(FATAL_ERROR "Missing OpenMP::OpenMP_CXX.")
+  endif()
+  if(NOT TARGET OpenMP::OpenMP_C)
+    message(FATAL_ERROR "Missing OpenMP::OpenMP_C.")
+  endif()
+endif()
+
+if(HE_QAT_MT)
+  add_definitions(-DHE_QAT_MT)
+  message(STATUS "Compile with multithreaded interfaces.")
+endif()
+
+if(HE_QAT_MISC)
+  if(HE_QAT_STANDALONE)
+    # IPP Crypto installation
+    if(IPPCP_PREFIX_PATH)
+      list(APPEND CMAKE_PREFIX_PATH "${IPPCP_PREFIX_PATH}")
+      set(IPPCP_DIR "${IPPCP_PREFIX_PATH}/../../../")
+      message(STATUS "IPPCP_DIR=${IPPCP_DIR}")
+    else()
+      # Default to this
+      set(IPPCP_DIR "/opt/ipp-crypto")
+      set(IPPCP_PREFIX_PATH "${IPPCP_DIR}/lib/cmake")
+      list(APPEND CMAKE_PREFIX_PATH "${IPPCP_PREFIX_PATH}")
+      message(STATUS "Else IPPCP_DIR=${IPPCP_DIR}")
+    endif()
+    find_package(IPPCP REQUIRED)
+    message(STATUS "IPPCP_LIBRARIES ${IPPCP_LIBRARIES}")
+    set(IPPCP_INC_DIR ${IPPCP_DIR}/../../../include)
+    set(IPPCP_LIB_DIR ${IPPCP_DIR}/lib/intel64)
+  else()
+    set(IPPCP_INC_DIR ${IPPCRYPTO_INC_DIR}/ippcrypto)
+    set(IPPCP_LIB_DIR ${IPPCRYPTO_LIB_DIR})
+  endif()
+endif()
+
+if(HE_QAT_SYNC)
+  add_definitions(-DHE_QAT_SYNC_MODE)
+endif()
+
+if(HE_QAT_PERF)
+  add_definitions(-DHE_QAT_PERF)
+endif()
+
+# OpenSSL installation
+find_package(OpenSSL REQUIRED)
+
+# External dependencies
+find_package(Threads REQUIRED)
+set(CMAKE_THREAD_PREFER_PTHREAD ON)
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+
+set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/he_qat)
+include(heqat-util)
+
+if(NOT CMAKE_INSTALL_PREFIX)
+  set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_LIST_DIR}/install)
+endif()
+
+# Include QAT lib API support
+include(cmake/qatconfig.cmake)
+
+# HE_QAT Library
+add_subdirectory(heqat)
+
+#Validation test examples
+if(HE_QAT_TEST)
+  add_subdirectory(test)
+endif()
+
+if(HE_QAT_DOCS)
+  # sudo apt-get install doxygen
+  find_package(Doxygen)
+  option(BUILD_DOCS "Create and install the HTML based API docs (requires Doxygen)" ${DOXYGEN_FOUND})
+  if(BUILD_DOCS)
+    if(NOT DOXYGEN_FOUND)
+      message(FATAL_ERROR "Doxygen was not found (Required)")
+    else()
+      add_subdirectory(doc)
+    endif()
+  endif()
+endif()
diff --git a/module/heqat/LICENSE b/module/heqat/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/module/heqat/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/module/heqat/README.md b/module/heqat/README.md
new file mode 100644
index 0000000..4f22caa
--- /dev/null
+++ b/module/heqat/README.md
@@ -0,0 +1,332 @@
+# Intel Homomorphic Encryption (HE) Acceleration Library for Quick Assist Technology (QAT)
+Intel Homomorphic Encryption Acceleration Library for QAT (HE QAT Lib) is an open-source library which provides accelerated performance for homomorphic encryption (HE) math functions involving multi-precision numbers and modular arithmetic. This library is written in C99.
+
+## Contents
+- [Intel Homomorphic Encryption (HE) Acceleration Library for Quick Assist Technology (QAT)](#intel-homomorphic-encryption-he-acceleration-library-for-quick-assist-technology-qat)
+  - [Contents](#contents)
+  - [Introduction](#introduction)
+  - [Building the HE QAT Library](#building-the-he-qat-library)
+    - [Requirements](#requirements)
+    - [Dependencies](#dependencies)
+    - [Instructions](#instructions)
+      - [Installing Dependencies](#installing-dependencies)
+      - [Installing OpenSSL](#installing-openssl)
+      - [Installing QAT Software Stack](#installing-qat-software-stack)
+      - [Setup Environment](#setup-environment)
+      - [Building the Library](#building-the-library)
+      - [Configuring QAT endpoints](#configuring-qat-endpoints)
+      - [Configuration Options](#configuration-options)
+      - [Running Samples](#running-samples)
+      - [Running All Samples](#running-all-samples)
+  - [Troubleshooting](#troubleshooting)
+  - [Testing and Benchmarking](#testing-and-benchmarking)
+- [Contributors](#contributors)
+<!-- - [Standardization](#standardization) -->
+- [Contributors](#contributors)
+
+## Introduction
+
+This library currently only offers acceleration of modular exponentiation of multi-precision numbers, i.e. large numbers whose precision range from 1024 to 8192 bits. Current stage of implementation supports modular exponentiation of big numbers encoded with OpenSSL `BIGNUM` data type, `ippcrypto`'s `BigNumber` class and octet strings encoded with `unsigned char`. More details about the modes of operation and characteristics of the execution flow are described below:
+
+ - Synchronous: API calls will submit requests that will be executed in the order they are first issued by the host caller, i.e. a series of modular exponentiation operation requests will be offloaded for processing by the accelerator in the order they are issued.
+
+ - Asynchronous: API calls will submit requests that will NOT necessarily be executed in the order they are first issued by the host caller, i.e. a sequence of multiple requests for the modular exponentiation operation could be scheduled out of order and executed concurrently by the accelerator; thus, completed out of order.
+
+ - Blocking: API calls will be blocked until work request processing completion. Internally, the next buffered work request waits for completion of the processing of the most recently offloaded request to the accelerator.
+
+  - Non-Blocking: API calls will be non-blocking, it does not wait for completion of the work request to return from call. After multiple non-blocking calls to the API, a blocking function to wait for the requests to complete processing must be called. Internally, non-blocking request submissions are scheduled to the accelerator asynchronously. When there are multiple requests from concurrent API callers, the requests are not guaranteed to be processed in order of arrival.
+
+ - Batch Support: The internal buffers are set accommodate up to 1024 requests at a time so that the maximum number of non-blocking API calls is 1024 for each concurrent thread caller. Therefore, only up to 1024 requests can be exercised asynchronously from the application side, be it from a single `for loop` or static code block. Finally, in order to collect the requests, a call to the `getBnModExpRequest()` function must be performed to wait for completion of all submitted asynchronous requests. On multithreaded mode, the blocking function to be called at the end of the code block shall be `release_bnModExp_buffer()`.
+
+ - Multithreading Support: This feature permits the API to be called by concurrently threads running on the host. Effective multithreading support relies on a separate buffer that admits outstanding work requests. This buffer is acquired before an API call to submit work requests to the accelerator. This is accomplished by first calling `acquire_bnModExp_buffer()` to reserve an internal buffer to store outstanding requests from the host API caller.
+
+ - Multiple Instances: The library accesses all logical instances from all visible and configured QAT endpoints at the creation of the QAT runtime context. Therefore, if 8 QAT endpoints are available, it will attempt to use them all, including all the total number of logical instances configured per process.
+
+>> _**Note**_: Current implementation does not verify if the instance/endpoint has the capabilities needed by the library. For example, the library needs access to the _asym_ capabilities like `CyLnModExp`, therefore if the configuration file of an endpoint happens to be configured to not offer it, the application will exit with an error at some point during execution.
+
+## Building the HE QAT Library
+
+### Requirements
+The hardware requirement to use the library is the following:
+ - Intel 4xxx co-processor
+<!-- - Intel C62XX acceleration card -->
+
+As for the operating systems, the library has been tested and confirmed to work on Ubuntu 20.04 and CentOS 7.9.
+
+### Dependencies
+
+Required dependencies include:
+
+```
+cmake >=3.15.1
+git
+yasm
+libboost >= 1.14
+libudev >= 1.47
+pthread
+OpenSSL >=1.1.0
+gcc >= 9.1
+QAT20.L.0.8.0-00071.tar.gz (qatlib and QAT drivers)
+nasm >= 2.15
+ipp-crypto
+```
+
+### Instructions
+
+Before attempting to build the library, please check if the platform has the QAT hardware.
+
+```
+$ sudo lspci -d 8086:4940
+6b:00.0 Co-processor: Intel Corporation Device 4940 (rev 30)
+70:00.0 Co-processor: Intel Corporation Device 4940 (rev 30)
+75:00.0 Co-processor: Intel Corporation Device 4940 (rev 30)
+7a:00.0 Co-processor: Intel Corporation Device 4940 (rev 30)
+e8:00.0 Co-processor: Intel Corporation Device 4940 (rev 30)
+ed:00.0 Co-processor: Intel Corporation Device 4940 (rev 30)
+f2:00.0 Co-processor: Intel Corporation Device 4940 (rev 30)
+f7:00.0 Co-processor: Intel Corporation Device 4940 (rev 30)
+```
+
+In the example above, the platform is a dual-socket server with Sapphire Rapids (SPR) CPU and it shows 8 QAT endpoints, 4 on each socket.
+
+#### Installing Dependencies
+
+```
+sudo apt install yasm zlib1g
+sudo apt update -y
+sudo apt install -y libsystemd-dev
+sudo apt install -y pciutils (tested with version=3.6.4)
+sudo apt install -y libudev-dev
+sudo apt install -y libreadline-dev
+sudo apt install -y libxml2-dev
+sudo apt install -y libboost-dev
+sudo apt install -y elfutils libelf-dev
+sudo apt install -y libnl-3-dev
+sudo apt install -y linux-headers-$(uname -r)
+sudo apt install -y build-essential
+sudo apt install -y libboost-regex-dev
+sudo apt install -y pkg-config
+```
+
+#### Installing OpenSSL
+
+```
+$ git clone https://github.com/openssl/openssl.git
+$ cd openssl/
+$ git checkout OpenSSL_1_1_1-stable
+$ ./Configure --prefix=/opt/openssl
+$ make
+$ sudo make install
+```
+
+#### Installing QAT Software Stack
+
+```
+$ cd $HOME
+$ mkdir QAT
+$ mv QAT20.L.0.8.0-00071.tar.gz QAT/
+$ cd QAT
+$ tar zxvf QAT20.L.0.8.0-00071.tar.gz
+$ ./configure
+$ sudo make -j
+$ sudo make install
+```
+
+Add `$USER` to the `qat` group. Must logout and log back in to take effect.
+
+```
+$ sudo usermod -aG qat $USER
+```
+
+> _**Note**_: Please contact the QAT team listed at [https://01.org/intel-quickassist-technology](https://01.org/intel-quickassist-technology) to obtain the latest `QAT20.L.0.8.0-00071.tar.gz` package.
+
+Verify the QAT installation by checking the QAT service status:
+
+ - Ubuntu
+```
+sudo service qat_service status
+```
+ - CentOS
+```
+sudo systemctl status qat_service.service
+```
+
+If all checks out, following the instructions below to build the HE QAT library.
+
+#### Setup Environment
+
+This step is required. Note that if the step [Installing QAT Software Stack](#installing-qat-software-stack) has just been performed, then the exact path of the installation is known, i.e.
+
+```
+export ICP_ROOT=$HOME/QAT
+```
+
+Alternatively, if the system has a pre-built QAT software stack installed, the script `auto_find_qat_install.sh` can used to help automatically find the path where it was installed (see command below). The script `auto_find_qat_install.sh` assumes that the QAT package is installed in a single location, such that if multiple installations are available at different locations, the script may produce undetermined behavior.
+
+ - Explicit way:
+```
+export ICP_ROOT=$(./auto_find_qat_install.sh)
+```
+ - Implicit way:
+```
+source setup_env.sh
+```
+
+#### Building the Library
+
+Follow the steps in the sections [Installing QAT Software Stack](#installing-qat-software-stack) and [Setup Environment](#setup-environment) before attempting to build the library.
+
+- How to build without `BigNumber` support
+
+```
+$ git clone https://github.com/intel-sandbox/libraries.security.cryptography.homomorphic-encryption.glade.project-destiny.git
+$ git checkout development
+$ cmake -S . -B build -DHE_QAT_MISC=OFF
+$ cmake --build build
+$ cmake --install build
+```
+
+- How to build with `BigNumber` support
+
+The `cmake` configuration variable `HE_QAT_MISC=ON` enables `BigNumber` resources and samples, requiring IPP Crypto installation as a dependency. If usage of the utility functions that support `BigNumber` data type is needed, follow the building instructions below to install IPP Crypto and then rebuild the library with the cmake flag `HE_QAT_MISC=ON`:
+
+- Installing `nasm-2.15`
+
+```
+$ wget -c https://www.nasm.us/pub/nasm/releasebuilds/2.15.05/nasm-2.15.05.tar.xz
+$ tar -xf nasm-2.15.05.tar.xz
+$ cd nasm-2.15.05/
+$ ./configure --prefix=/opt/nasm-2.15
+$ make -j
+$ sudo make install
+```
+
+- Installing `ippcrypto`
+
+```
+$ cd ~
+$ git clone https://github.com/intel/ipp-crypto.git
+$ cd ipp-crypto
+$ CC=gcc CXX=g++ cmake CMakeLists.txt -B_build -DARCH=intel64 -DMERGED_BLD:BOOL=ON -DCMAKE_INSTALL_PREFIX=/opt/ipp-crypto -DOPENSSL_INCLUDE_DIR=/opt/openssl/include -DOPENSSL_LIBRARIES=/opt/openssl/lib -DOPENSSL_ROOT_DIR=/opt/openssl -DCMAKE_ASM_NASM_COMPILER=/opt/nasm-2.15/bin/nasm
+$ cmake --build _build -j
+$ sudo cmake --install _build
+```
+
+#### Configuring QAT endpoints
+
+Before trying to run any application or example that uses the HE QAT Lib, the QAT endpoints must be configured.
+The default configuration provided in this release is the optimal configuration to provide computing acceleration support for [IPCL](https://github.com/intel/pailliercryptolib).
+The boilerplate configurations can be found in the `config` directory.
+
+```
+./scripts/setup_devices.sh
+```
+
+The script above will configure the QAT devices to perform asymmetric functions only.
+
+#### Configuration Options
+
+In addition to the standard CMake configuration options, Intel HE Acceleration Library for QAT supports several cmake options to configure the build. For convenience, they are listed below:
+
+| CMake option                  | Values                 | Description                                             |
+| ------------------------------| ---------------------- | ------------------------------------------------------- |
+| HE_QAT_MISC                   | ON / OFF (default OFF) | Enable/Disable BigNumber conversion functions.          |
+| HE_QAT_DEBUG                  | ON / OFF (default OFF) | Enable/Disable debug log at large runtime penalty.      |
+| HE_QAT_SAMPLES                | ON / OFF (default ON)  | Enable/Disable building of samples.                     |
+| HE_QAT_DOCS                   | ON / OFF (default ON)  | Enable/Disable building of documentation.               |
+| HE_QAT_SYNC                   | ON / OFF (default OFF) | Enable/Disable synchronous mode execution.              |
+| HE_QAT_MT                     | ON / OFF (default ON)  | Enable/Disable interfaces for multithreaded programs.   |
+| HE_QAT_PERF                   | ON / OFF (default OFF) | Enable/Disable display of measured request performance. |
+| HE_QAT_TEST                   | ON / OFF (default OFF) | Enable/Disable testing.                                 |
+| HE_QAT_OMP                    | ON / OFF (default ON)  | Enable/Disable tests using OpenMP.                      |
+| HE_QAT_SHARED                 | ON / OFF (default ON)  | Enable/Disable building shared library.                 |
+
+#### Running Samples
+
+Test showing creation and teardown of the QAT runtime environment:
+
+```
+./build/samples/sample_context
+```
+
+Test showing functional correctness and performance using BIGNUM data as input:
+
+```
+./build/samples/sample_BIGNUMModExp
+```
+
+If built with `HE_QAT_MISC=ON`, then the following samples below are also available to try.
+
+Test showing data conversion between `BigNumber` and `CpaFlatBuffer` formats:
+
+```
+./build/samples/sample_bnConversion
+```
+
+Test showing functional correctness and performance using `BigNumber` data types:
+
+```
+./build/samples/sample_bnModExp
+```
+
+Test showing functional correctness and performance of multithreading support:
+
+```
+./build/samples/sample_bnModExp_MT
+```
+#### Running All Samples
+
+```
+HEQATLIB_ROOT_DIR=$PWD ./scripts/run.sh
+```
+
+## Troubleshooting
+
+- **Issue #1**
+
+```
+xuser@ubuntu-guest:~/heqat$ cmake -S . -B build -DCMAKE_BUILD_TYPE=RelWithDebInfo -DHE_QAT_MISC=ON
+-- CMAKE_INSTALL_PREFIX: /usr/local
+-- CMAKE_PREFIX_PATH /home/xuser/ipp-crypto/_build/
+-- Missed required Intel IPP Cryptography component: ippcp
+--   library not found:
+   /opt/ipp-crypto/lib/intel64/libippcp.a
+CMake Error at CMakeLists.txt:93 (find_package):
+  Found package configuration file:
+
+    /opt/ipp-crypto/lib/cmake/ippcp/ippcp-config.cmake
+
+  but it set IPPCP_FOUND to FALSE so package "IPPCP" is considered to be NOT
+  FOUND.
+```
+
+To resolve the error below simply create the symbolic link `/opt/ipp-crypto/lib/intel64/libippcp.a` from the appropriate static ippcp library that was compiled. For example:
+
+```
+xuser@ubuntu-guest:/opt/ipp-crypto/lib/intel64$ ls -lha
+total 7.3M
+drwxr-xr-x 2 root root 4.0K Jun  3 16:29 .
+drwxr-xr-x 5 root root 4.0K Jun  3 16:29 ..
+-rw-r--r-- 1 root root 1.6M Jun  3 16:28 libcrypto_mb.a
+lrwxrwxrwx 1 root root   18 Jun  3 16:29 libcrypto_mb.so -> libcrypto_mb.so.11
+lrwxrwxrwx 1 root root   20 Jun  3 16:29 libcrypto_mb.so.11 -> libcrypto_mb.so.11.5
+-rw-r--r-- 1 root root 1.3M Jun  3 16:28 libcrypto_mb.so.11.5
+lrwxrwxrwx 1 root root   16 Jun  3 16:29 libippcpmx.so -> libippcpmx.so.11
+lrwxrwxrwx 1 root root   18 Jun  3 16:29 libippcpmx.so.11 -> libippcpmx.so.11.5
+-rw-r--r-- 1 root root 1.7M Jun  3 16:28 libippcpmx.so.11.5
+-rw-r--r-- 1 root root 2.9M Jun  3 16:28 libippcp_s_mx.a
+xuser@ubuntu-guest:/opt/ipp-crypto/lib/intel64$ sudo ln -s libippcp_s_mx.a libippcp.a
+```
+
+## Testing and Benchmarking
+
+TODO
+
+# Contributors
+
+Main contributors to this project, sorted by alphabetical order of last name are:
+  - [Fillipe Dias M. de Souza](https://www.linkedin.com/in/fillipe-d-m-de-souza-a8281820) (lead)
+  - [Xiaoran Fang](https://github.com/fangxiaoran)
+  - [Jingyi Jin](https://www.linkedin.com/in/jingyi-jin-655735)
+  - [Sejun Kim](https://www.linkedin.com/in/sejun-kim-2b1b4866)
+  - [Pengfei Zhao](https://github.com/justalittlenoob)
diff --git a/module/heqat/cmake/he_qat/HE_QATConfig.cmake.in b/module/heqat/cmake/he_qat/HE_QATConfig.cmake.in
new file mode 100644
index 0000000..5267fd1
--- /dev/null
+++ b/module/heqat/cmake/he_qat/HE_QATConfig.cmake.in
@@ -0,0 +1,22 @@
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+@PACKAGE_INIT@
+
+include(CMakeFindDependencyMacro)
+
+include(${CMAKE_CURRENT_LIST_DIR}/he_qatTargets.cmake)
+
+if(TARGET HE_QAT::he_qat)
+  set(HE_QAT_FOUND TRUE)
+  message(STATUS "Intel Homomorphic Encryption Acceleration Library for QAT found")
+else()
+  message(STATUS "Intel Homomorphic Encryption Acceleraiton Library for QAT not found")
+endif()
+
+set(HE_QAT_VERSION "@HE_QAT_VERSION")
+set(HE_QAT_VERSION_MAJOR "@HE_QAT_VERSION_MAJOR")
+set(HE_QAT_VERSION_MINOR "@HE_QAT_VERSION")
+set(HE_QAT_VERSION_PATCH "@HE_QAT_VERSION")
+
+set(HE_QAT_DEBUG "@HE_QAT_DEBUG")
diff --git a/module/heqat/cmake/he_qat/heqat-util.cmake b/module/heqat/cmake/he_qat/heqat-util.cmake
new file mode 100644
index 0000000..8034239
--- /dev/null
+++ b/module/heqat/cmake/he_qat/heqat-util.cmake
@@ -0,0 +1,31 @@
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Add dependency to the target archive
+function(heqat_create_archive target dependency)
+  # For proper export of IPCLConfig.cmake / IPCLTargets.cmake,
+  # we avoid explicitly linking dependencies via target_link_libraries, since
+  # this would add dependencies to the exported ipcl target.
+  add_dependencies(${target} ${dependency})
+
+  if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
+    add_custom_command(TARGET ${target} POST_BUILD
+                      COMMAND ar -x $<TARGET_FILE:${target}>
+                      COMMAND ar -x $<TARGET_FILE:${dependency}>
+                      COMMAND ar -qcs $<TARGET_FILE:${target}> *.o
+                      COMMAND rm -f *.o
+                      WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+                      DEPENDS ${target} ${dependency}
+      )
+  elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+    add_custom_command(TARGET ${target} POST_BUILD
+                       COMMAND lib.exe /OUT:$<TARGET_FILE:${target}>
+                        $<TARGET_FILE:${target}>
+                        $<TARGET_FILE:${dependency}>
+                        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+                        DEPENDS ${target} ${dependency}
+  )
+  else()
+    message(WARNING "Unsupported compiler ${CMAKE_CXX_COMPILER_ID}")
+  endif()
+endfunction()
diff --git a/module/heqat/cmake/qatconfig.cmake b/module/heqat/cmake/qatconfig.cmake
new file mode 100644
index 0000000..da52322
--- /dev/null
+++ b/module/heqat/cmake/qatconfig.cmake
@@ -0,0 +1,50 @@
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Setup ICP variables
+if(DEFINED ENV{ICP_ROOT})
+	message(STATUS "Environment variable ICP_ROOT is defined as $ENV{ICP_ROOT}.")
+else()
+	message(FATAL_ERROR "Environment variable ICP_ROOT must be defined. Try export ICP_ROOT=<QAT_ROOT_PATH>")
+endif()
+
+set(ICP_ROOT             $ENV{ICP_ROOT})
+set(ICP_BUILDOUTPUT_PATH ${ICP_ROOT}/build)
+set(ICP_BUILDSYSTEM_PATH ${ICP_ROOT}/quickassist/build_system)
+set(ICP_API_DIR          ${ICP_ROOT}/quickassist)
+set(ICP_LAC_DIR          ${ICP_ROOT}/quickassist/lookaside/access_layer)
+set(ICP_OSAL_DIR         ${ICP_ROOT}/quickassist/utilities/oasl)
+set(ICP_ADF_DIR          ${ICP_ROOT}/quickassist/lookaside/access_layer/src/qat_direct)
+set(CMN_ROOT             ${ICP_ROOT}/quickassist/utilities/libusdm_drv)
+
+set(ICP_INC_DIR ${ICP_API_DIR}/include
+                ${ICP_LAC_DIR}/include
+                ${ICP_ADF_DIR}/include
+                ${CMN_ROOT}
+                ${ICP_API_DIR}/include/dc
+                ${ICP_API_DIR}/include/lac)
+
+#add_definitions(-DDO_CRYPTO)
+add_definitions(-DUSER_SPACE)
+add_compile_options(-fPIC)
+
+add_library(libadf_static STATIC IMPORTED GLOBAL)
+add_library(libosal_static STATIC IMPORTED GLOBAL)
+add_library(libqat_static STATIC IMPORTED GLOBAL)
+add_library(libusdm_drv_static STATIC IMPORTED GLOBAL)
+
+set_target_properties(libadf_static PROPERTIES
+	IMPORTED_LOCATION ${ICP_BUILDOUTPUT_PATH}/libadf.a
+)
+
+set_target_properties(libosal_static PROPERTIES
+	IMPORTED_LOCATION ${ICP_BUILDOUTPUT_PATH}/libosal.a
+)
+
+set_target_properties(libqat_static PROPERTIES
+	IMPORTED_LOCATION ${ICP_BUILDOUTPUT_PATH}/libqat.a
+)
+
+set_target_properties(libusdm_drv_static PROPERTIES
+	IMPORTED_LOCATION ${ICP_BUILDOUTPUT_PATH}/libusdm_drv.a
+)
diff --git a/module/heqat/config/4xxx_dev0.conf b/module/heqat/config/4xxx_dev0.conf
new file mode 100755
index 0000000..fcaa5f2
--- /dev/null
+++ b/module/heqat/config/4xxx_dev0.conf
@@ -0,0 +1,182 @@
+################################################################
+# This file is provided under a dual BSD/GPLv2 license.  When using or
+#   redistributing this file, you may do so under either license.
+#
+#   GPL LICENSE SUMMARY
+#
+#   Copyright(c) 2007-2021 Intel Corporation. All rights reserved.
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of version 2 of the GNU General Public License as
+#   published by the Free Software Foundation.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+#   The full GNU General Public License is included in this distribution
+#   in the file called LICENSE.GPL.
+#
+#   Contact Information:
+#   Intel Corporation
+#
+#   BSD LICENSE
+#
+#   Copyright(c) 2007-2021 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+#
+#  version: QAT20.L.0.8.0-00071
+################################################################
+[GENERAL]
+ServicesEnabled = asym;dc
+
+ConfigVersion = 2
+
+#Default value for FW Auth loading
+FirmwareAuthEnabled = 1
+
+#Default values for number of concurrent requests*/
+CyNumConcurrentSymRequests = 512
+CyNumConcurrentAsymRequests = 64
+
+#Statistics, valid values: 1,0
+statsGeneral = 1
+statsDh = 1
+statsDrbg = 1
+statsDsa = 1
+statsEcc = 1
+statsKeyGen = 1
+statsDc = 1
+statsLn = 1
+statsPrime = 1
+statsRsa = 1
+statsSym = 1
+
+# This flag is to enable SSF features (CNV and BnP)
+StorageEnabled = 0
+
+# Disable public key crypto and prime number
+# services by specifying a value of 1 (default is 0)
+PkeServiceDisabled = 0
+
+# This flag is to enable device auto reset on heartbeat error
+AutoResetOnError = 0
+
+# Default value for power management idle interrupt delay
+PmIdleInterruptDelay = 0
+
+# This flag is to enable power management idle support
+PmIdleSupport = 1
+
+# This flag is to enable key protection technology
+KptEnabled = 1
+
+# Define the maximum SWK count per function can have
+# Default value is 1, the maximum value is 128
+KptMaxSWKPerFn = 1
+
+# Define the maximum SWK count per pasid can have
+# Default value is 1, the maximum value is 128
+KptMaxSWKPerPASID = 1
+
+# Define the maximum SWK lifetime in second
+# Default value is 0 (eternal of life)
+# The maximum value is 31536000 (one year)
+KptMaxSWKLifetime = 31536000
+
+# Flag to define whether to allow SWK to be shared among processes
+# Default value is 0 (shared mode is off)
+KptSWKShared = 0
+
+##############################################
+# Kernel Instances Section
+##############################################
+[KERNEL]
+NumberCyInstances = 1
+NumberDcInstances = 0
+
+# Crypto - Kernel instance #0
+Cy0Name = "IPSec0"
+Cy0IsPolled = 0
+Cy0CoreAffinity = 0
+
+# Data Compression - Kernel instance #0
+Dc0Name = "IPComp0"
+Dc0IsPolled = 0
+Dc0CoreAffinity = 0
+
+##############################################
+# ADI Section for Scalable IOV
+##############################################
+[SIOV]
+NumberAdis = 0
+
+##############################################
+# User Process Instance Section
+##############################################
+[SSL]
+NumberCyInstances = 1
+NumberDcInstances = 2
+NumProcesses = 1
+LimitDevAccess = 0
+
+# Crypto - User instance #0
+Cy0Name = "SSL0"
+Cy0IsPolled = 1
+# List of core affinities
+Cy0CoreAffinity = 1
+
+## Crypto - User instance #1
+#Cy1Name = "SSL1"
+#Cy1IsPolled = 1
+## List of core affinities
+#Cy1CoreAffinity = 2
+#
+## Crypto - User instance #2
+#Cy2Name = "SSL2"
+#Cy2IsPolled = 1
+## List of core affinities
+#Cy2CoreAffinity = 3
+
+# Data Compression - User instance #0
+Dc0Name = "Dc0"
+Dc0IsPolled = 1
+# List of core affinities
+Dc0CoreAffinity = 1
+
+# Data Compression - User instance #1
+Dc1Name = "Dc1"
+Dc1IsPolled = 1
+# List of core affinities
+Dc1CoreAffinity = 2
diff --git a/module/heqat/config/4xxxvf_dev0.conf b/module/heqat/config/4xxxvf_dev0.conf
new file mode 100755
index 0000000..a633579
--- /dev/null
+++ b/module/heqat/config/4xxxvf_dev0.conf
@@ -0,0 +1,133 @@
+################################################################
+# This file is provided under a dual BSD/GPLv2 license.  When using or
+#   redistributing this file, you may do so under either license.
+#
+#   GPL LICENSE SUMMARY
+#
+#   Copyright(c) 2007-2021 Intel Corporation. All rights reserved.
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of version 2 of the GNU General Public License as
+#   published by the Free Software Foundation.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+#   The full GNU General Public License is included in this distribution
+#   in the file called LICENSE.GPL.
+#
+#   Contact Information:
+#   Intel Corporation
+#
+#   BSD LICENSE
+#
+#   Copyright(c) 2007-2021 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+#
+#  version: QAT20.L.0.8.0-00071
+################################################################
+[GENERAL]
+ServicesEnabled = asym;dc
+
+ConfigVersion = 2
+
+#Default values for number of concurrent requests*/
+CyNumConcurrentSymRequests = 512
+CyNumConcurrentAsymRequests = 64
+
+#Statistics, valid values: 1,0
+statsGeneral = 1
+statsDh = 1
+statsDrbg = 1
+statsDsa = 1
+statsEcc = 1
+statsKeyGen = 1
+statsDc = 1
+statsLn = 1
+statsPrime = 1
+statsRsa = 1
+statsSym = 1
+
+# This flag is to enable SSF features (CNV and BnP)
+StorageEnabled = 0
+
+# Disable public key crypto and prime number
+# services by specifying a value of 1 (default is 0)
+PkeServiceDisabled = 0
+
+# This flag is to enable device auto reset on heartbeat error
+AutoResetOnError = 0
+
+# Disable Address translation services
+ATEnabled = 0
+##############################################
+# Kernel Instances Section
+##############################################
+[KERNEL]
+NumberCyInstances = 0
+NumberDcInstances = 0
+
+##############################################
+# User Process Instance Section
+##############################################
+[SSL]
+NumberCyInstances = 1
+NumberDcInstances = 2
+NumProcesses = 1
+LimitDevAccess = 0
+
+# Crypto - User instance #0
+Cy0Name = "SSL0"
+Cy0IsPolled = 1
+# List of core affinities
+Cy0CoreAffinity = 1
+
+## Crypto - User instance #1
+#Cy1Name = "SSL1"
+#Cy1IsPolled = 1
+## List of core affinities
+#Cy1CoreAffinity = 2
+
+# Data Compression - User instance #0
+Dc0Name = "Dc0"
+Dc0IsPolled = 1
+# List of core affinities
+Dc0CoreAffinity = 1
+
+# Data Compression - User instance #1
+Dc1Name = "Dc1"
+Dc1IsPolled = 1
+# List of core affinities
+Dc1CoreAffinity = 2
diff --git a/module/heqat/doc/CMakeLists.txt b/module/heqat/doc/CMakeLists.txt
new file mode 100644
index 0000000..822e556
--- /dev/null
+++ b/module/heqat/doc/CMakeLists.txt
@@ -0,0 +1,25 @@
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Build Doxygen documentation
+SET(DOXYGEN_MIN_VERSION "1.8.5")
+find_package(Doxygen ${DOXYGEN_MIN_VERSION} REQUIRED)
+
+set(DOXYGEN_OUTPUT_DIR ${CMAKE_BINARY_DIR}/doxygen)
+set(DOXYGEN_INDEX_FILE ${DOXYGEN_OUTPUT_DIR}/xml/indexl.html)
+set(DOXYFILE_IN ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in)
+set(DOXYFILE_OUT ${CMAKE_BINARY_DIR}/Doxyfile)
+
+# Create Doxyfile
+configure_file(${DOXYFILE_IN} ${DOXYFILE_OUT} @ONLY)
+
+add_custom_command(OUTPUT ${DOXYGEN_INDEX_FILE}
+                   COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYFILE_OUT}
+                   MAIN_DEPENDENCY ${DOXYFILE_OUT} ${DOXYFILE_IN}
+                   COMMENT "Generating Doxygen documentation")
+
+add_custom_target(docs ALL DEPENDS ${DOXYGEN_INDEX_FILE})
+
+install(DIRECTORY
+    ${CMAKE_BINARY_DIR}/doc/doxygen
+    DESTINATION doc)
diff --git a/module/heqat/doc/Doxyfile.in b/module/heqat/doc/Doxyfile.in
new file mode 100644
index 0000000..be375b4
--- /dev/null
+++ b/module/heqat/doc/Doxyfile.in
@@ -0,0 +1,36 @@
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+PROJECT_NAME           = "Intel HE Acceleration Library for QAT"
+PROJECT_BRIEF          = "Intel Homomorphic Encryption Acceleration Library for QAT, accelerating the modular arithmetic operations used in partial homomorphic encryption on Intel QAT."
+
+OUTPUT_DIRECTORY       = @CMAKE_BINARY_DIR@/doc/doxygen
+INPUT                  = @CMAKE_SOURCE_DIR@/heqat/include/heqat \
+                         @CMAKE_SOURCE_DIR@/heqat/include/heqat/common \
+                         @CMAKE_SOURCE_DIR@/heqat/include/heqat/misc \
+                         @CMAKE_SOURCE_DIR@/heqat \
+                         @CMAKE_SOURCE_DIR@/heqat/misc \
+                         @CMAKE_SOURCE_DIR@/heqat/common \
+                         @CMAKE_SOURCE_DIR@/samples \
+                         @CMAKE_SOURCE_DIR@/README.md
+RECURSIVE              = YES
+USE_MDFILE_AS_MAINPAGE = @CMAKE_SOURCE_DIR@/README.md
+USE_MATHJAX            = YES
+FULL_PATH_NAMES        = NO
+
+GENERATE_XML           = YES
+EXTRACT_ALL            = YES
+EXTRACT_PRIVATE        = NO
+SHOW_NAMESPACES        = YES
+GENERATE_LATEX         = YES
+
+WARNINGS               = YES
+WARN_IF_UNDOCUMENTED   = YES
+WARN_IF_DOC_ERROR      = YES
+WARN_NO_PARAMDOC       = YES
+WARN_AS_ERROR          = YES
+
+QUIET                  = NO
+
+SEARCHENGINE           = YES
+SERVER_BASED_SEARCH    = NO
diff --git a/module/heqat/doc/index.html b/module/heqat/doc/index.html
new file mode 100644
index 0000000..c344624
--- /dev/null
+++ b/module/heqat/doc/index.html
@@ -0,0 +1,2 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head><meta http-equiv=Refresh content="0;url=doxygen/html/index.html"></head></html>
diff --git a/module/heqat/doc/index.rst b/module/heqat/doc/index.rst
new file mode 100644
index 0000000..860a83a
--- /dev/null
+++ b/module/heqat/doc/index.rst
@@ -0,0 +1,5 @@
+## Intel HE Acceleration Library for QAT Documentation ##
+.. toctree::
+      api
+
+.. mdinclude:: ../README.md
diff --git a/module/heqat/example/CMakeLists.txt b/module/heqat/example/CMakeLists.txt
new file mode 100644
index 0000000..11f2aab
--- /dev/null
+++ b/module/heqat/example/CMakeLists.txt
@@ -0,0 +1,32 @@
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+project(he_qat_example LANGUAGES C CXX)
+
+cmake_minimum_required(VERSION 3.13)
+
+set(CMAKE_CXX_STANDARD 11)
+set(HE_QAT_HINT_DIR ${CMAKE_PREFIX_PATH})
+message(STATUS "CMAKE_PREFIX_PATH ${HE_QAT_HINT_DIR}")
+
+# Example using source
+find_package(HE_QAT 1.3.2
+	     HINTS ${HE_QAT_HINT_DIR}
+	     REQUIRED)
+if(NOT TARGET HE_QAT::he_qat)
+  message(FATAL_ERROR "TARGET HE_QAT::he_qat not found")
+endif()
+
+if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+  add_definitions(-DHE_QAT_DEBUG)
+endif()
+
+find_package(OpenSSL REQUIRED)
+find_package(Threads REQUIRED)
+set(CMAKE_THREAD_PREFER_PTHREAD ON)
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+
+add_definitions(-fpermissive)
+add_executable(example example.cpp)
+target_link_libraries(example PRIVATE HE_QAT::he_qat)
+target_link_libraries(example PRIVATE OpenSSL::SSL)
diff --git a/module/heqat/example/README.md b/module/heqat/example/README.md
new file mode 100644
index 0000000..9bd68d2
--- /dev/null
+++ b/module/heqat/example/README.md
@@ -0,0 +1,7 @@
+# Building and running
+
+```
+cmake -S . -B build -DCMAKE_PREFIX_PATH=../install/lib/cmake
+cmake --build build
+./build/example
+```
diff --git a/module/heqat/example/example.cpp b/module/heqat/example/example.cpp
new file mode 100644
index 0000000..b3460b0
--- /dev/null
+++ b/module/heqat/example/example.cpp
@@ -0,0 +1,139 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <time.h>
+#include <openssl/bn.h>
+#include <openssl/err.h>
+#include <openssl/rand.h>
+#include <sys/time.h>
+
+#include "heqat/heqat.h"
+
+#define LEN_OF_1024_BITS 128
+#define LEN_OF_2048_BITS 256
+#define msb_CAN_BE_ZERO -1
+#define msb_IS_ONE 0
+#define EVEN_RND_NUM 0
+#define ODD_RND_NUM 1
+#define BATCH_SIZE 1
+
+struct timeval start_time, end_time;
+double time_taken = 0.0;
+
+int main(int argc, const char** argv) {
+    const int bit_length = 4096;  // 1024;
+    const size_t num_trials = 100;
+
+    double avg_speed_up = 0.0;
+    double ssl_avg_time = 0.0;
+    double qat_avg_time = 0.0;
+
+    double ssl_elapsed = 0.0;
+    double qat_elapsed = 0.0;
+
+    HE_QAT_STATUS status = HE_QAT_STATUS_FAIL;
+
+    // Set up QAT runtime context
+    acquire_qat_devices();
+
+    // Set up OpenSSL context (as baseline)
+    BN_CTX* ctx = BN_CTX_new();
+    BN_CTX_start(ctx);
+
+    for (size_t mod = 0; mod < num_trials; mod++) {
+        BIGNUM* bn_mod = generateTestBNData(bit_length);
+
+        if (!bn_mod) continue;
+
+#ifdef HE_QAT_DEBUG
+        char* bn_str = BN_bn2hex(bn_mod);
+        HE_QAT_PRINT("Generated modulus: %s num_bytes: %d num_bits: %d\n",
+                     bn_str, BN_num_bytes(bn_mod), BN_num_bits(bn_mod));
+        OPENSSL_free(bn_str);
+#endif
+        // bn_exponent in [0..bn_mod]
+        BIGNUM* bn_exponent = BN_new();
+        if (!BN_rand_range(bn_exponent, bn_mod)) {
+            BN_free(bn_mod);
+            continue;
+        }
+
+        BIGNUM* bn_base = generateTestBNData(bit_length);
+
+        // Perform OpenSSL ModExp Op
+        BIGNUM* ssl_res = BN_new();
+        gettimeofday(&start_time, NULL);
+        BN_mod_exp(ssl_res, bn_base, bn_exponent, bn_mod, ctx);
+        gettimeofday(&end_time, NULL);
+        time_taken = (end_time.tv_sec - start_time.tv_sec) * 1e6;
+        time_taken =
+            (time_taken + (end_time.tv_usec - start_time.tv_usec));  //*1e-6;
+        ssl_elapsed = time_taken;
+
+        if (!ERR_get_error()) {
+#ifdef HE_QAT_DEBUG
+            bn_str = BN_bn2hex(ssl_res);
+            HE_QAT_PRINT("SSL BN mod exp: %s num_bytes: %d num_bits: %d\n",
+                         bn_str, BN_num_bytes(ssl_res), BN_num_bits(ssl_res));
+            showHexBN(ssl_res, bit_length);
+            OPENSSL_free(bn_str);
+#endif
+        } else {
+            HE_QAT_PRINT_ERR("Modular exponentiation failed.\n");
+            exit(1);
+        }
+
+        HE_QAT_PRINT_DBG("\nStarting QAT bnModExp...\n");
+
+        // Perform QAT ModExp Op
+        BIGNUM* qat_res = BN_new();
+        gettimeofday(&start_time, NULL);
+        for (unsigned int j = 0; j < BATCH_SIZE; j++)
+            status = HE_QAT_BIGNUMModExp(qat_res, bn_base, bn_exponent, bn_mod,
+                                         bit_length);
+        getBnModExpRequest(BATCH_SIZE);
+        gettimeofday(&end_time, NULL);
+        time_taken = (end_time.tv_sec - start_time.tv_sec) * 1e6;
+        time_taken =
+            (time_taken + (end_time.tv_usec - start_time.tv_usec));  //*1e-6;
+        qat_elapsed = time_taken;
+
+        ssl_avg_time = (mod * ssl_avg_time + ssl_elapsed) / (mod + 1);
+        qat_avg_time =
+            (mod * qat_avg_time + qat_elapsed / BATCH_SIZE) / (mod + 1);
+        avg_speed_up =
+            (mod * avg_speed_up + (ssl_elapsed) / (qat_elapsed / BATCH_SIZE)) /
+            (mod + 1);
+
+        HE_QAT_PRINT(
+            "Trial #%03lu\tOpenSSL: %.1lfus\tQAT: %.1lfus\tSpeed Up:%.1lfx\t",
+            (mod + 1), ssl_avg_time, qat_avg_time, avg_speed_up);
+
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("\nQAT bnModExpOp failed\n");
+            exit(1);
+        }
+
+        if (BN_cmp(qat_res, ssl_res) != 0)
+            HE_QAT_PRINT("\t** FAIL **\n");
+        else
+            HE_QAT_PRINT("\t** PASS **\n");
+
+        HE_QAT_PRINT_DBG("\nQAT bnModExpOp finished\n");
+
+        BN_free(ssl_res);
+        BN_free(qat_res);
+
+        BN_free(bn_mod);
+        BN_free(bn_base);
+        BN_free(bn_exponent);
+    }
+
+    // Tear down OpenSSL context
+    BN_CTX_end(ctx);
+
+    // Tear down QAT runtime context
+    release_qat_devices();
+
+    return static_cast<int>(status);
+}
diff --git a/module/heqat/heqat/CMakeLists.txt b/module/heqat/heqat/CMakeLists.txt
new file mode 100644
index 0000000..d05886a
--- /dev/null
+++ b/module/heqat/heqat/CMakeLists.txt
@@ -0,0 +1,117 @@
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# HE QAT Lib source code
+set(HE_QAT_SRC ${HE_QAT_SRC_DIR}/cb.c
+	       ${HE_QAT_SRC_DIR}/context.c
+	       ${HE_QAT_SRC_DIR}/ctrl.c
+	       ${HE_QAT_SRC_DIR}/bnops.c
+         ${HE_QAT_SRC_DIR}/common/utils.c
+)
+
+# Helper functions for ippcrypto's BigNumber class
+if(HE_QAT_MISC)
+  list(APPEND HE_QAT_SRC          ${HE_QAT_SRC_DIR}/misc/misc.cpp
+  ${HE_QAT_SRC_DIR}/misc/utils.cpp
+  ${HE_QAT_SRC_DIR}/misc/bignum.cpp
+)
+endif()
+
+if(HE_QAT_SHARED)
+  add_library(he_qat SHARED ${HE_QAT_SRC})
+else()
+  add_library(he_qat STATIC ${HE_QAT_SRC})
+endif()
+
+add_library(HE_QAT::he_qat ALIAS he_qat)
+
+target_include_directories(he_qat
+	PUBLIC $<BUILD_INTERFACE:${HE_QAT_INC_DIR}> #Public headers
+	PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}> #Public headers
+	PUBLIC ${ICP_INC_DIR}
+)
+
+install(DIRECTORY ${HE_QAT_INC_DIR}/
+	DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+	FILES_MATCHING
+	PATTERN "*.hpp"
+	PATTERN "*.h")
+
+target_link_directories(he_qat PUBLIC ${ICP_BUILDOUTPUT_PATH})
+
+target_link_libraries(he_qat PRIVATE udev z)
+target_link_libraries(he_qat PRIVATE OpenSSL::SSL)
+target_link_libraries(he_qat PRIVATE Threads::Threads)
+if(HE_QAT_SHARED)
+  target_link_libraries(he_qat PRIVATE qat_s)
+  target_link_libraries(he_qat PRIVATE usdm_drv_s)
+else()
+  heqat_create_archive(he_qat libadf_static)
+  heqat_create_archive(he_qat libosal_static)
+  heqat_create_archive(he_qat libqat_static)
+  heqat_create_archive(he_qat libusdm_drv_static)
+endif()
+
+if(NOT HE_QAT_STANDALONE)
+  add_dependencies(he_qat IPPCP::ippcp)
+endif()
+
+if(HE_QAT_MISC)
+  target_include_directories(he_qat PRIVATE ${IPPCP_INC_DIR})
+  target_link_directories(he_qat PRIVATE ${IPPCP_LIB_DIR})
+  if(HE_QAT_SHARED)
+    target_link_libraries(he_qat PRIVATE IPPCP::ippcp)
+  else()
+    heqat_create_archive(he_qat IPPCP::ippcp)
+  endif()
+endif()
+
+set_target_properties(he_qat PROPERTIES POSITION_INDEPENDENT_CODE ON)
+set_target_properties(he_qat PROPERTIES VERSION ${HE_QAT_VERSION})
+
+if(HE_QAT_DEBUG)
+  set_target_properties(he_qat PROPERTIES OUTPUT_NAME "he_qat_debug")
+else()
+  set_target_properties(he_qat PROPERTIES OUTPUT_NAME "he_qat")
+endif()
+
+include(CMakePackageConfigHelpers)
+
+# config cmake config and target file
+set(HE_QAT_TARGET_FILENAME ${CMAKE_CURRENT_BINARY_DIR}/cmake/he_qat-${HE_QAT_VERSION}/he_qatTargets.cmake)
+set(HE_QAT_CONFIG_IN_FILENAME ${HE_QAT_CMAKE_PATH}/HE_QATConfig.cmake.in)
+set(HE_QAT_CONFIG_FILENAME ${HE_QAT_ROOT_DIR}/cmake/he_qat-${HE_QAT_VERSION}/HE_QATConfig.cmake)
+set(HE_QAT_CONFIG_VERSION_FILENAME ${CMAKE_CURRENT_BINARY_DIR}/cmake/he_qat-${HE_QAT_VERSION}/HE_QATConfigVersion.cmake)
+set(HE_QAT_CONFIG_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR}/cmake/he_qat-${HE_QAT_VERSION}/)
+
+install(
+	EXPORT he_qatTargets
+	NAMESPACE HE_QAT::
+	DESTINATION ${HE_QAT_CONFIG_INSTALL_DIR}
+)
+
+write_basic_package_version_file(
+    ${HE_QAT_CONFIG_VERSION_FILENAME}
+    VERSION ${HE_QAT_VERSION}
+    COMPATIBILITY ExactVersion
+)
+
+configure_package_config_file(
+		${HE_QAT_CONFIG_IN_FILENAME} ${HE_QAT_CONFIG_FILENAME}
+		INSTALL_DESTINATION ${HE_QAT_CONFIG_INSTALL_DIR}
+)
+
+install(
+    TARGETS he_qat
+    EXPORT he_qatTargets
+    ARCHIVE DESTINATION ${HE_QAT_INSTALL_LIBDIR}
+    LIBRARY DESTINATION ${HE_QAT_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${HE_QAT_INSTALL_LIBDIR}
+    )
+
+install(FILES ${HE_QAT_CONFIG_FILENAME}
+              ${HE_QAT_CONFIG_VERSION_FILENAME}
+        DESTINATION ${HE_QAT_CONFIG_INSTALL_DIR})
+
+export(EXPORT he_qatTargets
+        FILE ${HE_QAT_TARGET_FILENAME})
diff --git a/module/heqat/heqat/bnops.c b/module/heqat/heqat/bnops.c
new file mode 100644
index 0000000..65c7983
--- /dev/null
+++ b/module/heqat/heqat/bnops.c
@@ -0,0 +1,540 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+/// @file heqat/bnops.c
+
+#include <cpa.h>
+#include <cpa_cy_im.h>
+#include <cpa_cy_ln.h>
+#include <icp_sal_poll.h>
+
+#include "heqat/bnops.h"
+#include "heqat/common/consts.h"
+#include "heqat/common/types.h"
+#include "heqat/common/utils.h"
+
+#ifdef HE_QAT_PERF
+#include <sys/time.h>
+#endif
+
+#include <stdio.h>
+#include <pthread.h>
+#include <assert.h>
+#include <string.h>
+#include <openssl/bn.h>
+
+#ifdef HE_QAT_SYNC_MODE
+#pragma message "Synchronous execution mode."
+#else
+#pragma message "Asynchronous execution mode."
+#endif
+
+// Global buffer for the runtime environment
+extern HE_QAT_RequestBuffer he_qat_buffer;
+extern HE_QAT_OutstandingBuffer outstanding;
+
+// Callback functions
+extern void HE_QAT_BIGNUMModExpCallback(void* pCallbackTag, CpaStatus status,
+                                        void* pOpData, CpaFlatBuffer* pOut);
+extern void HE_QAT_bnModExpCallback(void* pCallbackTag, CpaStatus status,
+                                    void* pOpData, CpaFlatBuffer* pOut);
+
+/// @brief Thread-safe producer implementation for the shared request buffer.
+/// @details Fill internal or outstanding buffer with incoming work requests.
+///          This function is implemented in he_qat_ctrl.c.
+extern void submit_request(HE_QAT_RequestBuffer* _buffer, void* args);
+
+/*
+ * **************************************************************************
+ *  Implementation of Functions for the Single Interface Support
+ * **************************************************************************
+ */
+
+HE_QAT_STATUS HE_QAT_bnModExp(unsigned char* r, unsigned char* b,
+                              unsigned char* e, unsigned char* m, int nbits) {
+    static unsigned long long req_count = 0;
+
+    // Unpack data and copy to QAT friendly memory space
+    int len = (nbits + 7) >> 3;
+
+    if (NULL == r) return HE_QAT_STATUS_INVALID_PARAM;
+    if (NULL == b) return HE_QAT_STATUS_INVALID_PARAM;
+    if (NULL == e) return HE_QAT_STATUS_INVALID_PARAM;
+    if (NULL == m) return HE_QAT_STATUS_INVALID_PARAM;
+
+    Cpa8U* pBase = NULL;
+    Cpa8U* pModulus = NULL;
+    Cpa8U* pExponent = NULL;
+
+    HE_QAT_STATUS status = HE_QAT_STATUS_FAIL;
+    status = HE_QAT_MEM_ALLOC_CONTIG(&pBase, len, BYTE_ALIGNMENT_8);
+    if (HE_QAT_STATUS_SUCCESS == status && NULL != pBase) {
+        memcpy(pBase, b, len);
+    } else {
+        HE_QAT_PRINT_ERR("Contiguous memory allocation failed for pBase.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    status = HE_QAT_MEM_ALLOC_CONTIG(&pExponent, len, BYTE_ALIGNMENT_8);
+    if (HE_QAT_STATUS_SUCCESS == status && NULL != pExponent) {
+        memcpy(pExponent, e, len);
+    } else {
+        printf("Contiguous memory allocation failed for pBase.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    status = HE_QAT_MEM_ALLOC_CONTIG(&pModulus, len, BYTE_ALIGNMENT_8);
+    if (HE_QAT_STATUS_SUCCESS == status && NULL != pModulus) {
+        memcpy(pModulus, m, len);
+    } else {
+        HE_QAT_PRINT_ERR("Contiguous memory allocation failed for pBase.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    // Pack it as a QAT Task Request
+    HE_QAT_TaskRequest* request =
+        (HE_QAT_TaskRequest*)calloc(1, sizeof(HE_QAT_TaskRequest));
+    if (NULL == request) {
+        HE_QAT_PRINT_ERR(
+            "HE_QAT_TaskRequest memory allocation failed in "
+            "bnModExpPerformOp.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    CpaCyLnModExpOpData* op_data =
+        (CpaCyLnModExpOpData*)calloc(1, sizeof(CpaCyLnModExpOpData));
+    if (NULL == op_data) {
+        HE_QAT_PRINT_ERR(
+            "Cpa memory allocation failed in bnModExpPerformOp.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+    op_data->base.pData = pBase;
+    op_data->base.dataLenInBytes = len;
+    op_data->exponent.pData = pExponent;
+    op_data->exponent.dataLenInBytes = len;
+    op_data->modulus.pData = pModulus;
+    op_data->modulus.dataLenInBytes = len;
+    request->op_data = (void*)op_data;
+
+    status = HE_QAT_MEM_ALLOC_CONTIG(&request->op_result.pData, len,
+                                     BYTE_ALIGNMENT_8);
+    if (HE_QAT_STATUS_SUCCESS == status && NULL != request->op_result.pData) {
+        request->op_result.dataLenInBytes = len;
+    } else {
+        HE_QAT_PRINT_ERR(
+            "CpaFlatBuffer.pData memory allocation failed in "
+            "bnModExpPerformOp.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    request->op_type = HE_QAT_OP_MODEXP;
+    request->callback_func = (void*)HE_QAT_bnModExpCallback;
+    request->op_status = status;
+    request->op_output = (void*)r;
+
+    request->id = req_count++;
+
+    // Ensure calls are synchronized at exit (blocking)
+    pthread_mutex_init(&request->mutex, NULL);
+    pthread_cond_init(&request->ready, NULL);
+
+    HE_QAT_PRINT_DBG("BN ModExp interface call for request #%llu\n", req_count);
+
+    // Submit request using producer function
+    submit_request(&he_qat_buffer, (void*)request);
+
+    return HE_QAT_STATUS_SUCCESS;
+}
+
+HE_QAT_STATUS HE_QAT_BIGNUMModExp(BIGNUM* r, BIGNUM* b, BIGNUM* e, BIGNUM* m,
+                                  int nbits) {
+    static unsigned long long req_count = 0;
+
+    // Unpack data and copy to QAT friendly memory space
+    int len = (nbits + 7) >> 3;
+
+    Cpa8U* pBase = NULL;
+    Cpa8U* pModulus = NULL;
+    Cpa8U* pExponent = NULL;
+
+    HE_QAT_TaskRequest* request =
+        (HE_QAT_TaskRequest*)calloc(1, sizeof(HE_QAT_TaskRequest));
+    if (NULL == request) {
+        HE_QAT_PRINT_ERR(
+            "HE_QAT_TaskRequest memory allocation failed in "
+            "bnModExpPerformOp.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    HE_QAT_STATUS status = HE_QAT_STATUS_FAIL;
+    status = HE_QAT_MEM_ALLOC_CONTIG(&pBase, len, BYTE_ALIGNMENT_8);
+    if (HE_QAT_STATUS_SUCCESS == status && NULL != pBase) {
+        if (!BN_bn2binpad(b, pBase, len)) {
+            HE_QAT_PRINT_ERR(
+                "BN_bn2binpad (base) failed in bnModExpPerformOp.\n");
+            HE_QAT_MEM_FREE_CONTIG(pBase);
+            return HE_QAT_STATUS_FAIL;
+        }
+    } else {
+        HE_QAT_PRINT_ERR("Contiguous memory allocation failed for pBase.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    status = HE_QAT_MEM_ALLOC_CONTIG(&pExponent, len, BYTE_ALIGNMENT_8);
+    if (HE_QAT_STATUS_SUCCESS == status && NULL != pExponent) {
+        if (!BN_bn2binpad(e, pExponent, len)) {
+            HE_QAT_PRINT_ERR(
+                "BN_bn2binpad (exponent) failed in bnModExpPerformOp.\n");
+            HE_QAT_MEM_FREE_CONTIG(pExponent);
+            return HE_QAT_STATUS_FAIL;
+        }
+    } else {
+        HE_QAT_PRINT_ERR("Contiguous memory allocation failed for pBase.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    status = HE_QAT_MEM_ALLOC_CONTIG(&pModulus, len, BYTE_ALIGNMENT_8);
+    if (HE_QAT_STATUS_SUCCESS == status && NULL != pModulus) {
+        if (!BN_bn2binpad(m, pModulus, len)) {
+            HE_QAT_PRINT_ERR("BN_bn2binpad failed in bnModExpPerformOp.\n");
+            HE_QAT_MEM_FREE_CONTIG(pModulus);
+            return HE_QAT_STATUS_FAIL;
+        }
+    } else {
+        HE_QAT_PRINT_ERR("Contiguous memory allocation failed for pBase.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    // Pack it as a QAT Task Request
+    CpaCyLnModExpOpData* op_data =
+        (CpaCyLnModExpOpData*)calloc(1, sizeof(CpaCyLnModExpOpData));
+    if (NULL == op_data) {
+        printf("Cpa memory allocation failed in bnModExpPerformOp.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+    op_data->base.pData = pBase;
+    op_data->base.dataLenInBytes = len;
+    op_data->exponent.pData = pExponent;
+    op_data->exponent.dataLenInBytes = len;
+    op_data->modulus.pData = pModulus;
+    op_data->modulus.dataLenInBytes = len;
+    request->op_data = (void*)op_data;
+
+    status = HE_QAT_MEM_ALLOC_CONTIG(&request->op_result.pData, len,
+                                     BYTE_ALIGNMENT_8);
+    if (HE_QAT_STATUS_SUCCESS == status && NULL != request->op_result.pData) {
+        request->op_result.dataLenInBytes = len;
+    } else {
+        HE_QAT_PRINT_ERR(
+            "CpaFlatBuffer.pData memory allocation failed in "
+            "bnModExpPerformOp.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    request->op_type = HE_QAT_OP_MODEXP;
+    request->callback_func = (void*)HE_QAT_BIGNUMModExpCallback;
+    request->op_status = status;
+    request->op_output = (void*)r;
+
+    request->id = req_count++;
+
+    // Ensure calls are synchronized at exit (blocking)
+    pthread_mutex_init(&request->mutex, NULL);
+    pthread_cond_init(&request->ready, NULL);
+
+    // Submit request using producer function
+    submit_request(&he_qat_buffer, (void*)request);
+
+    return HE_QAT_STATUS_SUCCESS;
+}
+
+void getBnModExpRequest(unsigned int batch_size) {
+    static unsigned long block_at_index = 0;
+    unsigned int j = 0;
+
+#ifdef HE_QAT_PERF
+    struct timeval start_time, end_time;
+    double time_taken = 0.0;
+    gettimeofday(&start_time, NULL);
+#endif
+    do {
+        // Buffer read may be safe for single-threaded blocking calls only.
+        // Note: Not tested on multithreaded environment.
+        HE_QAT_TaskRequest* task =
+            (HE_QAT_TaskRequest*)he_qat_buffer.data[block_at_index];
+
+        if (NULL == task) continue;
+
+        // Block and synchronize: Wait for the most recently offloaded request
+        // to complete processing
+        pthread_mutex_lock(
+            &task->mutex);  // mutex only needed for the conditional variable
+        while (HE_QAT_STATUS_READY != task->request_status)
+            pthread_cond_wait(&task->ready, &task->mutex);
+
+#ifdef HE_QAT_PERF
+        time_taken = (task->end.tv_sec - task->start.tv_sec) * 1e6;
+        time_taken =
+            (time_taken + (task->end.tv_usec - task->start.tv_usec));  //*1e-6;
+        HE_QAT_PRINT("%u time: %.1lfus\n", j, time_taken);
+#endif
+
+        // Free up QAT temporary memory
+        CpaCyLnModExpOpData* op_data = (CpaCyLnModExpOpData*)task->op_data;
+        if (op_data) {
+            HE_QAT_MEM_FREE_CONTIG(op_data->base.pData);
+            HE_QAT_MEM_FREE_CONTIG(op_data->exponent.pData);
+            HE_QAT_MEM_FREE_CONTIG(op_data->modulus.pData);
+        }
+        free(task->op_data);
+        task->op_data = NULL;
+        if (task->op_result.pData) {
+            HE_QAT_MEM_FREE_CONTIG(task->op_result.pData);
+        }
+
+        // Move forward to wait for the next request that will be offloaded
+        pthread_mutex_unlock(&task->mutex);
+
+        free(he_qat_buffer.data[block_at_index]);
+        he_qat_buffer.data[block_at_index] = NULL;
+
+        block_at_index = (block_at_index + 1) % HE_QAT_BUFFER_SIZE;
+    } while (++j < batch_size);
+
+#ifdef HE_QAT_PERF
+    gettimeofday(&end_time, NULL);
+    time_taken = (end_time.tv_sec - start_time.tv_sec) * 1e6;
+    time_taken = (time_taken + (end_time.tv_usec - start_time.tv_usec));
+    HE_QAT_PRINT("Batch Wall Time: %.1lfus\n", time_taken);
+#endif
+
+    return;
+}
+
+/*
+ * **************************************************************************
+ *  Implementation of Functions for the Multithreading Interface Support
+ * **************************************************************************
+ */
+
+HE_QAT_STATUS HE_QAT_bnModExp_MT(unsigned int _buffer_id, unsigned char* r,
+                                 unsigned char* b, unsigned char* e,
+                                 unsigned char* m, int nbits) {
+    static unsigned long long req_count = 0;
+
+    // Unpack data and copy to QAT friendly memory space
+    int len = (nbits + 7) >> 3;
+
+    if (NULL == r) return HE_QAT_STATUS_INVALID_PARAM;
+    if (NULL == b) return HE_QAT_STATUS_INVALID_PARAM;
+    if (NULL == e) return HE_QAT_STATUS_INVALID_PARAM;
+    if (NULL == m) return HE_QAT_STATUS_INVALID_PARAM;
+
+    Cpa8U* pBase = NULL;
+    Cpa8U* pModulus = NULL;
+    Cpa8U* pExponent = NULL;
+
+    HE_QAT_STATUS status = HE_QAT_STATUS_FAIL;
+    status = HE_QAT_MEM_ALLOC_CONTIG(&pBase, len, BYTE_ALIGNMENT_8);
+    if (HE_QAT_STATUS_SUCCESS == status && NULL != pBase) {
+        memcpy(pBase, b, len);
+    } else {
+        HE_QAT_PRINT("Contiguous memory allocation failed for pBase.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    status = HE_QAT_MEM_ALLOC_CONTIG(&pExponent, len, BYTE_ALIGNMENT_8);
+    if (HE_QAT_STATUS_SUCCESS == status && NULL != pExponent) {
+        memcpy(pExponent, e, len);
+    } else {
+        HE_QAT_PRINT_ERR("Contiguous memory allocation failed for pBase.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    status = HE_QAT_MEM_ALLOC_CONTIG(&pModulus, len, BYTE_ALIGNMENT_8);
+    if (HE_QAT_STATUS_SUCCESS == status && NULL != pModulus) {
+        memcpy(pModulus, m, len);
+    } else {
+        HE_QAT_PRINT_ERR("Contiguous memory allocation failed for pBase.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    // Pack it as a QAT Task Request
+    HE_QAT_TaskRequest* request =
+        (HE_QAT_TaskRequest*)calloc(1, sizeof(HE_QAT_TaskRequest));
+    if (NULL == request) {
+        HE_QAT_PRINT_ERR(
+            "HE_QAT_TaskRequest memory allocation failed in "
+            "bnModExpPerformOp.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    CpaCyLnModExpOpData* op_data =
+        (CpaCyLnModExpOpData*)calloc(1, sizeof(CpaCyLnModExpOpData));
+    if (NULL == op_data) {
+        printf("Cpa memory allocation failed in bnModExpPerformOp.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+    op_data->base.pData = pBase;
+    op_data->base.dataLenInBytes = len;
+    op_data->exponent.pData = pExponent;
+    op_data->exponent.dataLenInBytes = len;
+    op_data->modulus.pData = pModulus;
+    op_data->modulus.dataLenInBytes = len;
+    request->op_data = (void*)op_data;
+
+    status = HE_QAT_MEM_ALLOC_CONTIG(&request->op_result.pData, len,
+                                     BYTE_ALIGNMENT_8);
+    if (HE_QAT_STATUS_SUCCESS == status && NULL != request->op_result.pData) {
+        request->op_result.dataLenInBytes = len;
+    } else {
+        HE_QAT_PRINT_ERR(
+            "CpaFlatBuffer.pData memory allocation failed in "
+            "bnModExpPerformOp.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    request->op_type = HE_QAT_OP_MODEXP;
+    request->callback_func = (void*)HE_QAT_bnModExpCallback;
+    request->op_status = status;
+    request->op_output = (void*)r;
+
+    request->id = req_count++;
+
+    // Ensure calls are synchronized at exit (blocking)
+    pthread_mutex_init(&request->mutex, NULL);
+    pthread_cond_init(&request->ready, NULL);
+
+    HE_QAT_PRINT_DBG("BN ModExp interface call for request #%llu\n", req_count);
+
+    // Submit request using producer function
+    submit_request(&outstanding.buffer[_buffer_id], (void*)request);
+
+    return HE_QAT_STATUS_SUCCESS;
+}
+
+HE_QAT_STATUS acquire_bnModExp_buffer(unsigned int* _buffer_id) {
+    if (NULL == _buffer_id) return HE_QAT_STATUS_INVALID_PARAM;
+
+    HE_QAT_PRINT_DBG("acquire_bnModExp_buffer #%ls\n", _buffer_id);
+
+    pthread_mutex_lock(&outstanding.mutex);
+
+    // Wait until next outstanding buffer becomes available for use
+    while (outstanding.busy_count >= HE_QAT_BUFFER_COUNT)
+        pthread_cond_wait(&outstanding.any_free_buffer, &outstanding.mutex);
+
+    assert(outstanding.busy_count < HE_QAT_BUFFER_COUNT);
+
+    // Find next outstanding buffer available
+    unsigned int next_free_buffer = outstanding.next_free_buffer;
+    for (unsigned int i = 0; i < HE_QAT_BUFFER_COUNT; i++) {
+        if (outstanding.free_buffer[next_free_buffer]) {
+            outstanding.free_buffer[next_free_buffer] = 0;
+            *_buffer_id = next_free_buffer;
+            break;
+        }
+        next_free_buffer = (next_free_buffer + 1) % HE_QAT_BUFFER_COUNT;
+    }
+
+    outstanding.next_free_buffer = (*_buffer_id + 1) % HE_QAT_BUFFER_COUNT;
+    outstanding.next_ready_buffer = *_buffer_id;
+    outstanding.ready_buffer[*_buffer_id] = 1;
+    outstanding.busy_count++;
+    // busy meaning:
+    // taken by a thread, enqueued requests, in processing, waiting results
+
+    pthread_cond_signal(&outstanding.any_ready_buffer);
+    pthread_mutex_unlock(&outstanding.mutex);
+
+    return HE_QAT_STATUS_SUCCESS;
+}
+
+void release_bnModExp_buffer(unsigned int _buffer_id,
+                             unsigned int _batch_size) {
+    unsigned int next_data_out = outstanding.buffer[_buffer_id].next_data_out;
+    unsigned int j = 0;
+
+    HE_QAT_PRINT_DBG("release_bnModExp_buffer #%u\n", _buffer_id);
+
+#ifdef HE_QAT_PERF
+    struct timeval start_time, end_time;
+    double time_taken = 0.0;
+    gettimeofday(&start_time, NULL);
+#endif
+
+    while (j < _batch_size) {
+        HE_QAT_TaskRequest* task =
+            (HE_QAT_TaskRequest*)outstanding.buffer[_buffer_id]
+                .data[next_data_out];
+
+        if (NULL == task) continue;
+
+        HE_QAT_PRINT_DBG("BatchSize %u Buffer #%u Request #%u Waiting\n",
+                         _batch_size, _buffer_id, j);
+
+        // Block and synchronize: Wait for the most recently offloaded request
+        // to complete processing. Mutex only needed for the conditional
+        // variable.
+        pthread_mutex_lock(&task->mutex);
+        while (HE_QAT_STATUS_READY != task->request_status)
+            pthread_cond_wait(&task->ready, &task->mutex);
+
+#ifdef HE_QAT_PERF
+        time_taken = (task->end.tv_sec - task->start.tv_sec) * 1e6;
+        time_taken =
+            (time_taken + (task->end.tv_usec - task->start.tv_usec));  //*1e-6;
+        HE_QAT_PRINT("%u time: %.1lfus\n", j, time_taken);
+#endif
+
+        // Free up QAT temporary memory
+        CpaCyLnModExpOpData* op_data = (CpaCyLnModExpOpData*)task->op_data;
+        if (op_data) {
+            HE_QAT_MEM_FREE_CONTIG(op_data->base.pData);
+            HE_QAT_MEM_FREE_CONTIG(op_data->exponent.pData);
+            HE_QAT_MEM_FREE_CONTIG(op_data->modulus.pData);
+        }
+        free(task->op_data);
+        task->op_data = NULL;
+        if (task->op_result.pData) {
+            HE_QAT_MEM_FREE_CONTIG(task->op_result.pData);
+        }
+
+        // Move forward to wait for the next request that will be offloaded
+        pthread_mutex_unlock(&task->mutex);
+
+        HE_QAT_PRINT_DBG("Buffer #%u Request #%u Completed\n", _buffer_id, j);
+
+        // outstanding.buffer[_buffer_id].count--;
+
+        free(outstanding.buffer[_buffer_id].data[next_data_out]);
+        outstanding.buffer[_buffer_id].data[next_data_out] = NULL;
+
+        // Update for next thread on the next external iteration
+        next_data_out = (next_data_out + 1) % HE_QAT_BUFFER_SIZE;
+
+        j++;
+    }
+
+#ifdef HE_QAT_PERF
+    gettimeofday(&end_time, NULL);
+    time_taken = (end_time.tv_sec - start_time.tv_sec) * 1e6;
+    time_taken =
+        (time_taken + (end_time.tv_usec - start_time.tv_usec));  //*1e-6;
+    HE_QAT_PRINT("Batch Wall Time: %.1lfus\n", time_taken);
+#endif
+
+    outstanding.buffer[_buffer_id].next_data_out = next_data_out;
+
+    // Release outstanding buffer for usage by another thread
+    pthread_mutex_lock(&outstanding.mutex);
+
+    outstanding.next_free_buffer = _buffer_id;
+    outstanding.ready_buffer[_buffer_id] = 0;
+    outstanding.free_buffer[_buffer_id] = 1;
+    outstanding.busy_count--;
+
+    pthread_cond_signal(&outstanding.any_free_buffer);
+    pthread_mutex_unlock(&outstanding.mutex);
+
+    return;
+}
diff --git a/module/heqat/heqat/cb.c b/module/heqat/heqat/cb.c
new file mode 100644
index 0000000..5151996
--- /dev/null
+++ b/module/heqat/heqat/cb.c
@@ -0,0 +1,129 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+/// @file heqat/cb.c
+
+// QAT-API headers
+#include <cpa.h>
+
+// C support libraries
+#include <pthread.h>
+#include <string.h>
+#include <openssl/bn.h>
+
+// Local headers
+#include "heqat/common/types.h"
+#include "heqat/common/utils.h"
+
+// Global variables
+static pthread_mutex_t
+    response_mutex;  ///< It protects against race condition on response_count
+                     ///< due to concurrent callback events.
+extern volatile unsigned long
+    response_count;  ///< It counts the number of requests completed by the
+                     ///< accelerator.
+
+/// @brief Callback implementation for the API HE_QAT_BIGNUMModExp(...)
+/// Callback function for the interface HE_QAT_BIGNUMModExp(). It performs
+/// any data post-processing required after the modular exponentiation.
+/// @param[in] pCallbackTag work request package containing the original input
+/// data and other resources for post-processing.
+/// @param[in] status CPA_STATUS of the performed operation, e.g. CyLnModExp().
+/// @param[in] pOpData original input data passed to accelerator to perform the
+/// target operation (cannot be NULL).
+/// @param[out] pOut output returned by the accelerator after executing the
+/// target operation.
+void HE_QAT_BIGNUMModExpCallback(void* pCallbackTag, CpaStatus status,
+                                 void* pOpData, CpaFlatBuffer* pOut) {
+    HE_QAT_TaskRequest* request = NULL;
+
+    // Check if input data for the op is available and do something
+    if (NULL != pCallbackTag) {
+        // Read request data
+        request = (HE_QAT_TaskRequest*)pCallbackTag;
+
+        pthread_mutex_lock(&response_mutex);
+        // Global track of responses by accelerator
+        response_count += 1;
+        pthread_mutex_unlock(&response_mutex);
+
+        pthread_mutex_lock(&request->mutex);
+        // Collect the device output in pOut
+        request->op_status = status;
+        if (CPA_STATUS_SUCCESS == status) {
+            if (pOpData == request->op_data) {
+                // Mark request as complete or ready to be used
+                request->request_status = HE_QAT_STATUS_READY;
+
+                BIGNUM* r = BN_bin2bn(request->op_result.pData,
+                                      request->op_result.dataLenInBytes,
+                                      (BIGNUM*)request->op_output);
+                if (NULL == r) request->request_status = HE_QAT_STATUS_FAIL;
+#ifdef HE_QAT_PERF
+                gettimeofday(&request->end, NULL);
+#endif
+            } else {
+                request->request_status = HE_QAT_STATUS_FAIL;
+            }
+        }
+        // Make it synchronous and blocking
+        pthread_cond_signal(&request->ready);
+        pthread_mutex_unlock(&request->mutex);
+#ifdef HE_QAT_SYNC_MODE
+        COMPLETE((struct COMPLETION_STRUCT*)&request->callback);
+#endif
+    }
+
+    return;
+}
+
+/// @brief Callback implementation for the API HE_QAT_bnModExp(...)
+/// Callback function for the interface HE_QAT_bnModExp(). It performs
+/// any data post-processing required after the modular exponentiation.
+/// @param[in] pCallbackTag work request package containing the original input
+/// data and other resources for post-processing.
+/// @param[in] status CPA_STATUS of the performed operation, e.g. CyLnModExp().
+/// @param[in] pOpData original input data passed to accelerator to perform the
+/// target operation (cannot be NULL).
+/// @param[out] pOut output returned by the accelerator after executing the
+/// target operation.
+void HE_QAT_bnModExpCallback(void* pCallbackTag, CpaStatus status,
+                             void* pOpData, CpaFlatBuffer* pOut) {
+    HE_QAT_TaskRequest* request = NULL;
+
+    // Check if input data for the op is available and do something
+    if (NULL != pCallbackTag) {
+        // Read request data
+        request = (HE_QAT_TaskRequest*)pCallbackTag;
+
+        pthread_mutex_lock(&response_mutex);
+        // Global track of responses by accelerator
+        response_count += 1;
+        pthread_mutex_unlock(&response_mutex);
+
+        pthread_mutex_lock(&request->mutex);
+        // Collect the device output in pOut
+        request->op_status = status;
+        if (CPA_STATUS_SUCCESS == status) {
+            if (pOpData == request->op_data) {
+                // Mark request as complete or ready to be used
+                request->request_status = HE_QAT_STATUS_READY;
+                // Copy compute results to output destination
+                memcpy(request->op_output, request->op_result.pData,
+                       request->op_result.dataLenInBytes);
+#ifdef HE_QAT_PERF
+                gettimeofday(&request->end, NULL);
+#endif
+            } else {
+                request->request_status = HE_QAT_STATUS_FAIL;
+            }
+        }
+        // Make it synchronous and blocking
+        pthread_cond_signal(&request->ready);
+        pthread_mutex_unlock(&request->mutex);
+#ifdef HE_QAT_SYNC_MODE
+        COMPLETE((struct COMPLETION_STRUCT*)&request->callback);
+#endif
+    }
+
+    return;
+}
diff --git a/module/heqat/heqat/common/utils.c b/module/heqat/heqat/common/utils.c
new file mode 100644
index 0000000..81fcb73
--- /dev/null
+++ b/module/heqat/heqat/common/utils.c
@@ -0,0 +1,69 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <openssl/err.h>
+#include <openssl/rand.h>
+
+#include "heqat/common/utils.h"
+#include "heqat/common/types.h"
+
+BIGNUM* generateTestBNData(int nbits) {
+    if (!RAND_status()) return NULL;
+
+    HE_QAT_PRINT_DBG("PRNG properly seeded.\n");
+
+    BIGNUM* bn = BN_new();
+
+    if (!BN_rand(bn, nbits, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY)) {
+        BN_free(bn);
+        HE_QAT_PRINT_ERR("Error while generating BN random number: %lu\n",
+                         ERR_get_error());
+        return NULL;
+    }
+
+    return bn;
+}
+
+unsigned char* paddingZeros(BIGNUM* bn, int nbits) {
+    if (!bn) return NULL;
+
+    // Returns same address if it fails
+    int num_bytes = BN_num_bytes(bn);
+    int bytes_left = nbits / 8 - num_bytes;
+    if (bytes_left <= 0) return NULL;
+
+    // Returns same address if it fails
+    unsigned char* bin = NULL;
+    int len = bytes_left + num_bytes;
+    if (!(bin = (unsigned char*)OPENSSL_zalloc(len))) return NULL;
+
+    HE_QAT_PRINT_DBG("Padding bn with %d bytes to total %d bytes\n", bytes_left,
+                     len);
+
+    BN_bn2binpad(bn, bin, len);
+    if (ERR_get_error()) {
+        OPENSSL_free(bin);
+        return NULL;
+    }
+
+    return bin;
+}
+
+void showHexBN(BIGNUM* bn, int nbits) {
+    int len = nbits / 8;
+    unsigned char* bin = (unsigned char*)OPENSSL_zalloc(len);
+    if (!bin) return;
+    if (BN_bn2binpad(bn, bin, len)) {
+        for (size_t i = 0; i < len; i++) HE_QAT_PRINT("%2.2x", bin[i]);
+        HE_QAT_PRINT("\n");
+    }
+    OPENSSL_free(bin);
+    return;
+}
+
+void showHexBin(unsigned char* bin, int len) {
+    if (!bin) return;
+    for (size_t i = 0; i < len; i++) HE_QAT_PRINT("%2.2x", bin[i]);
+    HE_QAT_PRINT("\n");
+    return;
+}
diff --git a/module/heqat/heqat/context.c b/module/heqat/heqat/context.c
new file mode 100644
index 0000000..eb0cd9b
--- /dev/null
+++ b/module/heqat/heqat/context.c
@@ -0,0 +1,302 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+/// @file heqat/context.c
+
+#define _GNU_SOURCE
+
+#include <icp_sal_user.h>
+#include <icp_sal_poll.h>
+#include <qae_mem.h>
+
+#include <pthread.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "heqat/common/types.h"
+#include "heqat/common/utils.h"
+#include "heqat/context.h"
+
+#ifdef USER_SPACE
+#define MAX_INSTANCES 1024
+#else
+#define MAX_INSTANCES 1
+#endif
+
+// Utilities functions from qae_mem.h header
+extern CpaStatus qaeMemInit(void);
+extern void qaeMemDestroy(void);
+
+static volatile HE_QAT_STATUS context_state = HE_QAT_STATUS_INACTIVE;
+static pthread_mutex_t context_lock;
+
+// Global variable declarations
+static pthread_t buffer_manager;
+static pthread_t he_qat_runner;
+static pthread_attr_t he_qat_inst_attr[HE_QAT_NUM_ACTIVE_INSTANCES];
+static HE_QAT_InstConfig he_qat_inst_config[HE_QAT_NUM_ACTIVE_INSTANCES];
+static HE_QAT_Config* he_qat_config = NULL;
+
+// External global variables
+extern HE_QAT_RequestBuffer he_qat_buffer;
+extern HE_QAT_OutstandingBuffer outstanding;
+
+/***********           Internal Services          ***********/
+// Start scheduler of work requests (consumer)
+extern void* schedule_requests(void* state);
+// Activate cpaCyInstances to run on background and poll responses from QAT
+// accelerator
+extern void* start_instances(void* _inst_config);
+// Stop a running group of cpaCyInstances started with the "start_instances"
+// service
+extern void stop_instances(HE_QAT_Config* _config);
+// Stop running individual QAT instances from a list of cpaCyInstances (called
+// by "stop_instances")
+extern void stop_perform_op(void* _inst_config, unsigned num_inst);
+// Activate a cpaCyInstance to run on background and poll responses from QAT
+// accelerator WARNING: Deprecated when "start_instances" becomes default.
+extern void* start_perform_op(void* _inst_config);
+
+static Cpa16U numInstances = 0;
+static Cpa16U nextInstance = 0;
+
+static CpaInstanceHandle get_qat_instance() {
+    static CpaInstanceHandle cyInstHandles[MAX_INSTANCES];
+    CpaStatus status = CPA_STATUS_SUCCESS;
+    CpaInstanceInfo2 info = {0};
+
+    if (0 == numInstances) {
+        status = cpaCyGetNumInstances(&numInstances);
+        if (numInstances >= MAX_INSTANCES) {
+            numInstances = MAX_INSTANCES;
+        }
+        if (numInstances >= HE_QAT_NUM_ACTIVE_INSTANCES) {
+            numInstances = HE_QAT_NUM_ACTIVE_INSTANCES;
+        }
+
+        if (CPA_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("No CyInstances Found (%d).\n", numInstances);
+            return NULL;
+        }
+
+        HE_QAT_PRINT_DBG("Found %d CyInstances.\n", numInstances);
+
+        if ((status == CPA_STATUS_SUCCESS) && (numInstances > 0)) {
+            status = cpaCyGetInstances(numInstances, cyInstHandles);
+
+            // List instances and their characteristics
+            for (unsigned int i = 0; i < numInstances; i++) {
+                status = cpaCyInstanceGetInfo2(cyInstHandles[i], &info);
+                if (CPA_STATUS_SUCCESS != status) return NULL;
+#ifdef HE_QAT_DEBUG
+                HE_QAT_PRINT("Vendor Name: %s\n", info.vendorName);
+                HE_QAT_PRINT("Part Name: %s\n", info.partName);
+                HE_QAT_PRINT("Inst Name: %s\n", info.instName);
+                HE_QAT_PRINT("Inst ID: %s\n", info.instID);
+                HE_QAT_PRINT("Node Affinity: %u\n", info.nodeAffinity);
+                HE_QAT_PRINT("Physical Instance:\n");
+                HE_QAT_PRINT("\tpackageId: %d\n", info.physInstId.packageId);
+                HE_QAT_PRINT("\tacceleratorId: %d\n",
+                             info.physInstId.acceleratorId);
+                HE_QAT_PRINT("\texecutionEngineId: %d\n",
+                             info.physInstId.executionEngineId);
+                HE_QAT_PRINT("\tbusAddress: %d\n", info.physInstId.busAddress);
+                HE_QAT_PRINT("\tkptAcHandle: %d\n",
+                             info.physInstId.kptAcHandle);
+#endif
+            }
+            HE_QAT_PRINT_DBG("Next Instance: %d.\n", nextInstance);
+
+            if (status == CPA_STATUS_SUCCESS)
+                return cyInstHandles[nextInstance];
+        }
+
+        if (0 == numInstances) {
+            HE_QAT_PRINT_ERR("No instances found for 'SSL'\n");
+            HE_QAT_PRINT_ERR("Please check your section names");
+            HE_QAT_PRINT_ERR(" in the config file.\n");
+            HE_QAT_PRINT_ERR("Also make sure to use config file version 2.\n");
+        }
+
+        return NULL;
+    }
+
+    nextInstance = ((nextInstance + 1) % numInstances);
+    HE_QAT_PRINT_DBG("Next Instance: %d.\n", nextInstance);
+
+    return cyInstHandles[nextInstance];
+}
+
+/// @brief
+/// Acquire QAT instances and set up QAT execution environment.
+HE_QAT_STATUS acquire_qat_devices() {
+    CpaStatus status = CPA_STATUS_FAIL;
+
+    pthread_mutex_lock(&context_lock);
+
+    // Handle cases where acquire_qat_devices() is called when already active
+    // and running
+    if (HE_QAT_STATUS_ACTIVE == context_state) {
+        pthread_mutex_unlock(&context_lock);
+        return HE_QAT_STATUS_SUCCESS;
+    }
+
+    // Initialize QAT memory pool allocator
+    status = qaeMemInit();
+    if (CPA_STATUS_SUCCESS != status) {
+        pthread_mutex_unlock(&context_lock);
+        HE_QAT_PRINT_ERR("Failed to initialized memory driver.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+    HE_QAT_PRINT_DBG("QAT memory successfully initialized.\n");
+
+    status = icp_sal_userStartMultiProcess("SSL", CPA_FALSE);
+    if (CPA_STATUS_SUCCESS != status) {
+        pthread_mutex_unlock(&context_lock);
+        HE_QAT_PRINT_ERR("Failed to start SAL user process SSL\n");
+        qaeMemDestroy();
+        return HE_QAT_STATUS_FAIL;
+    }
+    HE_QAT_PRINT_DBG("SAL user process successfully started.\n");
+
+    CpaInstanceHandle _inst_handle[HE_QAT_NUM_ACTIVE_INSTANCES];
+    for (unsigned int i = 0; i < HE_QAT_NUM_ACTIVE_INSTANCES; i++) {
+        _inst_handle[i] = get_qat_instance();
+        if (_inst_handle[i] == NULL) {
+            pthread_mutex_unlock(&context_lock);
+            HE_QAT_PRINT_ERR("Failed to find QAT endpoints.\n");
+            return HE_QAT_STATUS_FAIL;
+        }
+    }
+
+    HE_QAT_PRINT_DBG("Found QAT endpoints.\n");
+
+    // Initialize QAT buffer synchronization attributes
+    he_qat_buffer.count = 0;
+    he_qat_buffer.next_free_slot = 0;
+    he_qat_buffer.next_data_slot = 0;
+
+    // Initialize QAT memory buffer
+    for (int i = 0; i < HE_QAT_BUFFER_SIZE; i++) {
+        he_qat_buffer.data[i] = NULL;
+    }
+
+    // Initialize QAT outstanding buffers
+    outstanding.busy_count = 0;
+    outstanding.next_free_buffer = 0;
+    outstanding.next_ready_buffer = 0;
+    for (int i = 0; i < HE_QAT_BUFFER_COUNT; i++) {
+        outstanding.free_buffer[i] = 1;
+        outstanding.ready_buffer[i] = 0;
+        outstanding.buffer[i].count = 0;
+        outstanding.buffer[i].next_free_slot = 0;
+        outstanding.buffer[i].next_data_slot = 0;
+        outstanding.buffer[i].next_data_out = 0;
+        for (int j = 0; j < HE_QAT_BUFFER_SIZE; j++) {
+            outstanding.buffer[i].data[j] = NULL;
+        }
+        pthread_mutex_init(&outstanding.buffer[i].mutex, NULL);
+        pthread_cond_init(&outstanding.buffer[i].any_more_data, NULL);
+        pthread_cond_init(&outstanding.buffer[i].any_free_slot, NULL);
+    }
+    pthread_mutex_init(&outstanding.mutex, NULL);
+    pthread_cond_init(&outstanding.any_free_buffer, NULL);
+    pthread_cond_init(&outstanding.any_ready_buffer, NULL);
+
+    // Creating QAT instances (consumer threads) to process op requests
+    cpu_set_t cpus;
+    for (int i = 0; i < HE_QAT_NUM_ACTIVE_INSTANCES; i++) {
+        CPU_ZERO(&cpus);
+        CPU_SET(i, &cpus);
+        pthread_attr_init(&he_qat_inst_attr[i]);
+        pthread_attr_setaffinity_np(&he_qat_inst_attr[i], sizeof(cpu_set_t),
+                                    &cpus);
+
+        // configure thread
+        he_qat_inst_config[i].active = 0;   // HE_QAT_STATUS_INACTIVE
+        he_qat_inst_config[i].polling = 0;  // HE_QAT_STATUS_INACTIVE
+        he_qat_inst_config[i].running = 0;
+        he_qat_inst_config[i].status = CPA_STATUS_FAIL;
+        pthread_mutex_init(&he_qat_inst_config[i].mutex, NULL);
+        pthread_cond_init(&he_qat_inst_config[i].ready, NULL);
+        he_qat_inst_config[i].inst_handle = _inst_handle[i];
+        he_qat_inst_config[i].inst_id = i;
+        he_qat_inst_config[i].attr = &he_qat_inst_attr[i];
+    }
+
+    he_qat_config = (HE_QAT_Config*)malloc(sizeof(HE_QAT_Config));
+    he_qat_config->inst_config = he_qat_inst_config;
+    he_qat_config->count = HE_QAT_NUM_ACTIVE_INSTANCES;
+    he_qat_config->running = 0;
+    he_qat_config->active = 0;
+
+    pthread_create(&he_qat_runner, NULL, start_instances, (void*)he_qat_config);
+    HE_QAT_PRINT_DBG("Created processing threads.\n");
+
+    // Dispatch the qat instances to run independently in the background
+    pthread_detach(he_qat_runner);
+    HE_QAT_PRINT_DBG("Detached processing threads.\n");
+
+    // Set context state to active
+    context_state = HE_QAT_STATUS_ACTIVE;
+
+    // Launch buffer manager thread to schedule incoming requests
+    if (0 != pthread_create(&buffer_manager, NULL, schedule_requests,
+                            (void*)&context_state)) {
+        pthread_mutex_unlock(&context_lock);
+        release_qat_devices();
+        HE_QAT_PRINT_ERR(
+            "Failed to complete QAT initialization while creating buffer "
+            "manager thread.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    if (0 != pthread_detach(buffer_manager)) {
+        pthread_mutex_unlock(&context_lock);
+        release_qat_devices();
+        HE_QAT_PRINT_ERR(
+            "Failed to complete QAT initialization while launching buffer "
+            "manager thread.\n");
+        return HE_QAT_STATUS_FAIL;
+    }
+
+    pthread_mutex_unlock(&context_lock);
+
+    return HE_QAT_STATUS_SUCCESS;
+}
+
+/// @brief
+/// Release QAT instances and tear down QAT execution environment.
+HE_QAT_STATUS release_qat_devices() {
+    pthread_mutex_lock(&context_lock);
+
+    if (HE_QAT_STATUS_INACTIVE == context_state) {
+        pthread_mutex_unlock(&context_lock);
+        return HE_QAT_STATUS_SUCCESS;
+    }
+
+    stop_instances(he_qat_config);
+    HE_QAT_PRINT_DBG("Stopped polling and processing threads.\n");
+
+    // Deactivate context (this will terminate buffer manager thread
+    context_state = HE_QAT_STATUS_INACTIVE;
+
+    // Stop QAT SSL service
+    icp_sal_userStop();
+    HE_QAT_PRINT_DBG("Stopped SAL user process.\n");
+
+    // Release QAT allocated memory
+    qaeMemDestroy();
+    HE_QAT_PRINT_DBG("Release QAT memory.\n");
+
+    numInstances = 0;
+    nextInstance = 0;
+
+    pthread_mutex_unlock(&context_lock);
+
+    return HE_QAT_STATUS_SUCCESS;
+}
+
+/// @brief  Retrieve and read context state.
+/// @return Possible return values are HE_QAT_STATUS_ACTIVE,
+///         HE_QAT_STATUS_RUNNING, and HE_QAT_STATUS_INACTIVE.
+HE_QAT_STATUS get_qat_context_state() { return context_state; }
diff --git a/module/heqat/heqat/ctrl.c b/module/heqat/heqat/ctrl.c
new file mode 100644
index 0000000..7c968f6
--- /dev/null
+++ b/module/heqat/heqat/ctrl.c
@@ -0,0 +1,818 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+/// @file heqat/ctrl.c
+
+// C support libraries
+#include <stdio.h>
+#include <pthread.h>
+#include <assert.h>
+#include <openssl/bn.h>
+
+// Global variables used to hold measured performance numbers.
+#ifdef HE_QAT_PERF
+#include <sys/time.h>
+struct timeval start_time, end_time;
+double time_taken = 0.0;
+#endif
+
+// QAT-API headers
+#include <cpa.h>
+#include <cpa_cy_im.h>
+#include <cpa_cy_ln.h>
+#include <icp_sal_poll.h>
+
+// Local headers
+#include "heqat/common/utils.h"
+#include "heqat/common/consts.h"
+#include "heqat/common/types.h"
+
+// Warn user on selected execution mode
+#ifdef HE_QAT_SYNC_MODE
+#pragma message "Synchronous execution mode."
+#else
+#pragma message "Asynchronous execution mode."
+#endif
+
+// Global buffer for the runtime environment
+HE_QAT_RequestBuffer
+    he_qat_buffer;  ///< This the internal buffer that holds and serializes the
+                    ///< requests to the accelerator.
+HE_QAT_OutstandingBuffer
+    outstanding;  ///< This is the data structure that holds outstanding
+                  ///< requests from separate active threads calling the API.
+volatile unsigned long response_count =
+    0;  ///< Counter of processed requests and it is used to help control
+        ///< throttling.
+static volatile unsigned long request_count =
+    0;  ///< Counter of received requests and it is used to help control
+        ///< throttling.
+static unsigned long restart_threshold =
+    NUM_PKE_SLICES *
+    HE_QAT_NUM_ACTIVE_INSTANCES;  ///< Number of concurrent requests allowed to
+                                  ///< be sent to accelerator at once.
+static unsigned long max_pending =
+    (2 * NUM_PKE_SLICES *
+     HE_QAT_NUM_ACTIVE_INSTANCES);  ///< Number of requests sent to the
+                                    ///< accelerator that are pending
+                                    ///< completion.
+
+/// @brief Populate internal buffer with incoming requests from API calls.
+/// @details This function is called from the main APIs to submit requests to
+/// a shared internal buffer for processing on QAT. It is a thread-safe
+/// implementation of the producer for the either the internal buffer or the
+/// outstanding buffer to host incoming requests. Depending on the buffer type,
+/// the submitted request is either ready to be scheduled or to be processed by
+/// the accelerator.
+/// @param[out] _buffer Either `he_qat_buffer` or `outstanding` buffer.
+/// @param[in] args Work request packaged in a custom data structure.
+void submit_request(HE_QAT_RequestBuffer* _buffer, void* args) {
+    HE_QAT_PRINT_DBG("Lock write request\n");
+
+    pthread_mutex_lock(&_buffer->mutex);
+
+    HE_QAT_PRINT_DBG("Wait lock write request. [buffer size: %d]\n",
+                     _buffer->count);
+
+    while (_buffer->count >= HE_QAT_BUFFER_SIZE)
+        pthread_cond_wait(&_buffer->any_free_slot, &_buffer->mutex);
+
+    assert(_buffer->count < HE_QAT_BUFFER_SIZE);
+
+    _buffer->data[_buffer->next_free_slot++] = args;
+
+    _buffer->next_free_slot %= HE_QAT_BUFFER_SIZE;
+    _buffer->count++;
+
+    pthread_cond_signal(&_buffer->any_more_data);
+    pthread_mutex_unlock(&_buffer->mutex);
+
+    HE_QAT_PRINT_DBG("Unlocked write request. [buffer size: %d]\n",
+                     _buffer->count);
+}
+
+/// @brief Populates internal buffer with a list of work request.
+/// @details This function is called by the request scheduler thread. It is a
+/// thread-safe implementation of the producer for the shared internal request
+/// buffer. This buffer stores and serializes the offloading of requests that
+/// are ready to be processed by the accelerator.
+/// @param[out] _buffer reference pointer to the internal buffer
+/// `he_qat_buffer`.
+/// @param[in] _requests list of requests retrieved from the buffer
+/// (`outstanding`) holding outstanding requests.
+static void submit_request_list(HE_QAT_RequestBuffer* _buffer,
+                                HE_QAT_TaskRequestList* _requests) {
+    HE_QAT_PRINT_DBG("Lock submit request list\n");
+
+    if (0 == _requests->count) return;
+
+    pthread_mutex_lock(&_buffer->mutex);
+
+    HE_QAT_PRINT_DBG(
+        "Wait lock submit request list. [internal buffer size: %d] [num "
+        "requests: %u]\n",
+        _buffer->count, _requests->count);
+
+    // Wait until buffer can accomodate the number of input requests
+    while (_buffer->count >= HE_QAT_BUFFER_SIZE ||
+           (HE_QAT_BUFFER_SIZE - _buffer->count) < _requests->count)
+        pthread_cond_wait(&_buffer->any_free_slot, &_buffer->mutex);
+
+    assert(_buffer->count < HE_QAT_BUFFER_SIZE);
+    // assert(_requests->count <= (HE_QAT_BUFFER_SIZE - _buffer->count));
+
+    for (unsigned int i = 0; i < _requests->count; i++) {
+        _buffer->data[_buffer->next_free_slot++] = _requests->request[i];
+        _buffer->next_free_slot %= HE_QAT_BUFFER_SIZE;
+        _requests->request[i] = NULL;
+    }
+    _buffer->count += _requests->count;
+    _requests->count = 0;
+
+    pthread_cond_signal(&_buffer->any_more_data);
+    pthread_mutex_unlock(&_buffer->mutex);
+
+    HE_QAT_PRINT_DBG(
+        "Unlocked submit request list. [internal buffer size: %d]\n",
+        _buffer->count);
+}
+
+/// @brief Retrieve multiple requests from the outstanding buffer.
+/// @details Thread-safe consumer implementation for the outstanding request
+/// buffer. Read requests from outstanding buffer (requests ready to be
+/// scheduled) to later pass them to the internal buffer `he_qat_buffer`. As
+/// those requests move from the outstanding buffer into the internal buffer,
+/// their state changes from ready-to-be-scheduled to ready-to-be-processed.
+/// This function is supported both in single-threaded or multi-threaded mode.
+/// @param[out] _requests list of requests retrieved from internal buffer.
+/// @param[in] _buffer buffer of type HE_QAT_RequestBuffer, typically the
+/// internal buffer in current implementation.
+/// @param[in] max_requests maximum number of requests to retrieve from internal
+/// buffer, if available.
+static void read_request_list(HE_QAT_TaskRequestList* _requests,
+                              HE_QAT_RequestBuffer* _buffer,
+                              unsigned int max_requests) {
+    if (NULL == _requests) return;
+
+    pthread_mutex_lock(&_buffer->mutex);
+
+    // Wait while buffer is empty
+    while (_buffer->count <= 0)
+        pthread_cond_wait(&_buffer->any_more_data, &_buffer->mutex);
+
+    assert(_buffer->count > 0);
+    assert(_buffer->count <= HE_QAT_BUFFER_SIZE);
+
+    unsigned int count =
+        (_buffer->count < max_requests) ? _buffer->count : max_requests;
+
+    for (unsigned int i = 0; i < count; i++) {
+        _requests->request[i] = _buffer->data[_buffer->next_data_slot++];
+        _buffer->next_data_slot %= HE_QAT_BUFFER_SIZE;
+    }
+    _requests->count = count;
+    _buffer->count -= count;
+
+    pthread_cond_signal(&_buffer->any_free_slot);
+    pthread_mutex_unlock(&_buffer->mutex);
+
+    return;
+}
+
+/// @brief Read requests from the outstanding buffer.
+/// @details Thread-safe consumer implementation for the outstanding request
+/// buffer. Retrieve work requests from outstanding buffer (requests ready to be
+/// scheduled) to later on pass them to the internal buffer `he_qat_buffer`. As
+/// those requests move from the outstanding buffer into the internal buffer,
+/// their state changes from ready-to-be-scheduled to ready-to-be-processed.
+/// This function is supported in single-threaded or multi-threaded mode.
+/// @param[out] _requests list of work requests retrieved from outstanding
+/// buffer.
+/// @param[in] _outstanding_buffer outstanding buffer holding requests in
+/// ready-to-be-scheduled state.
+/// @param[in] max_num_requests maximum number of requests to retrieve from
+/// outstanding buffer if available.
+static void pull_outstanding_requests(
+    HE_QAT_TaskRequestList* _requests,
+    HE_QAT_OutstandingBuffer* _outstanding_buffer,
+    unsigned int max_num_requests) {
+    if (NULL == _requests) return;
+    _requests->count = 0;
+
+    // For now, only one thread can change next_ready_buffer
+    // so no need for synchronization objects
+
+    // Select an outstanding buffer to pull requests and add them into the
+    // processing queue (internal buffer)
+    pthread_mutex_lock(&_outstanding_buffer->mutex);
+    // Wait until next outstanding buffer becomes available for use
+    while (outstanding.busy_count <= 0)
+        pthread_cond_wait(&_outstanding_buffer->any_ready_buffer,
+                          &_outstanding_buffer->mutex);
+
+    int any_ready = 0;
+    unsigned int index = _outstanding_buffer->next_ready_buffer;  // no fairness
+    for (unsigned int i = 0; i < HE_QAT_BUFFER_COUNT; i++) {
+        index = i;  // ensure fairness
+        if (_outstanding_buffer->ready_buffer[index] &&
+            _outstanding_buffer->buffer[index]
+                .count) {  // sync with mutex at interface
+            any_ready = 1;
+            break;
+        }
+        // index = (index + 1) % HE_QAT_BUFFER_COUNT;
+    }
+    // Ensures it gets picked once only
+    pthread_mutex_unlock(&_outstanding_buffer->mutex);
+
+    if (!any_ready) return;
+
+    // Extract outstanding requests from outstanding buffer
+    // (this is the only function that reads from outstanding buffer,
+    // from a single thread)
+    pthread_mutex_lock(&_outstanding_buffer->buffer[index].mutex);
+
+    // This conditional waiting may not be required
+    // Wait while buffer is empty
+    while (_outstanding_buffer->buffer[index].count <= 0) {
+        pthread_cond_wait(&_outstanding_buffer->buffer[index].any_more_data,
+                          &_outstanding_buffer->buffer[index].mutex);
+    }
+    assert(_outstanding_buffer->buffer[index].count > 0);
+
+    unsigned int num_requests =
+        (_outstanding_buffer->buffer[index].count < max_num_requests)
+            ? _outstanding_buffer->buffer[index].count
+            : max_num_requests;
+
+    assert(num_requests <= HE_QAT_BUFFER_SIZE);
+
+    for (unsigned int i = 0; i < num_requests; i++) {
+        _requests->request[i] =
+            _outstanding_buffer->buffer[index]
+                .data[_outstanding_buffer->buffer[index].next_data_slot];
+        _outstanding_buffer->buffer[index].count--;
+        _outstanding_buffer->buffer[index].next_data_slot++;
+        _outstanding_buffer->buffer[index].next_data_slot %= HE_QAT_BUFFER_SIZE;
+    }
+    _requests->count = num_requests;
+
+    pthread_cond_signal(&_outstanding_buffer->buffer[index].any_free_slot);
+    pthread_mutex_unlock(&_outstanding_buffer->buffer[index].mutex);
+
+    // ---------------------------------------------------------------------------
+    // Notify there is an outstanding buffer in ready for the processing queue
+    //    pthread_mutex_lock(&_outstanding_buffer->mutex);
+    //
+    //    _outstanding_buffer->ready_count--;
+    //    _outstanding_buffer->ready_buffer[index] = 0;
+    //
+    //    pthread_cond_signal(&_outstanding_buffer->any_free_buffer);
+    //    pthread_mutex_unlock(&_outstanding_buffer->mutex);
+
+    return;
+}
+
+/// @brief Schedule outstanding requests to the internal buffer and be ready for
+/// processing.
+/// @details Schedule outstanding requests from outstanding buffers to the
+/// internal buffer, from which requests are ready to be submitted to the device
+/// for processing.
+/// @param[in] context_state A volatile integer variable used to activate
+/// (val>0) or disactive (val=0) the scheduler.
+void* schedule_requests(void* context_state) {
+    if (NULL == context_state) {
+        HE_QAT_PRINT_DBG("Failed at buffer_manager: argument is NULL.\n");
+        pthread_exit(NULL);
+    }
+
+    HE_QAT_STATUS* active = (HE_QAT_STATUS*)context_state;
+
+    HE_QAT_TaskRequestList outstanding_requests;
+    for (unsigned int i = 0; i < HE_QAT_BUFFER_SIZE; i++) {
+        outstanding_requests.request[i] = NULL;
+    }
+    outstanding_requests.count = 0;
+
+    // This thread should receive signal from context to exit
+    *active = HE_QAT_STATUS_RUNNING;
+    while (HE_QAT_STATUS_INACTIVE != *active) {
+        // Collect a set of requests from the outstanding buffer
+        pull_outstanding_requests(&outstanding_requests, &outstanding,
+                                  HE_QAT_BUFFER_SIZE);
+        // Submit them to the HE QAT buffer for offloading
+        submit_request_list(&he_qat_buffer, &outstanding_requests);
+    }
+
+    pthread_exit(NULL);
+}
+
+/// @brief Poll responses from a specific QAT instance.
+/// @param[in] _inst_config Instance configuration containing the parameter
+/// values to start and poll responses from the accelerator.
+static void* start_inst_polling(void* _inst_config) {
+    if (NULL == _inst_config) {
+        HE_QAT_PRINT_ERR(
+            "Failed at start_inst_polling: argument is NULL.\n");  //,__FUNC__);
+        pthread_exit(NULL);
+    }
+
+    HE_QAT_InstConfig* config = (HE_QAT_InstConfig*)_inst_config;
+
+    if (NULL == config->inst_handle) return NULL;
+
+    HE_QAT_PRINT_DBG("Instance ID %d Polling\n", config->inst_id);
+
+    // What is harmful for polling without performing any operation?
+    config->polling = 1;
+    while (config->polling) {
+        icp_sal_CyPollInstance(config->inst_handle, 0);
+        // OS_SLEEP(50);
+        HE_QAT_SLEEP(50, HE_QAT_MICROSEC);
+    }
+
+    pthread_exit(NULL);
+}
+
+/// @brief
+/// Initialize and start multiple instances, their polling thread,
+/// and a single processing thread.
+///
+/// @details
+/// It initializes multiple QAT instances and launches their respective
+/// independent polling threads that will listen to responses to requests sent
+/// to the accelerators concurrently. Then, it becomes the thread that collect
+/// the incoming requests stored in a shared buffer and send them to the
+/// accelerator for processing. This is the only processing thread for requests
+/// handled by multiple instances -- unlike when using multiple instances with
+/// the `start_perform_op` function, in which case each instance has a separate
+/// processing thread. The implementation of the multiple instance support using
+/// `start_perform_op` is obsolete and slower. The way is using this function,
+/// which delivers better performance. The scheduling of request offloads uses a
+/// round-robin approach. It collects multiple requests from the internal buffer
+/// and then  send them to the multiple accelerator instances to process in a
+/// round-robin fashion. It was designed to support processing requests of
+/// different operation types but currently only supports Modular
+/// Exponentiation.
+///
+/// @param[in] _config Data structure containing the configuration of multiple
+/// instances.
+void* start_instances(void* _config) {
+    static unsigned int instance_count = 0;
+    static unsigned int next_instance = 0;
+
+    if (NULL == _config) {
+        HE_QAT_PRINT_ERR("Failed in start_instances: _config is NULL.\n");
+        pthread_exit(NULL);
+    }
+
+    HE_QAT_Config* config = (HE_QAT_Config*)_config;
+    instance_count = config->count;
+
+    HE_QAT_PRINT_DBG("Instance Count: %d\n", instance_count);
+    pthread_t* polling_thread =
+        (pthread_t*)malloc(sizeof(pthread_t) * instance_count);
+    if (NULL == polling_thread) {
+        HE_QAT_PRINT_ERR(
+            "Failed in start_instances: polling_thread is NULL.\n");
+        pthread_exit(NULL);
+    }
+
+    unsigned* request_count_per_instance =
+        (unsigned*)malloc(sizeof(unsigned) * instance_count);
+    if (NULL == request_count_per_instance) {
+        HE_QAT_PRINT_ERR(
+            "Failed in start_instances: polling_thread is NULL.\n");
+        pthread_exit(NULL);
+    }
+    for (unsigned i = 0; i < instance_count; i++) {
+        request_count_per_instance[i] = 0;
+    }
+
+    CpaStatus status = CPA_STATUS_FAIL;
+    for (unsigned int j = 0; j < config->count; j++) {
+        // Start from zero or restart after stop_perform_op
+        pthread_mutex_lock(&config->inst_config[j].mutex);
+        while (config->inst_config[j].active)
+            pthread_cond_wait(&config->inst_config[j].ready,
+                              &config->inst_config[j].mutex);
+
+        // assert(0 == config->active);
+        // assert(NULL == config->inst_handle);
+
+        status = cpaCyStartInstance(config->inst_config[j].inst_handle);
+        config->inst_config[j].status = status;
+        if (CPA_STATUS_SUCCESS == status) {
+            HE_QAT_PRINT_DBG("Cpa CyInstance has successfully started.\n");
+            status = cpaCySetAddressTranslation(
+                config->inst_config[j].inst_handle, HE_QAT_virtToPhys);
+        }
+
+        pthread_cond_signal(&config->inst_config[j].ready);
+        pthread_mutex_unlock(&config->inst_config[j].mutex);
+
+        if (CPA_STATUS_SUCCESS != status) pthread_exit(NULL);
+
+        HE_QAT_PRINT_DBG("Instance ID: %d\n", config->inst_config[j].inst_id);
+
+        // Start QAT instance and start polling
+        if (pthread_create(&polling_thread[j], config->inst_config[j].attr,
+                           start_inst_polling,
+                           (void*)&(config->inst_config[j])) != 0) {
+            HE_QAT_PRINT_ERR(
+                "Failed at creating and starting polling thread.\n");
+            pthread_exit(NULL);
+        }
+
+        if (pthread_detach(polling_thread[j]) != 0) {
+            HE_QAT_PRINT_ERR("Failed at detaching polling thread.\n");
+            pthread_exit(NULL);
+        }
+
+        config->inst_config[j].active = 1;
+        config->inst_config[j].running = 1;
+    }  // for loop
+
+    HE_QAT_TaskRequestList outstanding_requests;
+    for (unsigned int i = 0; i < HE_QAT_BUFFER_SIZE; i++) {
+        outstanding_requests.request[i] = NULL;
+    }
+    outstanding_requests.count = 0;
+
+    config->running = 1;
+    config->active = 1;
+    while (config->running) {
+        HE_QAT_PRINT_DBG("Try reading request from buffer. Inst #%d\n",
+                         next_instance);
+
+        unsigned long pending = request_count - response_count;
+        unsigned long available =
+            max_pending - ((pending < max_pending) ? pending : max_pending);
+
+        HE_QAT_PRINT_DBG(
+            "[CHECK] request_count: %lu response_count: %lu pending: %lu "
+            "available: %lu\n",
+            request_count, response_count, pending, available);
+
+        while (available < restart_threshold) {
+            HE_QAT_PRINT_DBG("[WAIT]\n");
+
+            // argument passed in microseconds
+            HE_QAT_SLEEP(RESTART_LATENCY_MICROSEC, HE_QAT_MICROSEC);
+            pending = request_count - response_count;
+            available =
+                max_pending - ((pending < max_pending) ? pending : max_pending);
+            HE_QAT_PRINT_DBG(
+                "[CHECK] request_count: %lu response_count: %lu pending: %lu "
+                "available: %lu\n",
+                request_count, response_count, pending, available);
+        }
+        HE_QAT_PRINT_DBG(
+            "[SUBMIT] request_count: %lu response_count: %lu pending: %lu "
+            "available: %lu\n",
+            request_count, response_count, pending, available);
+
+        unsigned int max_requests = available;
+
+        // Try consume maximum amount of data from butter to perform requested
+        // operation
+        read_request_list(&outstanding_requests, &he_qat_buffer, max_requests);
+
+        HE_QAT_PRINT_DBG("Offloading %u requests to the accelerator.\n",
+                         outstanding_requests.count);
+
+        for (unsigned int i = 0; i < outstanding_requests.count; i++) {
+            HE_QAT_TaskRequest* request = outstanding_requests.request[i];
+#ifdef HE_QAT_SYNC_MODE
+            COMPLETION_INIT(&request->callback);
+#endif
+
+            unsigned retry = 0;
+            do {
+                // Realize the type of operation from data
+                switch (request->op_type) {
+                // Select appropriate action
+                case HE_QAT_OP_MODEXP:
+                    HE_QAT_PRINT_DBG("Offload request using instance #%d\n",
+                                     next_instance);
+#ifdef HE_QAT_PERF
+                    gettimeofday(&request->start, NULL);
+#endif
+                    status = cpaCyLnModExp(
+                        config->inst_config[next_instance].inst_handle,
+                        (CpaCyGenFlatBufCbFunc)
+                            request->callback_func,  // lnModExpCallback,
+                        (void*)request, (CpaCyLnModExpOpData*)request->op_data,
+                        &request->op_result);
+                    retry++;
+                    break;
+                case HE_QAT_OP_NONE:
+                default:
+                    HE_QAT_PRINT_DBG("HE_QAT_OP_NONE to instance #%d\n",
+                                     next_instance);
+                    retry = HE_QAT_MAX_RETRY;
+                    break;
+                }
+
+                if (CPA_STATUS_RETRY == status) {
+                    HE_QAT_PRINT_DBG("CPA requested RETRY\n");
+                    HE_QAT_PRINT_DBG("RETRY count = %u\n", retry);
+                    pthread_exit(NULL);  // halt the whole system
+                }
+            } while (CPA_STATUS_RETRY == status && retry < HE_QAT_MAX_RETRY);
+
+            // Ensure every call to perform operation is blocking for each
+            // endpoint
+            if (CPA_STATUS_SUCCESS == status) {
+                // Global tracking of number of requests
+                request_count += 1;
+                request_count_per_instance[next_instance] += 1;
+                next_instance = (next_instance + 1) % instance_count;
+
+                // Wake up any blocked call to stop_perform_op, signaling that
+                // now it is safe to terminate running instances. Check if this
+                // detereorate performance.
+                // TODO(fdiasmor): Check if prone to the lost wake-up problem.
+                pthread_cond_signal(&config->inst_config[next_instance].ready);
+
+#ifdef HE_QAT_SYNC_MODE
+                // Wait until the callback function has been called
+                if (!COMPLETION_WAIT(&request->callback, TIMEOUT_MS)) {
+                    request->op_status = CPA_STATUS_FAIL;
+                    request->request_status = HE_QAT_STATUS_FAIL;  // Review it
+                    HE_QAT_PRINT_ERR("Failed in COMPLETION WAIT\n");
+                }
+
+                // Destroy synchronization object
+                COMPLETION_DESTROY(&request->callback);
+#endif
+            } else {
+                request->op_status = CPA_STATUS_FAIL;
+                request->request_status = HE_QAT_STATUS_FAIL;  // Review it
+                HE_QAT_PRINT_ERR("Request Submission FAILED\n");
+            }
+
+            HE_QAT_PRINT_DBG("Offloading completed by instance #%d\n",
+                             next_instance - 1);
+
+            // Reset pointer
+            outstanding_requests.request[i] = NULL;
+            request = NULL;
+        }  // for loop over batch of requests
+        outstanding_requests.count = 0;
+    }
+    pthread_exit(NULL);
+}
+
+/// @brief
+///  Start independent processing and polling threads for an instance.
+///
+/// @details
+///  It initializes a QAT instance and launches its polling thread to listen
+///     to responses (request outputs) from the accelerator. It is also
+///     reponsible
+/// to collect requests from the internal buffer and send them to the
+/// accelerator  periodiacally. It was designed to extend to receiving
+/// and offloading  requests of different operation types but currently only
+/// supports Modular  Exponentiation.
+///
+/// @param[in] _inst_config Data structure containing the configuration of a
+/// single
+///       instance.
+void* start_perform_op(void* _inst_config) {
+    if (NULL == _inst_config) {
+        HE_QAT_PRINT_ERR("Failed in start_perform_op: _inst_config is NULL.\n");
+        pthread_exit(NULL);
+    }
+
+    HE_QAT_InstConfig* config = (HE_QAT_InstConfig*)_inst_config;
+
+    CpaStatus status = CPA_STATUS_FAIL;
+
+    // Start from zero or restart after stop_perform_op
+    pthread_mutex_lock(&config->mutex);
+    while (config->active) pthread_cond_wait(&config->ready, &config->mutex);
+
+    // assert(0 == config->active);
+    // assert(NULL == config->inst_handle);
+
+    status = cpaCyStartInstance(config->inst_handle);
+    config->status = status;
+    if (CPA_STATUS_SUCCESS == status) {
+        HE_QAT_PRINT_DBG("Cpa CyInstance has successfully started.\n");
+        status =
+            cpaCySetAddressTranslation(config->inst_handle, HE_QAT_virtToPhys);
+    }
+
+    pthread_cond_signal(&config->ready);
+    pthread_mutex_unlock(&config->mutex);
+
+    if (CPA_STATUS_SUCCESS != status) pthread_exit(NULL);
+
+    // Start QAT instance and start polling
+    pthread_t polling_thread;
+    if (pthread_create(&polling_thread, config->attr, start_inst_polling,
+                       (void*)config) != 0) {
+        HE_QAT_PRINT_ERR("Failed at creating and starting polling thread.\n");
+        pthread_exit(NULL);
+    }
+
+    if (pthread_detach(polling_thread) != 0) {
+        HE_QAT_PRINT_ERR("Failed at detaching polling thread.\n");
+        pthread_exit(NULL);
+    }
+
+    HE_QAT_TaskRequestList outstanding_requests;
+    for (unsigned int i = 0; i < HE_QAT_BUFFER_SIZE; i++) {
+        outstanding_requests.request[i] = NULL;
+    }
+    outstanding_requests.count = 0;
+
+    config->running = 1;
+    config->active = 1;
+    while (config->running) {
+        HE_QAT_PRINT_DBG("Try reading request from buffer. Inst #%d\n",
+                         config->inst_id);
+
+        unsigned long pending = request_count - response_count;
+        unsigned long available =
+            max_pending - ((pending < max_pending) ? pending : max_pending);
+
+        HE_QAT_PRINT_DBG(
+            "[CHECK] request_count: %lu response_count: %lu pending: %lu "
+            "available: %lu\n",
+            request_count, response_count, pending, available);
+
+        while (available < restart_threshold) {
+            HE_QAT_PRINT_DBG("[WAIT]\n");
+
+            HE_QAT_SLEEP(650, HE_QAT_MICROSEC);
+
+            pending = request_count - response_count;
+            available =
+                max_pending - ((pending < max_pending) ? pending : max_pending);
+        }
+        HE_QAT_PRINT_DBG(
+            "[SUBMIT] request_count: %lu response_count: %lu pending: %lu "
+            "available: %lu\n",
+            request_count, response_count, pending, available);
+
+        unsigned int max_requests = available;
+
+        // Try consume maximum amount of data from butter to perform requested
+        // operation
+        read_request_list(&outstanding_requests, &he_qat_buffer, max_requests);
+
+        // // Try consume data from butter to perform requested operation
+        //        HE_QAT_TaskRequest* request =
+        //            (HE_QAT_TaskRequest*)read_request(&he_qat_buffer);
+        //
+        //        if (NULL == request) {
+        //            pthread_cond_signal(&config->ready);
+        //            continue;
+        //        }
+        HE_QAT_PRINT_DBG("Offloading %u requests to the accelerator.\n",
+                         outstanding_requests.count);
+
+        for (unsigned int i = 0; i < outstanding_requests.count; i++) {
+            HE_QAT_TaskRequest* request = outstanding_requests.request[i];
+#ifdef HE_QAT_SYNC_MODE
+            COMPLETION_INIT(&request->callback);
+#endif
+            unsigned retry = 0;
+            do {
+                // Realize the type of operation from data
+                switch (request->op_type) {
+                // Select appropriate action
+                case HE_QAT_OP_MODEXP:
+                    // if (retry > 0) HE_QAT_PRINT_DBG("Try offloading again
+                    // last request\n");
+                    HE_QAT_PRINT_DBG("Offload request using instance #%d\n",
+                                     config->inst_id);
+#ifdef HE_QAT_PERF
+                    gettimeofday(&request->start, NULL);
+#endif
+                    status = cpaCyLnModExp(
+                        config->inst_handle,
+                        (CpaCyGenFlatBufCbFunc)
+                            request->callback_func,  // lnModExpCallback,
+                        (void*)request, (CpaCyLnModExpOpData*)request->op_data,
+                        &request->op_result);
+                    retry++;
+                    break;
+                case HE_QAT_OP_NONE:
+                default:
+                    HE_QAT_PRINT_DBG("HE_QAT_OP_NONE to instance #%d\n",
+                                     config->inst_id);
+                    retry = HE_QAT_MAX_RETRY;
+                    break;
+                }
+
+                if (CPA_STATUS_RETRY == status) {
+                    HE_QAT_PRINT_DBG("CPA requested RETRY\n");
+                    HE_QAT_PRINT_DBG("RETRY count: %u\n", retry);
+                    HE_QAT_SLEEP(600, HE_QAT_MICROSEC);
+                }
+            } while (CPA_STATUS_RETRY == status && retry < HE_QAT_MAX_RETRY);
+
+            // Ensure every call to perform operation is blocking for each
+            // endpoint
+            if (CPA_STATUS_SUCCESS == status) {
+                // Global tracking of number of requests
+                request_count += 1;
+
+                HE_QAT_PRINT_DBG("request_count = %lu\n", request_count);
+#ifdef HE_QAT_SYNC_MODE
+                // Wait until the callback function has been called
+                if (!COMPLETION_WAIT(&request->callback, TIMEOUT_MS)) {
+                    request->op_status = CPA_STATUS_FAIL;
+                    request->request_status = HE_QAT_STATUS_FAIL;  // Review it
+                    HE_QAT_PRINT_ERR("Failed in COMPLETION WAIT\n");
+                }
+
+                // Destroy synchronization object
+                COMPLETION_DESTROY(&request->callback);
+#endif
+            } else {
+                request->op_status = CPA_STATUS_FAIL;
+                request->request_status = HE_QAT_STATUS_FAIL;  // Review it
+            }
+
+            // Reset pointer
+            outstanding_requests.request[i] = NULL;
+            request = NULL;
+        }  // for loop over batch of requests
+        outstanding_requests.count = 0;
+
+        // Wake up any blocked call to stop_perform_op, signaling that now it is
+        // safe to terminate running instances. Check if this detereorate
+        // performance.
+        // TODO(fdiasmor): Check if prone to the lost wake-up problem.
+        pthread_cond_signal(&config->ready);
+
+        HE_QAT_PRINT_DBG("Offloading completed by instance #%d\n",
+                         config->inst_id);
+    }
+    pthread_exit(NULL);
+}
+
+/// @brief
+///  Stop specified number of instances from running.
+///
+/// @details
+///  Stop first 'num_inst' number of cpaCyInstance(s), including their
+/// polling  and running threads. Stop runnning and polling instances.
+///     Release QAT instances handles.
+///
+/// @param[in] config List of all created QAT instances and their
+/// configurations.
+/// @param[in] num_inst Unsigned integer number indicating first number of
+///    instances to be terminated.
+void stop_perform_op(HE_QAT_InstConfig* config, unsigned num_inst) {
+    if (NULL == config) return;
+
+    CpaStatus status = CPA_STATUS_FAIL;
+    for (unsigned i = 0; i < num_inst; i++) {
+        pthread_mutex_lock(&config[i].mutex);
+
+        HE_QAT_PRINT_DBG("Try teardown HE QAT instance #%d.\n", i);
+
+        while (0 == config[i].active) {
+            pthread_cond_wait(&config[i].ready, &config[i].mutex);
+        }
+
+        if (CPA_STATUS_SUCCESS == config[i].status && config[i].active) {
+            HE_QAT_PRINT_DBG("Stop polling and running threads #%d\n", i);
+
+            config[i].polling = 0;
+            config[i].running = 0;
+
+            HE_QAT_SLEEP(10, HE_QAT_MICROSEC);
+
+            HE_QAT_PRINT_DBG("Stop cpaCyInstance #%d\n", i);
+            if (config[i].inst_handle == NULL) continue;
+
+            HE_QAT_PRINT_DBG("cpaCyStopInstance\n");
+            status = cpaCyStopInstance(config[i].inst_handle);
+            if (CPA_STATUS_SUCCESS != status) {
+                HE_QAT_PRINT_ERR("Failed to stop QAT instance #%d\n", i);
+            }
+        }
+        pthread_cond_signal(&config[i].ready);
+        pthread_mutex_unlock(&config[i].mutex);
+    }
+
+    return;
+}
+
+/// @brief Stop all running instances.
+/// @details
+///  Stop all running instances after calling `start_instances()`.
+/// It will set the states of the instances to terminate gracefully.
+/// @param[in] _config All QAT instances configurations holding their states.
+void stop_instances(HE_QAT_Config* _config) {
+    if (NULL == _config) return;
+    if (_config->active) _config->active = 0;
+    if (_config->running) _config->running = 0;
+    stop_perform_op(_config->inst_config, _config->count);
+    return;
+}
diff --git a/module/heqat/heqat/include/heqat/bnops.h b/module/heqat/heqat/include/heqat/bnops.h
new file mode 100644
index 0000000..346c9a4
--- /dev/null
+++ b/module/heqat/heqat/include/heqat/bnops.h
@@ -0,0 +1,154 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+/// @file heqat/bnops.h
+///
+/// @details
+///     In this file, functions for Big Number operations accelerated by the
+///     QuickAssist (QAT) co-processor are specified.
+///
+/// @note
+///     Unless otherwise specified, Big numbers are represented by octet strings
+///     and stored in memory as pointers of type unsigned char*. On the QAT API
+///     the octet string is copied into a data structure of type
+///     CpaFlatBuffer. The octet strings representing Big Numbers are encoded
+///     with compliance to PKCA#1 v2.1, section 4, which is consistent with
+///     ASN.1 syntax.
+///     The largest number supported here has 8192 bits, i.e. numbers from 0 to
+///     2^(8192)-1. If the number is N, then the bit length is defined by n =
+///     floor(log2(N))+1. The memory buffer b to hold such number N needs to
+///     have at least M = ceiling(n/8) bytes allocated. In general, it will be
+///     larger and a power of 2, e.g. total bytes allocated is T=128 for
+///     numbers having up to n=1024 bits, total bytes allocated is T=256 for
+///     numbers having up to n=2048 bits, and so forth. Finally, the big number
+///     N is stored in `big endian` format, i.e. the least significant byte
+///     (LSB) is located at index [T-1], whereas the most significant byte is
+///     stored at [T-M].
+///
+///     The API client is responsible for allocation and release of their memory
+///     spaces of the function arguments. Allocated memory spaces must be
+///     contiguous. Once a function is called, the ownership of the memory
+///     spaces is transferred to the function until their completion such
+///     that concurrent usage by the client during execution may result in
+///     undefined behavior.
+
+// New compilers
+#pragma once
+
+// Legacy compilers
+#ifndef _HE_QAT_BN_OPS_H_
+#define _HE_QAT_BN_OPS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <openssl/bn.h>
+
+#include "heqat/common/types.h"
+
+/// @brief Performs modular exponentiation using BIGNUM data structure.
+///
+/// @details
+/// Perform big number modular exponentiation operation accelerated with QAT for
+/// input data using OpenSSL BIGNUM data structure. Create QAT contiguous memory
+/// space. Copy BIGNUM binary data and package it into a request (HE_QAT_Request
+/// data structure) and call producer function to submit request to the internal
+/// buffer.
+///
+/// @param[out] r Remainder number of the modular exponentiation operation.
+/// @param[in] b Base number of the modular exponentiation operation.
+/// @param[in] e Exponent number of the modular exponentiation operation.
+/// @param[in] m Modulus number of the modular exponentiation operation.
+/// @param[in] nbits Number of bits (bit precision) of input/output big numbers.
+HE_QAT_STATUS HE_QAT_BIGNUMModExp(BIGNUM* r, BIGNUM* b, BIGNUM* e, BIGNUM* m,
+                                  int nbits);
+
+/// @brief Performs big number modular exponentiation for input data (an octet
+/// string) in primitive type format (unsigned char *).
+///
+/// @details
+/// Perform big number modular exponentiation operation accelerated with QAT for
+/// input data as an octet string of unsigned chars. Create QAT contiguous
+/// memory space. Upon call it copies input data and package it into a request,
+/// then calls producer function to submit request to internal buffer.
+///
+/// @param[out] r Remainder number of the modular exponentiation operation.
+/// @param[in] b Base number of the modular exponentiation operation.
+/// @param[in] e Exponent number of the modular exponentiation operation.
+/// @param[in] m Modulus number of the modular exponentiation operation.
+/// @param[in] nbits Number of bits (bit precision) of input/output big numbers.
+HE_QAT_STATUS HE_QAT_bnModExp(unsigned char* r, unsigned char* b,
+                              unsigned char* e, unsigned char* m, int nbits);
+
+/// @brief
+/// It waits for number of requests sent by HE_QAT_bnModExp or
+/// HE_QAT_BIGNUMModExp to complete.
+///
+/// @details
+/// This function is blocking and works as a barrier. The purpose of this
+/// function is to wait for all outstanding requests to complete processing. It
+/// will also release all temporary memory allocated used to support the
+/// submission and processing of the requests. It monitors outstanding requests
+/// to be completed and then it deallocates buffer holding outstanding request.
+///
+/// @param[in] num_requests Number of requests to wait for processing
+/// completion.
+void getBnModExpRequest(unsigned int num_requests);
+
+/**
+ *
+ * Interfaces for the Multithreading Support
+ *
+ **/
+
+/// @brief Performs big number modular exponentiation for input data (an octet
+/// string) in primitive type format (unsigned char *). Same as HE_QAT_bnModExp
+/// with multithreading support.
+///
+/// @details
+/// Perform big number modular exponentiation operation accelerated with QAT for
+/// input data as an octet string of unsigned chars. Create QAT contiguous
+/// memory space. Upon call it copies input data and package it into a request,
+/// then calls producer function to submit request to internal buffer.
+///
+/// @param[in] _buffer_id Buffer ID of the reserved buffer for the caller's
+/// thread.
+/// @param[out] r Remainder number of the modular exponentiation operation.
+/// @param[in] b Base number of the modular exponentiation operation.
+/// @param[in] e Exponent number of the modular exponentiation operation.
+/// @param[in] m Modulus number of the modular exponentiation operation.
+/// @param[in] nbits Number of bits (bit precision) of input/output big numbers.
+HE_QAT_STATUS HE_QAT_bnModExp_MT(unsigned int _buffer_id, unsigned char* r,
+                                 unsigned char* b, unsigned char* e,
+                                 unsigned char* m, int nbits);
+
+/// @brief Reserve/acquire buffer for multithreading support.
+///
+/// @details Try to acquire an available buffer to store outstanding work
+/// requests sent by caller.
+///          If none is available, it blocks further processing and waits until
+///          another caller's concurrent thread releases one. This function must
+///          be called before calling HE_QAT_bnModExp_MT(.).
+///
+/// @param[out] _buffer_id Memory space address allocated by caller to hold the
+/// buffer ID of the buffer used to store caller's outstanding requests.
+HE_QAT_STATUS acquire_bnModExp_buffer(unsigned int* _buffer_id);
+
+/// @brief Wait for request processing to complete and release previously
+/// acquired buffer.
+///
+/// @details Caution: It assumes acquire_bnModExp_buffer(&_buffer_id) to be
+/// called first to secure and be assigned an outstanding buffer for the target
+/// thread. Equivalent to getBnModExpRequests() for the multithreading support
+/// interfaces.
+///
+/// param[in] _buffer_id Buffer ID of the buffer to be released/unlock for reuse
+/// by the next concurrent thread. param[in] _batch_size Total number of
+/// requests to wait for completion before releasing the buffer.
+void release_bnModExp_buffer(unsigned int _buffer_id, unsigned int _batch_size);
+
+#ifdef __cplusplus
+}  // extern "C" {
+#endif
+
+#endif
diff --git a/module/heqat/heqat/include/heqat/common.h b/module/heqat/heqat/include/heqat/common.h
new file mode 100644
index 0000000..2c94a58
--- /dev/null
+++ b/module/heqat/heqat/include/heqat/common.h
@@ -0,0 +1,18 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+/// @file heqat/common.h
+
+#ifndef MODULE_HEQAT_HEQAT_INCLUDE_HEQAT_COMMON_H_
+#define MODULE_HEQAT_HEQAT_INCLUDE_HEQAT_COMMON_H_
+
+#include "heqat/common/consts.h"
+#include "heqat/common/types.h"
+#include "heqat/common/utils.h"
+
+#ifdef __cplusplus
+#ifdef HE_QAT_MISC
+#include "heqat/misc.h"
+#endif
+#endif
+
+#endif  // MODULE_HEQAT_HEQAT_INCLUDE_HEQAT_COMMON_H_
diff --git a/module/heqat/heqat/include/heqat/common/consts.h b/module/heqat/heqat/include/heqat/common/consts.h
new file mode 100644
index 0000000..4b7c63d
--- /dev/null
+++ b/module/heqat/heqat/include/heqat/common/consts.h
@@ -0,0 +1,18 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+/// @file heqat/common/consts.h
+
+#pragma once
+
+#ifndef _HE_QAT_CONST_H_
+#define _HE_QAT_CONST_H_
+
+// Local Constants
+#define HE_QAT_NUM_ACTIVE_INSTANCES 8
+#define HE_QAT_BUFFER_SIZE 1024
+#define HE_QAT_BUFFER_COUNT HE_QAT_NUM_ACTIVE_INSTANCES
+#define HE_QAT_MAX_RETRY 100
+#define RESTART_LATENCY_MICROSEC 600
+#define NUM_PKE_SLICES 6
+
+#endif  // _HE_QAT_CONST_H_
diff --git a/module/heqat/heqat/include/heqat/common/types.h b/module/heqat/heqat/include/heqat/common/types.h
new file mode 100644
index 0000000..e3790f8
--- /dev/null
+++ b/module/heqat/heqat/include/heqat/common/types.h
@@ -0,0 +1,186 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+/// @file heqat/common/types.h
+
+#pragma once
+
+#ifndef _HE_QAT_TYPES_H_
+#define _HE_QAT_TYPES_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// C Libraries
+#include <pthread.h>
+#include <semaphore.h>
+#ifdef HE_QAT_PERF
+#include <sys/time.h>
+#endif
+
+// QATLib Headers
+#include <cpa.h>
+#include <cpa_cy_im.h>
+#include <cpa_cy_ln.h>
+
+#include "heqat/common/consts.h"
+
+struct completion_struct {
+    sem_t semaphore;
+};
+
+// Type definitions
+typedef enum { HE_QAT_SYNC = 1, HE_QAT_ASYNC = 2 } HE_QAT_EXEC_MODE;
+
+typedef enum {
+    HE_QAT_STATUS_ACTIVE = 3,
+    HE_QAT_STATUS_RUNNING = 4,
+    HE_QAT_STATUS_INVALID_PARAM = 2,
+    HE_QAT_STATUS_READY = 1,
+    HE_QAT_STATUS_SUCCESS = 0,
+    HE_QAT_STATUS_FAIL = -1,
+    HE_QAT_STATUS_INACTIVE = -2
+} HE_QAT_STATUS;
+
+typedef enum {
+    HE_QAT_OP_NONE = 0,   ///< No Operation (NO OP)
+    HE_QAT_OP_MODEXP = 1  ///< QAT Modular Exponentiation
+} HE_QAT_OP;
+
+typedef enum {
+    HE_QAT_NANOSEC = 1000000000,
+    HE_QAT_MICROSEC = 1000000,
+    HE_QAT_MILLISEC = 1000,
+    HE_QAT_SEC = 1
+} HE_QAT_TIME_UNIT;
+
+typedef pthread_t HE_QAT_Inst;
+
+typedef struct {
+    void* data[HE_QAT_BUFFER_SIZE];  ///< Stores work requests ready to be sent
+                                     ///< to the accelerator.
+    volatile unsigned int count;     ///< Tracks the number of occupied
+                                     ///< entries/slots in the data[] buffer.
+    // nextin index of the next free slot for a request
+    unsigned int next_free_slot;  ///< Index of the next slot available to store
+                                  ///< a new request.
+    // nextout index of next request to be processed
+    unsigned int next_data_slot;  ///< Index of the next slot containing request
+                                  ///< ready to be consumed for processing.
+    // index of next output data to be read by a thread waiting
+    // for all the request to complete processing
+    unsigned int
+        next_data_out;      ///< Index of the next slot containing request whose
+                            ///< processing has been completed and its output is
+                            ///< ready to be consumed by the caller.
+    pthread_mutex_t mutex;  ///< Synchronization object used to control access
+                            ///< to the buffer.
+    pthread_cond_t
+        any_more_data;  ///< Conditional variable used to synchronize the
+                        ///< consumption of data in buffer (wait until more data
+                        ///< is available to be consumed).
+    pthread_cond_t any_free_slot;  ///< Conditional variable used to synchronize
+                                   ///< the provision of free slots in buffer
+                                   ///< (wait until enough slots are available
+                                   ///< to add more data in buffer).
+} HE_QAT_RequestBuffer;
+
+typedef struct {
+    HE_QAT_RequestBuffer
+        buffer[HE_QAT_BUFFER_COUNT];  ///< Buffers to support concurrent threads
+                                      ///< with less sync overhead. Stores
+                                      ///< incoming request from different
+                                      ///< threads.
+    unsigned int busy_count;  ///< Counts number of currently occupied buffers.
+    unsigned int next_free_buffer;  ///< Next in: index of the next free slot
+                                    ///< for a request.
+    int free_buffer
+        [HE_QAT_BUFFER_COUNT];  ///< Keeps track of buffers that are available
+                                ///< (any value > 0 means the buffer at index i
+                                ///< is available).  The next_free_buffer does
+                                ///< not necessarily mean that the buffer is
+                                ///< already released from usage.
+    unsigned int next_ready_buffer;  ///< Next out: index of next request to be
+                                     ///< processed.
+    int ready_buffer
+        [HE_QAT_BUFFER_COUNT];  ///< Keeps track of buffers that are ready (any
+                                ///< value > 0 means the buffer at index i is
+                                ///< ready). The next_ready_buffer does not
+                                ///< necessarily mean that the buffer is not
+                                ///< busy at any time instance.
+    pthread_mutex_t mutex;  ///< Used for synchronization of concurrent access
+                            ///< of an object of the type
+    pthread_cond_t
+        any_ready_buffer;  ///< Conditional variable used to synchronize the
+                           ///< consumption of the contents in the buffers
+                           ///< storing outstanding requests and ready to be
+                           ///< scheduled to move to the internal buffer.
+    pthread_cond_t
+        any_free_buffer;  ///< Conditional variable used to synchronize the
+                          ///< provisioning of buffers to store incoming
+                          ///< requests from concurrent threads.
+} HE_QAT_OutstandingBuffer;
+
+typedef struct {
+    int inst_id;                          ///< QAT instance ID.
+    CpaInstanceHandle inst_handle;        ///< Handle of this QAT instance.
+    pthread_attr_t* attr;                 ///< Unused member.
+    HE_QAT_RequestBuffer* he_qat_buffer;  ///< Unused member.
+    pthread_mutex_t mutex;
+    pthread_cond_t ready;
+    volatile int active;  ///< State of this QAT instance.
+    volatile int
+        polling;  ///< State of this QAT instance's polling thread (any value
+                  ///< different from 0 indicates that it is running).
+    volatile int
+        running;  ///< State of this QAT instance's processing thread (any value
+                  ///< different from 0 indicates that it is running).
+    CpaStatus status;  ///< Status of the latest activity by this QAT instance.
+} HE_QAT_InstConfig;
+
+typedef struct {
+    HE_QAT_InstConfig*
+        inst_config;       ///< List of the QAT instance's configurations.
+    volatile int active;   ///< Value different from 0 indicates all QAT
+                           ///< instances are created and active.
+    volatile int running;  ///< Value different from 0 indicates all created QAT
+                           ///< instances are running.
+    unsigned int count;    ///< Total number of created QAT instances.
+} HE_QAT_Config;
+
+// One for each consumer
+typedef struct {
+    unsigned long long id;  ///< Work request ID.
+    // sem_t callback;
+    struct completion_struct callback;  ///< Synchronization object.
+    HE_QAT_OP
+    op_type;  ///< Work type: type of operation to be offloaded to QAT.
+    CpaStatus op_status;      ///< Status of the operation after completion.
+    CpaFlatBuffer op_result;  ///< Output of the operation in contiguous memory.
+    // CpaCyLnModExpOpData op_data;
+    void* op_data;    ///< Input data packaged in QAT's data structure for the
+                      ///< target type of operation.
+    void* op_output;  ///< Pointer to the memory space where to store the output
+                      ///< for the caller.
+    void* callback_func;  ///< Pointer to the callback function.
+    volatile HE_QAT_STATUS request_status;
+    pthread_mutex_t mutex;
+    pthread_cond_t ready;
+#ifdef HE_QAT_PERF
+    struct timeval
+        start;  ///< Time when the request was first received from the caller.
+    struct timeval end;  ///< Time when the request completed processing and
+                         ///< callback function was triggered.
+#endif
+} HE_QAT_TaskRequest;
+
+typedef struct {
+    HE_QAT_TaskRequest* request[HE_QAT_BUFFER_SIZE];
+    unsigned int count;
+} HE_QAT_TaskRequestList;
+
+#ifdef __cplusplus
+}  // close the extern "C" {
+#endif
+
+#endif  // _HE_QAT_TYPES_H_
diff --git a/module/heqat/heqat/include/heqat/common/utils.h b/module/heqat/heqat/include/heqat/common/utils.h
new file mode 100644
index 0000000..e3e400d
--- /dev/null
+++ b/module/heqat/heqat/include/heqat/common/utils.h
@@ -0,0 +1,169 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+/// @file heqat/common/utils.h
+
+#pragma once
+
+#ifndef MODULE_HEQAT_HEQAT_INCLUDE_HEQAT_MISC_UTILS_H_
+#define MODULE_HEQAT_HEQAT_INCLUDE_HEQAT_MISC_UTILS_H_
+
+#ifdef __cplusplus
+#define HE_QAT_RESTRICT __restrict__
+#else
+#define HE_QAT_RESTRICT restrict
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <openssl/bn.h>
+#include <errno.h>
+
+#include <qae_mem.h>
+
+#include "heqat/common/types.h"
+
+#ifndef BYTE_ALIGNMENT_8
+#define BYTE_ALIGNMENT_8 (8)
+#endif
+#ifndef BYTE_ALIGNMENT_64
+#define BYTE_ALIGNMENT_64 (64)
+#endif
+
+// 5 seconds
+#ifndef TIMEOUT_MS
+#define TIMEOUT_MS 5000
+#endif
+
+// Printing Functions
+#ifdef HE_QAT_DEBUG
+#define HE_QAT_PRINT_DBG(args...)   \
+    do {                            \
+        printf("%s(): ", __func__); \
+        printf(args);               \
+        fflush(stdout);             \
+    } while (0)
+#else
+#define HE_QAT_PRINT_DBG(args...) \
+    {}
+#endif
+
+#ifndef HE_QAT_PRINT
+#define HE_QAT_PRINT(args...) \
+    do {                      \
+        printf(args);         \
+    } while (0)
+#endif
+
+#ifndef HE_QAT_PRINT_ERR
+#define HE_QAT_PRINT_ERR(args...)   \
+    do {                            \
+        printf("%s(): ", __func__); \
+        printf(args);               \
+    } while (0)
+#endif
+
+// Use semaphores to signal completion of events
+#define COMPLETION_STRUCT completion_struct
+#define COMPLETION_INIT(s) sem_init(&((s)->semaphore), 0, 0);
+#define COMPLETION_WAIT(s, timeout) (sem_wait(&((s)->semaphore)) == 0)
+#define COMPLETE(s) sem_post(&((s)->semaphore))
+#define COMPLETION_DESTROY(s) sem_destroy(&((s)->semaphore))
+
+/// @brief
+///      This function and associated macro allocates the memory for the given
+///      size for the given alignment and stores the address of the memory
+///      allocated in the pointer. Memory allocated by this function is
+///      guaranteed to be physically contiguous.
+///
+/// @param[out] ppMemAddr    address of pointer where address will be stored
+/// @param[in] sizeBytes     the size of the memory to be allocated
+/// @param[in] alignment     the alignment of the memory to be allocated
+/// (non-zero)
+/// @param[in] node          the allocate memory that is local to cpu(node)
+///
+/// @retval HE_QAT_STATUS_FAIL      Failed to allocate memory.
+/// @retval HE_QAT_STATUS_SUCCESS   Memory successfully allocated.
+static __inline HE_QAT_STATUS HE_QAT_memAllocContig(void** ppMemAddr,
+                                                    Cpa32U sizeBytes,
+                                                    Cpa32U alignment,
+                                                    Cpa32U node) {
+    *ppMemAddr = qaeMemAllocNUMA(sizeBytes, node, alignment);
+    if (NULL == *ppMemAddr) {
+        return HE_QAT_STATUS_FAIL;
+    }
+    return HE_QAT_STATUS_SUCCESS;
+}
+#define HE_QAT_MEM_ALLOC_CONTIG(ppMemAddr, sizeBytes, alignment) \
+    HE_QAT_memAllocContig((void*)(ppMemAddr), (sizeBytes), (alignment), 0)
+
+/// @brief
+///      This function and associated macro frees the memory at the given
+///      address and resets the pointer to NULL. The memory must have been
+///      allocated by the function Mem_Alloc_Contig().
+///
+/// @param[out] ppMemAddr    address of pointer where mem address is stored.
+///                          If pointer is NULL, the function will exit silently
+static __inline void HE_QAT_memFreeContig(void** ppMemAddr) {
+    if (NULL != *ppMemAddr) {
+        qaeMemFreeNUMA(ppMemAddr);
+        *ppMemAddr = NULL;
+    }
+}
+#define HE_QAT_MEM_FREE_CONTIG(pMemAddr) HE_QAT_memFreeContig((void*)&pMemAddr)
+
+/// @brief Sleep for time unit.
+/// @param[in] time Unsigned integer representing
+/// amount of time.
+/// @param[in] unit Time unit of the amount of time
+/// passed in the first parameter. Unit values can be
+/// HE_QAT_NANOSEC (nano seconds), HE_QAT_MICROSEC
+/// (micro seconds), HE_QAT_MILLISEC (milli seconds),
+/// or HE_QAT_SEC (seconds).
+static __inline HE_QAT_STATUS HE_QAT_sleep(unsigned int time,
+                                           HE_QAT_TIME_UNIT unit) {
+    int ret = 0;
+    struct timespec resTime, remTime;
+
+    resTime.tv_sec = time / unit;
+    resTime.tv_nsec = (time % unit) * (HE_QAT_NANOSEC / unit);
+
+    do {
+        ret = nanosleep(&resTime, &remTime);
+        resTime = remTime;
+    } while ((0 != ret) && (EINTR == errno));
+
+    if (0 != ret) {
+        HE_QAT_PRINT_ERR("nano sleep failed with code %d\n", ret);
+        return HE_QAT_STATUS_FAIL;
+    } else {
+        return HE_QAT_STATUS_SUCCESS;
+    }
+}
+#define HE_QAT_SLEEP(time, timeUnit) HE_QAT_sleep((time), (timeUnit))
+
+/// @brief
+///      This function returns the physical address
+///      for a given virtual address. In case of error
+///      0 is returned.
+/// @param[in] virtAddr     Virtual address
+/// @retval CpaPhysicalAddr Physical address or 0 in
+/// case of error
+static __inline CpaPhysicalAddr HE_QAT_virtToPhys(void* virtAddr) {
+    return (CpaPhysicalAddr)qaeVirtToPhysNUMA(virtAddr);
+}
+
+BIGNUM* generateTestBNData(int nbits);
+
+unsigned char* paddingZeros(BIGNUM* bn, int nbits);
+
+void showHexBN(BIGNUM* bn, int nbits);
+
+void showHexBin(unsigned char* bin, int len);
+
+#ifdef __cplusplus
+}  // extern "C" {
+#endif
+
+#endif  // MODULE_HEQAT_HEQAT_INCLUDE_HEQAT_MISC_UTILS_H_
diff --git a/module/heqat/heqat/include/heqat/context.h b/module/heqat/heqat/include/heqat/context.h
new file mode 100644
index 0000000..abe9be5
--- /dev/null
+++ b/module/heqat/heqat/include/heqat/context.h
@@ -0,0 +1,32 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+/// @file heqat/context.h
+
+#pragma once
+
+#ifndef _HE_QAT_CONTEXT_H_
+#define _HE_QAT_CONTEXT_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "heqat/common/types.h"
+
+/// @brief
+/// Configure and initialize QAT runtime environment.
+HE_QAT_STATUS acquire_qat_devices();
+
+/// @brief
+/// Release and free resources of the QAT runtime environment.
+HE_QAT_STATUS release_qat_devices();
+
+/// @brief
+/// Probe context status of the QAT runtime environment.
+HE_QAT_STATUS get_qat_context_state();
+
+#ifdef __cplusplus
+}  // extern "C" {
+#endif
+
+#endif
diff --git a/module/heqat/heqat/include/heqat/heqat.h b/module/heqat/heqat/include/heqat/heqat.h
new file mode 100644
index 0000000..6c5e4a8
--- /dev/null
+++ b/module/heqat/heqat/include/heqat/heqat.h
@@ -0,0 +1,12 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+/// @file heqat/heqat.h
+
+#ifndef MODULE_HEQAT_HEQAT_INCLUDE_HEQAT_HEQAT_H_
+#define MODULE_HEQAT_HEQAT_INCLUDE_HEQAT_HEQAT_H_
+
+#include "heqat/common.h"
+#include "heqat/context.h"
+#include "heqat/bnops.h"
+
+#endif  // MODULE_HEQAT_HEQAT_INCLUDE_HEQAT_HEQAT_H_
diff --git a/module/heqat/heqat/include/heqat/misc.h b/module/heqat/heqat/include/heqat/misc.h
new file mode 100644
index 0000000..9a396bc
--- /dev/null
+++ b/module/heqat/heqat/include/heqat/misc.h
@@ -0,0 +1,38 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+/// @file heqat/misc.h
+
+#pragma once
+
+#ifndef HE_QAT_MISC_H_
+#define HE_QAT_MISC_H_
+
+#include "heqat/common/consts.h"
+#include "heqat/common/types.h"
+
+#ifdef __cplusplus
+
+#include "heqat/misc/bignum.h"
+
+/// @brief
+/// Convert QAT large number into little endian format and encapsulate it into a
+/// BigNumber object.
+/// @param[out] bn   BigNumber object representing multi-precision number in
+/// little endian format.
+/// @param[in]  data Large number of nbits precision in big endian format.
+/// @param[in]  nbits Number of bits. Has to be power of 2, e.g. 1024, 2048,
+/// 4096, etc.
+HE_QAT_STATUS binToBigNumber(BigNumber& bn, const unsigned char* data,
+                             int nbits);
+/// @brief
+/// Convert BigNumber object into raw data compatible with QAT.
+/// @param[out] data  BigNumber object's raw data in big endian format.
+/// @param[in]  nbits Number of bits. Has to be power of 2, e.g. 1024, 2048,
+/// 4096, etc.
+/// @param[in]  bn    BigNumber object holding a multi-precision that can be
+/// represented in nbits.
+HE_QAT_STATUS bigNumberToBin(unsigned char* data, int nbits,
+                             const BigNumber& bn);
+#endif  // __cpluscplus
+
+#endif  // HE_QAT_MISC_H_
diff --git a/module/heqat/heqat/include/heqat/misc/bignum.h b/module/heqat/heqat/include/heqat/misc/bignum.h
new file mode 100644
index 0000000..1e83f01
--- /dev/null
+++ b/module/heqat/heqat/include/heqat/misc/bignum.h
@@ -0,0 +1,112 @@
+/*******************************************************************************
+ * Copyright 2019-2021 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#if !defined _BIGNUMBER_H_
+#define _BIGNUMBER_H_
+
+#include "ippcp.h"
+
+#include <iostream>
+#include <vector>
+#include <iterator>
+
+using namespace std;
+
+class BigNumber {
+public:
+    BigNumber(Ipp32u value = 0);
+    BigNumber(Ipp32s value);
+    BigNumber(const IppsBigNumState* pBN);
+    BigNumber(const Ipp32u* pData, int length = 1,
+              IppsBigNumSGN sgn = IppsBigNumPOS);
+    BigNumber(const BigNumber& bn);
+    BigNumber(const char* s);
+    virtual ~BigNumber();
+
+    // set value
+    void Set(const Ipp32u* pData, int length = 1,
+             IppsBigNumSGN sgn = IppsBigNumPOS);
+    // conversion to IppsBigNumState
+    friend IppsBigNumState* BN(const BigNumber& bn) { return bn.m_pBN; }
+    operator IppsBigNumState*() const { return m_pBN; }
+
+    // some useful constants
+    static const BigNumber& Zero();
+    static const BigNumber& One();
+    static const BigNumber& Two();
+
+    // arithmetic operators probably need
+    BigNumber& operator=(const BigNumber& bn);
+    BigNumber& operator+=(const BigNumber& bn);
+    BigNumber& operator-=(const BigNumber& bn);
+    BigNumber& operator*=(Ipp32u n);
+    BigNumber& operator*=(const BigNumber& bn);
+    BigNumber& operator/=(const BigNumber& bn);
+    BigNumber& operator%=(const BigNumber& bn);
+    friend BigNumber operator+(const BigNumber& a, const BigNumber& b);
+    friend BigNumber operator-(const BigNumber& a, const BigNumber& b);
+    friend BigNumber operator*(const BigNumber& a, const BigNumber& b);
+    friend BigNumber operator*(const BigNumber& a, Ipp32u);
+    friend BigNumber operator%(const BigNumber& a, const BigNumber& b);
+    friend BigNumber operator/(const BigNumber& a, const BigNumber& b);
+
+    // modulo arithmetic
+    BigNumber Modulo(const BigNumber& a) const;
+    BigNumber ModAdd(const BigNumber& a, const BigNumber& b) const;
+    BigNumber ModSub(const BigNumber& a, const BigNumber& b) const;
+    BigNumber ModMul(const BigNumber& a, const BigNumber& b) const;
+    BigNumber InverseAdd(const BigNumber& a) const;
+    BigNumber InverseMul(const BigNumber& a) const;
+
+    // comparisons
+    friend bool operator<(const BigNumber& a, const BigNumber& b);
+    friend bool operator>(const BigNumber& a, const BigNumber& b);
+    friend bool operator==(const BigNumber& a, const BigNumber& b);
+    friend bool operator!=(const BigNumber& a, const BigNumber& b);
+    friend bool operator<=(const BigNumber& a, const BigNumber& b) {
+        return !(a > b);
+    }
+    friend bool operator>=(const BigNumber& a, const BigNumber& b) {
+        return !(a < b);
+    }
+
+    // easy tests
+    bool IsOdd() const;
+    bool IsEven() const { return !IsOdd(); }
+
+    // size of BigNumber
+    int MSB() const;
+    int LSB() const;
+    int BitSize() const { return MSB() + 1; }
+    int DwordSize() const { return (BitSize() + 31) >> 5; }
+    friend int Bit(const vector<Ipp32u>& v, int n);
+
+    // conversion and output
+    void num2hex(string& s) const;          // convert to hex string
+    void num2vec(vector<Ipp32u>& v) const;  // convert to 32-bit word vector
+    friend ostream& operator<<(ostream& os, const BigNumber& a);
+
+protected:
+    bool create(const Ipp32u* pData, int length,
+                IppsBigNumSGN sgn = IppsBigNumPOS);
+    int compare(const BigNumber&) const;
+    IppsBigNumState* m_pBN;
+};
+
+// convert bit size into 32-bit words
+#define BITSIZE_WORD(n) ((((n) + 31) >> 5))
+
+#endif  // _BIGNUMBER_H_
diff --git a/module/heqat/heqat/include/heqat/misc/utils.h b/module/heqat/heqat/include/heqat/misc/utils.h
new file mode 100644
index 0000000..3d0ae49
--- /dev/null
+++ b/module/heqat/heqat/include/heqat/misc/utils.h
@@ -0,0 +1,34 @@
+/*******************************************************************************
+ * Copyright 2021 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+#ifndef MODULE_HEQAT_HEQAT_INCLUDE_HEQAT_MISC_UTILS_H_
+#define MODULE_HEQAT_HEQAT_INCLUDE_HEQAT_MISC_UTILS_H_
+
+#include <stddef.h>
+
+#define RSIZE_MAX_STR (4UL << 10) /* 4Kb */
+
+/**
+ * \brief
+ * The strnlen_s function computes the length of the string pointed to by dest.
+ * \param[in] dest pointer to string
+ * \param[in] dmax restricted maximum length. (default 4Kb)
+ * \return size_t
+ * The function returns the string length, excluding  the terminating
+ * null character.  If dest is NULL, then strnlen_s returns 0.
+ */
+size_t strlen_safe(const char* dest, size_t dmax = RSIZE_MAX_STR);
+
+#endif  // MODULE_HEQAT_HEQAT_INCLUDE_HEQAT_MISC_UTILS_H_
diff --git a/module/heqat/heqat/misc/bignum.cpp b/module/heqat/heqat/misc/bignum.cpp
new file mode 100644
index 0000000..c47a278
--- /dev/null
+++ b/module/heqat/heqat/misc/bignum.cpp
@@ -0,0 +1,395 @@
+/*******************************************************************************
+ * Copyright 2019-2021 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#include "heqat/misc/bignum.h"
+#include "heqat/misc/utils.h"
+
+#include <cstring>
+#include <cstdlib>
+
+//////////////////////////////////////////////////////////////////////
+//
+// BigNumber
+//
+//////////////////////////////////////////////////////////////////////
+BigNumber::~BigNumber() { delete[](Ipp8u*) m_pBN; }
+
+bool BigNumber::create(const Ipp32u* pData, int length, IppsBigNumSGN sgn) {
+    int size;
+    ippsBigNumGetSize(length, &size);
+    m_pBN = (IppsBigNumState*)(new Ipp8u[size]);
+    if (!m_pBN) return false;
+    ippsBigNumInit(length, m_pBN);
+    if (pData) ippsSet_BN(sgn, length, pData, m_pBN);
+    return true;
+}
+
+//
+// constructors
+//
+BigNumber::BigNumber(Ipp32u value) { create(&value, 1, IppsBigNumPOS); }
+
+BigNumber::BigNumber(Ipp32s value) {
+    Ipp32s avalue = abs(value);
+    create((Ipp32u*)&avalue, 1, (value < 0) ? IppsBigNumNEG : IppsBigNumPOS);
+}
+
+BigNumber::BigNumber(const IppsBigNumState* pBN) {
+    IppsBigNumSGN bnSgn;
+    int bnBitLen;
+    Ipp32u* bnData;
+    ippsRef_BN(&bnSgn, &bnBitLen, &bnData, pBN);
+
+    create(bnData, BITSIZE_WORD(bnBitLen), bnSgn);
+}
+
+BigNumber::BigNumber(const Ipp32u* pData, int length, IppsBigNumSGN sgn) {
+    create(pData, length, sgn);
+}
+
+static char HexDigitList[] = "0123456789ABCDEF";
+
+BigNumber::BigNumber(const char* s) {
+    bool neg = '-' == s[0];
+    if (neg) s++;
+    bool hex = ('0' == s[0]) && (('x' == s[1]) || ('X' == s[1]));
+
+    int dataLen;
+    Ipp32u base;
+    if (hex) {
+        s += 2;
+        base = 0x10;
+        dataLen = (int)(strlen_safe(s) + 7) / 8;
+    } else {
+        base = 10;
+        dataLen = (int)(strlen_safe(s) + 9) / 10;
+    }
+
+    create(0, dataLen);
+    *(this) = Zero();
+    while (*s) {
+        char tmp[2] = {s[0], 0};
+        Ipp32u digit = (Ipp32u)strcspn(HexDigitList, tmp);
+        *this = (*this) * base + BigNumber(digit);
+        s++;
+    }
+
+    if (neg) (*this) = Zero() - (*this);
+}
+
+BigNumber::BigNumber(const BigNumber& bn) {
+    IppsBigNumSGN bnSgn;
+    int bnBitLen;
+    Ipp32u* bnData;
+    ippsRef_BN(&bnSgn, &bnBitLen, &bnData, bn);
+
+    create(bnData, BITSIZE_WORD(bnBitLen), bnSgn);
+}
+
+//
+// set value
+//
+void BigNumber::Set(const Ipp32u* pData, int length, IppsBigNumSGN sgn) {
+    ippsSet_BN(sgn, length, pData, BN(*this));
+}
+
+//
+// constants
+//
+const BigNumber& BigNumber::Zero() {
+    static const BigNumber zero(0);
+    return zero;
+}
+
+const BigNumber& BigNumber::One() {
+    static const BigNumber one(1);
+    return one;
+}
+
+const BigNumber& BigNumber::Two() {
+    static const BigNumber two(2);
+    return two;
+}
+
+//
+// arithmetic operators
+//
+BigNumber& BigNumber::operator=(const BigNumber& bn) {
+    if (this != &bn) {  // prevent self copy
+        IppsBigNumSGN bnSgn;
+        int bnBitLen;
+        Ipp32u* bnData;
+        ippsRef_BN(&bnSgn, &bnBitLen, &bnData, bn);
+
+        delete[](Ipp8u*) m_pBN;
+        create(bnData, BITSIZE_WORD(bnBitLen), bnSgn);
+    }
+    return *this;
+}
+
+BigNumber& BigNumber::operator+=(const BigNumber& bn) {
+    int aBitLen;
+    ippsRef_BN(NULL, &aBitLen, NULL, *this);
+    int bBitLen;
+    ippsRef_BN(NULL, &bBitLen, NULL, bn);
+    int rBitLen = IPP_MAX(aBitLen, bBitLen) + 1;
+
+    BigNumber result(0, BITSIZE_WORD(rBitLen));
+    ippsAdd_BN(*this, bn, result);
+    *this = result;
+    return *this;
+}
+
+BigNumber& BigNumber::operator-=(const BigNumber& bn) {
+    int aBitLen;
+    ippsRef_BN(NULL, &aBitLen, NULL, *this);
+    int bBitLen;
+    ippsRef_BN(NULL, &bBitLen, NULL, bn);
+    int rBitLen = IPP_MAX(aBitLen, bBitLen);
+
+    BigNumber result(0, BITSIZE_WORD(rBitLen));
+    ippsSub_BN(*this, bn, result);
+    *this = result;
+    return *this;
+}
+
+BigNumber& BigNumber::operator*=(const BigNumber& bn) {
+    int aBitLen;
+    ippsRef_BN(NULL, &aBitLen, NULL, *this);
+    int bBitLen;
+    ippsRef_BN(NULL, &bBitLen, NULL, bn);
+    int rBitLen = aBitLen + bBitLen;
+
+    BigNumber result(0, BITSIZE_WORD(rBitLen));
+    ippsMul_BN(*this, bn, result);
+    *this = result;
+    return *this;
+}
+
+BigNumber& BigNumber::operator*=(Ipp32u n) {
+    int aBitLen;
+    ippsRef_BN(NULL, &aBitLen, NULL, *this);
+
+    BigNumber result(0, BITSIZE_WORD(aBitLen + 32));
+    BigNumber bn(n);
+    ippsMul_BN(*this, bn, result);
+    *this = result;
+    return *this;
+}
+
+BigNumber& BigNumber::operator%=(const BigNumber& bn) {
+    BigNumber remainder(bn);
+    ippsMod_BN(BN(*this), BN(bn), BN(remainder));
+    *this = remainder;
+    return *this;
+}
+
+BigNumber& BigNumber::operator/=(const BigNumber& bn) {
+    BigNumber quotient(*this);
+    BigNumber remainder(bn);
+    ippsDiv_BN(BN(*this), BN(bn), BN(quotient), BN(remainder));
+    *this = quotient;
+    return *this;
+}
+
+BigNumber operator+(const BigNumber& a, const BigNumber& b) {
+    BigNumber r(a);
+    return r += b;
+}
+
+BigNumber operator-(const BigNumber& a, const BigNumber& b) {
+    BigNumber r(a);
+    return r -= b;
+}
+
+BigNumber operator*(const BigNumber& a, const BigNumber& b) {
+    BigNumber r(a);
+    return r *= b;
+}
+
+BigNumber operator*(const BigNumber& a, Ipp32u n) {
+    BigNumber r(a);
+    return r *= n;
+}
+
+BigNumber operator/(const BigNumber& a, const BigNumber& b) {
+    BigNumber q(a);
+    return q /= b;
+}
+
+BigNumber operator%(const BigNumber& a, const BigNumber& b) {
+    BigNumber r(b);
+    ippsMod_BN(BN(a), BN(b), BN(r));
+    return r;
+}
+
+//
+// modulo arithmetic
+//
+BigNumber BigNumber::Modulo(const BigNumber& a) const { return a % *this; }
+
+BigNumber BigNumber::InverseAdd(const BigNumber& a) const {
+    BigNumber t = Modulo(a);
+    if (t == BigNumber::Zero())
+        return t;
+    else
+        return *this - t;
+}
+
+BigNumber BigNumber::InverseMul(const BigNumber& a) const {
+    BigNumber r(*this);
+    ippsModInv_BN(BN(a), BN(*this), BN(r));
+    return r;
+}
+
+BigNumber BigNumber::ModAdd(const BigNumber& a, const BigNumber& b) const {
+    BigNumber r = this->Modulo(a + b);
+    return r;
+}
+
+BigNumber BigNumber::ModSub(const BigNumber& a, const BigNumber& b) const {
+    BigNumber r = this->Modulo(a + this->InverseAdd(b));
+    return r;
+}
+
+BigNumber BigNumber::ModMul(const BigNumber& a, const BigNumber& b) const {
+    BigNumber r = this->Modulo(a * b);
+    return r;
+}
+
+//
+// comparison
+//
+int BigNumber::compare(const BigNumber& bn) const {
+    Ipp32u result;
+    BigNumber tmp = *this - bn;
+    ippsCmpZero_BN(BN(tmp), &result);
+    return (result == IS_ZERO) ? 0 : (result == GREATER_THAN_ZERO) ? 1 : -1;
+}
+
+bool operator<(const BigNumber& a, const BigNumber& b) {
+    return a.compare(b) < 0;
+}
+bool operator>(const BigNumber& a, const BigNumber& b) {
+    return a.compare(b) > 0;
+}
+bool operator==(const BigNumber& a, const BigNumber& b) {
+    return 0 == a.compare(b);
+}
+bool operator!=(const BigNumber& a, const BigNumber& b) {
+    return 0 != a.compare(b);
+}
+
+// easy tests
+//
+bool BigNumber::IsOdd() const {
+    Ipp32u* bnData;
+    ippsRef_BN(NULL, NULL, &bnData, *this);
+    return bnData[0] & 1;
+}
+
+//
+// size of BigNumber
+//
+int BigNumber::LSB() const {
+    if (*this == BigNumber::Zero()) return 0;
+
+    vector<Ipp32u> v;
+    num2vec(v);
+
+    int lsb = 0;
+    vector<Ipp32u>::iterator i;
+    for (i = v.begin(); i != v.end(); i++) {
+        Ipp32u x = *i;
+        if (0 == x)
+            lsb += 32;
+        else {
+            while (0 == (x & 1)) {
+                lsb++;
+                x >>= 1;
+            }
+            break;
+        }
+    }
+    return lsb;
+}
+
+int BigNumber::MSB() const {
+    if (*this == BigNumber::Zero()) return 0;
+
+    vector<Ipp32u> v;
+    num2vec(v);
+
+    int msb = (int)v.size() * 32 - 1;
+    vector<Ipp32u>::reverse_iterator i;
+    for (i = v.rbegin(); i != v.rend(); i++) {
+        Ipp32u x = *i;
+        if (0 == x)
+            msb -= 32;
+        else {
+            while (!(x & 0x80000000)) {
+                msb--;
+                x <<= 1;
+            }
+            break;
+        }
+    }
+    return msb;
+}
+
+int Bit(const vector<Ipp32u>& v, int n) {
+    return 0 != (v[n >> 5] & (1 << (n & 0x1F)));
+}
+
+//
+// conversions and output
+//
+void BigNumber::num2vec(vector<Ipp32u>& v) const {
+    int bnBitLen;
+    Ipp32u* bnData;
+    ippsRef_BN(NULL, &bnBitLen, &bnData, *this);
+
+    int len = BITSIZE_WORD(bnBitLen);
+    ;
+    for (int n = 0; n < len; n++) v.push_back(bnData[n]);
+}
+
+void BigNumber::num2hex(string& s) const {
+    IppsBigNumSGN bnSgn;
+    int bnBitLen;
+    Ipp32u* bnData;
+    ippsRef_BN(&bnSgn, &bnBitLen, &bnData, *this);
+
+    int len = BITSIZE_WORD(bnBitLen);
+
+    s.append(1, (bnSgn == ippBigNumNEG) ? '-' : ' ');
+    s.append(1, '0');
+    s.append(1, 'x');
+    for (int n = len; n > 0; n--) {
+        Ipp32u x = bnData[n - 1];
+        for (int nd = 8; nd > 0; nd--) {
+            char c = HexDigitList[(x >> (nd - 1) * 4) & 0xF];
+            s.append(1, c);
+        }
+    }
+}
+
+ostream& operator<<(ostream& os, const BigNumber& a) {
+    string s;
+    a.num2hex(s);
+    os << s.c_str();
+    return os;
+}
diff --git a/module/heqat/heqat/misc/misc.cpp b/module/heqat/heqat/misc/misc.cpp
new file mode 100644
index 0000000..5116003
--- /dev/null
+++ b/module/heqat/heqat/misc/misc.cpp
@@ -0,0 +1,41 @@
+/// @file heqat/misc/misc.cpp
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "heqat/misc.h"
+
+#include <openssl/err.h>
+#include <openssl/rand.h>
+
+HE_QAT_STATUS binToBigNumber(BigNumber& bn, const unsigned char* data,
+                             int nbits) {
+    if (nbits <= 0) return HE_QAT_STATUS_INVALID_PARAM;
+    int len_ = (nbits + 7) >> 3;  // nbits/8;
+
+    // Create BigNumber containing input data passed as argument
+    bn = BigNumber(reinterpret_cast<const Ipp32u*>(data), BITSIZE_WORD(nbits));
+    Ipp32u* ref_bn_data_ = NULL;
+    ippsRef_BN(NULL, NULL, &ref_bn_data_, BN(bn));
+
+    // Convert it to little endian format
+    unsigned char* data_ = reinterpret_cast<unsigned char*>(ref_bn_data_);
+    for (int i = 0; i < len_; i++) data_[i] = data[len_ - 1 - i];
+
+    return HE_QAT_STATUS_SUCCESS;
+}
+
+HE_QAT_STATUS bigNumberToBin(unsigned char* data, int nbits,
+                             const BigNumber& bn) {
+    if (nbits <= 0) return HE_QAT_STATUS_INVALID_PARAM;
+    int len_ = (nbits + 7) >> 3;  // nbits/8;
+
+    // Extract raw vector of data in little endian format
+    Ipp32u* ref_bn_data_ = NULL;
+    ippsRef_BN(NULL, NULL, &ref_bn_data_, BN(bn));
+
+    // Revert it to big endian format
+    unsigned char* data_ = reinterpret_cast<unsigned char*>(ref_bn_data_);
+    for (int i = 0; i < len_; i++) data[i] = data_[len_ - 1 - i];
+
+    return HE_QAT_STATUS_SUCCESS;
+}
diff --git a/module/heqat/heqat/misc/utils.cpp b/module/heqat/heqat/misc/utils.cpp
new file mode 100644
index 0000000..e0ffc4f
--- /dev/null
+++ b/module/heqat/heqat/misc/utils.cpp
@@ -0,0 +1,45 @@
+/*******************************************************************************
+ * Copyright 2021 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#include "heqat/misc/utils.h"
+
+size_t strlen_safe(const char* dest, size_t dmax) {
+    size_t count;
+
+    /* check null pointer */
+    if (NULL == dest) {
+        return 0UL;
+    }
+
+    /* check max equal zero */
+    if (0UL == dmax) {
+        return 0UL;
+    }
+
+    /* check dmax > 4Kb */
+    if (dmax > RSIZE_MAX_STR) {
+        return 0UL;
+    }
+
+    count = 0UL;
+    while (*dest && dmax) {
+        ++count;
+        --dmax;
+        ++dest;
+    }
+
+    return count;
+}
diff --git a/module/heqat/scripts/auto_find_qat_install.sh b/module/heqat/scripts/auto_find_qat_install.sh
new file mode 100755
index 0000000..07e5968
--- /dev/null
+++ b/module/heqat/scripts/auto_find_qat_install.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# shellcheck disable=SC2143
+for item in $(locate QAT/build); do [ -d "$item" ] && [ "$(echo "$item" | grep "$HOME")" ] && echo "${item%/*}"; done
diff --git a/module/heqat/scripts/reset_asym_buffer_size.sh b/module/heqat/scripts/reset_asym_buffer_size.sh
new file mode 100755
index 0000000..a4d523d
--- /dev/null
+++ b/module/heqat/scripts/reset_asym_buffer_size.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# shellcheck disable=SC2199
+if [[ -z "${@}" ]]; then
+  echo "Usage: ./reset_asym_buffer_size <old buffer size> <new buffer size>"
+  exit
+fi
+
+OLD_BUFFER_SIZE=$1
+
+if [[ -z ${2} ]]; then
+  echo "Error: second parameter missing"
+  echo "Usage: ./reset_asym_buffer_size <old buffer size> <new buffer size>"
+  exit
+fi
+
+NEW_BUFFER_SIZE=$2
+
+num_phys_dev=$(lspci -d 8086:4940 | wc -l)
+num_virt_dev=$(lspci -d 8086:4941 | wc -l)
+
+# Update physical device configuration files
+i=0
+while [ $i -lt "$num_phys_dev" ]; do
+  sudo sed -i /etc/4xxx_dev$i.conf -e "s/CyNumConcurrentAsymRequests = $OLD_BUFFER_SIZE/CyNumConcurrentAsymRequests = $NEW_BUFFER_SIZE/g"
+  i=$((i + 1))
+done
+
+# Update virtual function configuration files
+i=0
+while [ $i -lt "$num_virt_dev" ]; do
+  sudo sed -i /etc/4xxxvf_dev$i.conf -e "s/CyNumConcurrentAsymRequests = $OLD_BUFFER_SIZE/CyNumConcurrentAsymRequests = $NEW_BUFFER_SIZE/g"
+  i=$((i + 1))
+done
+
+## Power Off PFs
+#i=0;
+#while [ $i -lt $num_phys_dev ];
+#do
+#	sudo adf_ctl qat_dev$i down;
+#	i=`expr $i + 1`;
+#done
+#
+## Power On PFs
+#i=0;
+#while [ $i -lt $num_phys_dev ];
+#do
+#	sudo adf_ctl qat_dev$i up;
+#	i=`expr $i + 1`;
+#done
+#
+## Restart QAT service (This will bring up PFs and VFs)
+#echo "sudo service qat_service restart"
+#sudo service qat_service restart
+#
+## Power Off All VFs
+#i=$num_phys_dev;
+#vf_per_pf=`expr $num_virt_dev / $num_phys_dev`
+#n=`expr $vf_per_pf \\* $num_phys_dev`
+#n=`expr $n + $num_phys_dev`
+#while [ $i -lt $n ];
+#do
+#	sudo adf_ctl qat_dev$i down;
+#	i=`expr $i + 1`;
+#done
+#
+## Power On One QAT VF per QAT PF
+#i=$num_phys_dev;
+#while [ $i -lt $n ];
+#do
+#	sudo adf_ctl qat_dev$i up;
+#	i=`expr $i + $vf_per_pf`;
+#done
diff --git a/module/heqat/scripts/restart_devices.sh b/module/heqat/scripts/restart_devices.sh
new file mode 100755
index 0000000..c563a25
--- /dev/null
+++ b/module/heqat/scripts/restart_devices.sh
@@ -0,0 +1,126 @@
+#!/bin/bash
+
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Refresh
+echo "sudo service restart qat_service"
+sudo service qat_service restart
+
+num_phys_dev=$(lspci -d 8086:4940 | wc -l)
+if [ "$num_phys_dev" -eq 0 ]; then
+  echo "No QAT Device Found !"
+  exit
+else
+  echo "$num_phys_dev QAT Devices Found !"
+fi
+
+total_virt_func=$(lspci -d 8086:4941 | wc -l)
+num_virt_func=$((total_virt_func / num_phys_dev))
+dev_step=1
+
+if [ $# -eq 0 ]; then
+  echo "Usage: ./setup_devices <num_phys_dev> <num_virt_inst> <conf_virt_inst>"
+  echo "   Parameters:"
+  echo "   -- num_phys_dev:   Number of physical devices to be active. (default: auto)"
+  echo "   -- conf_virt_func: Number of configured virtual functions per device. (default: 0)"
+  echo "   -- num_virt_func:  Number of virtual functions to be active per device. (default: 0)"
+fi
+
+nphysdev=$num_phys_dev
+if [ -n "$1" ]; then
+  nphysdev=$1
+  if [ "$nphysdev" -gt "$num_phys_dev" ]; then
+    nphysdev=$num_phys_dev
+  fi
+fi
+
+conf_virt_func=0
+# Check if virtual function is enabled
+if [ "$num_virt_func" -gt 0 ]; then
+  conf_virt_func=1
+fi
+
+if [ -n "$2" ]; then
+  conf_virt_func=$2
+  # if user attempts to request higher than available
+  if [ "$conf_virt_func" -gt "$num_virt_func" ]; then
+    conf_virt_func=$num_virt_func
+  fi
+fi
+
+# Shutdown QAT PFs
+i=0
+while [ $i -lt "$num_phys_dev" ]; do
+  echo "sudo adf_ctl qat_dev$i down"
+  sudo adf_ctl qat_dev$i down
+  i=$((i + 1))
+done
+
+# Reconfigure Target QAT PFs
+i=0
+n=$nphysdev
+while [ $i -lt "$n" ]; do
+  echo "sudo adf_ctl qat_dev$i up"
+  sudo adf_ctl qat_dev$i up
+  i=$((i + 1))
+done
+
+# Refresh
+echo "sudo service restart qat_service"
+sudo systemctl restart qat_service
+
+# If Virtualization Mode Enabled
+start=0
+if [ "$num_virt_func" -gt 0 ]; then
+  if [ "$conf_virt_func" -gt 0 ]; then
+    start=$num_phys_dev
+    dev_step=$num_virt_func
+  fi
+fi
+
+# Shutdown QAT VFs
+i=$start
+stop=$((num_phys_dev * num_virt_func))
+stop=$((start + stop))
+while [ "$i" -lt "$stop" ]; do
+  echo "sudo adf_ctl qat_dev$i down"
+  sudo adf_ctl qat_dev"$i" down
+  i=$((i + 1))
+done
+
+i=0
+while [ $i -lt "$nphysdev" ]; do
+  # Start QAT PF
+  echo "sudo adf_ctl qat_dev$i up"
+  sudo adf_ctl qat_dev$i up
+  i=$((i + 1))
+done
+
+start=$num_phys_dev
+i=$start
+stop=$((nphysdev * num_virt_func))
+stop=$((start + stop))
+while [ "$i" -lt "$stop" ]; do
+  # Start QAT VF
+  echo "adf_ctl qat_dev$i up"
+  sudo adf_ctl qat_dev"$i" up
+  # Start up additional instances mapped to the same physical device
+  j=1
+  while [ $j -lt "$conf_virt_func" ]; do
+    dev_id=$((i + j))
+    # Start QAT VF
+    echo "adf_ctl qat_dev$dev_id up"
+    sudo adf_ctl qat_dev"$dev_id" up
+    j=$((j + 1))
+  done
+  i=$((i + dev_step))
+done
+
+# Shutdown Unused QAT PFs
+i=$nphysdev
+while [ "$i" -lt "$num_phys_dev" ]; do
+  echo "sudo adf_ctl qat_dev$i down"
+  sudo adf_ctl qat_dev"$i" down
+  i=$((i + 1))
+done
diff --git a/module/heqat/scripts/run.sh b/module/heqat/scripts/run.sh
new file mode 100755
index 0000000..b4a5090
--- /dev/null
+++ b/module/heqat/scripts/run.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+export HEQATLIB_INSTALL_DIR=$HEQATLIB_ROOT_DIR/install
+ICP_ROOT=$("$HEQATLIB_ROOT_DIR"/scripts/auto_find_qat_install.sh)
+export ICP_ROOT
+export LD_LIBRARY_PATH=$HEQATLIB_INSTALL_DIR/lib:$ICP_ROOT/build:$LD_LIBRARY_PATH
+
+pushd "$HEQATLIB_INSTALL_DIR"/bin || exit
+
+for app in test_*; do
+  echo "*****************************************************************"
+  echo "* [START]            RUNNING TEST SAMPLE $app                   *"
+  echo "*****************************************************************"
+  ./"$app"
+  echo "*****************************************************************"
+  echo "* [STOP]             RUNNING TEST SAMPLE $app                   *"
+  echo "*****************************************************************"
+done
+
+popd || exit
diff --git a/module/heqat/scripts/setup_devices.sh b/module/heqat/scripts/setup_devices.sh
new file mode 100755
index 0000000..57f56ee
--- /dev/null
+++ b/module/heqat/scripts/setup_devices.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Refresh
+echo "sudo service restart qat_service"
+#sudo service qat_service restart
+sudo systemctl restart qat_service.service
+
+num_phys_dev=$(lspci -d 8086:4940 | wc -l)
+if [ "$num_phys_dev" -eq 0 ]; then
+  echo "No QAT Device Found !"
+  exit
+else
+  echo "$num_phys_dev QAT Devices Found !"
+fi
+
+total_virt_func=$(lspci -d 8086:4941 | wc -l)
+num_virt_func=$((total_virt_func / num_phys_dev))
+dev_step=1
+
+if [ $# -eq 0 ]; then
+  echo "Usage: ./setup_devices <num_phys_dev> <num_virt_inst> <conf_virt_inst>"
+  echo "   Parameters:"
+  echo "   -- num_phys_dev:   Number of physical devices to be active. (default: auto)"
+  echo "   -- conf_virt_func: Number of configured virtual functions per device. (default: 0)"
+  echo "   -- num_virt_func:  Number of virtual functions to be active per device. (default: 0)"
+fi
+
+nphysdev=$num_phys_dev
+if [ -n "$1" ]; then
+  nphysdev=$1
+  if [ "$nphysdev" -gt "$num_phys_dev" ]; then
+    nphysdev=$num_phys_dev
+  fi
+fi
+
+conf_virt_func=0
+# Check if virtual function is enabled
+if [ "$num_virt_func" -gt 0 ]; then
+  conf_virt_func=1
+fi
+
+if [ -n "$2" ]; then
+  conf_virt_func=$2
+  # if user attempts to request higher than available
+  if [ "$conf_virt_func" -gt "$num_virt_func" ]; then
+    conf_virt_func=$num_virt_func
+  fi
+fi
+
+# Shutdown QAT PFs
+i=0
+while [ $i -lt "$num_phys_dev" ]; do
+  echo "sudo adf_ctl qat_dev$i down"
+  sudo adf_ctl qat_dev$i down
+  i=$((i + 1))
+done
+
+# Reconfigure Target QAT PFs
+i=0
+n=$nphysdev
+while [ $i -lt "$n" ]; do
+  echo "sudo cp config/4xxx_dev0.conf /etc/4xxx_dev$i.conf"
+  sudo cp config/4xxx_dev0.conf /etc/4xxx_dev$i.conf
+  echo "sudo adf_ctl qat_dev$i up"
+  sudo adf_ctl qat_dev$i up
+  i=$((i + 1))
+done
+
+# Refresh
+echo "sudo service restart qat_service"
+#sudo service qat_service restart
+sudo systemctl restart qat_service.service
+
+# If Virtualization Mode Enabled
+start=0
+if [ "$num_virt_func" -gt 0 ]; then
+  if [ "$conf_virt_func" -gt 0 ]; then
+    start=$num_phys_dev
+    dev_step=$num_virt_func
+  fi
+fi
+
+# Shutdown QAT VFs
+i=$start
+stop=$((num_phys_dev * num_virt_func))
+stop=$((start + stop))
+while [ "$i" -lt "$stop" ]; do
+  echo "sudo adf_ctl qat_dev$i down"
+  sudo adf_ctl qat_dev"$i" down
+  i=$((i + 1))
+done
+
+#i=0
+#while [ $i -lt $total_virt_func ];
+#do
+#  echo "sudo cp config/4xxxvf_dev0.conf /etc/4xxxvf_dev$i.conf";
+#  sudo cp config/4xxxvf_dev0.conf /etc/4xxxvf_dev$i.conf;
+#  i=`expr $i + 1`;
+#done
+
+i=0
+while [ $i -lt "$nphysdev" ]; do
+  # Reconfigure QAT PF
+  echo "sudo cp config/4xxx_dev0.conf /etc/4xxx_dev$i.conf"
+  sudo cp config/4xxx_dev0.conf /etc/4xxx_dev$i.conf
+  # Start QAT PF
+  echo "sudo adf_ctl qat_dev$i up"
+  sudo adf_ctl qat_dev$i up
+  i=$((i + 1))
+done
+
+start=$num_phys_dev
+i=$start
+stop=$((nphysdev * num_virt_func))
+stop=$((start + stop))
+while [ "$i" -lt "$stop" ]; do
+  k=$((i - start))
+  # Reconfigure QAT VF (must match PF's config)
+  echo "sudo cp config/4xxxvf_dev0.conf /etc/4xxxvf_dev$k.conf"
+  sudo cp config/4xxxvf_dev0.conf /etc/4xxxvf_dev"$k".conf
+  # Start QAT VF
+  echo "adf_ctl qat_dev$i up"
+  sudo adf_ctl qat_dev"$i" up
+  # Start up additional instances mapped to the same physical device
+  j=1
+  while [ $j -lt "$conf_virt_func" ]; do
+    dev_id=$((i + j))
+    k=$((dev_id - start))
+    # Reconfigure QAT VF (must match PF's config)
+    echo "sudo cp config/4xxxvf_dev0.conf /etc/4xxxvf_dev$k.conf"
+    sudo cp config/4xxxvf_dev0.conf /etc/4xxxvf_dev"$k".conf
+    # Start QAT VF
+    echo "adf_ctl qat_dev$dev_id up"
+    sudo adf_ctl qat_dev"$dev_id" up
+    j=$((j + 1))
+  done
+  i=$((i + dev_step))
+done
+
+# Shutdown Unused QAT PFs
+i=$nphysdev
+while [ "$i" -lt "$num_phys_dev" ]; do
+  echo "sudo adf_ctl qat_dev$i down"
+  sudo adf_ctl qat_dev"$i" down
+  i=$((i + 1))
+done
diff --git a/module/heqat/setup_env.sh b/module/heqat/setup_env.sh
new file mode 100755
index 0000000..05d4dbc
--- /dev/null
+++ b/module/heqat/setup_env.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+HEQATLIB_ROOT_DIR=$(pwd)
+export HEQATLIB_ROOT_DIR
+ICP_ROOT=$("$PWD"/scripts/auto_find_qat_install.sh)
+export ICP_ROOT
diff --git a/module/heqat/test/CMakeLists.txt b/module/heqat/test/CMakeLists.txt
new file mode 100644
index 0000000..2bd391f
--- /dev/null
+++ b/module/heqat/test/CMakeLists.txt
@@ -0,0 +1,57 @@
+# Copyright (C) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+###############################################################################
+
+macro(heqat_create_executable test_case language dependencies)
+  if(${language} STREQUAL "C" OR ${language} STREQUAL "c")
+    set(extension "c")
+  elseif(${language} STREQUAL "CXX" OR ${language} STREQUAL "cxx")
+    set(extension "cpp")
+  else()
+    message(FATAL_ERROR "Error language not supported. Options: C or CXX.")
+  endif()
+
+  set(target test_${test_case})
+  add_executable(${target} test_${test_case}.${extension})
+
+  target_include_directories(${target} PUBLIC ${HE_QAT_INC_DIR})
+
+  target_link_libraries(${target} PUBLIC he_qat)
+
+  if(NOT ${dependencies} STREQUAL "")
+    message(STATUS "Target: ${target} Additional Dependencies: ${${dependencies}}")
+    target_link_libraries(${target} PUBLIC ${${dependencies}})
+  endif()
+
+  install(TARGETS ${target} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+endmacro()
+
+##############################################################################
+
+set(EXECUTABLE_DEPENDENCIES OpenSSL::SSL)
+
+# Sample testing the robustness of the heqatlib context functions
+heqat_create_executable(context c "")
+
+# Sample demonstrating how to use API for BIGNUM inputs
+heqat_create_executable(BIGNUMModExp C EXECUTABLE_DEPENDENCIES)
+
+if(HE_QAT_MISC)
+  add_compile_options(-fpermissive)
+
+  list(APPEND EXECUTABLE_DEPENDENCIES IPPCP::ippcp)
+
+  # Sample showing how to convert from/to BigNumber to/from CpaFlatBuffer
+  heqat_create_executable(bnConversion cxx EXECUTABLE_DEPENDENCIES)
+
+  # Sample showing how to use bnModExp API
+  heqat_create_executable(bnModExp CXX EXECUTABLE_DEPENDENCIES)
+
+  if(OpenMP_CXX_FOUND)
+    list(APPEND EXECUTABLE_DEPENDENCIES OpenMP::OpenMP_CXX)
+    # Sample showing how to use bnModExp_MT API for multithreaded applications
+    heqat_create_executable(bnModExp_MT CXX EXECUTABLE_DEPENDENCIES)
+  endif()
+endif()
+
+###############################################################################
diff --git a/module/heqat/test/test_BIGNUMModExp.c b/module/heqat/test/test_BIGNUMModExp.c
new file mode 100644
index 0000000..612bfb3
--- /dev/null
+++ b/module/heqat/test/test_BIGNUMModExp.c
@@ -0,0 +1,134 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <time.h>
+#include <openssl/bn.h>
+#include <openssl/err.h>
+#include <openssl/rand.h>
+#include <sys/time.h>
+
+#include "heqat/heqat.h"
+
+struct timeval start_time, end_time;
+double time_taken = 0.0;
+
+const unsigned int BATCH_SIZE = 1;
+
+int main(int argc, const char** argv) {
+    const int bit_length = 4096;  // 1024;
+    const size_t num_trials = 100;
+
+    double avg_speed_up = 0.0;
+    double ssl_avg_time = 0.0;
+    double qat_avg_time = 0.0;
+
+    double ssl_elapsed = 0.0;
+    double qat_elapsed = 0.0;
+
+    HE_QAT_STATUS status = HE_QAT_STATUS_FAIL;
+
+    // Set up QAT runtime context
+    acquire_qat_devices();
+
+    // Set up OpenSSL context (as baseline)
+    BN_CTX* ctx = BN_CTX_new();
+    BN_CTX_start(ctx);
+
+    for (size_t mod = 0; mod < num_trials; mod++) {
+        BIGNUM* bn_mod = generateTestBNData(bit_length);
+
+        if (!bn_mod) continue;
+
+#ifdef HE_QAT_DEBUG
+        char* bn_str = BN_bn2hex(bn_mod);
+        HE_QAT_PRINT("Generated modulus: %s num_bytes: %d num_bits: %d\n",
+                     bn_str, BN_num_bytes(bn_mod), BN_num_bits(bn_mod));
+        OPENSSL_free(bn_str);
+#endif
+        // bn_exponent in [0..bn_mod]
+        BIGNUM* bn_exponent = BN_new();
+        if (!BN_rand_range(bn_exponent, bn_mod)) {
+            BN_free(bn_mod);
+            continue;
+        }
+
+        BIGNUM* bn_base = generateTestBNData(bit_length);
+
+        // Perform OpenSSL ModExp Op
+        BIGNUM* ssl_res = BN_new();
+        // start = clock();
+        gettimeofday(&start_time, NULL);
+        BN_mod_exp(ssl_res, bn_base, bn_exponent, bn_mod, ctx);
+        // ssl_elapsed = clock() - start;
+        gettimeofday(&end_time, NULL);
+        time_taken = (end_time.tv_sec - start_time.tv_sec) * 1e6;
+        time_taken =
+            (time_taken + (end_time.tv_usec - start_time.tv_usec));  //*1e-6;
+        ssl_elapsed = time_taken;
+
+        if (!ERR_get_error()) {
+#ifdef HE_QAT_DEBUG
+            bn_str = BN_bn2hex(ssl_res);
+            HE_QAT_PRINT("SSL BN mod exp: %s num_bytes: %d num_bits: %d\n",
+                         bn_str, BN_num_bytes(ssl_res), BN_num_bits(ssl_res));
+            showHexBN(ssl_res, bit_length);
+            OPENSSL_free(bn_str);
+#endif
+        } else {
+            HE_QAT_PRINT_DBG("Modular exponentiation failed.\n");
+        }
+        HE_QAT_PRINT_DBG("\nStarting QAT bnModExp...\n");
+
+        // Perform QAT ModExp Op
+        BIGNUM* qat_res = BN_new();
+        // start = clock();
+        gettimeofday(&start_time, NULL);
+        for (unsigned int j = 0; j < BATCH_SIZE; j++)
+            status = HE_QAT_BIGNUMModExp(qat_res, bn_base, bn_exponent, bn_mod,
+                                         bit_length);
+        getBnModExpRequest(BATCH_SIZE);
+        // qat_elapsed = clock() - start;
+        gettimeofday(&end_time, NULL);
+        time_taken = (end_time.tv_sec - start_time.tv_sec) * 1e6;
+        time_taken =
+            (time_taken + (end_time.tv_usec - start_time.tv_usec));  //*1e-6;
+        qat_elapsed = time_taken;
+
+        ssl_avg_time = (mod * ssl_avg_time + ssl_elapsed) / (mod + 1);
+        qat_avg_time =
+            (mod * qat_avg_time + qat_elapsed / BATCH_SIZE) / (mod + 1);
+        avg_speed_up =
+            (mod * avg_speed_up + (ssl_elapsed) / (qat_elapsed / BATCH_SIZE)) /
+            (mod + 1);
+
+        HE_QAT_PRINT(
+            "Trial #%03lu\tOpenSSL: %.1lfus\tQAT: %.1lfus\tSpeed Up:%.1lfx\t",
+            (mod + 1), ssl_avg_time, qat_avg_time, avg_speed_up);
+
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("\nQAT bnModExpOp failed\n");
+        } else {
+            HE_QAT_PRINT_DBG("\nQAT bnModExpOp finished\n");
+        }
+
+        if (BN_cmp(qat_res, ssl_res) != 0)
+            HE_QAT_PRINT("\t** FAIL **\n");
+        else
+            HE_QAT_PRINT("\t** PASS **\n");
+
+        BN_free(ssl_res);
+        BN_free(qat_res);
+
+        BN_free(bn_mod);
+        BN_free(bn_base);
+        BN_free(bn_exponent);
+    }
+
+    // Tear down OpenSSL context
+    BN_CTX_end(ctx);
+
+    // Tear down QAT runtime context
+    release_qat_devices();
+
+    return (int)status;
+}
diff --git a/module/heqat/test/test_bnConversion.cpp b/module/heqat/test/test_bnConversion.cpp
new file mode 100644
index 0000000..38d0dfa
--- /dev/null
+++ b/module/heqat/test/test_bnConversion.cpp
@@ -0,0 +1,108 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <time.h>
+#include <openssl/bn.h>
+#include <openssl/err.h>
+#include <openssl/rand.h>
+#include <sys/time.h>
+
+#include <string>
+
+#include <iomanip>
+
+#include "heqat/heqat.h"
+
+struct timeval start_time, end_time;
+double time_taken = 0.0;
+
+int main(int argc, const char** argv) {
+    const int bit_length = 1024;
+    const size_t num_trials = 4;
+
+    double ssl_elapsed = 0.0;
+    double qat_elapsed = 0.0;
+
+    HE_QAT_STATUS status = HE_QAT_STATUS_FAIL;
+
+    // Set up OpenSSL context (as baseline)
+    BN_CTX* ctx = BN_CTX_new();
+    BN_CTX_start(ctx);
+
+    for (unsigned int mod = 0; mod < num_trials; mod++) {
+        BIGNUM* bn_mod = generateTestBNData(bit_length);
+
+        if (!bn_mod) continue;
+
+        char* bn_str = BN_bn2hex(bn_mod);
+        HE_QAT_PRINT("BIGNUM: %s num_bytes: %d num_bits: %d\n", bn_str,
+                     BN_num_bytes(bn_mod), BN_num_bits(bn_mod));
+        OPENSSL_free(bn_str);
+
+        int len_ = (bit_length + 7) >> 3;
+
+        unsigned char* bn_mod_data_ =
+            (unsigned char*)calloc(len_, sizeof(unsigned char));
+        if (NULL == bn_mod_data_) exit(1);
+        BN_bn2binpad(bn_mod, bn_mod_data_, len_);
+
+        BN_free(bn_mod);
+
+        BigNumber big_num((Ipp32u)0);
+
+        gettimeofday(&start_time, NULL);
+        status = binToBigNumber(big_num, bn_mod_data_, bit_length);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT("Failed at binToBigNumber()\n");
+            exit(1);
+        }
+        gettimeofday(&end_time, NULL);
+        time_taken = (end_time.tv_sec - start_time.tv_sec) * 1e6;
+        time_taken =
+            (time_taken + (end_time.tv_usec - start_time.tv_usec));  //*1e-6;
+        ssl_elapsed = time_taken;
+        HE_QAT_PRINT("Conversion to BigNumber has completed in %.1lfus.\n",
+                     (ssl_elapsed));
+
+        int bit_len = 0;
+        ippsRef_BN(NULL, &bit_len, NULL, BN(big_num));
+        std::string str;
+        big_num.num2hex(str);
+        HE_QAT_PRINT("BigNumber:  %s num_bytes: %d num_bits: %d\n", str.c_str(),
+                     len_, bit_len);
+
+        gettimeofday(&start_time, NULL);
+        unsigned char* ref_bn_data_ =
+            (unsigned char*)calloc(len_, sizeof(unsigned char));
+        if (NULL == ref_bn_data_) exit(1);
+        status = bigNumberToBin(ref_bn_data_, bit_length, big_num);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT("Failed at bigNumberToBin()\n");
+            exit(1);
+        }
+        gettimeofday(&end_time, NULL);
+        time_taken = (end_time.tv_sec - start_time.tv_sec) * 1e6;
+        time_taken =
+            (time_taken + (end_time.tv_usec - start_time.tv_usec));  //*1e-6;
+        qat_elapsed = time_taken;
+        HE_QAT_PRINT("Conversion from BigNumber has completed %.1lfus.\n",
+                     (qat_elapsed));
+
+        BIGNUM* ref_bin_ = BN_new();
+        BN_bin2bn(ref_bn_data_, len_, ref_bin_);
+        bn_str = BN_bn2hex(ref_bin_);
+        HE_QAT_PRINT("Bin: %s num_bytes(%d) num_bits(%d)\n", bn_str,
+                     BN_num_bytes(ref_bin_), BN_num_bits(ref_bin_));
+        HE_QAT_PRINT("-----------------------\n");
+
+        OPENSSL_free(bn_str);
+        free(bn_mod_data_);
+        free(ref_bn_data_);
+        BN_free(ref_bin_);
+    }
+
+    // Tear down OpenSSL context
+    BN_CTX_end(ctx);
+
+    return static_cast<int>(status);
+}
diff --git a/module/heqat/test/test_bnModExp.cpp b/module/heqat/test/test_bnModExp.cpp
new file mode 100644
index 0000000..9aae63e
--- /dev/null
+++ b/module/heqat/test/test_bnModExp.cpp
@@ -0,0 +1,229 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <time.h>
+#include <openssl/bn.h>
+#include <openssl/err.h>
+#include <openssl/rand.h>
+
+#include <iomanip>
+#include <cstring>
+#include <chrono>  // NOLINT [build/c++11]
+
+#include "heqat/heqat.h"
+
+const unsigned int BATCH_SIZE = 48;
+
+int main(int argc, const char** argv) {
+    const int bit_length = 4096;
+    const size_t num_trials = 100;
+
+    double avg_speed_up = 0.0;
+    double ssl_avg_time = 0.0;
+    double qat_avg_time = 0.0;
+
+    HE_QAT_STATUS status = HE_QAT_STATUS_FAIL;
+
+    // Set up QAT runtime context
+    acquire_qat_devices();
+
+    // Set up OpenSSL context (as baseline)
+    BN_CTX* ctx = BN_CTX_new();
+    BN_CTX_start(ctx);
+
+    for (unsigned int mod = 0; mod < num_trials; mod++) {
+        // Generate modulus number
+        BIGNUM* bn_mod = generateTestBNData(bit_length);
+
+        if (!bn_mod) continue;
+
+        char* bn_str = BN_bn2hex(bn_mod);
+#ifdef HE_QAT_DEBUG
+        HE_QAT_PRINT("BIGNUM: %s num_bytes: %d num_bits: %d\n", bn_str,
+                     BN_num_bytes(bn_mod), BN_num_bits(bn_mod));
+#endif
+        OPENSSL_free(bn_str);
+
+        // Generate exponent in [0..bn_mod]
+        BIGNUM* bn_exponent = BN_new();
+        if (!BN_rand_range(bn_exponent, bn_mod)) {
+            BN_free(bn_mod);
+            continue;
+        }
+
+        // Generate base number
+        BIGNUM* bn_base = generateTestBNData(bit_length);
+
+        // Perform OpenSSL ModExp Op
+        BIGNUM* ssl_res = BN_new();
+        auto start = std::chrono::high_resolution_clock::now();
+        BN_mod_exp(ssl_res, bn_base, bn_exponent, bn_mod, ctx);
+        auto stop = std::chrono::high_resolution_clock::now();
+        auto ssl_duration =
+            std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
+
+        int len_ = (bit_length + 7) >> 3;
+
+        // Start QAT timer (including data conversion overhead)
+        start = std::chrono::high_resolution_clock::now();
+        unsigned char* bn_base_data_ =
+            (unsigned char*)calloc(len_, sizeof(unsigned char));
+        if (NULL == bn_base_data_) exit(1);
+        BN_bn2binpad(bn_base, bn_base_data_, len_);
+        unsigned char* bn_mod_data_ =
+            (unsigned char*)calloc(len_, sizeof(unsigned char));
+        if (NULL == bn_mod_data_) exit(1);
+        BN_bn2binpad(bn_mod, bn_mod_data_, len_);
+        unsigned char* bn_exponent_data_ =
+            (unsigned char*)calloc(len_, sizeof(unsigned char));
+        if (NULL == bn_exponent_data_) exit(1);
+        BN_bn2binpad(bn_exponent, bn_exponent_data_, len_);
+        unsigned char* bn_remainder_data_ =
+            (unsigned char*)calloc(len_, sizeof(unsigned char));
+        if (NULL == bn_remainder_data_) exit(1);
+        stop = std::chrono::high_resolution_clock::now();
+        auto cvt_duration =
+            std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
+
+        // Simulate input number in BigNumber representation
+        BigNumber big_num_base((Ipp32u)0);
+        BigNumber big_num_mod((Ipp32u)0);
+        BigNumber big_num_exponent((Ipp32u)0);
+        status = binToBigNumber(big_num_base, bn_base_data_, bit_length);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("Failed at binToBigNumber()\n");
+            exit(1);
+        }
+        status = binToBigNumber(big_num_mod, bn_mod_data_, bit_length);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("Failed at binToBigNumber()\n");
+            exit(1);
+        }
+        status =
+            binToBigNumber(big_num_exponent, bn_exponent_data_, bit_length);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("Failed at binToBigNumber()\n");
+            exit(1);
+        }
+
+        // Reset numbers to 0
+        memset(bn_base_data_, 0, len_);
+        memset(bn_mod_data_, 0, len_);
+        memset(bn_exponent_data_, 0, len_);
+        // Make sure variables are reset
+        if (memcmp(bn_base_data_, bn_mod_data_, len_) ||
+            memcmp(bn_base_data_, bn_exponent_data_, len_)) {
+            HE_QAT_PRINT_ERR("Pointers are not reset to zero!");
+            exit(1);
+        }
+
+        start = std::chrono::high_resolution_clock::now();
+        status = bigNumberToBin(bn_base_data_, bit_length, big_num_base);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("bn_base_data_: failed at bigNumberToBin()\n");
+            exit(1);
+        }
+        status = bigNumberToBin(bn_mod_data_, bit_length, big_num_mod);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("bn_base_data_: failed at bigNumberToBin()\n");
+            exit(1);
+        }
+        status =
+            bigNumberToBin(bn_exponent_data_, bit_length, big_num_exponent);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("bn_base_data_: failed at bigNumberToBin()\n");
+            exit(1);
+        }
+        cvt_duration += std::chrono::duration_cast<std::chrono::microseconds>(
+            std::chrono::high_resolution_clock::now() - start);
+
+        // Perform BigNumber modular exponentiation on QAT
+        start = std::chrono::high_resolution_clock::now();
+        for (unsigned int b = 0; b < BATCH_SIZE; b++)
+            status =
+                HE_QAT_bnModExp(bn_remainder_data_, bn_base_data_,
+                                bn_exponent_data_, bn_mod_data_, bit_length);
+        getBnModExpRequest(BATCH_SIZE);
+        stop = std::chrono::high_resolution_clock::now();
+        auto qat_duration =
+            std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
+
+        ssl_avg_time =
+            (mod * ssl_avg_time + (static_cast<double>(ssl_duration.count()))) /
+            (mod + 1);
+        qat_avg_time =
+            (mod * qat_avg_time +
+             (static_cast<double>(qat_duration.count())) / BATCH_SIZE) /
+            (mod + 1);
+        avg_speed_up =
+            (mod * avg_speed_up +
+             (ssl_duration.count() /
+              static_cast<double>(qat_duration.count() / BATCH_SIZE))) /
+            (mod + 1);
+        HE_QAT_PRINT("Request #%u\t", mod + 1);
+        HE_QAT_PRINT("Overhead: %.1luus", cvt_duration.count());
+        HE_QAT_PRINT("\tOpenSSL: %.1lfus", ssl_avg_time);
+        HE_QAT_PRINT("\tQAT: %.1lfus", qat_avg_time);
+        HE_QAT_PRINT("\tSpeed-up: %.1lfx", avg_speed_up);
+
+        BIGNUM* qat_res = BN_new();
+        BN_bin2bn(bn_remainder_data_, len_, qat_res);
+
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("\nQAT bnModExp with BigNumber failed\n");
+        }
+#ifdef HE_QAT_DEBUG
+        else
+            HE_QAT_PRINT("\nQAT bnModExpOp finished\n");
+#endif
+
+        BigNumber big_num((Ipp32u)0);
+        status = binToBigNumber(big_num, bn_remainder_data_, bit_length);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR(
+                "bn_remainder_data_: Failed at bigNumberToBin()\n");
+            exit(1);
+        }
+
+#ifdef HE_QAT_DEBUG
+        bn_str = BN_bn2hex(qat_res);
+        HE_QAT_PRINT("Bin: %s num_bytes(%d) num_bits(%d)\n", bn_str,
+                     BN_num_bytes(qat_res), BN_num_bits(qat_res));
+#endif
+
+#ifdef HE_QAT_DEBUG
+        int bit_len = 0;
+        ippsRef_BN(NULL, &bit_len, NULL, BN(big_num));
+        std::string str;
+        big_num.num2hex(str);
+        HE_QAT_PRINT("BigNumber:  %s num_bytes: %d num_bits: %d\n", str.c_str(),
+                     len_, bit_len);
+        HE_QAT_PRINT(
+            "---------------------################-----------------------\n");
+#endif
+
+        if (BN_cmp(qat_res, ssl_res) != 0)
+            HE_QAT_PRINT("\t** FAIL **\n");
+        else
+            HE_QAT_PRINT("\t** PASS **\n");
+
+        BN_free(bn_mod);
+        BN_free(bn_base);
+        BN_free(bn_exponent);
+        BN_free(qat_res);
+        BN_free(ssl_res);
+
+        free(bn_mod_data_);
+        free(bn_base_data_);
+        free(bn_exponent_data_);
+        free(bn_remainder_data_);
+    }
+
+    // Tear down OpenSSL context
+    BN_CTX_end(ctx);
+
+    // Tear down QAT runtime context
+    release_qat_devices();
+
+    return static_cast<int>(status);
+}
diff --git a/module/heqat/test/test_bnModExp_MT.cpp b/module/heqat/test/test_bnModExp_MT.cpp
new file mode 100644
index 0000000..b511982
--- /dev/null
+++ b/module/heqat/test/test_bnModExp_MT.cpp
@@ -0,0 +1,270 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <time.h>
+#include <omp.h>
+
+#include <openssl/bn.h>
+#include <openssl/err.h>
+#include <openssl/rand.h>
+
+#include <cstring>
+#include <iomanip>
+#include <chrono>  // NOLINT [build/c++11]
+
+#include "heqat/heqat.h"
+
+const unsigned int BATCH_SIZE = 4096;
+
+int main(int argc, const char** argv) {
+    const int bit_length = 4096;
+    const size_t num_trials = 20;
+
+    double avg_speed_up = 0.0;
+    double ssl_avg_time = 0.0;
+    double qat_avg_time = 0.0;
+
+    HE_QAT_STATUS status = HE_QAT_STATUS_FAIL;
+
+    // Set up QAT runtime context
+    acquire_qat_devices();
+
+    // Set up OpenSSL context (as baseline)
+    BN_CTX* ctx = BN_CTX_new();
+    BN_CTX_start(ctx);
+
+    int nthreads = 4;
+    for (unsigned int mod = 0; mod < num_trials; mod++) {
+        // Generate modulus number
+        BIGNUM* bn_mod = generateTestBNData(bit_length);
+
+        if (!bn_mod) continue;
+
+        char* bn_str = BN_bn2hex(bn_mod);
+
+        HE_QAT_PRINT_DBG("BIGNUM: %s num_bytes: %d num_bits: %d\n", bn_str,
+                         BN_num_bytes(bn_mod), BN_num_bits(bn_mod));
+
+        OPENSSL_free(bn_str);
+
+        // Generate exponent in [0..bn_mod]
+        BIGNUM* bn_exponent = BN_new();
+        if (!BN_rand_range(bn_exponent, bn_mod)) {
+            BN_free(bn_mod);
+            continue;
+        }
+
+        // Generate base number
+        BIGNUM* bn_base = generateTestBNData(bit_length);
+
+        // Perform OpenSSL ModExp Op
+        BIGNUM* ssl_res = BN_new();
+        auto start = std::chrono::high_resolution_clock::now();
+        BN_mod_exp(ssl_res, bn_base, bn_exponent, bn_mod, ctx);
+        auto stop = std::chrono::high_resolution_clock::now();
+        auto ssl_duration =
+            std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
+
+        int len_ = (bit_length + 7) >> 3;
+
+        // Start QAT timer (including data conversion overhead)
+        start = std::chrono::high_resolution_clock::now();
+        unsigned char* bn_base_data_ =
+            (unsigned char*)calloc(len_, sizeof(unsigned char));
+        if (NULL == bn_base_data_) exit(1);
+        BN_bn2binpad(bn_base, bn_base_data_, len_);
+        unsigned char* bn_mod_data_ =
+            (unsigned char*)calloc(len_, sizeof(unsigned char));
+        if (NULL == bn_mod_data_) exit(1);
+        BN_bn2binpad(bn_mod, bn_mod_data_, len_);
+        unsigned char* bn_exponent_data_ =
+            (unsigned char*)calloc(len_, sizeof(unsigned char));
+        if (NULL == bn_exponent_data_) exit(1);
+        BN_bn2binpad(bn_exponent, bn_exponent_data_, len_);
+        unsigned char* bn_remainder_data_ =
+            (unsigned char*)calloc(nthreads * len_, sizeof(unsigned char));
+        if (NULL == bn_remainder_data_) exit(1);
+        stop = std::chrono::high_resolution_clock::now();
+        auto cvt_duration =
+            std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
+
+        // Simulate input number in BigNumber representation
+        BigNumber big_num_base((Ipp32u)0);
+        BigNumber big_num_mod((Ipp32u)0);
+        BigNumber big_num_exponent((Ipp32u)0);
+        status = binToBigNumber(big_num_base, bn_base_data_, bit_length);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("Failed at binToBigNumber()\n");
+            exit(1);
+        }
+        status = binToBigNumber(big_num_mod, bn_mod_data_, bit_length);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("Failed at binToBigNumber()\n");
+            exit(1);
+        }
+        status =
+            binToBigNumber(big_num_exponent, bn_exponent_data_, bit_length);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("Failed at binToBigNumber()\n");
+            exit(1);
+        }
+
+        // Reset numbers to 0
+        memset(bn_base_data_, 0, len_);
+        memset(bn_mod_data_, 0, len_);
+        memset(bn_exponent_data_, 0, len_);
+        // Make sure variables are reset
+        if (memcmp(bn_base_data_, bn_mod_data_, len_) ||
+            memcmp(bn_base_data_, bn_exponent_data_, len_)) {
+            HE_QAT_PRINT_ERR("Pointers are not reset to zero!");
+            exit(1);
+        }
+
+        // start = clock();
+        start = std::chrono::high_resolution_clock::now();
+        status = bigNumberToBin(bn_base_data_, bit_length, big_num_base);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("bn_base_data_: failed at bigNumberToBin()\n");
+            exit(1);
+        }
+        status = bigNumberToBin(bn_mod_data_, bit_length, big_num_mod);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("bn_base_data_: failed at bigNumberToBin()\n");
+            exit(1);
+        }
+        status =
+            bigNumberToBin(bn_exponent_data_, bit_length, big_num_exponent);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("bn_base_data_: failed at bigNumberToBin()\n");
+            exit(1);
+        }
+        // cvt_elapsed += (clock() - start);
+        cvt_duration += std::chrono::duration_cast<std::chrono::microseconds>(
+            std::chrono::high_resolution_clock::now() - start);
+
+        omp_set_num_threads(nthreads);
+
+        // Perform BigNumber modular exponentiation on QAT
+        start = std::chrono::high_resolution_clock::now();
+
+#pragma omp parallel private(status)
+        {
+            int thread_id = omp_get_thread_num();
+            unsigned int buffer_id = thread_id;
+
+            // Secure one of the distributed outstanding buffers
+            status = acquire_bnModExp_buffer(&buffer_id);
+            if (HE_QAT_STATUS_SUCCESS != status) {
+                HE_QAT_PRINT_ERR("Failed to acquire_bnModExp_buffer()\n");
+                exit(1);
+            }
+
+            HE_QAT_PRINT_DBG("Thread #%d HE QAT ACQUIRED BUFFER ID: %u\n",
+                             thread_id, buffer_id);
+
+            // Divide work among threads
+            unsigned int worksize = BATCH_SIZE / nthreads;
+            unsigned int begin = thread_id * worksize;
+            unsigned int end = begin + worksize;
+
+            HE_QAT_PRINT_DBG("Thread #%d Begin: %u End: %u\n", thread_id, begin,
+                             end);
+
+            // For local thread, schedule work execution
+            for (unsigned int b = begin; b < end; b++)
+                status = HE_QAT_bnModExp_MT(
+                    buffer_id, bn_remainder_data_ + thread_id * len_,
+                    bn_base_data_, bn_exponent_data_, bn_mod_data_, bit_length);
+
+            HE_QAT_PRINT_DBG("Thread #%d Waiting\n", thread_id);
+
+            // Wait for the request to complete
+            release_bnModExp_buffer(buffer_id, BATCH_SIZE / nthreads);
+
+            HE_QAT_PRINT_DBG("Thread #%d Completed\n", thread_id);
+        }  // pragma omp parallel
+
+        stop = std::chrono::high_resolution_clock::now();
+        auto qat_duration =
+            std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
+
+        ssl_avg_time =
+            (mod * ssl_avg_time + (static_cast<double>(ssl_duration.count()))) /
+            (mod + 1);
+        qat_avg_time =
+            (mod * qat_avg_time +
+             (static_cast<double>(qat_duration.count())) / BATCH_SIZE) /
+            (mod + 1);
+        avg_speed_up =
+            (mod * avg_speed_up +
+             (ssl_duration.count() /
+              static_cast<double>(qat_duration.count() / BATCH_SIZE))) /
+            (mod + 1);
+
+        HE_QAT_PRINT("Request #%u\t", mod + 1);
+        HE_QAT_PRINT("Overhead: %.1luus", cvt_duration.count());
+        HE_QAT_PRINT("\tOpenSSL: %.1lfus", ssl_avg_time);
+        HE_QAT_PRINT("\tQAT: %.1lfus", qat_avg_time);
+        HE_QAT_PRINT("\tSpeed-up: %.1lfx", avg_speed_up);
+
+        BIGNUM* qat_res = BN_new();
+        BN_bin2bn(bn_remainder_data_, len_, qat_res);
+
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR("\nQAT bnModExp with BigNumber failed\n");
+            exit(1);
+        }
+
+        HE_QAT_PRINT_DBG("\nQAT bnModExpOp finished\n");
+
+        // start = clock();
+        BigNumber big_num((Ipp32u)0);
+        status = binToBigNumber(big_num, bn_remainder_data_, bit_length);
+        if (HE_QAT_STATUS_SUCCESS != status) {
+            HE_QAT_PRINT_ERR(
+                "bn_remainder_data_: Failed at bigNumberToBin()\n");
+            exit(1);
+        }
+
+#ifdef HE_QAT_DEBUG
+        bn_str = BN_bn2hex(qat_res);
+        HE_QAT_PRINT_DBG("Bin: %s num_bytes(%d) num_bits(%d)\n", bn_str,
+                         BN_num_bytes(qat_res), BN_num_bits(qat_res));
+#endif
+
+#ifdef HE_QAT_DEBUG
+        int bit_len = 0;
+        ippsRef_BN(NULL, &bit_len, NULL, BN(big_num));
+        std::string str;
+        big_num.num2hex(str);
+        HE_QAT_PRINT_DBG("BigNumber:  %s num_bytes: %d num_bits: %d\n",
+                         str.c_str(), len_, bit_len);
+        HE_QAT_PRINT_DBG(
+            "---------------------################-----------------------\n");
+#endif
+
+        if (BN_cmp(qat_res, ssl_res) != 0)
+            HE_QAT_PRINT("\t** FAIL **\n");
+        else
+            HE_QAT_PRINT("\t** PASS **\n");
+
+        BN_free(bn_mod);
+        BN_free(bn_base);
+        BN_free(bn_exponent);
+        BN_free(qat_res);
+        BN_free(ssl_res);
+
+        free(bn_mod_data_);
+        free(bn_base_data_);
+        free(bn_exponent_data_);
+        free(bn_remainder_data_);
+    }
+
+    // Tear down OpenSSL context
+    BN_CTX_end(ctx);
+
+    // Tear down QAT runtime context
+    release_qat_devices();
+
+    return static_cast<int>(status);
+}
diff --git a/module/heqat/test/test_context.c b/module/heqat/test/test_context.c
new file mode 100644
index 0000000..2d4846f
--- /dev/null
+++ b/module/heqat/test/test_context.c
@@ -0,0 +1,52 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "heqat/heqat.h"
+
+int main() {
+    HE_QAT_STATUS status = HE_QAT_STATUS_FAIL;
+
+    status = release_qat_devices();
+    if (HE_QAT_STATUS_SUCCESS == status) {
+        printf("Nothing to do by release_qat_devices().\n");
+    } else {
+        printf("release_qat_devices() failed.\n");
+        exit(1);
+    }
+
+    status = acquire_qat_devices();
+    if (HE_QAT_STATUS_SUCCESS == status) {
+        printf("Completed acquire_qat_devices() successfully.\n");
+    } else {
+        printf("acquire_qat_devices() failed.\n");
+        exit(1);
+    }
+
+    status = acquire_qat_devices();
+    if (HE_QAT_STATUS_SUCCESS == status) {
+        printf("QAT context already exists.\n");
+    } else {
+        printf("acquire_qat_devices() failed.\n");
+        exit(1);
+    }
+
+    HE_QAT_SLEEP(5000, HE_QAT_MILLISEC);
+
+    status = release_qat_devices();
+    if (HE_QAT_STATUS_SUCCESS == status) {
+        printf("Completed release_qat_devices() successfully.\n");
+    } else {
+        printf("release_qat_devices() failed.\n");
+        exit(1);
+    }
+
+    status = release_qat_devices();
+    if (HE_QAT_STATUS_SUCCESS == status) {
+        printf("Nothing to do by release_qat_devices().\n");
+    } else {
+        printf("release_qat_devices() failed.\n");
+        exit(1);
+    }
+
+    return 0;
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index fe6c6e9..4d7550d 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -2,9 +2,11 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Unit tests
-set(IPCL_UNITTEST_SRC main.cpp
-test_cryptography.cpp
-test_ops.cpp)
+set(IPCL_UNITTEST_SRC
+  main.cpp
+  test_cryptography.cpp
+  test_ops.cpp
+)
 
 add_executable(unittest_ipcl ${IPCL_UNITTEST_SRC})
 target_include_directories(unittest_ipcl PRIVATE
diff --git a/test/main.cpp b/test/main.cpp
index 0ec6f12..d0930de 100644
--- a/test/main.cpp
+++ b/test/main.cpp
@@ -1,13 +1,45 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
-#include <gtest/gtest.h>
-
 #include <random>
 
+#include "gtest/gtest.h"
+#include "ipcl/ipcl.hpp"
+
 int main(int argc, char** argv) {
+#ifdef IPCL_USE_QAT
+  ipcl::initializeContext("QAT");
+
+  if (ipcl::isQATActive())
+    std::cout << "QAT Context: ACTIVE" << std::endl;
+  else
+    std::cout << "Error: QAT Context INACTIVE." << std::endl;
+
+  if (ipcl::isQATRunning())
+    std::cout << "QAT Instances: RUNNING" << std::endl;
+  else
+    std::cout << "Error: QAT Instances NOT RUNNING." << std::endl;
+#else
+  ipcl::initializeContext("default");
+#endif  // IPCL_USE_QAT
+
   // Use system clock for seed
   srand(time(nullptr));
   ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
+  int status = RUN_ALL_TESTS();
+
+  ipcl::terminateContext();
+
+#ifdef IPCL_USE_QAT
+  if (!ipcl::isQATActive())
+    std::cout << "QAT Context: INACTIVE" << std::endl;
+  else
+    std::cout << "Error: QAT Context ACTIVE." << std::endl;
+  if (!ipcl::isQATRunning())
+    std::cout << "QAT Instances: NOT RUNNING" << std::endl;
+  else
+    std::cout << "Error: QAT Instances STILL RUNNING." << std::endl;
+#endif
+
+  return status;
 }
diff --git a/test/test_cryptography.cpp b/test/test_cryptography.cpp
index bac47db..db233e1 100644
--- a/test/test_cryptography.cpp
+++ b/test/test_cryptography.cpp
@@ -2,19 +2,20 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #include <climits>
-#include <iostream>
 #include <random>
 #include <vector>
 
 #include "gtest/gtest.h"
 #include "ipcl/ipcl.hpp"
 
-constexpr int SELF_DEF_NUM_VALUES = 9;
+constexpr int SELF_DEF_NUM_VALUES = 18;
+constexpr float SELF_DEF_HYBRID_QAT_RATIO = 0.5;
 
 TEST(CryptoTest, CryptoTest) {
   const uint32_t num_values = SELF_DEF_NUM_VALUES;
+  const float qat_ratio = SELF_DEF_HYBRID_QAT_RATIO;
 
-  ipcl::keyPair key = ipcl::generateKeypair(2048, true);
+  ipcl::KeyPair key = ipcl::generateKeypair(2048, true);
 
   std::vector<uint32_t> exp_value(num_values);
   ipcl::PlainText pt;
@@ -30,16 +31,16 @@ TEST(CryptoTest, CryptoTest) {
   }
 
   pt = ipcl::PlainText(exp_value);
-  ct = key.pub_key->encrypt(pt);
-  dt = key.priv_key->decrypt(ct);
+
+  ipcl::setHybridRatio(qat_ratio);
+
+  ct = key.pub_key.encrypt(pt);
+  dt = key.priv_key.decrypt(ct);
 
   for (int i = 0; i < num_values; i++) {
     std::vector<uint32_t> v = dt.getElementVec(i);
     EXPECT_EQ(v[0], exp_value[i]);
   }
-
-  delete key.pub_key;
-  delete key.priv_key;
 }
 
 TEST(CryptoTest, ISO_IEC_18033_6_ComplianceTest) {
@@ -61,10 +62,10 @@ TEST(CryptoTest, ISO_IEC_18033_6_ComplianceTest) {
   BigNumber n = p * q;
   int n_length = n.BitSize();
 
-  ipcl::PublicKey* public_key = new ipcl::PublicKey(n, n_length);
-  ipcl::PrivateKey* private_key = new ipcl::PrivateKey(public_key, p, q);
+  ipcl::PublicKey pk(n, n_length);
+  ipcl::PrivateKey sk(pk, p, q);
 
-  ipcl::keyPair key = {public_key, private_key};
+  ipcl::KeyPair key = {pk, sk};
 
   std::vector<BigNumber> pt_bn_v(num_values);
   std::vector<BigNumber> ir_bn_v(num_values);
@@ -151,13 +152,15 @@ TEST(CryptoTest, ISO_IEC_18033_6_ComplianceTest) {
   ipcl::PlainText pt;
   ipcl::CipherText ct;
 
-  key.pub_key->setRandom(ir_bn_v);
+  ipcl::setHybridOff();
+
+  key.pub_key.setRandom(ir_bn_v);
 
   pt = ipcl::PlainText(pt_bn_v);
-  ct = key.pub_key->encrypt(pt);
+  ct = key.pub_key.encrypt(pt);
 
   ipcl::PlainText dt;
-  dt = key.priv_key->decrypt(ct);
+  dt = key.priv_key.decrypt(ct);
   for (int i = 0; i < num_values; i++) {
     EXPECT_EQ(dt.getElement(i), pt_bn_v[i]);
   }
@@ -179,10 +182,7 @@ TEST(CryptoTest, ISO_IEC_18033_6_ComplianceTest) {
   std::string str4;
   ipcl::PlainText dt_sum;
 
-  dt_sum = key.priv_key->decrypt(sum);
+  dt_sum = key.priv_key.decrypt(sum);
   m1m2.num2hex(str4);
   EXPECT_EQ(str4, dt_sum.getElementHex(0));
-
-  delete key.pub_key;
-  delete key.priv_key;
 }
diff --git a/test/test_ops.cpp b/test/test_ops.cpp
index 74064fe..2484c59 100644
--- a/test/test_ops.cpp
+++ b/test/test_ops.cpp
@@ -2,17 +2,17 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #include <climits>
-#include <iostream>
 #include <random>
 #include <vector>
 
 #include "gtest/gtest.h"
 #include "ipcl/ipcl.hpp"
 
-constexpr int SELF_DEF_NUM_VALUES = 7;
+constexpr int SELF_DEF_NUM_VALUES = 14;
+constexpr float SELF_DEF_HYBRID_QAT_RATIO = 0.5;
 
 void CtPlusCt(ipcl::CipherText& res, const ipcl::CipherText& ct1,
-              const ipcl::CipherText& ct2, const ipcl::keyPair key) {
+              const ipcl::CipherText& ct2, const ipcl::KeyPair key) {
   int size = ct1.getSize();
   std::vector<BigNumber> sum_bn_v(size);
 
@@ -26,12 +26,12 @@ void CtPlusCt(ipcl::CipherText& res, const ipcl::CipherText& ct1,
 }
 
 void CtPlusCtArray(ipcl::CipherText& res, const ipcl::CipherText& ct1,
-                   const ipcl::CipherText& ct2, const ipcl::keyPair key) {
+                   const ipcl::CipherText& ct2) {
   res = ct1 + ct2;
 }
 
 void CtPlusPt(ipcl::CipherText& res, const ipcl::CipherText& ct1,
-              const ipcl::PlainText& pt2, const ipcl::keyPair key) {
+              const ipcl::PlainText& pt2, const ipcl::KeyPair key) {
   int size = ct1.getSize();
   std::vector<BigNumber> sum_bn_v(size);
 
@@ -45,12 +45,12 @@ void CtPlusPt(ipcl::CipherText& res, const ipcl::CipherText& ct1,
 }
 
 void CtPlusPtArray(ipcl::CipherText& res, const ipcl::CipherText& ct1,
-                   const ipcl::PlainText& pt2, const ipcl::keyPair key) {
+                   const ipcl::PlainText& pt2) {
   res = ct1 + pt2;
 }
 
 void CtMultiplyPt(ipcl::CipherText& res, const ipcl::CipherText& ct1,
-                  const ipcl::PlainText& pt2, const ipcl::keyPair key) {
+                  const ipcl::PlainText& pt2, const ipcl::KeyPair key) {
   int size = ct1.getSize();
   std::vector<BigNumber> product_bn_v(size);
 
@@ -64,12 +64,12 @@ void CtMultiplyPt(ipcl::CipherText& res, const ipcl::CipherText& ct1,
 }
 
 void CtMultiplyPtArray(ipcl::CipherText& res, const ipcl::CipherText& ct1,
-                       const ipcl::PlainText& pt2, const ipcl::keyPair key) {
+                       const ipcl::PlainText& pt2) {
   res = ct1 * pt2;
 }
 
 void AddSub(ipcl::CipherText& res, const ipcl::CipherText& ct1,
-            const ipcl::CipherText& ct2, const ipcl::keyPair key) {
+            const ipcl::CipherText& ct2, const ipcl::KeyPair key) {
   int size = ct1.getSize();
   std::vector<BigNumber> sum_bn_v(size);
 
@@ -86,7 +86,7 @@ void AddSub(ipcl::CipherText& res, const ipcl::CipherText& ct1,
 }
 
 void PtPlusCt(ipcl::CipherText& res, const ipcl::PlainText& pt2,
-              const ipcl::CipherText& ct1, const ipcl::keyPair key) {
+              const ipcl::CipherText& ct1, const ipcl::KeyPair key) {
   int size = ct1.getSize();
   std::vector<BigNumber> sum_bn_v(size);
 
@@ -100,12 +100,12 @@ void PtPlusCt(ipcl::CipherText& res, const ipcl::PlainText& pt2,
 }
 
 void PtPlusCtArray(ipcl::CipherText& res, const ipcl::PlainText& pt2,
-                   const ipcl::CipherText& ct1, const ipcl::keyPair key) {
+                   const ipcl::CipherText& ct1) {
   res = pt2 + ct1;
 }
 
 void PtMultiplyCt(ipcl::CipherText& res, const ipcl::PlainText& pt2,
-                  const ipcl::CipherText& ct1, const ipcl::keyPair key) {
+                  const ipcl::CipherText& ct1, const ipcl::KeyPair key) {
   int size = ct1.getSize();
   std::vector<BigNumber> product_bn_v(size);
 
@@ -119,14 +119,15 @@ void PtMultiplyCt(ipcl::CipherText& res, const ipcl::PlainText& pt2,
 }
 
 void PtMultiplyCtArray(ipcl::CipherText& res, const ipcl::PlainText& pt2,
-                       const ipcl::CipherText& ct1, const ipcl::keyPair key) {
+                       const ipcl::CipherText& ct1) {
   res = pt2 * ct1;
 }
 
 TEST(OperationTest, CtPlusCtTest) {
   const uint32_t num_values = SELF_DEF_NUM_VALUES;
+  const float qat_ratio = SELF_DEF_HYBRID_QAT_RATIO;
 
-  ipcl::keyPair key = ipcl::generateKeypair(2048);
+  ipcl::KeyPair key = ipcl::generateKeypair(2048);
 
   std::vector<uint32_t> exp_value1(num_values), exp_value2(num_values);
   ipcl::PlainText pt1, pt2, dt_sum;
@@ -143,12 +144,14 @@ TEST(OperationTest, CtPlusCtTest) {
   pt1 = ipcl::PlainText(exp_value1);
   pt2 = ipcl::PlainText(exp_value2);
 
-  ct1 = key.pub_key->encrypt(pt1);
-  ct2 = key.pub_key->encrypt(pt2);
+  ipcl::setHybridRatio(qat_ratio);
+
+  ct1 = key.pub_key.encrypt(pt1);
+  ct2 = key.pub_key.encrypt(pt2);
 
   CtPlusCt(ct_sum, ct1, ct2, key);
 
-  dt_sum = key.priv_key->decrypt(ct_sum);
+  dt_sum = key.priv_key.decrypt(ct_sum);
 
   for (int i = 0; i < num_values; i++) {
     std::vector<uint32_t> v = dt_sum.getElementVec(i);
@@ -159,15 +162,13 @@ TEST(OperationTest, CtPlusCtTest) {
 
     EXPECT_EQ(sum, exp_sum);
   }
-
-  delete key.pub_key;
-  delete key.priv_key;
 }
 
 TEST(OperationTest, CtPlusCtArrayTest) {
   const uint32_t num_values = SELF_DEF_NUM_VALUES;
+  const float qat_ratio = SELF_DEF_HYBRID_QAT_RATIO;
 
-  ipcl::keyPair key = ipcl::generateKeypair(2048);
+  ipcl::KeyPair key = ipcl::generateKeypair(2048);
 
   std::vector<uint32_t> exp_value1(num_values), exp_value2(num_values);
   ipcl::PlainText pt1, pt2, dt_sum;
@@ -184,12 +185,14 @@ TEST(OperationTest, CtPlusCtArrayTest) {
   pt1 = ipcl::PlainText(exp_value1);
   pt2 = ipcl::PlainText(exp_value2);
 
-  ct1 = key.pub_key->encrypt(pt1);
-  ct2 = key.pub_key->encrypt(pt2);
+  ipcl::setHybridRatio(qat_ratio);
+
+  ct1 = key.pub_key.encrypt(pt1);
+  ct2 = key.pub_key.encrypt(pt2);
 
-  CtPlusCtArray(ct_sum, ct1, ct2, key);
+  CtPlusCtArray(ct_sum, ct1, ct2);
 
-  dt_sum = key.priv_key->decrypt(ct_sum);
+  dt_sum = key.priv_key.decrypt(ct_sum);
 
   for (int i = 0; i < num_values; i++) {
     std::vector<uint32_t> v = dt_sum.getElementVec(i);
@@ -200,15 +203,13 @@ TEST(OperationTest, CtPlusCtArrayTest) {
 
     EXPECT_EQ(sum, exp_sum);
   }
-
-  delete key.pub_key;
-  delete key.priv_key;
 }
 
 TEST(OperationTest, CtPlusPtTest) {
   const uint32_t num_values = SELF_DEF_NUM_VALUES;
+  const float qat_ratio = SELF_DEF_HYBRID_QAT_RATIO;
 
-  ipcl::keyPair key = ipcl::generateKeypair(2048);
+  ipcl::KeyPair key = ipcl::generateKeypair(2048);
 
   std::vector<uint32_t> exp_value1(num_values), exp_value2(num_values);
   ipcl::PlainText pt1, pt2, dt_sum;
@@ -225,11 +226,13 @@ TEST(OperationTest, CtPlusPtTest) {
   pt1 = ipcl::PlainText(exp_value1);
   pt2 = ipcl::PlainText(exp_value2);
 
-  ct1 = key.pub_key->encrypt(pt1);
+  ipcl::setHybridRatio(qat_ratio);
+
+  ct1 = key.pub_key.encrypt(pt1);
 
   CtPlusPt(ct_sum, ct1, pt2, key);
 
-  dt_sum = key.priv_key->decrypt(ct_sum);
+  dt_sum = key.priv_key.decrypt(ct_sum);
 
   for (int i = 0; i < num_values; i++) {
     std::vector<uint32_t> v = dt_sum.getElementVec(i);
@@ -240,15 +243,13 @@ TEST(OperationTest, CtPlusPtTest) {
 
     EXPECT_EQ(sum, exp_sum);
   }
-
-  delete key.pub_key;
-  delete key.priv_key;
 }
 
 TEST(OperationTest, CtPlusPtArrayTest) {
   const uint32_t num_values = SELF_DEF_NUM_VALUES;
+  const float qat_ratio = SELF_DEF_HYBRID_QAT_RATIO;
 
-  ipcl::keyPair key = ipcl::generateKeypair(2048);
+  ipcl::KeyPair key = ipcl::generateKeypair(2048);
 
   std::vector<uint32_t> exp_value1(num_values), exp_value2(num_values);
   ipcl::PlainText pt1, pt2, dt_sum;
@@ -265,11 +266,13 @@ TEST(OperationTest, CtPlusPtArrayTest) {
   pt1 = ipcl::PlainText(exp_value1);
   pt2 = ipcl::PlainText(exp_value2);
 
-  ct1 = key.pub_key->encrypt(pt1);
+  ipcl::setHybridRatio(qat_ratio);
 
-  CtPlusPtArray(ct_sum, ct1, pt2, key);
+  ct1 = key.pub_key.encrypt(pt1);
 
-  dt_sum = key.priv_key->decrypt(ct_sum);
+  CtPlusPtArray(ct_sum, ct1, pt2);
+
+  dt_sum = key.priv_key.decrypt(ct_sum);
 
   for (int i = 0; i < num_values; i++) {
     std::vector<uint32_t> v = dt_sum.getElementVec(i);
@@ -280,15 +283,13 @@ TEST(OperationTest, CtPlusPtArrayTest) {
 
     EXPECT_EQ(sum, exp_sum);
   }
-
-  delete key.pub_key;
-  delete key.priv_key;
 }
 
 TEST(OperationTest, CtMultiplyPtTest) {
   const uint32_t num_values = SELF_DEF_NUM_VALUES;
+  const float qat_ratio = SELF_DEF_HYBRID_QAT_RATIO;
 
-  ipcl::keyPair key = ipcl::generateKeypair(2048);
+  ipcl::KeyPair key = ipcl::generateKeypair(2048);
 
   std::vector<uint32_t> exp_value1(num_values), exp_value2(num_values);
   ipcl::PlainText pt1, pt2, dt_product;
@@ -305,11 +306,13 @@ TEST(OperationTest, CtMultiplyPtTest) {
   pt1 = ipcl::PlainText(exp_value1);
   pt2 = ipcl::PlainText(exp_value2);
 
-  ct1 = key.pub_key->encrypt(pt1);
+  ipcl::setHybridRatio(qat_ratio);
+
+  ct1 = key.pub_key.encrypt(pt1);
 
   CtMultiplyPt(ct_product, ct1, pt2, key);
 
-  dt_product = key.priv_key->decrypt(ct_product);
+  dt_product = key.priv_key.decrypt(ct_product);
 
   for (int i = 0; i < num_values; i++) {
     std::vector<uint32_t> v = dt_product.getElementVec(i);
@@ -320,15 +323,13 @@ TEST(OperationTest, CtMultiplyPtTest) {
 
     EXPECT_EQ(product, exp_product);
   }
-
-  delete key.pub_key;
-  delete key.priv_key;
 }
 
 TEST(OperationTest, CtMultiplyZeroPtTest) {
   const uint32_t num_values = SELF_DEF_NUM_VALUES;
+  const float qat_ratio = SELF_DEF_HYBRID_QAT_RATIO;
 
-  ipcl::keyPair key = ipcl::generateKeypair(2048);
+  ipcl::KeyPair key = ipcl::generateKeypair(2048);
 
   std::vector<uint32_t> exp_value1(num_values), exp_value2(num_values);
   ipcl::PlainText pt1, pt2, dt_product;
@@ -346,11 +347,13 @@ TEST(OperationTest, CtMultiplyZeroPtTest) {
   pt1 = ipcl::PlainText(exp_value1);
   pt2 = ipcl::PlainText(exp_value2);
 
-  ct1 = key.pub_key->encrypt(pt1);
+  ipcl::setHybridRatio(qat_ratio);
+
+  ct1 = key.pub_key.encrypt(pt1);
 
   CtMultiplyPt(ct_product, ct1, pt2, key);
 
-  dt_product = key.priv_key->decrypt(ct_product);
+  dt_product = key.priv_key.decrypt(ct_product);
 
   for (int i = 0; i < num_values; i++) {
     std::vector<uint32_t> v = dt_product.getElementVec(i);
@@ -361,15 +364,13 @@ TEST(OperationTest, CtMultiplyZeroPtTest) {
 
     EXPECT_EQ(product, exp_product);
   }
-
-  delete key.pub_key;
-  delete key.priv_key;
 }
 
 TEST(OperationTest, CtMultiplyPtArrayTest) {
   const uint32_t num_values = SELF_DEF_NUM_VALUES;
+  const float qat_ratio = SELF_DEF_HYBRID_QAT_RATIO;
 
-  ipcl::keyPair key = ipcl::generateKeypair(2048);
+  ipcl::KeyPair key = ipcl::generateKeypair(2048);
 
   std::vector<uint32_t> exp_value1(num_values), exp_value2(num_values);
   ipcl::PlainText pt1, pt2, dt_product;
@@ -386,11 +387,13 @@ TEST(OperationTest, CtMultiplyPtArrayTest) {
   pt1 = ipcl::PlainText(exp_value1);
   pt2 = ipcl::PlainText(exp_value2);
 
-  ct1 = key.pub_key->encrypt(pt1);
+  ipcl::setHybridRatio(qat_ratio);
 
-  CtMultiplyPtArray(ct_product, ct1, pt2, key);
+  ct1 = key.pub_key.encrypt(pt1);
 
-  dt_product = key.priv_key->decrypt(ct_product);
+  CtMultiplyPtArray(ct_product, ct1, pt2);
+
+  dt_product = key.priv_key.decrypt(ct_product);
 
   for (int i = 0; i < num_values; i++) {
     std::vector<uint32_t> v = dt_product.getElementVec(i);
@@ -401,15 +404,13 @@ TEST(OperationTest, CtMultiplyPtArrayTest) {
 
     EXPECT_EQ(product, exp_product);
   }
-
-  delete key.pub_key;
-  delete key.priv_key;
 }
 
 TEST(OperationTest, AddSubTest) {
   const uint32_t num_values = SELF_DEF_NUM_VALUES;
+  const float qat_ratio = SELF_DEF_HYBRID_QAT_RATIO;
 
-  ipcl::keyPair key = ipcl::generateKeypair(2048);
+  ipcl::KeyPair key = ipcl::generateKeypair(2048);
 
   std::vector<uint32_t> exp_value1(num_values), exp_value2(num_values);
   ipcl::PlainText pt1, pt2, dt_sum;
@@ -426,12 +427,14 @@ TEST(OperationTest, AddSubTest) {
   pt1 = ipcl::PlainText(exp_value1);
   pt2 = ipcl::PlainText(exp_value2);
 
-  ct1 = key.pub_key->encrypt(pt1);
-  ct2 = key.pub_key->encrypt(pt2);
+  ipcl::setHybridRatio(qat_ratio);
+
+  ct1 = key.pub_key.encrypt(pt1);
+  ct2 = key.pub_key.encrypt(pt2);
 
   AddSub(ct_sum, ct1, ct2, key);
 
-  dt_sum = key.priv_key->decrypt(ct_sum);
+  dt_sum = key.priv_key.decrypt(ct_sum);
 
   for (int i = 0; i < num_values; i++) {
     std::vector<uint32_t> v = dt_sum.getElementVec(i);
@@ -442,15 +445,13 @@ TEST(OperationTest, AddSubTest) {
 
     EXPECT_EQ(sum, exp_sum);
   }
-
-  delete key.pub_key;
-  delete key.priv_key;
 }
 
 TEST(OperationTest, PtPlusCtTest) {
   const uint32_t num_values = SELF_DEF_NUM_VALUES;
+  const float qat_ratio = SELF_DEF_HYBRID_QAT_RATIO;
 
-  ipcl::keyPair key = ipcl::generateKeypair(2048);
+  ipcl::KeyPair key = ipcl::generateKeypair(2048);
 
   std::vector<uint32_t> exp_value1(num_values), exp_value2(num_values);
   ipcl::PlainText pt1, pt2, dt_sum;
@@ -467,11 +468,13 @@ TEST(OperationTest, PtPlusCtTest) {
   pt1 = ipcl::PlainText(exp_value1);
   pt2 = ipcl::PlainText(exp_value2);
 
-  ct1 = key.pub_key->encrypt(pt1);
+  ipcl::setHybridRatio(qat_ratio);
+
+  ct1 = key.pub_key.encrypt(pt1);
 
   PtPlusCt(ct_sum, pt2, ct1, key);
 
-  dt_sum = key.priv_key->decrypt(ct_sum);
+  dt_sum = key.priv_key.decrypt(ct_sum);
 
   for (int i = 0; i < num_values; i++) {
     std::vector<uint32_t> v = dt_sum.getElementVec(i);
@@ -482,15 +485,13 @@ TEST(OperationTest, PtPlusCtTest) {
 
     EXPECT_EQ(sum, exp_sum);
   }
-
-  delete key.pub_key;
-  delete key.priv_key;
 }
 
 TEST(OperationTest, PtPlusCtArrayTest) {
   const uint32_t num_values = SELF_DEF_NUM_VALUES;
+  const float qat_ratio = SELF_DEF_HYBRID_QAT_RATIO;
 
-  ipcl::keyPair key = ipcl::generateKeypair(2048);
+  ipcl::KeyPair key = ipcl::generateKeypair(2048);
 
   std::vector<uint32_t> exp_value1(num_values), exp_value2(num_values);
   ipcl::PlainText pt1, pt2, dt_sum;
@@ -507,11 +508,13 @@ TEST(OperationTest, PtPlusCtArrayTest) {
   pt1 = ipcl::PlainText(exp_value1);
   pt2 = ipcl::PlainText(exp_value2);
 
-  ct1 = key.pub_key->encrypt(pt1);
+  ipcl::setHybridRatio(qat_ratio);
 
-  PtPlusCtArray(ct_sum, pt2, ct1, key);
+  ct1 = key.pub_key.encrypt(pt1);
 
-  dt_sum = key.priv_key->decrypt(ct_sum);
+  PtPlusCtArray(ct_sum, pt2, ct1);
+
+  dt_sum = key.priv_key.decrypt(ct_sum);
 
   for (int i = 0; i < num_values; i++) {
     std::vector<uint32_t> v = dt_sum.getElementVec(i);
@@ -522,15 +525,13 @@ TEST(OperationTest, PtPlusCtArrayTest) {
 
     EXPECT_EQ(sum, exp_sum);
   }
-
-  delete key.pub_key;
-  delete key.priv_key;
 }
 
 TEST(OperationTest, PtMultiplyCtTest) {
   const uint32_t num_values = SELF_DEF_NUM_VALUES;
+  const float qat_ratio = SELF_DEF_HYBRID_QAT_RATIO;
 
-  ipcl::keyPair key = ipcl::generateKeypair(2048);
+  ipcl::KeyPair key = ipcl::generateKeypair(2048);
 
   std::vector<uint32_t> exp_value1(num_values), exp_value2(num_values);
   ipcl::PlainText pt1, pt2, dt_product;
@@ -547,11 +548,13 @@ TEST(OperationTest, PtMultiplyCtTest) {
   pt1 = ipcl::PlainText(exp_value1);
   pt2 = ipcl::PlainText(exp_value2);
 
-  ct1 = key.pub_key->encrypt(pt1);
+  ipcl::setHybridRatio(qat_ratio);
+
+  ct1 = key.pub_key.encrypt(pt1);
 
   PtMultiplyCt(ct_product, pt2, ct1, key);
 
-  dt_product = key.priv_key->decrypt(ct_product);
+  dt_product = key.priv_key.decrypt(ct_product);
 
   for (int i = 0; i < num_values; i++) {
     std::vector<uint32_t> v = dt_product.getElementVec(i);
@@ -562,15 +565,13 @@ TEST(OperationTest, PtMultiplyCtTest) {
 
     EXPECT_EQ(product, exp_product);
   }
-
-  delete key.pub_key;
-  delete key.priv_key;
 }
 
 TEST(OperationTest, PtMultiplyCtArrayTest) {
   const uint32_t num_values = SELF_DEF_NUM_VALUES;
+  const float qat_ratio = SELF_DEF_HYBRID_QAT_RATIO;
 
-  ipcl::keyPair key = ipcl::generateKeypair(2048);
+  ipcl::KeyPair key = ipcl::generateKeypair(2048);
 
   std::vector<uint32_t> exp_value1(num_values), exp_value2(num_values);
   ipcl::PlainText pt1, pt2, dt_product;
@@ -587,11 +588,13 @@ TEST(OperationTest, PtMultiplyCtArrayTest) {
   pt1 = ipcl::PlainText(exp_value1);
   pt2 = ipcl::PlainText(exp_value2);
 
-  ct1 = key.pub_key->encrypt(pt1);
+  ipcl::setHybridRatio(qat_ratio);
+
+  ct1 = key.pub_key.encrypt(pt1);
 
-  PtMultiplyCtArray(ct_product, pt2, ct1, key);
+  PtMultiplyCtArray(ct_product, pt2, ct1);
 
-  dt_product = key.priv_key->decrypt(ct_product);
+  dt_product = key.priv_key.decrypt(ct_product);
 
   for (int i = 0; i < num_values; i++) {
     std::vector<uint32_t> v = dt_product.getElementVec(i);
@@ -602,7 +605,4 @@ TEST(OperationTest, PtMultiplyCtArrayTest) {
 
     EXPECT_EQ(product, exp_product);
   }
-
-  delete key.pub_key;
-  delete key.priv_key;
 }