diff --git a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
index bd3494f75ab0..2e04912d9c70 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
@@ -215,14 +215,12 @@ object VeloxConfig extends ConfigRegistry {
       .bytesConf(ByteUnit.BYTE)
       .createWithDefaultString("32MB")
 
-  val COLUMNAR_VELOX_ASYNC_TIMEOUT =
+  val COLUMNAR_VELOX_ASYNC_TIMEOUT_ON_TASK_STOPPING =
     buildStaticConf("spark.gluten.sql.columnar.backend.velox.asyncTimeoutOnTaskStopping")
-      .doc(
-        "Timeout for asynchronous execution when task is being stopped in Velox backend. " +
-          "It's recommended to set to a number larger than network connection timeout that the " +
-          "possible async tasks are relying on.")
+      .doc("Timeout in milliseconds when waiting for runtime-scoped async work to finish during" +
+        " teardown.")
       .timeConf(TimeUnit.MILLISECONDS)
-      .createWithDefault(30000)
+      .createWithDefault(30000L)
 
   val COLUMNAR_VELOX_SPLIT_PRELOAD_PER_DRIVER =
     buildConf("spark.gluten.sql.columnar.backend.velox.SplitPreloadPerDriver")
diff --git a/cpp/core/jni/JniCommon.h b/cpp/core/jni/JniCommon.h
index ee16412e3a1f..a4edd2c57e86 100644
--- a/cpp/core/jni/JniCommon.h
+++ b/cpp/core/jni/JniCommon.h
@@ -21,9 +21,6 @@
 #include <arrow/ipc/writer.h>
 #include <execinfo.h>
 #include <jni.h>
-#include <thread>
-
-#include <folly/executors/thread_factory/ThreadFactory.h>
 
 #include "compute/ProtobufUtils.h"
 #include "compute/Runtime.h"
@@ -165,11 +162,6 @@ class JniCommonState {
 
   jmethodID runtimeAwareCtxHandle();
 
-  JavaVM* vm() const {
-    assertInitialized();
-    return vm_;
-  }
-
  private:
   void initialize(JNIEnv* env);
 
@@ -187,56 +179,6 @@ inline JniCommonState* getJniCommonState() {
   return &jniCommonState;
 }
 
-/// A folly::ThreadFactory for spill thread pools. Attaches each thread to the
-/// JVM as a daemon at creation and calls DetachCurrentThread inside the thread
-/// function body — after all work completes but before any pthread_key destructor
-/// fires — to prevent unbounded JavaThread accumulation.
-///
-/// INVARIANT: threads created by this factory must never call libhdfs. libhdfs
-/// registers hdfsThreadDestructor via pthread_key on first HDFS call; that
-/// destructor calls DetachCurrentThread at actual thread exit. Calling it
-/// earlier (inside the thread body) would invalidate libhdfs's cached JNIEnv*,
-/// causing SIGSEGV on the next HDFS call.
-///
-/// REQUIRES: JniCommonState::ensureInitialized() must have been called before
-/// constructing this factory (i.e. after JNI_OnLoad completes).
-class JniAwareThreadFactory : public folly::ThreadFactory {
- public:
-  JniAwareThreadFactory() : vm_(getJniCommonState()->vm()) {}
-
-  std::thread newThread(folly::Func&& func) override {
-    return std::thread([vm = vm_, f = std::move(func)]() mutable {
-      JNIEnv* env = nullptr;
-      bool weAttached = (vm->GetEnv(reinterpret_cast<void**>(&env), jniVersion) == JNI_EDETACHED);
-      if (weAttached) {
-        if (vm->AttachCurrentThreadAsDaemon(reinterpret_cast<void**>(&env), nullptr) != JNI_OK) {
-          LOG(WARNING) << "JniAwareThreadFactory: failed to attach thread to JVM";
-          weAttached = false;
-        }
-      }
-      // RAII guard: ensures DetachCurrentThread is called even if f() throws.
-      struct DetachGuard {
-        JavaVM* vm;
-        bool active;
-        ~DetachGuard() {
-          if (active) {
-            vm->DetachCurrentThread();
-          }
-        }
-      } guard{vm, weAttached};
-      f();
-    });
-  }
-
-  const std::string& getNamePrefix() const override {
-    static const std::string kEmpty;
-    return kEmpty;
-  }
-
- private:
-  JavaVM* vm_;
-};
-
 Runtime* getRuntime(JNIEnv* env, jobject runtimeAware);
 
 // Safe version of JNI {Get|Release}<PrimitiveType>ArrayElements routines.
diff --git a/cpp/velox/compute/VeloxBackend.cc b/cpp/velox/compute/VeloxBackend.cc
index e36bc6a1c609..fe49b37d2784 100644
--- a/cpp/velox/compute/VeloxBackend.cc
+++ b/cpp/velox/compute/VeloxBackend.cc
@@ -68,9 +68,6 @@ DECLARE_bool(velox_ssd_odirect);
 DECLARE_bool(velox_memory_pool_capacity_transfer_across_tasks);
 DECLARE_int32(cache_prefetch_min_pct);
 
-DECLARE_int32(gluten_velox_async_timeout_on_task_stopping);
-DEFINE_int32(gluten_velox_async_timeout_on_task_stopping, 30000, "Async timout when task is being stopped");
-
 using namespace facebook;
 
 namespace gluten {
@@ -146,14 +143,10 @@ void VeloxBackend::init(
   // Set velox_memory_use_hugepages.
   FLAGS_velox_memory_use_hugepages = backendConf_->get<bool>(kMemoryUseHugePages, kMemoryUseHugePagesDefault);
 
-  // Async timeout.
-  FLAGS_gluten_velox_async_timeout_on_task_stopping =
-      backendConf_->get<int32_t>(kVeloxAsyncTimeoutOnTaskStopping, kVeloxAsyncTimeoutOnTaskStoppingDefault);
-
   // Set cache_prefetch_min_pct default as 0 to force all loads are prefetched in DirectBufferInput.
   FLAGS_cache_prefetch_min_pct = backendConf_->get<int>(kCachePrefetchMinPct, 0);
 
-  auto hiveConf = createHiveConnectorConfig(backendConf_);
+  hiveConnectorConfig_ = createHiveConnectorConfig(backendConf_);
 
   // Setup and register.
   velox::filesystems::registerLocalFileSystem();
@@ -169,7 +162,7 @@ void VeloxBackend::init(
 #endif
 #ifdef ENABLE_ABFS
   velox::filesystems::registerAbfsFileSystem();
-  velox::filesystems::registerAzureClientProvider(*hiveConf);
+  velox::filesystems::registerAzureClientProvider(*hiveConnectorConfig_);
 #endif
 
 #ifdef GLUTEN_ENABLE_GPU
@@ -190,8 +183,20 @@ void VeloxBackend::init(
   }
 #endif
 
+  const auto spillThreadNum = backendConf_->get<uint32_t>(kSpillThreadNum, kSpillThreadNumDefaultValue);
+  if (spillThreadNum > 0) {
+    spillExecutor_ = std::make_unique<folly::CPUThreadPoolExecutor>(spillThreadNum);
+  }
+  auto ioThreads = backendConf_->get<int32_t>(kVeloxIOThreads, kVeloxIOThreadsDefault);
+  GLUTEN_CHECK(
+      ioThreads >= 0,
+      kVeloxIOThreads + " was set to negative number " + std::to_string(ioThreads) + ", this should not happen.");
+  if (ioThreads > 0) {
+    ioExecutor_ = std::make_unique<folly::CPUThreadPoolExecutor>(
+        ioThreads, std::make_unique<folly::UnboundedBlockingQueue<folly::CPUThreadPoolExecutor::CPUTask>>());
+  }
+
   initJolFilesystem();
-  initConnector(hiveConf);
 
   velox::dwio::common::registerFileSinks();
   velox::parquet::registerParquetReaderFactory();
@@ -312,33 +317,26 @@ void VeloxBackend::initCache() {
   }
 }
 
-void VeloxBackend::initConnector(const std::shared_ptr<velox::config::ConfigBase>& hiveConf) {
-  auto ioThreads = backendConf_->get<int32_t>(kVeloxIOThreads, kVeloxIOThreadsDefault);
-  GLUTEN_CHECK(
-      ioThreads >= 0,
-      kVeloxIOThreads + " was set to negative number " + std::to_string(ioThreads) + ", this should not happen.");
-  if (ioThreads > 0) {
-    ioExecutor_ = std::make_unique<folly::CPUThreadPoolExecutor>(
-        ioThreads, std::make_unique<folly::UnboundedBlockingQueue<folly::CPUThreadPoolExecutor::CPUTask>>());
-  }
-  velox::connector::registerConnector(
-      std::make_shared<velox::connector::hive::HiveConnector>(kHiveConnectorId, hiveConf, ioExecutor_.get()));
+std::shared_ptr<facebook::velox::connector::Connector> VeloxBackend::createHiveConnector(
+    const std::string& connectorId,
+    folly::Executor* ioExecutor) const {
+  return std::make_shared<velox::connector::hive::HiveConnector>(connectorId, hiveConnectorConfig_, ioExecutor);
+}
 
-  // Register value-stream connector for runtime iterator-based inputs
-  auto valueStreamDynamicFilterEnabled =
-      backendConf_->get<bool>(kValueStreamDynamicFilterEnabled, kValueStreamDynamicFilterEnabledDefault);
-  velox::connector::registerConnector(
-      std::make_shared<ValueStreamConnector>(kIteratorConnectorId, hiveConf, valueStreamDynamicFilterEnabled));
+std::shared_ptr<facebook::velox::connector::Connector> VeloxBackend::createValueStreamConnector(
+    const std::string& connectorId,
+    bool dynamicFilterEnabled) const {
+  return std::make_shared<ValueStreamConnector>(connectorId, hiveConnectorConfig_, dynamicFilterEnabled);
+}
 
 #ifdef GLUTEN_ENABLE_GPU
-  if (backendConf_->get<bool>(kCudfEnableTableScan, kCudfEnableTableScanDefault) &&
-      backendConf_->get<bool>(kCudfEnabled, kCudfEnabledDefault)) {
-    facebook::velox::cudf_velox::connector::hive::CudfHiveConnectorFactory factory;
-    auto hiveConnector = factory.newConnector(kCudfHiveConnectorId, hiveConf, ioExecutor_.get());
-    facebook::velox::connector::registerConnector(hiveConnector);
-  }
-#endif
+std::shared_ptr<facebook::velox::connector::Connector> VeloxBackend::createCudfHiveConnector(
+    const std::string& connectorId,
+    folly::Executor* ioExecutor) const {
+  facebook::velox::cudf_velox::connector::hive::CudfHiveConnectorFactory factory;
+  return factory.newConnector(connectorId, hiveConnectorConfig_, ioExecutor);
 }
+#endif
 
 void VeloxBackend::initUdf() {
   auto got = backendConf_->get<std::string>(kVeloxUdfLibraryPaths, "");
@@ -378,7 +376,10 @@ void VeloxBackend::tearDown() {
   // Destruct IOThreadPoolExecutor will join all threads.
   // On threads exit, thread local variables can be constructed with referencing global variables.
   // So, we need to destruct IOThreadPoolExecutor and stop the threads before global variables get destructed.
+  executor_.reset();
+  spillExecutor_.reset();
   ioExecutor_.reset();
+  ssdCacheExecutor_.reset();
   globalMemoryManager_.reset();
 
   // dump cache stats on exit if enabled
diff --git a/cpp/velox/compute/VeloxBackend.h b/cpp/velox/compute/VeloxBackend.h
index c6fbf965cf08..68791ec0f995 100644
--- a/cpp/velox/compute/VeloxBackend.h
+++ b/cpp/velox/compute/VeloxBackend.h
@@ -27,6 +27,7 @@
 #include "velox/common/caching/AsyncDataCache.h"
 #include "velox/common/config/Config.h"
 #include "velox/common/memory/MmapAllocator.h"
+#include "velox/connectors/Connector.h"
 
 #include "jni/JniHashTable.h"
 #include "memory/VeloxMemoryManager.h"
@@ -58,9 +59,31 @@ class VeloxBackend {
   }
 
   folly::Executor* executor() const {
+    return executor_.get();
+  }
+
+  folly::Executor* spillExecutor() const {
+    return spillExecutor_.get();
+  }
+
+  folly::Executor* ioExecutor() const {
     return ioExecutor_.get();
   }
 
+  std::shared_ptr<facebook::velox::connector::Connector> createHiveConnector(
+      const std::string& connectorId,
+      folly::Executor* ioExecutor) const;
+
+  std::shared_ptr<facebook::velox::connector::Connector> createValueStreamConnector(
+      const std::string& connectorId,
+      bool dynamicFilterEnabled) const;
+
+#ifdef GLUTEN_ENABLE_GPU
+  std::shared_ptr<facebook::velox::connector::Connector> createCudfHiveConnector(
+      const std::string& connectorId,
+      folly::Executor* ioExecutor) const;
+#endif
+
   void tearDown();
 
  private:
@@ -72,7 +95,6 @@ class VeloxBackend {
 
   void init(std::unique_ptr<AllocationListener> listener, const std::unordered_map<std::string, std::string>& conf);
   void initCache();
-  void initConnector(const std::shared_ptr<facebook::velox::config::ConfigBase>& hiveConf);
   void initUdf();
   std::unique_ptr<facebook::velox::cache::SsdCache> initSsdCache(uint64_t ssdSize);
 
@@ -89,9 +111,12 @@ class VeloxBackend {
   // Instance of AsyncDataCache used for all large allocations.
   std::shared_ptr<facebook::velox::cache::AsyncDataCache> asyncDataCache_;
 
-  std::unique_ptr<folly::Executor> ssdCacheExecutor_;
+  std::unique_ptr<folly::Executor> executor_;
+  std::unique_ptr<folly::Executor> spillExecutor_;
   std::unique_ptr<folly::Executor> ioExecutor_;
+  std::unique_ptr<folly::Executor> ssdCacheExecutor_;
   std::shared_ptr<facebook::velox::memory::MmapAllocator> cacheAllocator_;
+  std::shared_ptr<facebook::velox::config::ConfigBase> hiveConnectorConfig_;
 
   std::string cachePathPrefix_;
   std::string cacheFilePrefix_;
diff --git a/cpp/velox/compute/VeloxConnectorIds.h b/cpp/velox/compute/VeloxConnectorIds.h
new file mode 100644
index 000000000000..e6082bae8bdf
--- /dev/null
+++ b/cpp/velox/compute/VeloxConnectorIds.h
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <string>
+
+namespace gluten {
+
+struct VeloxConnectorIds {
+  std::string hive;
+  std::string iterator;
+  std::string cudfHive;
+  bool hiveRegistered{false};
+  bool iteratorRegistered{false};
+  bool cudfHiveRegistered{false};
+};
+
+} // namespace gluten
diff --git a/cpp/velox/compute/VeloxPlanConverter.cc b/cpp/velox/compute/VeloxPlanConverter.cc
index 627bd396b7df..f3ffab59a6a8 100644
--- a/cpp/velox/compute/VeloxPlanConverter.cc
+++ b/cpp/velox/compute/VeloxPlanConverter.cc
@@ -30,12 +30,20 @@ VeloxPlanConverter::VeloxPlanConverter(
     velox::memory::MemoryPool* veloxPool,
     const facebook::velox::config::ConfigBase* veloxCfg,
     const std::vector<std::shared_ptr<ResultIterator>>& rowVectors,
+    VeloxConnectorIds connectorIds,
     const std::optional<std::string> writeFilesTempPath,
     const std::optional<std::string> writeFileName,
     bool validationMode)
     : validationMode_(validationMode),
       veloxCfg_(veloxCfg),
-      substraitVeloxPlanConverter_(veloxPool, veloxCfg, rowVectors, writeFilesTempPath, writeFileName, validationMode) {
+      substraitVeloxPlanConverter_(
+          veloxPool,
+          veloxCfg,
+          rowVectors,
+          std::move(connectorIds),
+          writeFilesTempPath,
+          writeFileName,
+          validationMode) {
   VELOX_USER_CHECK_NOT_NULL(veloxCfg_);
 }
 
diff --git a/cpp/velox/compute/VeloxPlanConverter.h b/cpp/velox/compute/VeloxPlanConverter.h
index 0b597a91f9ed..1aee2c36bd12 100644
--- a/cpp/velox/compute/VeloxPlanConverter.h
+++ b/cpp/velox/compute/VeloxPlanConverter.h
@@ -21,6 +21,7 @@
 #include <velox/core/PlanNode.h>
 #include <velox/exec/Split.h>
 
+#include "compute/VeloxConnectorIds.h"
 #include "substrait/SubstraitToVeloxPlan.h"
 #include "substrait/plan.pb.h"
 
@@ -33,6 +34,7 @@ class VeloxPlanConverter {
       facebook::velox::memory::MemoryPool* veloxPool,
       const facebook::velox::config::ConfigBase* veloxCfg,
       const std::vector<std::shared_ptr<ResultIterator>>& rowVectors,
+      VeloxConnectorIds connectorIds,
       const std::optional<std::string> writeFilesTempPath = std::nullopt,
       const std::optional<std::string> writeFileName = std::nullopt,
       bool validationMode = false);
diff --git a/cpp/velox/compute/VeloxRuntime.cc b/cpp/velox/compute/VeloxRuntime.cc
index 7c1276f49abf..c95e967e8a0b 100644
--- a/cpp/velox/compute/VeloxRuntime.cc
+++ b/cpp/velox/compute/VeloxRuntime.cc
@@ -20,19 +20,26 @@
 #include <operators/plannodes/RowVectorStream.h>
 
 #include <algorithm>
+#include <condition_variable>
 #include <filesystem>
+#include <mutex>
+#include <unordered_map>
+
+#include <folly/ScopeGuard.h>
 
 #include "VeloxBackend.h"
 #include "compute/ResultIterator.h"
 #include "compute/Runtime.h"
 #include "compute/VeloxPlanConverter.h"
 #include "config/VeloxConfig.h"
+#include "operators/plannodes/IteratorSplit.h"
 #include "operators/serializer/VeloxRowToColumnarConverter.h"
 #include "shuffle/VeloxShuffleReader.h"
 #include "shuffle/VeloxShuffleWriter.h"
 #include "utils/ConfigExtractor.h"
 #include "utils/VeloxArrowUtils.h"
 #include "utils/VeloxWholeStageDumper.h"
+#include "velox/common/process/StackTrace.h"
 
 DECLARE_bool(velox_exception_user_stacktrace_enabled);
 DECLARE_bool(velox_memory_use_hugepages);
@@ -62,6 +69,146 @@ using namespace facebook;
 
 namespace gluten {
 
+namespace {
+
+class HookedExecutor final : public folly::Executor {
+ public:
+  HookedExecutor(folly::Executor* parent, std::string name, bool debug, std::chrono::milliseconds joinTimeout)
+      : parent_(parent), name_(std::move(name)), debug_(debug), joinTimeout_(joinTimeout) {}
+
+  ~HookedExecutor() override {
+    if (!join()) {
+      LOG(WARNING) << "Timed out waiting for hooked executor " << name_ << " to finish after " << joinTimeout_.count()
+                   << " ms.";
+      if (debug_) {
+        dumpOutstandingTasks();
+      }
+    }
+  }
+
+  uint8_t getNumPriorities() const override {
+    return parent_ == nullptr ? 1 : parent_->getNumPriorities();
+  }
+
+  const std::string& name() const {
+    return name_;
+  }
+
+  void dumpOutstandingTasks() const {
+    if (!debug_) {
+      return;
+    }
+    std::lock_guard<std::mutex> lock(taskMutex_);
+    if (inFlightTasks_.empty()) {
+      LOG(WARNING) << "Hooked executor " << name_ << " timed out with no tracked in-flight tasks.";
+      return;
+    }
+    for (const auto& [taskId, info] : inFlightTasks_) {
+      const auto elapsedMs =
+          std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - info.enqueueTime)
+              .count();
+      LOG(WARNING) << "Outstanding task in hooked executor " << name_ << ": taskId=" << taskId
+                   << ", elapsedMs=" << elapsedMs << ", priority=" << static_cast<int32_t>(info.priority)
+                   << ", submitStacktrace:\n"
+                   << info.submitStacktrace;
+    }
+  }
+
+ private:
+  bool join() {
+    std::unique_lock<std::mutex> lock(mutex_);
+    return cv_.wait_for(lock, joinTimeout_, [&] { return inFlight_.load(std::memory_order_acquire) == 0; });
+  }
+
+ public:
+  void add(folly::Func func) override {
+    GLUTEN_CHECK(parent_ != nullptr, "Parent executor is null.");
+    inFlight_.fetch_add(1, std::memory_order_relaxed);
+    parent_->add(wrap(std::move(func), 0));
+  }
+
+  void addWithPriority(folly::Func func, int8_t priority) override {
+    GLUTEN_CHECK(parent_ != nullptr, "Parent executor is null.");
+    inFlight_.fetch_add(1, std::memory_order_relaxed);
+    parent_->addWithPriority(wrap(std::move(func), priority), priority);
+  }
+
+  struct TaskInfo {
+    std::chrono::steady_clock::time_point enqueueTime;
+    int8_t priority;
+    std::string submitStacktrace;
+  };
+
+  folly::Func wrap(folly::Func func, int8_t priority) {
+    auto* self = this;
+    const auto taskId = nextTaskId_.fetch_add(1, std::memory_order_relaxed);
+    if (debug_) {
+      TaskInfo info{
+          .enqueueTime = std::chrono::steady_clock::now(),
+          .priority = priority,
+          .submitStacktrace = velox::process::StackTrace().toString()};
+      std::lock_guard<std::mutex> lock(taskMutex_);
+      inFlightTasks_[taskId] = std::move(info);
+    }
+    return [func = std::move(func), self, taskId]() mutable {
+      auto markDone = folly::makeGuard([&] {
+        if (self->debug_) {
+          std::lock_guard<std::mutex> lock(self->taskMutex_);
+          self->inFlightTasks_.erase(taskId);
+        }
+        if (self->inFlight_.fetch_sub(1, std::memory_order_acq_rel) == 1) {
+          std::lock_guard<std::mutex> lock(self->mutex_);
+          self->cv_.notify_all();
+        }
+      });
+      // Destroy the submitted callable and all of its captures before
+      // decrementing inFlight_. Some async tasks capture AsyncLoadHolder,
+      // which keeps a MemoryPool alive until the callable itself is
+      // destroyed. If we decrement inFlight_ first, HookedExecutor can
+      // appear drained and let VeloxRuntime teardown proceed while the
+      // holder is still alive, causing MemoryManager destruction to race
+      // with outstanding task-owned resources.
+      auto localFunc = std::move(func);
+      localFunc();
+    };
+  }
+
+  folly::Executor* parent_;
+  std::string name_;
+  bool debug_;
+  std::chrono::milliseconds joinTimeout_;
+  std::atomic<uint64_t> nextTaskId_{0};
+  std::atomic<size_t> inFlight_{0};
+  std::mutex mutex_;
+  std::condition_variable cv_;
+  mutable std::mutex taskMutex_;
+  std::unordered_map<uint64_t, TaskInfo> inFlightTasks_;
+};
+
+std::unique_ptr<folly::Executor> makeHookedExecutor(
+    folly::Executor* parent,
+    const std::string& name,
+    bool debug,
+    std::chrono::milliseconds joinTimeout) {
+  if (parent == nullptr) {
+    return nullptr;
+  }
+  return std::make_unique<HookedExecutor>(parent, name, debug, joinTimeout);
+}
+
+std::string makeScopedConnectorId(const std::string& base, uint64_t runtimeId) {
+  return fmt::format("{}-runtime-{}", base, runtimeId);
+}
+
+VeloxConnectorIds makeScopedConnectorIds(uint64_t runtimeId) {
+  return VeloxConnectorIds{
+      .hive = makeScopedConnectorId(kHiveConnectorId, runtimeId),
+      .iterator = makeScopedConnectorId(kIteratorConnectorId, runtimeId),
+      .cudfHive = makeScopedConnectorId(kCudfHiveConnectorId, runtimeId)};
+}
+
+} // namespace
+
 VeloxRuntime::VeloxRuntime(
     const std::string& kind,
     VeloxMemoryManager* vmm,
@@ -80,6 +227,79 @@ VeloxRuntime::VeloxRuntime(
   FLAGS_velox_memory_use_hugepages = veloxCfg_->get<bool>(kMemoryUseHugePages, FLAGS_velox_memory_use_hugepages);
   FLAGS_velox_memory_pool_capacity_transfer_across_tasks = veloxCfg_->get<bool>(
       kMemoryPoolCapacityTransferAcrossTasks, FLAGS_velox_memory_pool_capacity_transfer_across_tasks);
+
+  static std::atomic<uint64_t> runtimeId{0};
+  connectorIds_ = makeScopedConnectorIds(runtimeId++);
+
+  initializeExecutors();
+  registerConnectors();
+}
+
+VeloxRuntime::~VeloxRuntime() {
+  unregisterConnectors();
+  executor_.reset();
+  spillExecutor_.reset();
+  ioExecutor_.reset();
+}
+
+void VeloxRuntime::initializeExecutors() {
+  const auto timeoutMs =
+      veloxCfg_->get<int32_t>(kVeloxAsyncTimeoutOnTaskStopping, kVeloxAsyncTimeoutOnTaskStoppingDefault);
+  const auto timeout = std::chrono::milliseconds(timeoutMs);
+  executor_ = makeHookedExecutor(VeloxBackend::get()->executor(), kind_ + ".executor", debugModeEnabled_, timeout);
+  spillExecutor_ =
+      makeHookedExecutor(VeloxBackend::get()->spillExecutor(), kind_ + ".spill", debugModeEnabled_, timeout);
+  ioExecutor_ = makeHookedExecutor(VeloxBackend::get()->ioExecutor(), kind_ + ".io", debugModeEnabled_, timeout);
+}
+
+void VeloxRuntime::registerConnectors() {
+  auto* backend = VeloxBackend::get();
+  connectorIds_.hiveRegistered =
+      velox::connector::registerConnector(backend->createHiveConnector(connectorIds_.hive, ioExecutor_.get()));
+  GLUTEN_CHECK(connectorIds_.hiveRegistered, "Failed to register scoped hive connector: " + connectorIds_.hive);
+  GLUTEN_CHECK(
+      velox::connector::hasConnector(connectorIds_.hive),
+      "Scoped hive connector not found after registration: " + connectorIds_.hive);
+
+  const auto valueStreamDynamicFilterEnabled =
+      veloxCfg_->get<bool>(kValueStreamDynamicFilterEnabled, kValueStreamDynamicFilterEnabledDefault);
+  connectorIds_.iteratorRegistered = velox::connector::registerConnector(
+      backend->createValueStreamConnector(connectorIds_.iterator, valueStreamDynamicFilterEnabled));
+  GLUTEN_CHECK(
+      connectorIds_.iteratorRegistered, "Failed to register scoped iterator connector: " + connectorIds_.iterator);
+  GLUTEN_CHECK(
+      velox::connector::hasConnector(connectorIds_.iterator),
+      "Scoped iterator connector not found after registration: " + connectorIds_.iterator);
+
+#ifdef GLUTEN_ENABLE_GPU
+  if (veloxCfg_->get<bool>(kCudfEnableTableScan, kCudfEnableTableScanDefault) &&
+      veloxCfg_->get<bool>(kCudfEnabled, kCudfEnabledDefault)) {
+    connectorIds_.cudfHiveRegistered = velox::connector::registerConnector(
+        backend->createCudfHiveConnector(connectorIds_.cudfHive, ioExecutor_.get()));
+    GLUTEN_CHECK(
+        connectorIds_.cudfHiveRegistered, "Failed to register scoped cudf hive connector: " + connectorIds_.cudfHive);
+    GLUTEN_CHECK(
+        velox::connector::hasConnector(connectorIds_.cudfHive),
+        "Scoped cudf hive connector not found after registration: " + connectorIds_.cudfHive);
+  }
+#endif
+}
+
+void VeloxRuntime::unregisterConnectors() {
+#ifdef GLUTEN_ENABLE_GPU
+  if (connectorIds_.cudfHiveRegistered) {
+    velox::connector::unregisterConnector(connectorIds_.cudfHive);
+    connectorIds_.cudfHiveRegistered = false;
+  }
+#endif
+  if (connectorIds_.iteratorRegistered) {
+    velox::connector::unregisterConnector(connectorIds_.iterator);
+    connectorIds_.iteratorRegistered = false;
+  }
+  if (connectorIds_.hiveRegistered) {
+    velox::connector::unregisterConnector(connectorIds_.hive);
+    connectorIds_.hiveRegistered = false;
+  }
 }
 
 void VeloxRuntime::parsePlan(const uint8_t* data, int32_t size) {
@@ -153,7 +373,8 @@ void VeloxRuntime::getInfoAndIds(
 
 std::string VeloxRuntime::planString(bool details, const std::unordered_map<std::string, std::string>& sessionConf) {
   auto veloxMemoryPool = gluten::defaultLeafVeloxMemoryPool();
-  VeloxPlanConverter veloxPlanConverter(veloxMemoryPool.get(), veloxCfg_.get(), {}, std::nullopt, std::nullopt, true);
+  VeloxPlanConverter veloxPlanConverter(
+      veloxMemoryPool.get(), veloxCfg_.get(), {}, connectorIds_, std::nullopt, std::nullopt, true);
   auto veloxPlan = veloxPlanConverter.toVeloxPlan(substraitPlan_, localFiles_);
   return veloxPlan->toString(details, true);
 }
@@ -173,6 +394,7 @@ std::shared_ptr<ResultIterator> VeloxRuntime::createResultIterator(
       memoryManager()->getLeafMemoryPool().get(),
       veloxCfg_.get(),
       inputs,
+      connectorIds_,
       *localWriteFilesTempPath(),
       *localWriteFileName());
   veloxPlan_ = veloxPlanConverter.toVeloxPlan(substraitPlan_, std::move(localFiles_));
@@ -194,6 +416,9 @@ std::shared_ptr<ResultIterator> VeloxRuntime::createResultIterator(
       scanIds,
       scanInfos,
       streamIds,
+      executor_.get(),
+      spillExecutor_.get(),
+      connectorIds_,
       spillDir,
       veloxCfg_,
       taskInfo_.has_value() ? taskInfo_.value() : SparkTaskInfo{});
diff --git a/cpp/velox/compute/VeloxRuntime.h b/cpp/velox/compute/VeloxRuntime.h
index 2cb75c5a124c..37f4da33439b 100644
--- a/cpp/velox/compute/VeloxRuntime.h
+++ b/cpp/velox/compute/VeloxRuntime.h
@@ -19,9 +19,11 @@
 
 #include "WholeStageResultIterator.h"
 #include "compute/Runtime.h"
+#include "compute/VeloxConnectorIds.h"
 #ifdef GLUTEN_ENABLE_ENHANCED_FEATURES
 #include "iceberg/IcebergWriter.h"
 #endif
+#include <folly/Executor.h>
 #include "memory/VeloxMemoryManager.h"
 #include "operators/serializer/VeloxColumnarBatchSerializer.h"
 #include "operators/serializer/VeloxColumnarToRowConverter.h"
@@ -42,6 +44,8 @@ class VeloxRuntime final : public Runtime {
       VeloxMemoryManager* vmm,
       const std::unordered_map<std::string, std::string>& confMap);
 
+  ~VeloxRuntime() override;
+
   void setSparkTaskInfo(SparkTaskInfo taskInfo) override {
     static std::atomic<uint32_t> vtId{0};
     taskInfo.vId = vtId++;
@@ -120,6 +124,22 @@ class VeloxRuntime final : public Runtime {
     return debugModeEnabled_;
   }
 
+  folly::Executor* executor() const {
+    return executor_.get();
+  }
+
+  folly::Executor* spillExecutor() const {
+    return spillExecutor_.get();
+  }
+
+  folly::Executor* ioExecutor() const {
+    return ioExecutor_.get();
+  }
+
+  const VeloxConnectorIds& connectorIds() const {
+    return connectorIds_;
+  }
+
   static void getInfoAndIds(
       const std::unordered_map<facebook::velox::core::PlanNodeId, std::shared_ptr<SplitInfo>>& splitInfoMap,
       const std::unordered_set<facebook::velox::core::PlanNodeId>& leafPlanNodeIds,
@@ -128,9 +148,17 @@ class VeloxRuntime final : public Runtime {
       std::vector<facebook::velox::core::PlanNodeId>& streamIds);
 
  private:
+  void initializeExecutors();
+  void registerConnectors();
+  void unregisterConnectors();
+
   std::shared_ptr<const facebook::velox::core::PlanNode> veloxPlan_;
   std::shared_ptr<facebook::velox::config::ConfigBase> veloxCfg_;
   bool debugModeEnabled_{false};
+  std::unique_ptr<folly::Executor> executor_;
+  std::unique_ptr<folly::Executor> spillExecutor_;
+  std::unique_ptr<folly::Executor> ioExecutor_;
+  VeloxConnectorIds connectorIds_;
 
   std::unordered_map<int32_t, std::shared_ptr<VeloxColumnarBatch>> emptySchemaBatchLoopUp_;
 };
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc
index 356913b5f795..2c1effba20c3 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -19,7 +19,6 @@
 #include "VeloxPlanConverter.h"
 #include "VeloxRuntime.h"
 #include "config/VeloxConfig.h"
-#include "jni/JniCommon.h"
 #include "utils/ConfigExtractor.h"
 #include "velox/connectors/hive/HiveConfig.h"
 #include "velox/connectors/hive/HiveConnectorSplit.h"
@@ -76,6 +75,9 @@ WholeStageResultIterator::WholeStageResultIterator(
     const std::vector<facebook::velox::core::PlanNodeId>& scanNodeIds,
     const std::vector<std::shared_ptr<SplitInfo>>& scanInfos,
     const std::vector<facebook::velox::core::PlanNodeId>& streamIds,
+    folly::Executor* executor,
+    folly::Executor* spillExecutor,
+    VeloxConnectorIds connectorIds,
     const std::string spillDir,
     const std::shared_ptr<facebook::velox::config::ConfigBase>& veloxCfg,
     const SparkTaskInfo& taskInfo)
@@ -85,21 +87,14 @@ WholeStageResultIterator::WholeStageResultIterator(
       enableCudf_(veloxCfg_->get<bool>(kCudfEnabled, kCudfEnabledDefault)),
 #endif
       taskInfo_(taskInfo),
+      executor_(executor),
       veloxPlan_(planNode),
+      spillExecutor_(spillExecutor),
+      connectorIds_(std::move(connectorIds)),
       scanNodeIds_(scanNodeIds),
       scanInfos_(scanInfos),
       streamIds_(streamIds) {
   spillStrategy_ = veloxCfg_->get<std::string>(kSpillStrategy, kSpillStrategyDefaultValue);
-  auto spillThreadNum = veloxCfg_->get<uint32_t>(kSpillThreadNum, kSpillThreadNumDefaultValue);
-  if (spillThreadNum > 0) {
-    // INVARIANT: spillExecutor_ threads must never call libhdfs.
-    // JniAwareThreadFactory calls DetachCurrentThread at thread exit (inside the
-    // thread fn body, before any pthread_key destructor). If libhdfs were used on
-    // these threads, hdfsThreadDestructor would fire afterward with a stale JNIEnv*,
-    // causing SIGSEGV. Spill always uses local or heap-over-local filesystem.
-    spillExecutor_ = std::make_shared<folly::CPUThreadPoolExecutor>(
-        spillThreadNum, std::make_shared<gluten::JniAwareThreadFactory>());
-  }
   getOrderedNodeIds(veloxPlan_, orderedNodeIds_);
 
   auto fileSystem = velox::filesystems::getFileSystem(spillDir, nullptr);
@@ -166,7 +161,7 @@ WholeStageResultIterator::WholeStageResultIterator(
         std::unordered_map<std::string, std::string> customSplitInfo{{"table_format", "hive-iceberg"}};
         auto deleteFiles = icebergSplitInfo->deleteFilesVec[idx];
         split = std::make_shared<velox::connector::hive::iceberg::HiveIcebergSplit>(
-            kHiveConnectorId,
+            connectorIds_.hive,
             paths[idx],
             format,
             starts[idx],
@@ -180,11 +175,11 @@ WholeStageResultIterator::WholeStageResultIterator(
             metadataColumn,
             properties[idx]);
       } else {
-        auto connectorId = kHiveConnectorId;
+        auto connectorId = connectorIds_.hive;
 #ifdef GLUTEN_ENABLE_GPU
         if (canUseCudfConnector && enableCudf_ &&
             veloxCfg_->get<bool>(kCudfEnableTableScan, kCudfEnableTableScanDefault)) {
-          connectorId = kCudfHiveConnectorId;
+          connectorId = connectorIds_.cudfHive;
         }
 #endif
         split = std::make_shared<velox::connector::hive::HiveConnectorSplit>(
@@ -219,14 +214,21 @@ WholeStageResultIterator::WholeStageResultIterator(
 
 std::shared_ptr<velox::core::QueryCtx> WholeStageResultIterator::createNewVeloxQueryCtx() {
   std::unordered_map<std::string, std::shared_ptr<velox::config::ConfigBase>> connectorConfigs;
-  connectorConfigs[kHiveConnectorId] = createHiveConnectorSessionConfig(veloxCfg_);
+  auto hiveSessionConfig = createHiveConnectorSessionConfig(veloxCfg_);
+  connectorConfigs[connectorIds_.hive] = hiveSessionConfig;
+  connectorConfigs[connectorIds_.iterator] = hiveSessionConfig;
+#ifdef GLUTEN_ENABLE_GPU
+  if (!connectorIds_.cudfHive.empty()) {
+    connectorConfigs[connectorIds_.cudfHive] = hiveSessionConfig;
+  }
+#endif
   std::shared_ptr<velox::core::QueryCtx> ctx = velox::core::QueryCtx::create(
-      nullptr,
+      executor_,
       facebook::velox::core::QueryConfig{getQueryContextConf()},
       connectorConfigs,
       gluten::VeloxBackend::get()->getAsyncDataCache(),
       memoryManager_->getAggregateMemoryPool(),
-      spillExecutor_.get(),
+      spillExecutor_,
       fmt::format(
           "Gluten_Stage_{}_TID_{}_VTID_{}",
           std::to_string(taskInfo_.stageId),
@@ -372,7 +374,7 @@ void WholeStageResultIterator::addIteratorSplits(const std::vector<std::shared_p
     if (inputIterators[i] == nullptr) {
       continue;
     }
-    auto connectorSplit = std::make_shared<IteratorConnectorSplit>(kIteratorConnectorId, inputIterators[i]);
+    auto connectorSplit = std::make_shared<IteratorConnectorSplit>(connectorIds_.iterator, inputIterators[i]);
     exec::Split split(folly::copy(connectorSplit), -1);
     task_->addSplit(streamIds_[i], std::move(split));
   }
diff --git a/cpp/velox/compute/WholeStageResultIterator.h b/cpp/velox/compute/WholeStageResultIterator.h
index 9bb6ef8b11f7..4fcc002ffd22 100644
--- a/cpp/velox/compute/WholeStageResultIterator.h
+++ b/cpp/velox/compute/WholeStageResultIterator.h
@@ -16,7 +16,9 @@
  */
 #pragma once
 
+#include <folly/Executor.h>
 #include "compute/Runtime.h"
+#include "compute/VeloxConnectorIds.h"
 #include "iceberg/IcebergPlanConverter.h"
 #include "memory/SplitAwareColumnarBatchIterator.h"
 #include "memory/VeloxColumnarBatch.h"
@@ -41,14 +43,22 @@ class WholeStageResultIterator : public SplitAwareColumnarBatchIterator {
       const std::vector<facebook::velox::core::PlanNodeId>& scanNodeIds,
       const std::vector<std::shared_ptr<SplitInfo>>& scanInfos,
       const std::vector<facebook::velox::core::PlanNodeId>& streamIds,
+      folly::Executor* executor,
+      folly::Executor* spillExecutor,
+      VeloxConnectorIds connectorIds,
       const std::string spillDir,
       const std::shared_ptr<facebook::velox::config::ConfigBase>& veloxCfg,
       const SparkTaskInfo& taskInfo);
 
   virtual ~WholeStageResultIterator() {
-    if (task_ != nullptr && task_->isRunning()) {
-      // calling .wait() may take no effect in single thread execution mode
-      task_->requestCancel().wait();
+    if (task_ != nullptr) {
+      if (task_->isRunning()) {
+        // calling .wait() may take no effect in single thread execution mode
+        task_->requestCancel().wait();
+      }
+      auto deletionFuture = task_->taskDeletionFuture();
+      task_.reset();
+      deletionFuture.wait();
     }
 #ifdef GLUTEN_ENABLE_GPU
     if (enableCudf_) {
@@ -126,12 +136,14 @@ class WholeStageResultIterator : public SplitAwareColumnarBatchIterator {
   const bool enableCudf_;
 #endif
   const SparkTaskInfo taskInfo_;
+  folly::Executor* executor_;
   std::shared_ptr<facebook::velox::exec::Task> task_;
   std::shared_ptr<const facebook::velox::core::PlanNode> veloxPlan_;
 
   /// Spill.
   std::string spillStrategy_;
-  std::shared_ptr<folly::Executor> spillExecutor_ = nullptr;
+  folly::Executor* spillExecutor_ = nullptr;
+  VeloxConnectorIds connectorIds_;
 
   /// Metrics
   std::unique_ptr<Metrics> metrics_{};
diff --git a/cpp/velox/cudf/CudfPlanValidator.cc b/cpp/velox/cudf/CudfPlanValidator.cc
index 346620096697..f93b5d5ded51 100644
--- a/cpp/velox/cudf/CudfPlanValidator.cc
+++ b/cpp/velox/cudf/CudfPlanValidator.cc
@@ -47,7 +47,7 @@ bool CudfPlanValidator::validate(const ::substrait::Plan& substraitPlan) {
   std::shared_ptr<facebook::velox::config::ConfigBase> veloxCfg = std::make_shared<facebook::velox::config::ConfigBase>(
       std::unordered_map<std::string, std::string>{{kCudfEnabled, "true"}});
   VeloxPlanConverter veloxPlanConverter(
-      veloxMemoryPool.get(), veloxCfg.get(), inputs, std::nullopt, std::nullopt, true);
+      veloxMemoryPool.get(), veloxCfg.get(), inputs, {}, std::nullopt, std::nullopt, true);
   auto planNode = veloxPlanConverter.toVeloxPlan(substraitPlan, localFiles);
   std::unordered_set<velox::core::PlanNodeId> emptySet;
   velox::core::PlanFragment planFragment{planNode, velox::core::ExecutionStrategy::kUngrouped, 1, emptySet};
diff --git a/cpp/velox/memory/VeloxMemoryManager.cc b/cpp/velox/memory/VeloxMemoryManager.cc
index d829516e0dde..f628b8cdaede 100644
--- a/cpp/velox/memory/VeloxMemoryManager.cc
+++ b/cpp/velox/memory/VeloxMemoryManager.cc
@@ -31,8 +31,6 @@
 #include "memory/ArrowMemoryPool.h"
 #include "utils/Exception.h"
 
-DECLARE_int32(gluten_velox_async_timeout_on_task_stopping);
-
 namespace gluten {
 
 using namespace facebook;
@@ -443,26 +441,9 @@ bool VeloxMemoryManager::tryDestructSafe() {
 }
 
 VeloxMemoryManager::~VeloxMemoryManager() {
-  static const uint32_t kWaitTimeoutMs = FLAGS_gluten_velox_async_timeout_on_task_stopping; // 30s by default
-  uint32_t accumulatedWaitMs = 0UL;
-  bool destructed = false;
-  for (int32_t tryCount = 0; accumulatedWaitMs < kWaitTimeoutMs; tryCount++) {
-    destructed = tryDestructSafe();
-    if (destructed) {
-      if (tryCount > 0) {
-        LOG(INFO) << "All the outstanding memory resources successfully released. ";
-      }
-      break;
-    }
-    uint32_t waitMs = 50 * static_cast<uint32_t>(pow(1.5, tryCount)); // 50ms, 75ms, 112.5ms ...
-    LOG(INFO) << "There are still outstanding Velox memory allocations. Waiting for " << waitMs
-              << " ms to let possible async tasks done... ";
-    usleep(waitMs * 1000);
-    accumulatedWaitMs += waitMs;
-  }
+  bool destructed = tryDestructSafe();
   if (!destructed) {
-    LOG(ERROR) << "Failed to release Velox memory manager after " << accumulatedWaitMs
-               << "ms as there are still outstanding memory resources. ";
+    LOG(ERROR) << "Failed to release Velox memory manager as there are still outstanding memory resources. ";
   }
 #ifdef ENABLE_JEMALLOC_STATS
   malloc_stats_print(nullptr, nullptr, nullptr);
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index ec17594cc094..5477176ce85f 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -874,7 +874,7 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
   const auto& compressionKind =
       writerOptions->compressionKind.value_or(common::CompressionKind::CompressionKind_SNAPPY);
   std::shared_ptr<core::InsertTableHandle> tableHandle = std::make_shared<core::InsertTableHandle>(
-      kHiveConnectorId,
+      connectorIds_.hive,
       makeHiveInsertTableHandle(
           tableColumnNames, /*inputType->names() clolumn name is different*/
           inputType->children(),
@@ -1408,7 +1408,7 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::constructValueStreamNode(
   // Create TableHandle
   bool dynamicFilterEnabled =
       veloxCfg_->get<bool>(kValueStreamDynamicFilterEnabled, kValueStreamDynamicFilterEnabledDefault);
-  auto tableHandle = std::make_shared<ValueStreamTableHandle>(kIteratorConnectorId, dynamicFilterEnabled);
+  auto tableHandle = std::make_shared<ValueStreamTableHandle>(connectorIds_.iterator, dynamicFilterEnabled);
 
   // Create column assignments
   connector::ColumnHandleMap assignments;
@@ -1573,11 +1573,11 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
 
   connector::ConnectorTableHandlePtr tableHandle;
   auto remainingFilter = readRel.has_filter() ? exprConverter_->toVeloxExpr(readRel.filter(), baseSchema) : nullptr;
-  auto connectorId = kHiveConnectorId;
+  auto connectorId = connectorIds_.hive;
   if (useCudfTableHandle(splitInfos_) && veloxCfg_->get<bool>(kCudfEnableTableScan, kCudfEnableTableScanDefault) &&
       veloxCfg_->get<bool>(kCudfEnabled, kCudfEnabledDefault)) {
 #ifdef GLUTEN_ENABLE_GPU
-    connectorId = kCudfHiveConnectorId;
+    connectorId = connectorIds_.cudfHive;
 #endif
   }
   common::SubfieldFilters subfieldFilters;
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.h b/cpp/velox/substrait/SubstraitToVeloxPlan.h
index 47bf3a0525b1..373601916d4d 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.h
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.h
@@ -19,6 +19,7 @@
 
 #include "SubstraitToVeloxExpr.h"
 #include "TypeUtils.h"
+#include "compute/VeloxConnectorIds.h"
 #include "velox/connectors/hive/FileProperties.h"
 #include "velox/connectors/hive/TableHandle.h"
 #include "velox/core/PlanNode.h"
@@ -80,12 +81,14 @@ class SubstraitToVeloxPlanConverter {
       memory::MemoryPool* pool,
       const facebook::velox::config::ConfigBase* veloxCfg,
       const std::vector<std::shared_ptr<ResultIterator>>& inputIters,
+      VeloxConnectorIds connectorIds,
       const std::optional<std::string> writeFilesTempPath = std::nullopt,
       const std::optional<std::string> writeFileName = std::nullopt,
       bool validationMode = false)
       : pool_(pool),
         veloxCfg_(veloxCfg),
         inputIters_(inputIters),
+        connectorIds_(std::move(connectorIds)),
         writeFilesTempPath_(writeFilesTempPath),
         writeFileName_(writeFileName),
         validationMode_(validationMode) {
@@ -308,6 +311,8 @@ class SubstraitToVeloxPlanConverter {
   /// Input row-vectors for query trace mode (ValuesNode / cuDF ValueStream support)
   std::vector<std::shared_ptr<ResultIterator>> inputIters_;
 
+  VeloxConnectorIds connectorIds_;
+
   /// The temporary path used to write files.
   std::optional<std::string> writeFilesTempPath_;
   std::optional<std::string> writeFileName_;
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
index 6bfca5ec36b8..55d2d2c96843 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
@@ -19,6 +19,8 @@
 
 #include <unordered_map>
 #include "SubstraitToVeloxPlan.h"
+#include "config/VeloxConfig.h"
+#include "operators/plannodes/IteratorSplit.h"
 #include "velox/core/QueryCtx.h"
 
 using namespace facebook;
@@ -34,7 +36,13 @@ class SubstraitToVeloxPlanValidator {
         {velox::core::QueryConfig::kSparkPartitionId, "0"}, {velox::core::QueryConfig::kSessionTimezone, "UTC"}};
     veloxCfg_ = std::make_shared<facebook::velox::config::ConfigBase>(std::move(configs));
     planConverter_ = std::make_unique<SubstraitToVeloxPlanConverter>(
-        pool, veloxCfg_.get(), std::vector<std::shared_ptr<ResultIterator>>{}, std::nullopt, std::nullopt, true);
+        pool,
+        veloxCfg_.get(),
+        std::vector<std::shared_ptr<ResultIterator>>{},
+        VeloxConnectorIds{.hive = kHiveConnectorId, .iterator = kIteratorConnectorId, .cudfHive = kCudfHiveConnectorId},
+        std::nullopt,
+        std::nullopt,
+        true);
     queryCtx_ = velox::core::QueryCtx::create(nullptr, velox::core::QueryConfig(veloxCfg_->rawConfigs()));
     // An execution context used for function validation.
     execCtx_ = std::make_unique<velox::core::ExecCtx>(pool, queryCtx_.get());
diff --git a/cpp/velox/tests/FunctionTest.cc b/cpp/velox/tests/FunctionTest.cc
index 2eb13402cb46..74be36ee0f54 100644
--- a/cpp/velox/tests/FunctionTest.cc
+++ b/cpp/velox/tests/FunctionTest.cc
@@ -47,7 +47,8 @@ class FunctionTest : public ::testing::Test, public test::VectorTestBase {
       std::make_shared<gluten::SubstraitToVeloxPlanConverter>(
           pool(),
           veloxCfg_.get(),
-          std::vector<std::shared_ptr<ResultIterator>>());
+          std::vector<std::shared_ptr<ResultIterator>>{},
+          VeloxConnectorIds{});
 };
 
 TEST_F(FunctionTest, makeNames) {
diff --git a/cpp/velox/tests/Substrait2VeloxPlanConversionTest.cc b/cpp/velox/tests/Substrait2VeloxPlanConversionTest.cc
index 8222f74caae6..a1d1cea02b77 100644
--- a/cpp/velox/tests/Substrait2VeloxPlanConversionTest.cc
+++ b/cpp/velox/tests/Substrait2VeloxPlanConversionTest.cc
@@ -71,8 +71,11 @@ class Substrait2VeloxPlanConversionTest : public exec::test::HiveConnectorTestBa
   std::shared_ptr<exec::test::TempDirectoryPath> tmpDir_{exec::test::TempDirectoryPath::create()};
   std::shared_ptr<facebook::velox::config::ConfigBase> veloxCfg_ =
       std::make_shared<facebook::velox::config::ConfigBase>(std::unordered_map<std::string, std::string>());
-  std::shared_ptr<VeloxPlanConverter> planConverter_ =
-      std::make_shared<VeloxPlanConverter>(pool(), veloxCfg_.get(), std::vector<std::shared_ptr<ResultIterator>>());
+  std::shared_ptr<VeloxPlanConverter> planConverter_ = std::make_shared<VeloxPlanConverter>(
+      pool(),
+      veloxCfg_.get(),
+      std::vector<std::shared_ptr<ResultIterator>>{},
+      VeloxConnectorIds{.hive = facebook::velox::exec::test::kHiveConnectorId});
 };
 
 // This test will firstly generate mock TPC-H lineitem ORC file. Then, Velox's
diff --git a/cpp/velox/tests/Substrait2VeloxValuesNodeConversionTest.cc b/cpp/velox/tests/Substrait2VeloxValuesNodeConversionTest.cc
index f903481ee972..38d0ebcef7bf 100644
--- a/cpp/velox/tests/Substrait2VeloxValuesNodeConversionTest.cc
+++ b/cpp/velox/tests/Substrait2VeloxValuesNodeConversionTest.cc
@@ -43,7 +43,13 @@ TEST_F(Substrait2VeloxValuesNodeConversionTest, valuesNode) {
   JsonToProtoConverter::readFromFile(planPath, substraitPlan);
   auto veloxCfg = std::make_shared<facebook::velox::config::ConfigBase>(std::unordered_map<std::string, std::string>());
   std::shared_ptr<SubstraitToVeloxPlanConverter> planConverter_ = std::make_shared<SubstraitToVeloxPlanConverter>(
-      pool_.get(), veloxCfg.get(), std::vector<std::shared_ptr<ResultIterator>>(), std::nullopt, std::nullopt, true);
+      pool_.get(),
+      veloxCfg.get(),
+      std::vector<std::shared_ptr<ResultIterator>>{},
+      VeloxConnectorIds{},
+      std::nullopt,
+      std::nullopt,
+      true);
   auto veloxPlan = planConverter_->toVeloxPlan(substraitPlan);
 
   RowVectorPtr expectedData = makeRowVector(
diff --git a/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc b/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc
index f8b4b04ca243..61add0d7bd32 100644
--- a/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc
+++ b/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc
@@ -75,7 +75,8 @@ class VeloxSubstraitRoundTripTest : public OperatorTestBase {
         std::make_shared<SubstraitToVeloxPlanConverter>(
             pool_.get(),
             veloxCfg.get(),
-            std::vector<std::shared_ptr<ResultIterator>>(),
+            std::vector<std::shared_ptr<ResultIterator>>{},
+            VeloxConnectorIds{},
             std::nullopt,
             std::nullopt,
             true);
@@ -102,7 +103,8 @@ class VeloxSubstraitRoundTripTest : public OperatorTestBase {
           std::make_shared<SubstraitToVeloxPlanConverter>(
               pool_.get(),
               veloxCfg.get(),
-              std::vector<std::shared_ptr<ResultIterator>>(),
+              std::vector<std::shared_ptr<ResultIterator>>{},
+              VeloxConnectorIds{},
               std::nullopt,
               std::nullopt,
               true);
diff --git a/docs/velox-configuration.md b/docs/velox-configuration.md
index 767875bb167e..ec929a3f0d96 100644
--- a/docs/velox-configuration.md
+++ b/docs/velox-configuration.md
@@ -15,7 +15,7 @@ nav_order: 16
 | spark.gluten.sql.columnar.backend.velox.SplitPreloadPerDriver                    | 2                 | The split preload per task                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct          | 90                | If partial aggregation aggregationPct greater than this value, partial aggregation may be early abandoned. Note: this option only works when flushable partial aggregation is enabled. Ignored when spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false.                                                                                                                                                                        |
 | spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows         | 100000            | If partial aggregation input rows number greater than this value,  partial aggregation may be early abandoned. Note: this option only works when flushable partial aggregation is enabled. Ignored when spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false.                                                                                                                                                                    |
-| spark.gluten.sql.columnar.backend.velox.asyncTimeoutOnTaskStopping               | 30000ms           | Timeout for asynchronous execution when task is being stopped in Velox backend. It's recommended to set to a number larger than network connection timeout that the possible async tasks are relying on.                                                                                                                                                                                                                                              |
+| spark.gluten.sql.columnar.backend.velox.asyncTimeoutOnTaskStopping               | 30000ms           | Timeout in milliseconds when waiting for runtime-scoped async work to finish during teardown.                                                                                                                                                                                                                                                                                                                                                         |
 | spark.gluten.sql.columnar.backend.velox.cacheEnabled                             | false             | Enable Velox cache, default off. It's recommended to enablesoft-affinity as well when enable velox cache.                                                                                                                                                                                                                                                                                                                                             |
 | spark.gluten.sql.columnar.backend.velox.cachePrefetchMinPct                      | 0                 | Set prefetch cache min pct for velox file scan                                                                                                                                                                                                                                                                                                                                                                                                        |
 | spark.gluten.sql.columnar.backend.velox.checkUsageLeak                           | true              | Enable check memory usage leak.                                                                                                                                                                                                                                                                                                                                                                                                                       |