feat: ExaTrkX edge building KDTree on CPU + fixes + refactor + tests (#…

…2360) * Replaces brute force edge building with `Acts::KDTree` method * Abstracts `std::vector` to `torch::Tensor` conversions * Fixes bug in edge duplicate removal * Add more unit tests and enable in CI
acts-project · Aug 16, 2023 · 0486e42 · 0486e42
1 parent 9cd59fd
commit 0486e42
Show file tree

Hide file tree

Showing 12 changed files with 640 additions and 288 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -74,7 +74,6 @@ build_exatrkx:
       - build/
     exclude:
       - build/**/*.o
-      - build/bin/ActsUnitTest*
       - build/bin/ActsIntegrationTest*
 
   script:
@@ -96,13 +95,24 @@ build_exatrkx:
       -DCMAKE_CUDA_ARCHITECTURES="75;86"
       -DACTS_BUILD_PLUGIN_EXATRKX=ON
       -DACTS_BUILD_EXAMPLES_EXATRKX=ON
+      -DACTS_BUILD_UNITTESTS=ON
       -DACTS_EXATRKX_ENABLE_TORCH=ON
       -DACTS_EXATRKX_ENABLE_ONNX=ON
       -DACTS_BUILD_EXAMPLES_PYTHON_BINDINGS=ON
       -DACTS_ENABLE_LOG_FAILURE_THRESHOLD=ON
     - cmake --build build -- -j3
 
-test_exatrkx:
+test_exatrkx_unittests:
+  stage: test
+  needs:
+    - build_exatrkx
+  image: ghcr.io/acts-project/ubuntu2004_exatrkx:v41
+  tags:
+    - docker-gpu-nvidia
+  script:
+    - ctest --test-dir build -R ExaTrkX
+
+test_exatrkx_python:
   stage: test
   needs:
     - build_exatrkx

diff --git a/Examples/Python/tests/root_file_hashes.txt b/Examples/Python/tests/root_file_hashes.txt
@@ -86,7 +86,7 @@ test_root_material_writer__material.root: e3b0c44298fc1c149afbf4c8996fb92427ae41
 test_root_clusters_writer[configPosConstructor]__clusters.root: 97f04fdd2c0eef4d37dc8732dd25ab49a90bb51925b2638d94826becf5059fae
 test_root_clusters_writer[configKwConstructor]__clusters.root: 97f04fdd2c0eef4d37dc8732dd25ab49a90bb51925b2638d94826becf5059fae
 test_root_clusters_writer[kwargsConstructor]__clusters.root: 97f04fdd2c0eef4d37dc8732dd25ab49a90bb51925b2638d94826becf5059fae
-test_exatrkx[cpu-torch]__performance_track_finding.root: 926d5056c290f1f35d0564e3781c5a1953f35c7f03095ce6420e8814b6e0ab84
+test_exatrkx[cpu-torch]__performance_track_finding.root: 25c8169fe0a0f12aced3dcd729d15a666c9795514cfc62d68a5567af0bc2a262
 test_exatrkx[gpu-onnx]__performance_track_finding.root: c232d638e53f0f5394d94e8343d1c4f34cf551aaab13db3f8ade4b1fb48b26dd
 test_exatrkx[gpu-torch]__performance_track_finding.root: 25c8169fe0a0f12aced3dcd729d15a666c9795514cfc62d68a5567af0bc2a262
 test_ML_Ambiguity_Solver__performance_ambiML.root: 080e183e758b8593a9c233e2d1b4d213f28fdcb18d82acefdac7c9a5a5763bfc
diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/buildEdges.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/buildEdges.hpp
diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp
@@ -0,0 +1,92 @@
+// This file is part of the Acts project.
+//
+// Copyright (C) 2023 CERN for the benefit of the Acts project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#pragma once
+
+#include "Acts/Utilities/Concepts.hpp"
+
+#include <cstdint>
+#include <vector>
+
+#include <torch/torch.h>
+
+namespace Acts::detail {
+
+/// So far this is only needed for integers
+template <typename T>
+struct TorchTypeMap {};
+
+template <>
+struct TorchTypeMap<int64_t> {
+  constexpr static torch::Dtype type = torch::kInt64;
+};
+
+template <>
+struct TorchTypeMap<int32_t> {
+  constexpr static torch::Dtype type = torch::kInt32;
+};
+
+template <>
+struct TorchTypeMap<int16_t> {
+  constexpr static torch::Dtype type = torch::kInt16;
+};
+
+template <>
+struct TorchTypeMap<int8_t> {
+  constexpr static torch::Dtype type = torch::kInt8;
+};
+
+template <>
+struct TorchTypeMap<float> {
+  constexpr static torch::Dtype type = torch::kFloat32;
+};
+
+template <>
+struct TorchTypeMap<double> {
+  constexpr static torch::Dtype type = torch::kFloat64;
+};
+
+/// Converts vector to 2D tensor
+/// Make sure your vector has a even number of elements!
+/// @Note Input must be mutable, due to torch API.
+/// @Note Tensor does not take ownership! `.clone()` afterwards to get
+/// ownership of the data
+template <typename T>
+at::Tensor vectorToTensor2D(std::vector<T> &vec, std::size_t cols) {
+  assert(vec.size() % cols == 0);
+
+  auto opts =
+      at::TensorOptions().dtype(TorchTypeMap<T>::type).device(torch::kCPU);
+
+  return torch::from_blob(
+      vec.data(),
+      {static_cast<long>(vec.size() / cols), static_cast<long>(cols)}, opts);
+}
+
+/// Converts 2D tensor to vector
+/// @Note Automatically converts tensor to target type!
+template <typename T>
+std::vector<T> tensor2DToVector(const at::Tensor &tensor) {
+  assert(tensor.sizes().size() == 2);
+
+  // clone to make sure we own the data
+  // bring to CPU
+  // convert to requested type
+  // ensure the tensor is contiguous (e.g. not the case if indexed with step)
+
+  at::Tensor transformedTensor =
+      tensor.to(torch::kCPU).to(TorchTypeMap<T>::type).contiguous();
+
+  std::vector<T> edgeIndex(
+      transformedTensor.template data_ptr<T>(),
+      transformedTensor.template data_ptr<T>() + transformedTensor.numel());
+
+  return edgeIndex;
+}
+
+}  // namespace Acts::detail
diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/buildEdges.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/detail/buildEdges.hpp
@@ -0,0 +1,47 @@
+// This file is part of the Acts project.
+//
+// Copyright (C) 2022 CERN for the benefit of the Acts project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#pragma once
+
+#include <cstdint>
+
+namespace at {
+class Tensor;
+}
+
+namespace Acts {
+namespace detail {
+
+/// Post process edges
+at::Tensor postprocessEdgeTensor(at::Tensor edges, bool removeSelfLoops = true,
+                                 bool removeDuplicates = true,
+                                 bool flipDirections = false);
+
+/// Edge building using FRNN and CUDA.
+/// Raises an exception if not built with CUDA
+at::Tensor buildEdgesFRNN(at::Tensor& embedFeatures, float rVal, int kVal,
+                          bool flipDirections = false);
+
+/// Edge building using the Acts KD-Tree implementation
+/// Note that this implementation has no maximum number of neighbours
+/// in the NN search. kVal is only a hint for reserving memory
+at::Tensor buildEdgesKDTree(at::Tensor& embedFeatures, float rVal, int kVal,
+                            bool flipDirections = false);
+
+/// Dispatches either to FRNN or KD-Tree based edge building
+///
+/// @param embedFeatures Tensor of shape (n_nodes, embedding_dim)
+/// @param rVal radius for NN search
+/// @param kVal max number of neighbours in NN search
+/// @param flipDirections if we want to randomly flip directions of the
+/// edges after the edge building
+at::Tensor buildEdges(at::Tensor& embedFeatures, float rVal, int kVal,
+                      bool flipDirections = false);
+
+}  // namespace detail
+}  // namespace Acts
diff --git a/Plugins/ExaTrkX/src/OnnxMetricLearning.cpp b/Plugins/ExaTrkX/src/OnnxMetricLearning.cpp
@@ -8,7 +8,7 @@
 
 #include "Acts/Plugins/ExaTrkX/OnnxMetricLearning.hpp"
 
-#include "Acts/Plugins/ExaTrkX/buildEdges.hpp"
+#include "Acts/Plugins/ExaTrkX/detail/buildEdges.hpp"
 
 #include <onnxruntime_cxx_api.h>
 #include <torch/script.h>
@@ -44,10 +44,9 @@ void OnnxMetricLearning::buildEdgesWrapper(std::vector<float>& embedFeatures,
 
   torch::Tensor embedTensor =
       torch::tensor(embedFeatures, options)
-          .reshape({1, numSpacepoints, m_cfg.embeddingDim});
+          .reshape({numSpacepoints, m_cfg.embeddingDim});
 
-  auto stackedEdges = buildEdges(embedTensor, numSpacepoints,
-                                 m_cfg.embeddingDim, m_cfg.rVal, m_cfg.knnVal);
+  auto stackedEdges = detail::buildEdges(embedTensor, m_cfg.rVal, m_cfg.knnVal);
 
   stackedEdges = stackedEdges.toType(torch::kInt64).to(torch::kCPU);
 

diff --git a/Plugins/ExaTrkX/src/TorchMetricLearning.cpp b/Plugins/ExaTrkX/src/TorchMetricLearning.cpp
@@ -8,13 +8,16 @@
 
 #include "Acts/Plugins/ExaTrkX/TorchMetricLearning.hpp"
 
-#include "Acts/Plugins/ExaTrkX/buildEdges.hpp"
+#include "Acts/Plugins/ExaTrkX/detail/TensorVectorConversion.hpp"
+#include "Acts/Plugins/ExaTrkX/detail/buildEdges.hpp"
 
 #include <torch/script.h>
 #include <torch/torch.h>
 
 #include "printCudaMemInfo.hpp"
 
+using namespace torch::indexing;
+
 namespace Acts {
 
 TorchMetricLearning::TorchMetricLearning(const Config &cfg,
@@ -58,18 +61,22 @@ std::tuple<std::any, std::any> TorchMetricLearning::operator()(
                << *std::min_element(inputValues.begin(), inputValues.end()))
   printCudaMemInfo(logger());
 
+  auto inputTensor =
+      detail::vectorToTensor2D(inputValues, m_cfg.spacepointFeatures);
+
+  // If we are on CPU, clone to get ownership (is this necessary?), else bring
+  // to device.
+  if (inputTensor.options().device() == device) {
+    inputTensor = inputTensor.clone();
+  } else {
+    inputTensor = inputTensor.to(device);
+  }
+
   // **********
   // Embedding
   // **********
 
-  const int64_t numSpacepoints = inputValues.size() / m_cfg.spacepointFeatures;
   std::vector<torch::jit::IValue> inputTensors;
-  auto opts = torch::TensorOptions().dtype(torch::kFloat32);
-  torch::Tensor inputTensor =
-      torch::from_blob(inputValues.data(),
-                       {numSpacepoints, m_cfg.spacepointFeatures}, opts)
-          .to(torch::kFloat32)
-          .to(device);
 
   // Clone models (solve memory leak? members can be const...)
   auto model = m_model->clone();
@@ -87,8 +94,7 @@ std::tuple<std::any, std::any> TorchMetricLearning::operator()(
   // Building Edges
   // ****************
 
-  auto edgeList = buildEdges(output, numSpacepoints, m_cfg.embeddingDim,
-                             m_cfg.rVal, m_cfg.knnVal);
+  auto edgeList = detail::buildEdges(output, m_cfg.rVal, m_cfg.knnVal);
 
   ACTS_VERBOSE("Shape of built edges: (" << edgeList.size(0) << ", "
                                          << edgeList.size(1));