cms-sw · lukaszmichalskii · Oct 3, 2025 · Oct 11, 2025 · Oct 14, 2025 · fwyzard
diff --git a/DataFormats/PortableTestObjects/interface/TorchTestHostCollection.h b/DataFormats/PortableTestObjects/interface/TorchTestHostCollection.h
@@ -0,0 +1,17 @@
+#ifndef DataFormats_PortableTestObjects_interface_TorchTestHostCollection_h
+#define DataFormats_PortableTestObjects_interface_TorchTestHostCollection_h
+
+#include "DataFormats/Portable/interface/PortableHostCollection.h"
+#include "DataFormats/PortableTestObjects/interface/TorchTestSoA.h"
+
+namespace torchportabletest {
+
+  using ParticleHostCollection = PortableHostCollection<ParticleSoA>;
+  using SimpleNetHostCollection = PortableHostCollection<SimpleNetSoA>;
+  using MultiHeadNetHostCollection = PortableHostCollection<MultiHeadNetSoA>;
+  using ImageHostCollection = PortableHostCollection<Image>;
+  using LogitsHostCollection = PortableHostCollection<Logits>;
+
+}  // namespace torchportabletest
+
+#endif  // DataFormats_PortableTestObjects_interface_TorchTestHostCollection_h
diff --git a/DataFormats/PortableTestObjects/interface/TorchTestSoA.h b/DataFormats/PortableTestObjects/interface/TorchTestSoA.h
@@ -0,0 +1,38 @@
+#ifndef DataFormats_PortableTestObjects_interface_TorchTestSoA_h
+#define DataFormats_PortableTestObjects_interface_TorchTestSoA_h
+
+#include <Eigen/Core>
+#include <Eigen/Dense>
+
+#include "DataFormats/Common/interface/StdArray.h"
+#include "DataFormats/SoATemplate/interface/SoACommon.h"
+#include "DataFormats/SoATemplate/interface/SoALayout.h"
+
+namespace torchportabletest {
+
+  GENERATE_SOA_LAYOUT(ParticleLayout, SOA_COLUMN(float, pt), SOA_COLUMN(float, eta), SOA_COLUMN(float, phi))
+  using ParticleSoA = ParticleLayout<>;
+
+  GENERATE_SOA_LAYOUT(SimpleNetLayout, SOA_COLUMN(float, reco_pt))
+  using SimpleNetSoA = SimpleNetLayout<>;
+
+  using ClassificationHead = Eigen::Vector<float, 3>;
+  GENERATE_SOA_LAYOUT(MultiHeadNetLayout,
+                      SOA_COLUMN(float, regression_head),
+                      SOA_EIGEN_COLUMN(ClassificationHead, classification_head))
+  using MultiHeadNetSoA = MultiHeadNetLayout<>;
+
+  using ColorChannel = Eigen::Matrix<float, 9, 9>;
+  GENERATE_SOA_LAYOUT(ImageLayout,
+                      SOA_EIGEN_COLUMN(ColorChannel, r),
+                      SOA_EIGEN_COLUMN(ColorChannel, g),
+                      SOA_EIGEN_COLUMN(ColorChannel, b))
+  using Image = ImageLayout<>;
+
+  using LogitsType = Eigen::Vector<float, 10>;
+  GENERATE_SOA_LAYOUT(LogitsLayout, SOA_EIGEN_COLUMN(LogitsType, logits))
+  using Logits = LogitsLayout<>;
+
+}  // namespace torchportabletest
+
+#endif  // DataFormats_PortableTestObjects_interface_TorchTestSoA_h
diff --git a/DataFormats/PortableTestObjects/interface/alpaka/TorchTestDeviceCollection.h b/DataFormats/PortableTestObjects/interface/alpaka/TorchTestDeviceCollection.h
@@ -0,0 +1,38 @@
+#ifndef DataFormats_PortableTestObjects_interface_alpaka_TorchTestDeviceCollection_h
+#define DataFormats_PortableTestObjects_interface_alpaka_TorchTestDeviceCollection_h
+
+#include "DataFormats/Portable/interface/alpaka/PortableCollection.h"
+#include "DataFormats/PortableTestObjects/interface/TorchTestHostCollection.h"
+#include "DataFormats/PortableTestObjects/interface/TorchTestSoA.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
+
+namespace ALPAKA_ACCELERATOR_NAMESPACE {
+
+  namespace torchportabletest {
+
+    // make the names from the top-level portabletest namespace visible for unqualified lookup
+    // inside the ALPAKA_ACCELERATOR_NAMESPACE::portabletest namespace
+    using namespace ::torchportabletest;
+
+    using ParticleDeviceCollection = PortableCollection<ParticleSoA>;
+    using SimpleNetDeviceCollection = PortableCollection<SimpleNetSoA>;
+    using MultiHeadNetDeviceCollection = PortableCollection<MultiHeadNetSoA>;
+    using ImageDeviceCollection = PortableCollection<Image>;
+    using LogitsDeviceCollection = PortableCollection<Logits>;
+
+  }  // namespace torchportabletest
+
+}  // namespace ALPAKA_ACCELERATOR_NAMESPACE
+
+// heterogeneous ml data checks
+ASSERT_DEVICE_MATCHES_HOST_COLLECTION(torchportabletest::ParticleDeviceCollection,
+                                      torchportabletest::ParticleHostCollection);
+ASSERT_DEVICE_MATCHES_HOST_COLLECTION(torchportabletest::SimpleNetDeviceCollection,
+                                      torchportabletest::SimpleNetHostCollection);
+ASSERT_DEVICE_MATCHES_HOST_COLLECTION(torchportabletest::MultiHeadNetDeviceCollection,
+                                      torchportabletest::MultiHeadNetHostCollection);
+ASSERT_DEVICE_MATCHES_HOST_COLLECTION(torchportabletest::ImageDeviceCollection, torchportabletest::ImageHostCollection);
+ASSERT_DEVICE_MATCHES_HOST_COLLECTION(torchportabletest::LogitsDeviceCollection,
+                                      torchportabletest::LogitsHostCollection);
+
+#endif  // DataFormats_PortableTestObjects_interface_alpaka_TorchTestDeviceCollection_h
diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_cuda.h b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda.h
@@ -7,5 +7,7 @@
 #include "DataFormats/PortableTestObjects/interface/TestProductWithPtr.h"
 #include "DataFormats/PortableTestObjects/interface/TestSoA.h"
 #include "DataFormats/PortableTestObjects/interface/TestStruct.h"
+#include "DataFormats/PortableTestObjects/interface/TorchTestSoA.h"
 #include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h"
 #include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceObject.h"
+#include "DataFormats/PortableTestObjects/interface/alpaka/TorchTestDeviceCollection.h"
diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml
@@ -18,4 +18,24 @@
   <class name="portabletest::TestProductWithPtr<alpaka_cuda_async::Device>"/>
   <class name="edm::DeviceProduct<portabletest::TestProductWithPtr<alpaka_cuda_async::Device>>"/>
   <class name="edm::Wrapper<edm::DeviceProduct<portabletest::TestProductWithPtr<alpaka_cuda_async::Device>>>" persistent="false"/>
+
+  <class name="alpaka_cuda_async::torchportabletest::ParticleDeviceCollection" persistent="false"/>
+  <class name="edm::DeviceProduct<alpaka_cuda_async::torchportabletest::ParticleDeviceCollection>" persistent="false"/>
+  <class name="edm::Wrapper<edm::DeviceProduct<alpaka_cuda_async::torchportabletest::ParticleDeviceCollection>>" persistent="false"/>
+
+  <class name="alpaka_cuda_async::torchportabletest::SimpleNetDeviceCollection" persistent="false"/>
+  <class name="edm::DeviceProduct<alpaka_cuda_async::torchportabletest::SimpleNetDeviceCollection>" persistent="false"/>
+  <class name="edm::Wrapper<edm::DeviceProduct<alpaka_cuda_async::torchportabletest::SimpleNetDeviceCollection>>" persistent="false"/>
+
+  <class name="alpaka_cuda_async::torchportabletest::MultiHeadNetDeviceCollection" persistent="false"/>
+  <class name="edm::DeviceProduct<alpaka_cuda_async::torchportabletest::MultiHeadNetDeviceCollection>" persistent="false"/>
+  <class name="edm::Wrapper<edm::DeviceProduct<alpaka_cuda_async::torchportabletest::MultiHeadNetDeviceCollection>>" persistent="false"/>
+
+  <class name="alpaka_cuda_async::torchportabletest::ImageDeviceCollection" persistent="false"/>
+  <class name="edm::DeviceProduct<alpaka_cuda_async::torchportabletest::ImageDeviceCollection>" persistent="false"/>
+  <class name="edm::Wrapper<edm::DeviceProduct<alpaka_cuda_async::torchportabletest::ImageDeviceCollection>>" persistent="false"/>
+
+  <class name="alpaka_cuda_async::torchportabletest::LogitsDeviceCollection" persistent="false"/>
+  <class name="edm::DeviceProduct<alpaka_cuda_async::torchportabletest::LogitsDeviceCollection>" persistent="false"/>
+  <class name="edm::Wrapper<edm::DeviceProduct<alpaka_cuda_async::torchportabletest::LogitsDeviceCollection>>" persistent="false"/>
 </lcgdict>
diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_rocm.h b/DataFormats/PortableTestObjects/src/alpaka/classes_rocm.h
@@ -7,5 +7,7 @@
 #include "DataFormats/PortableTestObjects/interface/TestProductWithPtr.h"
 #include "DataFormats/PortableTestObjects/interface/TestSoA.h"
 #include "DataFormats/PortableTestObjects/interface/TestStruct.h"
+#include "DataFormats/PortableTestObjects/interface/TorchTestSoA.h"
 #include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h"
 #include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceObject.h"
+#include "DataFormats/PortableTestObjects/interface/alpaka/TorchTestDeviceCollection.h"
diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml b/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml
@@ -18,4 +18,24 @@
   <class name="portabletest::TestProductWithPtr<alpaka_rocm_async::Device>"/>
   <class name="edm::DeviceProduct<portabletest::TestProductWithPtr<alpaka_rocm_async::Device>>"/>
   <class name="edm::Wrapper<edm::DeviceProduct<portabletest::TestProductWithPtr<alpaka_rocm_async::Device>>>" persistent="false"/>
+
+  <class name="alpaka_rocm_async::torchportabletest::ParticleDeviceCollection" persistent="false"/>
+  <class name="edm::DeviceProduct<alpaka_rocm_async::torchportabletest::ParticleDeviceCollection>" persistent="false"/>
+  <class name="edm::Wrapper<edm::DeviceProduct<alpaka_rocm_async::torchportabletest::ParticleDeviceCollection>>" persistent="false"/>
+
+  <class name="alpaka_rocm_async::torchportabletest::SimpleNetDeviceCollection" persistent="false"/>
+  <class name="edm::DeviceProduct<alpaka_rocm_async::torchportabletest::SimpleNetDeviceCollection>" persistent="false"/>
+  <class name="edm::Wrapper<edm::DeviceProduct<alpaka_rocm_async::torchportabletest::SimpleNetDeviceCollection>>" persistent="false"/>
+
+  <class name="alpaka_rocm_async::torchportabletest::MultiHeadNetDeviceCollection" persistent="false"/>
+  <class name="edm::DeviceProduct<alpaka_rocm_async::torchportabletest::MultiHeadNetDeviceCollection>" persistent="false"/>
+  <class name="edm::Wrapper<edm::DeviceProduct<alpaka_rocm_async::torchportabletest::MultiHeadNetDeviceCollection>>" persistent="false"/>
+
+  <class name="alpaka_rocm_async::torchportabletest::ImageDeviceCollection" persistent="false"/>
+  <class name="edm::DeviceProduct<alpaka_rocm_async::torchportabletest::ImageDeviceCollection>" persistent="false"/>
+  <class name="edm::Wrapper<edm::DeviceProduct<alpaka_rocm_async::torchportabletest::ImageDeviceCollection>>" persistent="false"/>
+
+  <class name="alpaka_rocm_async::torchportabletest::LogitsDeviceCollection" persistent="false"/>
+  <class name="edm::DeviceProduct<alpaka_rocm_async::torchportabletest::LogitsDeviceCollection>" persistent="false"/>
+  <class name="edm::Wrapper<edm::DeviceProduct<alpaka_rocm_async::torchportabletest::LogitsDeviceCollection>>" persistent="false"/>
 </lcgdict>
diff --git a/DataFormats/PortableTestObjects/src/classes.cc b/DataFormats/PortableTestObjects/src/classes.cc
@@ -2,8 +2,15 @@
 #include "DataFormats/Portable/interface/PortableHostObjectReadRules.h"
 #include "DataFormats/PortableTestObjects/interface/TestHostCollection.h"
 #include "DataFormats/PortableTestObjects/interface/TestHostObject.h"
+#include "DataFormats/PortableTestObjects/interface/TorchTestHostCollection.h"
 
 SET_PORTABLEHOSTCOLLECTION_READ_RULES(portabletest::TestHostCollection);
 SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(portabletest::TestHostMultiCollection2);
 SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(portabletest::TestHostMultiCollection3);
 SET_PORTABLEHOSTOBJECT_READ_RULES(portabletest::TestHostObject);
+
+SET_PORTABLEHOSTCOLLECTION_READ_RULES(torchportabletest::ParticleHostCollection);
+SET_PORTABLEHOSTCOLLECTION_READ_RULES(torchportabletest::SimpleNetHostCollection);
+SET_PORTABLEHOSTCOLLECTION_READ_RULES(torchportabletest::MultiHeadNetHostCollection);
+SET_PORTABLEHOSTCOLLECTION_READ_RULES(torchportabletest::ImageHostCollection);
+SET_PORTABLEHOSTCOLLECTION_READ_RULES(torchportabletest::LogitsHostCollection);
diff --git a/DataFormats/PortableTestObjects/src/classes.h b/DataFormats/PortableTestObjects/src/classes.h
@@ -8,3 +8,5 @@
 #include "DataFormats/PortableTestObjects/interface/TestProductWithPtr.h"
 #include "DataFormats/PortableTestObjects/interface/TestSoA.h"
 #include "DataFormats/PortableTestObjects/interface/TestStruct.h"
+#include "DataFormats/PortableTestObjects/interface/TorchTestSoA.h"
+#include "DataFormats/PortableTestObjects/interface/TorchTestHostCollection.h"
diff --git a/DataFormats/PortableTestObjects/src/classes_def.xml b/DataFormats/PortableTestObjects/src/classes_def.xml
@@ -42,4 +42,35 @@
 
   <class name="portabletest::TestProductWithPtr<alpaka_common::DevHost>"/>
   <class name="edm::Wrapper<portabletest::TestProductWithPtr<alpaka_common::DevHost>>" persistent="false"/>
+
+  <!-- Torch SoAs and Collections -->
+  <class name="torchportabletest::ParticleSoA"/>
+  <class name="torchportabletest::ParticleSoA::View"/>
+  <class name="torchportabletest::ParticleSoA::ConstView"/>
+  <class name="torchportabletest::ParticleHostCollection"/>
+  <class name="edm::Wrapper<torchportabletest::ParticleHostCollection>" splitLevel="0"/>
+
+  <class name="torchportabletest::SimpleNetSoA"/>
+  <class name="torchportabletest::SimpleNetSoA::View"/>
+  <class name="torchportabletest::SimpleNetSoA::ConstView"/>
+  <class name="torchportabletest::SimpleNetHostCollection"/>
+  <class name="edm::Wrapper<torchportabletest::SimpleNetHostCollection>" splitLevel="0"/>
+
+  <class name="torchportabletest::MultiHeadNetSoA"/>
+  <class name="torchportabletest::MultiHeadNetSoA::View"/>
+  <class name="torchportabletest::MultiHeadNetSoA::ConstView"/>
+  <class name="torchportabletest::MultiHeadNetHostCollection"/>
+  <class name="edm::Wrapper<torchportabletest::MultiHeadNetHostCollection>" splitLevel="0"/>
+
+  <class name="torchportabletest::Image"/>
+  <class name="torchportabletest::Image::View"/>
+  <class name="torchportabletest::Image::ConstView"/>
+  <class name="torchportabletest::ImageHostCollection"/>
+  <class name="edm::Wrapper<torchportabletest::ImageHostCollection>" splitLevel="0"/>
+
+  <class name="torchportabletest::Logits"/>
+  <class name="torchportabletest::Logits::View"/>
+  <class name="torchportabletest::Logits::ConstView"/>
+  <class name="torchportabletest::LogitsHostCollection"/>
+  <class name="edm::Wrapper<torchportabletest::LogitsHostCollection>" splitLevel="0"/>
 </lcgdict>
diff --git a/PhysicsTools/PyTorch/BuildFile.xml b/PhysicsTools/PyTorch/BuildFile.xml
@@ -1,8 +1,5 @@
-<use name="FWCore/Framework"/>
-<use name="FWCore/MessageLogger"/>
-<use name="FWCore/Utilities"/>
-<use name="FWCore/ServiceRegistry"/>
 <use name="pytorch"/>
+<use name="FWCore/Utilities"/>
 <export>
   <lib name="1"/>
 </export>
diff --git a/PhysicsTools/PyTorch/README.md b/PhysicsTools/PyTorch/README.md
@@ -0,0 +1,35 @@
+# PhysicsTools/PyTorch
+The torch interface is split into a general torch wrapper and an [Alpaka supported interface](../PyTorchAlpaka). A full ML CMSSW pipeline is implemented and tested in [PyTorchAlpakaTest](../PyTorchAlpakaTest) and serves as a tutorial how to run direct inference with `Portable` modules.
+
+This package enables seamless integration between PyTorch and the CMSSW SoA implementation. It provides:
+- Support for automatic conversion of optimized SoA to torch tensors, with memory blobs reusage.
+- Support for both just-in-time (JIT) and ahead-of-time (AOT) model execution (Beta version for AOT).
+
+## PyTorchService
+To not interfere with CMSSW threading model, `PyTorchService` **MUST** be included in the `cmsRun` configuration path, whenever PyTorch is used. The service will disable internal threading of PyTorch 
+An example setup can be found in [PyTorchAlpakaTest](../PyTorchAlpakaTest/test/testPyTorchAlpakaHeterogeneousPipeline.py). More on PyTorch threading model: [CPU threading and TorchScript inference](https://docs.pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html).
+
+## Inference: JIT and AOT Model Execution
+A Wrapper for the torch model stored with Just-in-Time (JIT), `Model` class, is provided enabling inference with native torch `Tensor` objects. To run direct inference on CMSSW PortableObjects and SoAs, the custom wrapper (see: [AlpakaModel.h](../PyTorchAlpaka/interface/alpaka/AlpakaModel.h)) has to be used.
+
+### Just-in-Time:
+- Loads `torch::jit::script::Module` at runtime.
+- Compiles model on-the-fly.
+- Introduces warm-up overhead without additional optimization.
+- When storing model through tracing, compatibility and correctness have to be checked
+
+Example how to export models from PyTorch Python API, more can be found in [PyTorchAlpakaTest/python](../PyTorchAlpakaTest/python/):
+```py
+batch_size = 10
+input_tensor = torch.randn(batch_size, shape)
+tm = torch.jit.trace(simplenet.eval(), input_tensor)
+tm.save(f"traced_model.pt")
+```
+
+### Ahead-of-Time (beta version not production ready):
+- Uses PyTorch AOT compiler to generate `.cpp` and `.so` files. (prerequisite done manually by end-user)
+- Package provide helper scripts to automate compilation process with CMSSW provided tools to some extent
+- Loads compiled model via [AOTIModelPackageLoader](https://github.com/pytorch/pytorch/blob/v2.6.0/torch/csrc/inductor/aoti_package/model_package_loader.h).
+- Eliminates JIT overhead, enable optimization, but requires architecture-specific handling 
+
+More in depth introduction to the concepts used with AOT compilation see: https://dev-discuss.pytorch.org/t/torchinductor-a-pytorch-native-compiler-with-define-by-run-ir-and-symbolic-shapes/747
diff --git a/PhysicsTools/PyTorch/interface/Model.h b/PhysicsTools/PyTorch/interface/Model.h
@@ -0,0 +1,42 @@
+#ifndef PhysicsTools_PyTorch_interface_Model_h
+#define PhysicsTools_PyTorch_interface_Model_h
+
+#include <string>
+#include <vector>
+
+#include "PhysicsTools/PyTorch/interface/ScriptModuleLoad.h"
+#include "PhysicsTools/PyTorch/interface/TorchInterface.h"
+
+namespace cms::torch {
+
+  // Wrapper of torch::jit::script::Module:
+  // - https://docs.pytorch.org/cppdocs/api/classtorch_1_1nn_1_1_module.html#class-module
+  class Model {
+  public:
+    explicit Model(const std::string &model_path) : model_(cms::torch::load(model_path)), device_(::torch::kCPU) {}
+
+    explicit Model(const std::string &model_path, ::torch::Device dev)
+        : model_(cms::torch::load(model_path, dev)), device_(dev) {}
+
+    // Move model to specified device memory space. Async load by specifying `non_blocking` (in default stream if not overridden by the caller)
+    void to(::torch::Device dev, const bool non_blocking = false) {
+      if (dev == device_)
+        return;
+      model_.to(dev, non_blocking);
+      device_ = dev;
+    }
+
+    // Forward pass (inference) of model, returns torch::IValue (multi output support). Match native torchlib interface.
+    ::torch::IValue forward(std::vector<::torch::IValue> &inputs) { return model_.forward(inputs); }
+
+    // Get model current device information.
+    ::torch::Device device() const { return device_; }
+
+  protected:
+    ::torch::jit::script::Module model_;  // underlying JIT model
+    ::torch::Device device_;              // device where the model is allocated (default CPU)
+  };
+
+}  // namespace cms::torch
+
+#endif  // PhysicsTools_PyTorch_interface_Model_h