-
Notifications
You must be signed in to change notification settings - Fork 4.6k
Integrating PyTorch in Alpaka heterogeneous core #47984
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
f38496d
78aa2eb
b81a950
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| #ifndef DataFormats_PortableTestObjects_interface_TorchTestHostCollection_h | ||
| #define DataFormats_PortableTestObjects_interface_TorchTestHostCollection_h | ||
|
|
||
| #include "DataFormats/Portable/interface/PortableHostCollection.h" | ||
| #include "DataFormats/PortableTestObjects/interface/TorchTestSoA.h" | ||
|
|
||
| namespace torchportabletest { | ||
|
|
||
| using ParticleHostCollection = PortableHostCollection<ParticleSoA>; | ||
| using SimpleNetHostCollection = PortableHostCollection<SimpleNetSoA>; | ||
| using MultiHeadNetHostCollection = PortableHostCollection<MultiHeadNetSoA>; | ||
| using ImageHostCollection = PortableHostCollection<Image>; | ||
| using LogitsHostCollection = PortableHostCollection<Logits>; | ||
|
Comment on lines
+9
to
+13
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer the common approach of one data structure per file, naming the file after the data structure. So @makortel, your opinion ? |
||
|
|
||
| } // namespace torchportabletest | ||
|
|
||
| #endif // DataFormats_PortableTestObjects_interface_TorchTestHostCollection_h | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| #ifndef DataFormats_PortableTestObjects_interface_TorchTestSoA_h | ||
| #define DataFormats_PortableTestObjects_interface_TorchTestSoA_h | ||
|
|
||
| #include <Eigen/Core> | ||
| #include <Eigen/Dense> | ||
|
|
||
| #include "DataFormats/Common/interface/StdArray.h" | ||
| #include "DataFormats/SoATemplate/interface/SoACommon.h" | ||
| #include "DataFormats/SoATemplate/interface/SoALayout.h" | ||
|
|
||
| namespace torchportabletest { | ||
|
|
||
| GENERATE_SOA_LAYOUT(ParticleLayout, SOA_COLUMN(float, pt), SOA_COLUMN(float, eta), SOA_COLUMN(float, phi)) | ||
| using ParticleSoA = ParticleLayout<>; | ||
|
|
||
| GENERATE_SOA_LAYOUT(SimpleNetLayout, SOA_COLUMN(float, reco_pt)) | ||
| using SimpleNetSoA = SimpleNetLayout<>; | ||
|
|
||
| using ClassificationHead = Eigen::Vector<float, 3>; | ||
| GENERATE_SOA_LAYOUT(MultiHeadNetLayout, | ||
| SOA_COLUMN(float, regression_head), | ||
| SOA_EIGEN_COLUMN(ClassificationHead, classification_head)) | ||
| using MultiHeadNetSoA = MultiHeadNetLayout<>; | ||
|
|
||
| using ColorChannel = Eigen::Matrix<float, 9, 9>; | ||
| GENERATE_SOA_LAYOUT(ImageLayout, | ||
| SOA_EIGEN_COLUMN(ColorChannel, r), | ||
| SOA_EIGEN_COLUMN(ColorChannel, g), | ||
| SOA_EIGEN_COLUMN(ColorChannel, b)) | ||
| using Image = ImageLayout<>; | ||
|
|
||
| using LogitsType = Eigen::Vector<float, 10>; | ||
| GENERATE_SOA_LAYOUT(LogitsLayout, SOA_EIGEN_COLUMN(LogitsType, logits)) | ||
| using Logits = LogitsLayout<>; | ||
|
|
||
| } // namespace torchportabletest | ||
|
|
||
| #endif // DataFormats_PortableTestObjects_interface_TorchTestSoA_h |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| #ifndef DataFormats_PortableTestObjects_interface_alpaka_TorchTestDeviceCollection_h | ||
| #define DataFormats_PortableTestObjects_interface_alpaka_TorchTestDeviceCollection_h | ||
|
|
||
| #include "DataFormats/Portable/interface/alpaka/PortableCollection.h" | ||
| #include "DataFormats/PortableTestObjects/interface/TorchTestHostCollection.h" | ||
| #include "DataFormats/PortableTestObjects/interface/TorchTestSoA.h" | ||
| #include "HeterogeneousCore/AlpakaInterface/interface/config.h" | ||
|
|
||
| namespace ALPAKA_ACCELERATOR_NAMESPACE { | ||
|
|
||
| namespace torchportabletest { | ||
|
|
||
| // make the names from the top-level portabletest namespace visible for unqualified lookup | ||
| // inside the ALPAKA_ACCELERATOR_NAMESPACE::portabletest namespace | ||
| using namespace ::torchportabletest; | ||
|
|
||
| using ParticleDeviceCollection = PortableCollection<ParticleSoA>; | ||
| using SimpleNetDeviceCollection = PortableCollection<SimpleNetSoA>; | ||
| using MultiHeadNetDeviceCollection = PortableCollection<MultiHeadNetSoA>; | ||
| using ImageDeviceCollection = PortableCollection<Image>; | ||
| using LogitsDeviceCollection = PortableCollection<Logits>; | ||
|
|
||
| } // namespace torchportabletest | ||
|
|
||
| } // namespace ALPAKA_ACCELERATOR_NAMESPACE | ||
|
|
||
| // heterogeneous ml data checks | ||
| ASSERT_DEVICE_MATCHES_HOST_COLLECTION(torchportabletest::ParticleDeviceCollection, | ||
| torchportabletest::ParticleHostCollection); | ||
| ASSERT_DEVICE_MATCHES_HOST_COLLECTION(torchportabletest::SimpleNetDeviceCollection, | ||
| torchportabletest::SimpleNetHostCollection); | ||
| ASSERT_DEVICE_MATCHES_HOST_COLLECTION(torchportabletest::MultiHeadNetDeviceCollection, | ||
| torchportabletest::MultiHeadNetHostCollection); | ||
| ASSERT_DEVICE_MATCHES_HOST_COLLECTION(torchportabletest::ImageDeviceCollection, torchportabletest::ImageHostCollection); | ||
| ASSERT_DEVICE_MATCHES_HOST_COLLECTION(torchportabletest::LogitsDeviceCollection, | ||
| torchportabletest::LogitsHostCollection); | ||
|
|
||
| #endif // DataFormats_PortableTestObjects_interface_alpaka_TorchTestDeviceCollection_h |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,5 @@ | ||
| <use name="FWCore/Framework"/> | ||
| <use name="FWCore/MessageLogger"/> | ||
| <use name="FWCore/Utilities"/> | ||
| <use name="FWCore/ServiceRegistry"/> | ||
| <use name="pytorch"/> | ||
| <use name="FWCore/Utilities"/> | ||
| <export> | ||
| <lib name="1"/> | ||
| </export> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| # PhysicsTools/PyTorch | ||
| The torch interface is split into a general torch wrapper and an [Alpaka supported interface](../PyTorchAlpaka). A full ML CMSSW pipeline is implemented and tested in [PyTorchAlpakaTest](../PyTorchAlpakaTest) and serves as a tutorial how to run direct inference with `Portable` modules. | ||
|
|
||
| This package enables seamless integration between PyTorch and the CMSSW SoA implementation. It provides: | ||
| - Support for automatic conversion of optimized SoA to torch tensors, with memory blobs reusage. | ||
| - Support for both just-in-time (JIT) and ahead-of-time (AOT) model execution (Beta version for AOT). | ||
|
|
||
| ## PyTorchService | ||
| To not interfere with CMSSW threading model, `PyTorchService` **MUST** be included in the `cmsRun` configuration path, whenever PyTorch is used. The service will disable internal threading of PyTorch | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a request for changes to this PR, more of a suggestion to consider for the future developments. Would it make sense to move the functionality used to load Torch models (either JIT or AOT) into the |
||
| An example setup can be found in [PyTorchAlpakaTest](../PyTorchAlpakaTest/test/testPyTorchAlpakaHeterogeneousPipeline.py). More on PyTorch threading model: [CPU threading and TorchScript inference](https://docs.pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html). | ||
|
|
||
| ## Inference: JIT and AOT Model Execution | ||
| A Wrapper for the torch model stored with Just-in-Time (JIT), `Model` class, is provided enabling inference with native torch `Tensor` objects. To run direct inference on CMSSW PortableObjects and SoAs, the custom wrapper (see: [AlpakaModel.h](../PyTorchAlpaka/interface/alpaka/AlpakaModel.h)) has to be used. | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you document the fact that |
||
| ### Just-in-Time: | ||
| - Loads `torch::jit::script::Module` at runtime. | ||
| - Compiles model on-the-fly. | ||
| - Introduces warm-up overhead without additional optimization. | ||
| - When storing model through tracing, compatibility and correctness have to be checked | ||
|
|
||
| Example how to export models from PyTorch Python API, more can be found in [PyTorchAlpakaTest/python](../PyTorchAlpakaTest/python/): | ||
| ```py | ||
| batch_size = 10 | ||
| input_tensor = torch.randn(batch_size, shape) | ||
| tm = torch.jit.trace(simplenet.eval(), input_tensor) | ||
| tm.save(f"traced_model.pt") | ||
| ``` | ||
|
|
||
| ### Ahead-of-Time (beta version not production ready): | ||
| - Uses PyTorch AOT compiler to generate `.cpp` and `.so` files. (prerequisite done manually by end-user) | ||
| - Package provide helper scripts to automate compilation process with CMSSW provided tools to some extent | ||
| - Loads compiled model via [AOTIModelPackageLoader](https://github.com/pytorch/pytorch/blob/v2.6.0/torch/csrc/inductor/aoti_package/model_package_loader.h). | ||
| - Eliminates JIT overhead, enable optimization, but requires architecture-specific handling | ||
|
|
||
| More in depth introduction to the concepts used with AOT compilation see: https://dev-discuss.pytorch.org/t/torchinductor-a-pytorch-native-compiler-with-define-by-run-ir-and-symbolic-shapes/747 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| #ifndef PhysicsTools_PyTorch_interface_Model_h | ||
| #define PhysicsTools_PyTorch_interface_Model_h | ||
|
|
||
| #include <string> | ||
| #include <vector> | ||
|
|
||
| #include "PhysicsTools/PyTorch/interface/ScriptModuleLoad.h" | ||
| #include "PhysicsTools/PyTorch/interface/TorchInterface.h" | ||
|
|
||
| namespace cms::torch { | ||
|
|
||
| // Wrapper of torch::jit::script::Module: | ||
| // - https://docs.pytorch.org/cppdocs/api/classtorch_1_1nn_1_1_module.html#class-module | ||
| class Model { | ||
| public: | ||
| explicit Model(const std::string &model_path) : model_(cms::torch::load(model_path)), device_(::torch::kCPU) {} | ||
|
|
||
| explicit Model(const std::string &model_path, ::torch::Device dev) | ||
| : model_(cms::torch::load(model_path, dev)), device_(dev) {} | ||
|
|
||
| // Move model to specified device memory space. Async load by specifying `non_blocking` (in default stream if not overridden by the caller) | ||
| void to(::torch::Device dev, const bool non_blocking = false) { | ||
| if (dev == device_) | ||
| return; | ||
| model_.to(dev, non_blocking); | ||
| device_ = dev; | ||
| } | ||
|
|
||
| // Forward pass (inference) of model, returns torch::IValue (multi output support). Match native torchlib interface. | ||
| ::torch::IValue forward(std::vector<::torch::IValue> &inputs) { return model_.forward(inputs); } | ||
|
|
||
| // Get model current device information. | ||
| ::torch::Device device() const { return device_; } | ||
|
|
||
| protected: | ||
| ::torch::jit::script::Module model_; // underlying JIT model | ||
| ::torch::Device device_; // device where the model is allocated (default CPU) | ||
| }; | ||
|
|
||
| } // namespace cms::torch | ||
|
|
||
| #endif // PhysicsTools_PyTorch_interface_Model_h |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just
portabletest?