diff --git a/.github/workflows/linux_openvino_ci.yml b/.github/workflows/linux_openvino_ci.yml
new file mode 100644
index 0000000000000..12495b1f26c65
--- /dev/null
+++ b/.github/workflows/linux_openvino_ci.yml
@@ -0,0 +1,45 @@
+name: Linux OpenVINO CI
+
+on:
+ push:
+ branches: [ main, 'rel-*' ]
+ pull_request:
+ branches: [ main, 'rel-*' ]
+ workflow_dispatch:
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+permissions:
+ contents: read
+ packages: write # Needed if the reusable workflow pushes images
+ attestations: write # Optional: for artifact attestations if enabled
+ id-token: write # Optional: may be needed for OIDC authentication (e.g., ACR)
+
+jobs:
+ build_test_openvino:
+ name: Build and Test OpenVINO EP (AlamLinux8, Py3.12)
+ # Use the reusable workflow as the other Linux CI pipelines
+ uses: ./.github/workflows/reusable_linux_build.yml
+ with:
+ pool_name: "onnxruntime-github-Ubuntu2204-AMD-CPU"
+ build_config: Release
+ # Architecture: OpenVino only supports Intel X64
+ architecture: x64
+ dockerfile_path: tools/ci_build/github/linux/docker/inference/x86_64/python/openvino/Dockerfile
+ docker_image_repo: onnxruntimeopenvino
+
+ execution_providers: 'openvino'
+
+ extra_build_flags: '--use_openvino CPU --enable_generic_interface --build_shared_lib'
+
+ # Python Path Prefix: Set the correct Python 3.12 path inside the manylinux container
+ python_path_prefix: 'PATH=/opt/python/cp312-cp312/bin:$PATH'
+
+ run_tests: true
+ upload_build_output: false
+
+ # Secrets: Pass the necessary GitHub token
+ secrets:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/publish-python-apidocs.yml b/.github/workflows/publish-python-apidocs.yml
index adc2346d1bf1b..d03c9a407d54f 100644
--- a/.github/workflows/publish-python-apidocs.yml
+++ b/.github/workflows/publish-python-apidocs.yml
@@ -40,6 +40,7 @@ jobs:
- name: Generate Python docs with Sphinx
run: |
cd tools/doc
+ chmod +x *
./builddoc.sh /usr/bin ../.. ../../build
- name: Log source commit
run: git rev-parse --short HEAD > build/docs/html/source-version.txt
diff --git a/cgmanifests/cgmanifest.json b/cgmanifests/cgmanifest.json
index d883f89dfdc56..f29857a231eb9 100644
--- a/cgmanifests/cgmanifest.json
+++ b/cgmanifests/cgmanifest.json
@@ -18,7 +18,7 @@
"maven": {
"GroupId": "com.google.protobuf",
"ArtifactId": "protobuf-java",
- "Version": "3.21.7"
+ "Version": "3.25.5"
},
"DevelopmentDependency": true
}
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index a841b17a2a571..5ab1605dd3a99 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -62,7 +62,7 @@ option(onnxruntime_GENERATE_TEST_REPORTS "Enable test report generation" OFF)
option(onnxruntime_ENABLE_STATIC_ANALYSIS "Enable static analysis" OFF)
option(onnxruntime_USE_CUSTOM_STATIC_ANALYSIS_RULES "Use a custom SDL Rule. It is mainly for our CI build" OFF)
option(onnxruntime_REDIRECT_STATIC_ANALYSIS_OUTPUTS_TO_FILE "Use a custom SDL Rule. It is mainly for our CI build" OFF)
-option(onnxruntime_ENABLE_PYTHON "Enable python buildings" OFF)
+option(onnxruntime_ENABLE_PYTHON "Enable python bindings" OFF)
# Enable it may cause LNK1169 error
option(onnxruntime_ENABLE_MEMLEAK_CHECKER "Experimental: Enable memory leak checker in Windows debug build" OFF)
option(onnxruntime_USE_CUDA "Build with CUDA support" OFF)
diff --git a/docs/python/ReadMeOV.rst b/docs/python/ReadMeOV.rst
index 845f79cf8257c..fefef421158f8 100644
--- a/docs/python/ReadMeOV.rst
+++ b/docs/python/ReadMeOV.rst
@@ -16,7 +16,7 @@ Requirements
^^^^^^^^^^^^
- Ubuntu 18.04, 20.04 or Windows 10 - 64 bit
-- Python 3.11, 3.12 and 3.13 for Windows and Linux
+- Python 3.10, 3.11, 3.12 and 3.13 for Windows and Linux
This package supports:
- Intel® CPUs
@@ -29,7 +29,7 @@ This package supports:
Please install OpenVINO™ PyPi Package separately for Windows.
For installation instructions on Windows please refer to `OpenVINO™ Execution Provider for ONNX Runtime for Windows `_.
-**OpenVINO™ Execution Provider for ONNX Runtime** Linux Wheels comes with pre-built libraries of OpenVINO™ version 2025.0.0 eliminating the need to install OpenVINO™ separately.
+**OpenVINO™ Execution Provider for ONNX Runtime** Linux Wheels comes with pre-built libraries of OpenVINO™ version 2025.1.0 eliminating the need to install OpenVINO™ separately.
For more details on build and installation please refer to `Build `_.
diff --git a/include/onnxruntime/core/providers/providers.h b/include/onnxruntime/core/providers/providers.h
index 2cfd5acf66293..8097be287df82 100644
--- a/include/onnxruntime/core/providers/providers.h
+++ b/include/onnxruntime/core/providers/providers.h
@@ -2,6 +2,10 @@
// Licensed under the MIT License.
#pragma once
+#include
+
+struct OrtSessionOptions;
+struct OrtLogger;
namespace onnxruntime {
class IExecutionProvider;
@@ -9,5 +13,20 @@ class IExecutionProvider;
struct IExecutionProviderFactory {
virtual ~IExecutionProviderFactory() = default;
virtual std::unique_ptr CreateProvider() = 0;
+
+ ///
+ /// Creates an IExecutionProvider instance. Enables initialization of an EP instance using session-level options
+ /// such as session configs (string key/value pairs), graph optimization level, etc.
+ ///
+ /// The default implementation ignores the arguments and calls the above CreateProvider() function,
+ /// which does not take in any arguments.
+ ///
+ /// This version of CreateProvider() is used by InferenceSession when registering EPs.
+ ///
+ /// Options for the session in which the IExecutionProvider is used
+ /// Session logger that should be used by the IExecutionProvider.
+ /// An IExecutionProvider
+ virtual std::unique_ptr CreateProvider(const OrtSessionOptions& session_options,
+ const OrtLogger& session_logger);
};
} // namespace onnxruntime
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 3bf0d5e19c525..d557ee7443306 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -310,6 +310,7 @@ ORT_RUNTIME_CLASS(ValueInfo);
ORT_RUNTIME_CLASS(Node);
ORT_RUNTIME_CLASS(Graph);
ORT_RUNTIME_CLASS(Model);
+ORT_RUNTIME_CLASS(ModelCompilationOptions);
#ifdef _MSC_VER
typedef _Return_type_success_(return == 0) OrtStatus* OrtStatusPtr;
@@ -673,6 +674,9 @@ typedef struct OrtTrainingApi OrtTrainingApi;
struct OrtModelEditorApi;
typedef struct OrtModelEditorApi OrtModelEditorApi;
+struct OrtCompileApi;
+typedef struct OrtCompileApi OrtCompileApi;
+
/** \brief The helper interface to get the right version of OrtApi
*
* Get a pointer to this structure through ::OrtGetApiBase
@@ -3638,10 +3642,16 @@ struct OrtApi {
* \param[in] provider_options_values - values to configure the provider options
* \param[in] num_keys - number of keys passed in
*
- * Currently supported providers:
- * QNN
- * SNPE
- * XNNPACK
+ * Currently supported provider names:
+ * QNNExecutionProvider (or QNN)
+ * OpenVINOExecutionProvider (or OpenVINO)
+ * XnnpackExecutionProvider (or XNNPACK)
+ * WebNNExecutionProvider (or WEBNN)
+ * WebGpuExecutionProvider (or WebGPU)
+ * AzureExecutionProvider (or AZURE)
+ * JsExecutionProvider (or JS)
+ * VitisAIExecutionProvider (or VitisAI)
+ * CoreMLExecutionProvider (or CoreML)
*
* Note: If an execution provider has a dedicated SessionOptionsAppendExecutionProvider_ function
* that should be used to add it.
@@ -3651,93 +3661,78 @@ struct OrtApi {
* name. E.g., given backend type "htp", on Windows, the backend path would be "QnnHtp.dll", and on other
* platforms, it would be "libQnnHtp.so". Mutually exclusive with "backend_path".
* Available options:
- * - "cpu"
- * - "gpu"
- * - "htp": Default.
- * - "saver"
+ * -# "cpu"
+ * -# "gpu"
+ * -# "htp": Default.
+ * -# "saver"
* "backend_path": File path to QNN backend library. Mutually exclusive with "backend_type".
* "profiling_level": QNN profiling level.
* Available options:
- * - "off": Default.
- * - "basic"
- * - "detailed"
+ * -# "off": Default.
+ * -# "basic"
+ * -# "detailed"
* "profiling_file_path": QNN profiling file path if ETW not enabled.
* "rpc_control_latency": QNN RPC control latency.
* "vtcm_mb": QNN VTCM size in MB. default to 0(not set).
* "htp_performance_mode": QNN performance mode.
* Available options:
- * - "burst"
- * - "balanced"
- * - "default": Default.
- * - "high_performance"
- * - "high_power_saver"
- * - "low_balanced"
- * - "extreme_power_saver"
- * - "low_power_saver"
- * - "power_saver"
- * - "sustained_high_performance"
+ * -# "burst"
+ * -# "balanced"
+ * -# "default": Default.
+ * -# "high_performance"
+ * -# "high_power_saver"
+ * -# "low_balanced"
+ * -# "extreme_power_saver"
+ * -# "low_power_saver"
+ * -# "power_saver"
+ * -# "sustained_high_performance"
* "qnn_saver_path": File path to the QNN Saver backend library. If specified, QNN Saver will be enabled and will
* dump QNN API calls to disk for replay/debugging. QNN Saver produces incorrect model inference results and
* may alter model/EP partitioning. Use only for debugging.
* "qnn_context_priority": QNN context priority.
* Available options:
- * - "low"
- * - "normal": Default.
- * - "normal_high"
- * - "high"
+ * -# "low"
+ * -# "normal": Default.
+ * -# "normal_high"
+ * -# "high"
* "htp_graph_finalization_optimization_mode": Set the optimization mode for graph finalization on the HTP backend.
* Available options:
- * - "0": Default.
- * - "1": Faster preparation time, less optimal graph.
- * - "2": Longer preparation time, more optimal graph.
- * - "3": Longest preparation time, most likely even more optimal graph. See QNN SDK documentation for specific
+ * -# "0": Default.
+ * -# "1": Faster preparation time, less optimal graph.
+ * -# "2": Longer preparation time, more optimal graph.
+ * -# "3": Longest preparation time, most likely even more optimal graph. See QNN SDK documentation for specific
* details.
* "soc_model": The SoC model number. Refer to the QNN SDK documentation for valid values.
* Defaults to "0" (unknown).
* "htp_arch": The minimum HTP architecture the driver will use to select compatible QNN operators.
* Available options:
- * - "0": Default (none).
- * - "68"
- * - "69"
- * - "73"
- * - "75"
+ * -# "0": Default (none).
+ * -# "68"
+ * -# "69"
+ * -# "73"
+ * -# "75"
* "device_id": The ID of the device to use when setting 'htp_arch'. Defaults to "0" (for single device).
* "enable_htp_fp16_precision": Used for float32 model for HTP backend.
* Enable the float32 model to be inferenced with fp16 precision. Otherwise, it will be fp32 precision.
- * - "0": With fp32 precision.
- * - "1": Default. With fp16 precision.
+ * -# "0": With fp32 precision.
+ * -# "1": Default. With fp16 precision.
* "offload_graph_io_quantization": Offload graph input quantization and graph output dequantization to another
* execution provider (typically CPU EP).
- * - "0": Disabled. QNN EP will handle quantization and dequantization of graph I/O.
- * - "1": Enabled. This is the default value.
+ * -# "0": Disabled. QNN EP will handle quantization and dequantization of graph I/O.
+ * -# "1": Enabled. This is the default value.
* "enable_htp_spill_fill_buffer": Enable HTP spill fill buffer setting. The flag is used while generating context
* binary.
- * - "0": Default. Disabled.
- * - "1": Enabled.
+ * -# "0": Default. Disabled.
+ * -# "1": Enabled.
* "enable_htp_shared_memory_allocator": Enable the QNN HTP shared memory allocator. Requires libcdsprpc.so/dll to
* be available.
- * - "0": Default. Disabled.
- * - "1": Enabled.
+ * -# "0": Default. Disabled.
+ * -# "1": Enabled.
* "dump_json_qnn_graph": Set to "1" to dump QNN graphs generated by QNN EP as JSON files. Each graph partition
* assigned to QNN EP is dumped to a separate file.
* "json_qnn_graph_dir": Directory in which to dump QNN JSON graphs. If not specified, QNN graphs are dumped in the
* program's current working directory. Ignored if "dump_json_qnn_graph" is not set.
*
- * SNPE supported keys:
- * "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16",
- * "DSP", "DSP_FIXED8_TF", "AIP_FIXED_TF", "AIP_FIXED8_TF".
- * Mapping to SNPE Runtime_t definition:
- * CPU, CPU_FLOAT32 => zdl::DlSystem::Runtime_t::CPU;
- * GPU, GPU_FLOAT32_16_HYBRID => zdl::DlSystem::Runtime_t::GPU;
- * GPU_FLOAT16 => zdl::DlSystem::Runtime_t::GPU_FLOAT16;
- * DSP, DSP_FIXED8_TF => zdl::DlSystem::Runtime_t::DSP.
- * AIP_FIXED_TF, AIP_FIXED8_TF => zdl::DlSystem::Runtime_t::AIP_FIXED_TF.
- * "priority": execution priority, options: "low", "normal".
- * "buffer_type": ITensor or user buffers, options: "ITENSOR", user buffer with different types - "TF8", "TF16", "UINT8", "FLOAT".
- * "ITENSOR" -- default, ITensor which is float only.
- * "TF8" -- quantized model required, "FLOAT" -- for both quantized or non-quantized model
- * "enable_init_cache": enable SNPE init caching feature, set to 1 to enabled it. Disabled by default.
- *
* XNNPACK supported keys:
* "intra_op_num_threads": number of thread-pool size to use for XNNPACK execution provider.
* default value is 0, which means to use the session thread-pool size.
@@ -4855,6 +4850,7 @@ struct OrtApi {
/** \brief Get the value name from an OrtValueInfo instance.
* \param[in] value_info The OrtValueInfo instance.
+ * \param[out] name The name of the OrtValueInfo
* \snippet{doc} snippets.dox OrtStatus Return Value
* \since Version 1.21.
*/
@@ -4862,6 +4858,7 @@ struct OrtApi {
/** \brief Get the type information from an OrtValueInfo instance.
* \param[in] value_info The OrtValueInfo instance.
+ * \param[out] type_info The type info of the OrtValueInfo
* \snippet{doc} snippets.dox OrtStatus Return Value
* \since Version 1.21.
*/
@@ -4889,6 +4886,7 @@ struct OrtApi {
* \param[in] shape Dimensions of the Tensor. All values should be > 0.
* \param[in] shape_len Number of dimensions in the shape array.
* \param[in] type Data type of the Tensor.
+ * \param[out] out Newly created ::OrtValue. Must be freed with OrtApi::ReleaseValue
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
@@ -4912,11 +4910,27 @@ struct OrtApi {
* is not guaranteed. The session may have already been created and initialized
* before the cancellation request was issued.
*
- * \snippet{doc} snippets.dox OrtStatus
+ * \snippet{doc} snippets.dox OrtStatus Return Value
*
+ * \since Version 1.21.
*/
ORT_API2_STATUS(SessionOptionsSetLoadCancellationFlag, _Inout_ OrtSessionOptions* options,
_In_ bool cancel);
+
+ /** \brief Get the Compile API instance.
+ *
+ * Get the Compile API instance to compile ONNX models. Execution providers that support compilation fuse a subgraph
+ * into an EPContext node that wraps a provider-specific binary representation of the subgraph.
+ * For more details about the EPContext design, refer to:
+ * \htmlonly
+ * EPContext design document.
+ * \endhtmlonly
+ *
+ * \return Compile API struct instance.
+ *
+ * \since Version 1.22.
+ */
+ const OrtCompileApi*(ORT_API_CALL* GetCompileApi)();
};
/*
@@ -5056,7 +5070,7 @@ struct OrtModelEditorApi {
* User can release `tensor_info` after creating the OrtTypeInfo.
*
* \param[in] tensor_info Tensor type and shape information.
- * \param[out] TypeInfo instance for the tensor.
+ * \param[out] type_info TypeInfo instance for the tensor.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
@@ -5072,7 +5086,7 @@ struct OrtModelEditorApi {
* User can release `tensor_info` after creating the OrtTypeInfo.
*
* \param[in] tensor_info SparseTensor type and shape information.
- * \param[out] TypeInfo instance for the tensor.
+ * \param[out] type_info TypeInfo instance for the tensor.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
@@ -5089,7 +5103,7 @@ struct OrtModelEditorApi {
*
* \param[in] map_key_type Key type for the map.
* \param[in] map_value_type Value type for the map.
- * \param[out] TypeInfo instance for the map.
+ * \param[out] type_info TypeInfo instance for the map.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
@@ -5105,7 +5119,7 @@ struct OrtModelEditorApi {
* User can release `sequence_type` after creating the OrtTypeInfo.
*
* \param[in] sequence_type Sequence type and shape information.
- * \param[out] TypeInfo instance for the sequence.
+ * \param[out] type_info TypeInfo instance for the sequence.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
@@ -5119,8 +5133,8 @@ struct OrtModelEditorApi {
*
* User can release `contained_type` after creating the OrtTypeInfo.
*
- * \param[in] tensor_info Tensor type and shape information.
- * \param[out] TypeInfo instance for the tensor.
+ * \param[in] contained_type Tensor type and shape information.
+ * \param[out] type_info TypeInfo instance for the tensor.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
@@ -5132,6 +5146,7 @@ struct OrtModelEditorApi {
*
* \param[in] name The name of the input or output.
* \param[in] type_info The type information for the input or output. The provided value is copied.
+ * \param[out] value_info The OrtValueInfo instance.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
@@ -5271,6 +5286,7 @@ struct OrtModelEditorApi {
* If augmenting an existing model add additional opset versions if needed.
* \param[in] opset_entries_len The number of domain_names and opset_versions entries.
* Domain and opset entries should be 1:1
+ * \param[out] model The OrtModel instance.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
@@ -5362,6 +5378,7 @@ struct OrtModelEditorApi {
* \param{in} model_data The model data for the existing model to augment.
* \param{in} model_data_length The length of the model data.
* \param{in} options The OrtSessionOptions instance.
+ * \param{out} out The created OrtSession instance.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
@@ -5376,12 +5393,13 @@ struct OrtModelEditorApi {
*
* When using the Model Editor API to augment a model, any new nodes must conform to the opset version of the
* original model. To do that the user must be able to discover that opset version.
+ * Returns an error if the domain is not used in the model.
*
* \param[in] session OrtSession to query
* \param[in] domain Domain to query. The ONNX domain is an empty string.
* \param[out] opset The opset version of the domain.
*
- * \snippet{doc} snippets.dox OrtStatus Return Value. Returns an error if the domain is not used in the model.
+ * \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.21.
*/
@@ -5414,7 +5432,7 @@ struct OrtModelEditorApi {
*
* \param[in] session OrtSession to finalize. Session must have been created using CreateModelEditorSession[FromArray].
* \param[in] options OrtSessionOptions to use for the session.
- * \param[in] Optional prepacked_weights_container OrtPrepackedWeightsContainer to use for the session.
+ * \param[in] prepacked_weights_container Optional OrtPrepackedWeightsContainer to use for the session.
Set to nullptr if not used.
* \snippet{doc} snippets.dox OrtStatus Return Value
*
@@ -5425,6 +5443,193 @@ struct OrtModelEditorApi {
#endif // !defined(ORT_MINIMAL_BUILD)
};
+/**
+ * ORT Compile API
+ */
+
+/**
+ * \brief The OrtCompileApi struct provides functions to compile ONNX models.
+ *
+ * Execution providers that support compilation fuse a subgraph into an EPContext node that wraps a provider-specific
+ * binary representation of the subgraph.
+ * For more details about the EPContext design, refer to:
+ * \htmlonly
+ * EPContext design document.
+ * \endhtmlonly
+ *
+ * Example (error handling not shown):
+ * OrtStatus* status = NULL;
+ * OrtCompileApi* compile_api = ort_api->GetCompileApi();
+ * OrtModelCompilationOptions* compile_options = NULL;
+ *
+ * status = compile_api->CreateModelCompilationOptionsFromSessionOptions(env, session_options, &compile_options);
+ * status = compile_api->ModelCompilationOptions_SetInputModelPath(compile_options, ORT_TSTR("model.onnx"));
+ * status = compile_api->ModelCompilationOptions_SetOutputModelPath(compile_options, ORT_TSTR("model.compiled.onnx"));
+ * status = compile_api->CompileModel(env, compile_options);
+ * compile_api->ReleaseModelCompilationOptions(compile_options);
+ *
+ * \since Version 1.22.
+ */
+struct OrtCompileApi {
+ /// @}
+ /// \name OrtModelCompilationOptions
+ /// @{
+ ORT_CLASS_RELEASE(ModelCompilationOptions);
+
+ /** \brief Creates an OrtModelCompilationOptions object from an existing OrtSessionOptions object.
+ *
+ * An OrtModelCompilationOptions object contains the settings used to generate a compiled ONNX model.
+ * The OrtSessionOptions object has the execution providers with which the model will be compiled.
+ *
+ * ReleaseOrtModelCompilationsOptions must be called to free the OrtModelCompilationOptions after calling
+ * CompileModel.
+ *
+ * \param[in] env OrtEnv object.
+ * \param[in] session_options The OrtSessionOptions instance from which to create the OrtModelCompilationOptions.
+ * \param[out] out The created OrtModelCompilationOptions instance.
+ * \snippet{doc} snippets.dox OrtStatus Return Value
+ *
+ * \since Version 1.22.
+ */
+ ORT_API2_STATUS(CreateModelCompilationOptionsFromSessionOptions, _In_ const OrtEnv* env,
+ _In_ const OrtSessionOptions* session_options, _Outptr_ OrtModelCompilationOptions** out);
+
+ /** \brief Sets the file path to the input ONNX model to compile.
+ *
+ * The input model's location (e.g., file path or memory buffer) must be set with either
+ * ModelCompilationOptions_SetInputModelPath or ModelCompilationOptions_SetInputModelFromBuffer.
+ *
+ * \param[in] model_compile_options The OrtModelCompilationOptions instance.
+ * \param[in] input_model_path Null terminated string of the path (wchar on Windows, char otherwise).
+ *
+ * \snippet{doc} snippets.dox OrtStatus Return Value
+ *
+ * \since Version 1.22.
+ */
+ ORT_API2_STATUS(ModelCompilationOptions_SetInputModelPath, _In_ OrtModelCompilationOptions* model_compile_options,
+ _In_ const ORTCHAR_T* input_model_path);
+
+ /** \brief Sets the buffer that stores the bytes of the loaded ONNX model to compile.
+ *
+ * The input model's location (e.g., file path or memory buffer) must be set with either
+ * ModelCompilationOptions_SetInputModelPath or ModelCompilationOptions_SetInputModelFromBuffer.
+ *
+ * \param[in] model_compile_options The OrtModelCompilationOptions instance.
+ * \param[in] input_model_data Buffer containing the loaded ONNX model bytes.
+ * \param[in] input_model_data_size The number of bytes in the `input_model_data` buffer.
+ *
+ * \snippet{doc} snippets.dox OrtStatus Return Value
+ *
+ * \since Version 1.22.
+ */
+ ORT_API2_STATUS(ModelCompilationOptions_SetInputModelFromBuffer,
+ _In_ OrtModelCompilationOptions* model_compile_options,
+ _In_ const void* input_model_data,
+ size_t input_model_data_size);
+
+ /** \brief Sets the file path for the output ONNX model generated by CompileModel.
+ *
+ * The output model's location (e.g., file path or memory buffer) can be set with either
+ * ModelCompilationOptions_SetOutputModelPath or ModelCompilationOptions_SetOutputModelBuffer.
+ *
+ * If the output model's location is not set, ONNX Runtime will generate an output file with a path based on
+ * the input model's file path. Examples:
+ * /Path/my_model.onnx -> /Path/my_model_ctx.onnx
+ * /Path/my_model -> /Path/my_model_ctx.onnx
+ *
+ * \param[in] model_compile_options The OrtModelCompilationOptions instance.
+ * \param[in] output_model_path Null terminated string of the path (wchar on Windows, char otherwise).
+ *
+ * \snippet{doc} snippets.dox OrtStatus Return Value
+ *
+ * \since Version 1.22.
+ */
+ ORT_API2_STATUS(ModelCompilationOptions_SetOutputModelPath, _In_ OrtModelCompilationOptions* model_compile_options,
+ _In_ const ORTCHAR_T* output_model_path);
+
+ /** \brief Optionally sets the file that should store external initializers for the compiled ONNX model.
+ * If not set, initializers are stored within the model.
+ *
+ * Only initializers for nodes that were not compiled are stored in the external initializers file.
+ * Compiled nodes contain their initializer data within the `ep_cache_context` attribute of EPContext nodes.
+ * Refer to ModelCompilationOptions_SetEpContextEmbedMode.
+ *
+ * \param[in] model_compile_options The OrtModelCompilationOptions instance.
+ * \param[in] external_initializers_file_path Null terminated string of the path to the file.
+ * \param[in] external_initializers_size_threshold Initializers larger than this threshold are stored in the file.
+ *
+ * \snippet{doc} snippets.dox OrtStatus Return Value
+ *
+ * \since Version 1.22.
+ */
+ ORT_API2_STATUS(ModelCompilationOptions_SetOutputModelExternalInitializersFile,
+ _In_ OrtModelCompilationOptions* model_compile_options,
+ _In_ const ORTCHAR_T* external_initializers_file_path,
+ size_t external_initializers_size_threshold);
+
+ /** \brief Configures model compilation to store the output compiled ONNX model in a buffer.
+ *
+ * The caller passes an OrtAllocator that ONNX Runtime uses to allocate memory for the buffer.
+ *
+ * The output model's location (e.g., file path or memory buffer) can be set with either
+ * ModelCompilationOptions_SetOutputModelPath or ModelCompilationOptions_SetOutputModelBuffer.
+ *
+ * If the output model's location is not set, ONNX Runtime will generate an output file with a path based on
+ * the input model's file path. Examples:
+ * /Path/my_model.onnx -> /Path/my_model_ctx.onnx
+ * /Path/my_model -> /Path/my_model_ctx.onnx
+ *
+ * \param[in] model_compile_options The OrtModelCompilationOptions instance.
+ * \param[in] allocator The allocator used to allocate the buffer for the compiled model.
+ * \param[out] output_model_buffer_ptr Pointer to the buffer that stores the compiled model.
+ * \param[out] output_model_buffer_size_ptr Pointer set to the size of output model in bytes.
+ *
+ * \snippet{doc} snippets.dox OrtStatus Return Value
+ *
+ * \since Version 1.22.
+ */
+ ORT_API2_STATUS(ModelCompilationOptions_SetOutputModelBuffer,
+ _In_ OrtModelCompilationOptions* model_compile_options,
+ _Inout_ OrtAllocator* allocator,
+ _Outptr_ void** output_model_buffer_ptr,
+ _Out_ size_t* output_model_buffer_size_ptr);
+
+ /** \brief Enables or disables the embedding of EPContext binary data into the `ep_cache_context` attribute
+ * of EPContext nodes. Defaults to false.
+ *
+ * If enabled, the `ep_cache_context` attribute of EPContext nodes will store the context binary data, which may
+ * include weights for compiled subgraphs.
+ *
+ * If disabled, the `ep_cache_context` attribute of EPContext nodes will contain the path to the file containing the
+ * context binary data. The path is set by the execution provider creating the EPContext node.
+ *
+ * More details relate to EPContext design refers to:
+ * \htmlonly
+ * EPContext design document.
+ * \endhtmlonly
+ *
+ * \param[in] model_compile_options The OrtModelCompilationOptions instance.
+ * \param[in] embed_ep_context_in_model True to embed EPContext binary data into the EPContext node
+ * `ep_cache_context` attributes.
+ *
+ * \snippet{doc} snippets.dox OrtStatus Return Value
+ *
+ * \since Version 1.22.
+ */
+ ORT_API2_STATUS(ModelCompilationOptions_SetEpContextEmbedMode, _In_ OrtModelCompilationOptions* model_compile_options,
+ bool embed_ep_context_in_model);
+
+ /** \brief Compiles an input ONNX model with the given compilation options.
+ *
+ * \param[in] env OrtEnv object.
+ * \param[in] model_options The compilation options that defines compilation options for a model.
+ *
+ * \snippet{doc} snippets.dox OrtStatus Return Value
+ *
+ * \since Version 1.22.
+ */
+ ORT_API2_STATUS(CompileModel, _In_ const OrtEnv* env, _In_ const OrtModelCompilationOptions* model_options);
+};
/*
* This is the old way to add the CUDA provider to the session, please use SessionOptionsAppendExecutionProvider_CUDA above to access the latest functionality
* This function always exists, but will only succeed if Onnxruntime was built with CUDA support and the CUDA provider shared library exists
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index ce7dc1c45b05e..a2937b6e82a27 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -158,6 +158,20 @@ inline const OrtModelEditorApi& GetModelEditorApi() {
return *api;
}
+///
+/// This returns a reference to the ORT C Compile API. Used if compiling a model at runtime.
+///
+/// ORT C Compile API reference
+inline const OrtCompileApi& GetCompileApi() {
+ auto* api = GetApi().GetCompileApi();
+ if (api == nullptr) {
+ // minimal build
+ ORT_CXX_API_THROW("Compile API is not available in this build", ORT_FAIL);
+ }
+
+ return *api;
+}
+
/** \brief IEEE 754 half-precision floating point data type
*
* \details This struct is used for converting float to float16 and back
@@ -517,6 +531,9 @@ namespace detail {
#define ORT_DEFINE_RELEASE(NAME) \
inline void OrtRelease(Ort##NAME* ptr) { GetApi().Release##NAME(ptr); }
+#define ORT_DEFINE_RELEASE_FROM_API_STRUCT(NAME, API_GETTER) \
+ inline void OrtRelease(Ort##NAME* ptr) { API_GETTER().Release##NAME(ptr); }
+
ORT_DEFINE_RELEASE(Allocator);
ORT_DEFINE_RELEASE(MemoryInfo);
ORT_DEFINE_RELEASE(CustomOpDomain);
@@ -542,8 +559,10 @@ ORT_DEFINE_RELEASE(ValueInfo);
ORT_DEFINE_RELEASE(Node);
ORT_DEFINE_RELEASE(Graph);
ORT_DEFINE_RELEASE(Model);
+ORT_DEFINE_RELEASE_FROM_API_STRUCT(ModelCompilationOptions, GetCompileApi);
#undef ORT_DEFINE_RELEASE
+#undef ORT_DEFINE_RELEASE_FROM_API_STRUCT
/** \brief This is a tagging template type. Use it with Base to indicate that the C++ interface object
* has no ownership of the underlying C object.
@@ -992,6 +1011,38 @@ struct SessionOptions : detail::SessionOptionsImpl {
ConstSessionOptions GetConst() const { return ConstSessionOptions{this->p_}; }
};
+/** \brief Options object used when compiling a model.
+ *
+ * Wraps ::OrtModelCompilationOptions object and methods
+ */
+struct ModelCompilationOptions : detail::Base {
+ using Base = detail::Base;
+ using Base::Base;
+
+ explicit ModelCompilationOptions(std::nullptr_t) {} ///< Create an empty ModelCompilationOptions object, must be assigned a valid one to be used.
+
+ ModelCompilationOptions(const Env& env, const SessionOptions& session_options); ///< Wraps OrtApi::CreateModelCompilationOptionsFromSessionOptions
+ ModelCompilationOptions(const Env& env, ConstSessionOptions session_options); ///< Wraps OrtApi::CreateModelCompilationOptionsFromSessionOptions
+
+ ModelCompilationOptions& SetInputModelPath(const ORTCHAR_T* input_model_path); ///< Wraps OrtApi::ModelCompilationOptions_SetInputModelPath
+ ModelCompilationOptions& SetInputModelFromBuffer(const void* input_model_data,
+ size_t input_model_data_size); ///< Wraps OrtApi::ModelCompilationOptions_SetInputModelFromBuffer
+ ModelCompilationOptions& SetEpContextEmbedMode(bool embed_ep_context_in_model); ///< Wraps OrtApi::ModelCompilationOptions_SetEpContextEmbedMode
+ ModelCompilationOptions& SetOutputModelPath(const ORTCHAR_T* output_model_path); ///< Wraps OrtApi::ModelCompilationOptions_SetOutputModelPath
+ ModelCompilationOptions& SetOutputModelExternalInitializersFile(const ORTCHAR_T* file_path,
+ size_t initializer_size_threshold); ///< Wraps OrtApi::ModelCompilationOptions_SetOutputModelExternalInitializersFile
+ ModelCompilationOptions& SetOutputModelBuffer(OrtAllocator* allocator, void** output_model_buffer_ptr,
+ size_t* output_model_buffer_size_ptr); ///< Wraps OrtApi::ModelCompilationOptions_SetOutputModelBuffer
+};
+
+/** \brief Compiles an input model to generate a model with EPContext nodes that execute EP-specific kernels. Wraps OrtApi::CompileModels.
+ *
+ * \param env: ORT environment object.
+ * \param model_compilation_options: Compilation options for a model.
+ * \return A Status indicating success or failure.
+ */
+Status CompileModel(const Env& env, const ModelCompilationOptions& model_compilation_options);
+
/** \brief Wrapper around ::OrtModelMetadata
*
*/
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index 524e3ecc92936..e41ef005349ac 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -630,6 +630,62 @@ inline RunOptions& RunOptions::AddActiveLoraAdapter(const LoraAdapter& adapter)
return *this;
}
+inline ModelCompilationOptions::ModelCompilationOptions(const Env& env, const SessionOptions& session_options) {
+ ThrowOnError(GetCompileApi().CreateModelCompilationOptionsFromSessionOptions(env, session_options, &this->p_));
+}
+
+inline ModelCompilationOptions::ModelCompilationOptions(const Env& env, ConstSessionOptions session_options) {
+ ThrowOnError(GetCompileApi().CreateModelCompilationOptionsFromSessionOptions(env, session_options, &this->p_));
+}
+
+inline Status CompileModel(const Env& env, const ModelCompilationOptions& model_compilation_options) {
+ return Ort::Status(GetCompileApi().CompileModel(env, model_compilation_options));
+}
+
+inline ModelCompilationOptions& ModelCompilationOptions::SetInputModelPath(
+ const ORTCHAR_T* input_model_path) {
+ Ort::ThrowOnError(GetCompileApi().ModelCompilationOptions_SetInputModelPath(this->p_, input_model_path));
+ return *this;
+}
+
+inline ModelCompilationOptions& ModelCompilationOptions::SetInputModelFromBuffer(
+ const void* input_model_data, size_t input_model_data_size) {
+ Ort::ThrowOnError(GetCompileApi().ModelCompilationOptions_SetInputModelFromBuffer(this->p_, input_model_data,
+ input_model_data_size));
+ return *this;
+}
+
+inline ModelCompilationOptions& ModelCompilationOptions::SetOutputModelPath(
+ const ORTCHAR_T* output_model_path) {
+ Ort::ThrowOnError(GetCompileApi().ModelCompilationOptions_SetOutputModelPath(this->p_, output_model_path));
+ return *this;
+}
+
+inline ModelCompilationOptions& ModelCompilationOptions::SetOutputModelExternalInitializersFile(
+ const ORTCHAR_T* file_path, size_t initializer_size_threshold) {
+ Ort::ThrowOnError(GetCompileApi().ModelCompilationOptions_SetOutputModelExternalInitializersFile(
+ this->p_,
+ file_path,
+ initializer_size_threshold));
+ return *this;
+}
+
+inline ModelCompilationOptions& ModelCompilationOptions::SetOutputModelBuffer(
+ OrtAllocator* allocator, void** output_model_buffer_ptr, size_t* output_model_buffer_size_ptr) {
+ Ort::ThrowOnError(GetCompileApi().ModelCompilationOptions_SetOutputModelBuffer(this->p_, allocator,
+ output_model_buffer_ptr,
+ output_model_buffer_size_ptr));
+ return *this;
+}
+
+inline ModelCompilationOptions& ModelCompilationOptions::SetEpContextEmbedMode(
+ bool embed_ep_context_in_model) {
+ Ort::ThrowOnError(GetCompileApi().ModelCompilationOptions_SetEpContextEmbedMode(
+ this->p_,
+ embed_ep_context_in_model));
+ return *this;
+}
+
namespace detail {
template
diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
index af1f9c04b2831..379c74e011d6e 100644
--- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
+++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
@@ -10,10 +10,10 @@
* "[Area][.[SubArea1].[SubArea2]...].[Keyname]"
* Such as "ep.cuda.use_arena"
* The Config Key cannot be empty
- * The maximum length of the Config Key is 128
+ * The maximum length of the Config Key is 1024
*
* The string format of a SessionOptions Config Value is defined individually for each Config.
- * The maximum length of the Config Value is 1024
+ * The maximum length of the Config Value is 2048
*/
// Key for disable PrePacking,
diff --git a/java/build-android.gradle b/java/build-android.gradle
index 9c4275b74f626..610625cf02e54 100644
--- a/java/build-android.gradle
+++ b/java/build-android.gradle
@@ -147,7 +147,7 @@ artifacts {
dependencies {
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.7.0'
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.7.0'
- testImplementation 'com.google.protobuf:protobuf-java:3.21.7'
+ testImplementation 'com.google.protobuf:protobuf-java:3.25.5'
}
publishing {
diff --git a/java/build.gradle b/java/build.gradle
index 8452daab72872..2d43d1ead13f0 100644
--- a/java/build.gradle
+++ b/java/build.gradle
@@ -179,7 +179,7 @@ if (cmakeBuildDir != null) {
dependencies {
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.9.2'
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.9.2'
- testImplementation 'com.google.protobuf:protobuf-java:3.21.7'
+ testImplementation 'com.google.protobuf:protobuf-java:3.25.5'
}
processTestResources {
diff --git a/js/.vscode/launch.json b/js/.vscode/launch.json
index 5fd79872cf07b..6fd4c855054bc 100644
--- a/js/.vscode/launch.json
+++ b/js/.vscode/launch.json
@@ -16,6 +16,15 @@
"sourceMaps": true,
"preLaunchTask": "tsc: build - common/test/tsconfig.json"
},
+ {
+ "name": "[node] Launch installation script",
+ "program": "${workspaceFolder}/node/script/install.js",
+ "request": "launch",
+ "skipFiles": ["/**"],
+ "type": "node",
+ "cwd": "${workspaceFolder}/node",
+ "args": ["--onnxruntime-node-install"]
+ },
{
"name": "[web] Launch Build script in Node.js",
"program": "${workspaceFolder}/web/script/build.js",
diff --git a/js/README.md b/js/README.md
index eb95c9224c081..dbc58f3a75ebd 100644
--- a/js/README.md
+++ b/js/README.md
@@ -24,9 +24,9 @@ Please follow the steps described below to setup development environment.
### Prerequisites
-- Node.js (16.0+): https://nodejs.org/ - (Optional) Use nvm ([Windows](https://github.com/coreybutler/nvm-windows) / [Mac/Linux](https://github.com/creationix/nvm)) to install Node.js
+- Node.js (20.0+): https://nodejs.org/ - (Optional) Use nvm ([Windows](https://github.com/coreybutler/nvm-windows) / [Mac/Linux](https://github.com/creationix/nvm)) to install Node.js
-- Python (2.7 or 3.6+): https://www.python.org/downloads/
+- Python (3.9+): https://www.python.org/downloads/
- python should be added to the PATH environment variable
@@ -72,7 +72,7 @@ This project is designed to include all "common" code, which are pure javascript
### Requirements
-Node.js v12+ (recommended v14+)
+Node.js v20+
### Build
@@ -108,7 +108,7 @@ Document will be generated in folder `/js/common/docs`.
> language: typescript/C++
-> dependency: onnxruntime-common, ONNXRuntime.dll
+> dependency: onnxruntime-common, ONNXRuntime shared library(.so/dll/dylib)
> folder: /js/node
@@ -116,7 +116,7 @@ This project is designed to be used as a NPM package to enable Node.js users to
### Requirements
-Node.js v12+ (recommended v14+)
+Node.js v20+
### Build
diff --git a/js/node/CMakeLists.txt b/js/node/CMakeLists.txt
index c78b40a3e7429..2bd6f22e5f901 100644
--- a/js/node/CMakeLists.txt
+++ b/js/node/CMakeLists.txt
@@ -12,7 +12,7 @@ execute_process(COMMAND node -e "console.log(process.platform)"
OUTPUT_VARIABLE node_platform OUTPUT_STRIP_TRAILING_WHITESPACE)
file(READ ${CMAKE_SOURCE_DIR}/../../VERSION_NUMBER ort_version)
string(STRIP "${ort_version}" ort_version)
-set(dist_folder "${CMAKE_SOURCE_DIR}/bin/napi-v3/${node_platform}/${NODE_ARCH}/")
+set(dist_folder "${CMAKE_SOURCE_DIR}/bin/napi-v6/${node_platform}/${NODE_ARCH}/")
# onnxruntime.dll dir
if(NOT ONNXRUNTIME_BUILD_DIR)
diff --git a/js/node/README.md b/js/node/README.md
index abb91bf05ddf1..c271d8daccc8b 100644
--- a/js/node/README.md
+++ b/js/node/README.md
@@ -10,6 +10,12 @@ Install the latest stable version:
npm install onnxruntime-node
```
+Install the nightly version:
+
+```
+npm install onnxruntime-node@dev
+```
+
Refer to [ONNX Runtime JavaScript examples](https://github.com/microsoft/onnxruntime-inference-examples/tree/main/js) for samples and tutorials.
## Requirements
@@ -18,33 +24,32 @@ ONNXRuntime works on Node.js v16.x+ (recommend v20.x+) or Electron v15.x+ (recom
The following table lists the supported versions of ONNX Runtime Node.js binding provided with pre-built binaries.
-| EPs/Platforms | Windows x64 | Windows arm64 | Linux x64 | Linux arm64 | MacOS x64 | MacOS arm64 |
-| ------------- | ----------- | ------------- | ----------------- | ----------- | --------- | ----------- |
-| CPU | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
-| DirectML | ✔️ | ✔️ | ❌ | ❌ | ❌ | ❌ |
-| CUDA | ❌ | ❌ | ✔️\[1] | ❌ | ❌ | ❌ |
+| EPs/Platforms | Windows x64 | Windows arm64 | Linux x64 | Linux arm64 | MacOS x64 | MacOS arm64 |
+| ------------- | ------------------ | ------------------ | ------------------ | ------------------ | ------------------ | ------------------ |
+| CPU | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
+| WebGPU | ✔️ \[1] | ✔️ \[1] | ✔️ \[1] | ✔️ \[1] | ✔️ \[1] | ✔️ \[1] |
+| DirectML | ✔️ | ✔️ | ❌ | ❌ | ❌ | ❌ |
+| CUDA | ❌ | ❌ | ✔️\[2] | ❌ | ❌ | ❌ |
+| CoreML | ❌ | ❌ | ❌ | ❌ | ✔️ | ✔️ |
-- \[1]: CUDA v11.8.
+- \[1]: WebGPU support is currently experimental.
+- \[2]: CUDA v12. See [CUDA EP Installation](#cuda-ep-installation) for details.
To use on platforms without pre-built binaries, you can build Node.js binding from source and consume it by `npm install /js/node/`. See also [instructions](https://onnxruntime.ai/docs/build/inferencing.html#apis-and-language-bindings) for building ONNX Runtime Node.js binding locally.
# GPU Support
-Right now, the Windows version supports only the DML provider. Linux x64 can use CUDA and TensorRT.
+Right now, the Windows version supports WebGPU execution provider and DML execution provider. Linux x64 can use CUDA and TensorRT.
## CUDA EP Installation
-To use CUDA EP, you need to install the CUDA EP binaries. By default, the CUDA EP binaries are installed automatically when you install the package. If you want to skip the installation, you can pass the `--onnxruntime-node-install-cuda=skip` flag to the installation command.
+To use CUDA EP, you need to install the CUDA EP binaries. By default, the CUDA EP binaries are installed automatically when you install the package. If you want to skip the installation, you can pass the `--onnxruntime-node-install=skip` flag to the installation command.
```
-npm install onnxruntime-node --onnxruntime-node-install-cuda=skip
+npm install onnxruntime-node --onnxruntime-node-install=skip
```
-You can also use this flag to specify the version of the CUDA: (v11 or v12)
-
-```
-npm install onnxruntime-node --onnxruntime-node-install-cuda=v12
-```
+~~You can also use this flag to specify the version of the CUDA: (v11 or v12)~~ CUDA v11 is no longer supported since v1.22.
## License
diff --git a/js/node/lib/binding.ts b/js/node/lib/binding.ts
index ed133734ce66a..ab4a72a4e60a5 100644
--- a/js/node/lib/binding.ts
+++ b/js/node/lib/binding.ts
@@ -53,7 +53,7 @@ export declare namespace Binding {
// export native binding
export const binding =
// eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/no-var-requires
- require(`../bin/napi-v3/${process.platform}/${process.arch}/onnxruntime_binding.node`) as {
+ require(`../bin/napi-v6/${process.platform}/${process.arch}/onnxruntime_binding.node`) as {
// eslint-disable-next-line @typescript-eslint/naming-convention
InferenceSession: Binding.InferenceSessionConstructor;
listSupportedBackends: () => Binding.SupportedBackend[];
diff --git a/js/node/package-lock.json b/js/node/package-lock.json
index 41ffb071b9ced..b445ce9e8c5c6 100644
--- a/js/node/package-lock.json
+++ b/js/node/package-lock.json
@@ -15,9 +15,9 @@
"linux"
],
"dependencies": {
+ "adm-zip": "^0.5.16",
"global-agent": "^3.0.0",
- "onnxruntime-common": "file:../common",
- "tar": "^7.0.1"
+ "onnxruntime-common": "file:../common"
},
"devDependencies": {
"@types/minimist": "^1.2.2",
@@ -36,123 +36,6 @@
"typedoc": "^0.25.7"
}
},
- "node_modules/@isaacs/cliui": {
- "version": "8.0.2",
- "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz",
- "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==",
- "dependencies": {
- "string-width": "^5.1.2",
- "string-width-cjs": "npm:string-width@^4.2.0",
- "strip-ansi": "^7.0.1",
- "strip-ansi-cjs": "npm:strip-ansi@^6.0.1",
- "wrap-ansi": "^8.1.0",
- "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0"
- },
- "engines": {
- "node": ">=12"
- }
- },
- "node_modules/@isaacs/cliui/node_modules/ansi-regex": {
- "version": "6.0.1",
- "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz",
- "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==",
- "engines": {
- "node": ">=12"
- },
- "funding": {
- "url": "https://github.com/chalk/ansi-regex?sponsor=1"
- }
- },
- "node_modules/@isaacs/cliui/node_modules/ansi-styles": {
- "version": "6.2.1",
- "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz",
- "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==",
- "engines": {
- "node": ">=12"
- },
- "funding": {
- "url": "https://github.com/chalk/ansi-styles?sponsor=1"
- }
- },
- "node_modules/@isaacs/cliui/node_modules/emoji-regex": {
- "version": "9.2.2",
- "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz",
- "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg=="
- },
- "node_modules/@isaacs/cliui/node_modules/string-width": {
- "version": "5.1.2",
- "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz",
- "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==",
- "dependencies": {
- "eastasianwidth": "^0.2.0",
- "emoji-regex": "^9.2.2",
- "strip-ansi": "^7.0.1"
- },
- "engines": {
- "node": ">=12"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- },
- "node_modules/@isaacs/cliui/node_modules/strip-ansi": {
- "version": "7.1.0",
- "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz",
- "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==",
- "dependencies": {
- "ansi-regex": "^6.0.1"
- },
- "engines": {
- "node": ">=12"
- },
- "funding": {
- "url": "https://github.com/chalk/strip-ansi?sponsor=1"
- }
- },
- "node_modules/@isaacs/cliui/node_modules/wrap-ansi": {
- "version": "8.1.0",
- "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
- "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==",
- "dependencies": {
- "ansi-styles": "^6.1.0",
- "string-width": "^5.0.1",
- "strip-ansi": "^7.0.1"
- },
- "engines": {
- "node": ">=12"
- },
- "funding": {
- "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
- }
- },
- "node_modules/@isaacs/fs-minipass": {
- "version": "4.0.0",
- "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.0.tgz",
- "integrity": "sha512-S00nN1Qt3z3dSP6Db45fj/mksrAq5XWNIJ/SWXGP8XPT2jrzEuYRCSEx08JpJwBcG2F1xgiOtBMGDU0AZHmxew==",
- "dependencies": {
- "minipass": "^7.0.4"
- },
- "engines": {
- "node": ">=18.0.0"
- }
- },
- "node_modules/@isaacs/fs-minipass/node_modules/minipass": {
- "version": "7.0.4",
- "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.0.4.tgz",
- "integrity": "sha512-jYofLM5Dam9279rdkWzqHozUo4ybjdZmCsDHePy5V/PbBcVMiSZR97gmAy45aqi8CK1lG2ECd356FU86avfwUQ==",
- "engines": {
- "node": ">=16 || 14 >=14.17"
- }
- },
- "node_modules/@pkgjs/parseargs": {
- "version": "0.11.0",
- "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
- "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==",
- "optional": true,
- "engines": {
- "node": ">=14"
- }
- },
"node_modules/@protobufjs/aspromise": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
@@ -229,10 +112,20 @@
"integrity": "sha512-93+VvleD3mXwlLI/xASjw0FzKcwzl3OdTCzm1LaRfqgS21gfFtK3zDXM5Op9TeeMsJVOaJ2VRDpT9q4Y3d0AvA==",
"dev": true
},
+ "node_modules/adm-zip": {
+ "version": "0.5.16",
+ "resolved": "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.16.tgz",
+ "integrity": "sha512-TGw5yVi4saajsSEgz25grObGHEUaDrniwvA2qwSC060KfqGPdglhvPMA2lPIoxs3PQIItj2iag35fONcQqgUaQ==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=12.0"
+ }
+ },
"node_modules/ansi-regex": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
"integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+ "dev": true,
"engines": {
"node": ">=8"
}
@@ -241,6 +134,7 @@
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+ "dev": true,
"dependencies": {
"color-convert": "^2.0.1"
},
@@ -288,33 +182,12 @@
"proxy-from-env": "^1.1.0"
}
},
- "node_modules/balanced-match": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
- "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
- },
"node_modules/boolean": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz",
"integrity": "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw==",
"deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info."
},
- "node_modules/brace-expansion": {
- "version": "2.0.1",
- "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
- "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
- "dependencies": {
- "balanced-match": "^1.0.0"
- }
- },
- "node_modules/chownr": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz",
- "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==",
- "engines": {
- "node": ">=18"
- }
- },
"node_modules/cliui": {
"version": "8.0.1",
"resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
@@ -423,6 +296,7 @@
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
"integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
+ "dev": true,
"dependencies": {
"color-name": "~1.1.4"
},
@@ -433,7 +307,8 @@
"node_modules/color-name": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
- "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="
+ "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
+ "dev": true
},
"node_modules/color-support": {
"version": "1.1.3",
@@ -462,19 +337,6 @@
"integrity": "sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==",
"dev": true
},
- "node_modules/cross-spawn": {
- "version": "7.0.6",
- "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
- "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
- "dependencies": {
- "path-key": "^3.1.0",
- "shebang-command": "^2.0.0",
- "which": "^2.0.1"
- },
- "engines": {
- "node": ">= 8"
- }
- },
"node_modules/debug": {
"version": "4.3.4",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
@@ -553,15 +415,11 @@
"resolved": "https://registry.npmjs.org/detect-node/-/detect-node-2.1.0.tgz",
"integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g=="
},
- "node_modules/eastasianwidth": {
- "version": "0.2.0",
- "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz",
- "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
- },
"node_modules/emoji-regex": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
- "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
+ "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
+ "dev": true
},
"node_modules/error-ex": {
"version": "1.3.2",
@@ -639,32 +497,6 @@
}
}
},
- "node_modules/foreground-child": {
- "version": "3.1.1",
- "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.1.1.tgz",
- "integrity": "sha512-TMKDUnIte6bfb5nWv7V/caI169OHgvwjb7V4WkeUvbQQdjr5rWKqHFiKWb/fcOwB+CzBT+qbWjvj+DVwRskpIg==",
- "dependencies": {
- "cross-spawn": "^7.0.0",
- "signal-exit": "^4.0.1"
- },
- "engines": {
- "node": ">=14"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
- "node_modules/foreground-child/node_modules/signal-exit": {
- "version": "4.1.0",
- "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz",
- "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==",
- "engines": {
- "node": ">=14"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
"node_modules/form-data": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
@@ -745,35 +577,6 @@
"node": "6.* || 8.* || >= 10.*"
}
},
- "node_modules/glob": {
- "version": "10.3.12",
- "resolved": "https://registry.npmjs.org/glob/-/glob-10.3.12.tgz",
- "integrity": "sha512-TCNv8vJ+xz4QiqTpfOJA7HvYv+tNIRHKfUWw/q+v2jdgN4ebz+KY9tGx5J4rHP0o84mNP+ApH66HRX8us3Khqg==",
- "dependencies": {
- "foreground-child": "^3.1.0",
- "jackspeak": "^2.3.6",
- "minimatch": "^9.0.1",
- "minipass": "^7.0.4",
- "path-scurry": "^1.10.2"
- },
- "bin": {
- "glob": "dist/esm/bin.mjs"
- },
- "engines": {
- "node": ">=16 || 14 >=14.17"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
- "node_modules/glob/node_modules/minipass": {
- "version": "7.0.4",
- "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.0.4.tgz",
- "integrity": "sha512-jYofLM5Dam9279rdkWzqHozUo4ybjdZmCsDHePy5V/PbBcVMiSZR97gmAy45aqi8CK1lG2ECd356FU86avfwUQ==",
- "engines": {
- "node": ">=16 || 14 >=14.17"
- }
- },
"node_modules/global-agent": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/global-agent/-/global-agent-3.0.0.tgz",
@@ -861,6 +664,7 @@
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
"integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
+ "dev": true,
"engines": {
"node": ">=8"
}
@@ -868,24 +672,8 @@
"node_modules/isexe": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
- "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="
- },
- "node_modules/jackspeak": {
- "version": "2.3.6",
- "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-2.3.6.tgz",
- "integrity": "sha512-N3yCS/NegsOBokc8GAdM8UcmfsKiSS8cipheD/nivzr700H+nsMOxJjQnvwOcRYVuFkdH0wGUvW2WbXGmrZGbQ==",
- "dependencies": {
- "@isaacs/cliui": "^8.0.2"
- },
- "engines": {
- "node": ">=14"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- },
- "optionalDependencies": {
- "@pkgjs/parseargs": "^0.11.0"
- }
+ "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
+ "dev": true
},
"node_modules/json-parse-better-errors": {
"version": "1.0.2",
@@ -991,20 +779,6 @@
"node": ">= 0.6"
}
},
- "node_modules/minimatch": {
- "version": "9.0.4",
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.4.tgz",
- "integrity": "sha512-KqWh+VchfxcMNRAJjj2tnsSJdNbHsVgnkBhTNrW7AjVo6OvLtxw8zfT9oLw1JSohlFzJ8jCoTgaoXvJ+kHt6fw==",
- "dependencies": {
- "brace-expansion": "^2.0.1"
- },
- "engines": {
- "node": ">=16 || 14 >=14.17"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
"node_modules/minimist": {
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
@@ -1018,30 +792,11 @@
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/minipass/-/minipass-5.0.0.tgz",
"integrity": "sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ==",
+ "dev": true,
"engines": {
"node": ">=8"
}
},
- "node_modules/minizlib": {
- "version": "3.0.1",
- "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.1.tgz",
- "integrity": "sha512-umcy022ILvb5/3Djuu8LWeqUa8D68JaBzlttKeMWen48SjabqS3iY5w/vzeMzMUNhLDifyhbOwKDSznB1vvrwg==",
- "dependencies": {
- "minipass": "^7.0.4",
- "rimraf": "^5.0.5"
- },
- "engines": {
- "node": ">= 18"
- }
- },
- "node_modules/minizlib/node_modules/minipass": {
- "version": "7.0.4",
- "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.0.4.tgz",
- "integrity": "sha512-jYofLM5Dam9279rdkWzqHozUo4ybjdZmCsDHePy5V/PbBcVMiSZR97gmAy45aqi8CK1lG2ECd356FU86avfwUQ==",
- "engines": {
- "node": ">=16 || 14 >=14.17"
- }
- },
"node_modules/mkdirp": {
"version": "0.5.6",
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz",
@@ -1112,37 +867,6 @@
"node": ">=4"
}
},
- "node_modules/path-key": {
- "version": "3.1.1",
- "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
- "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/path-scurry": {
- "version": "1.10.2",
- "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.10.2.tgz",
- "integrity": "sha512-7xTavNy5RQXnsjANvVvMkEjvloOinkAjv/Z6Ildz9v2RinZ4SBKTWFOVRbaF8p0vpHnyjV/UwNDdKuUv6M5qcA==",
- "dependencies": {
- "lru-cache": "^10.2.0",
- "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0"
- },
- "engines": {
- "node": ">=16 || 14 >=14.17"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
- "node_modules/path-scurry/node_modules/lru-cache": {
- "version": "10.2.0",
- "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.2.0.tgz",
- "integrity": "sha512-2bIM8x+VAf6JT4bKAljS1qUWgMsqZRPGJS6FSahIMPVvctcNhyVp7AJu7quxOW9jwkryBReKZY5tY5JYv2n/7Q==",
- "engines": {
- "node": "14 || >=16.14"
- }
- },
"node_modules/protobufjs": {
"version": "7.2.5",
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.2.5.tgz",
@@ -1220,23 +944,6 @@
"node": ">=0.10.0"
}
},
- "node_modules/rimraf": {
- "version": "5.0.5",
- "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.5.tgz",
- "integrity": "sha512-CqDakW+hMe/Bz202FPEymy68P+G50RfMQK+Qo5YUqc9SPipvbGjCGKd0RSKEelbsfQuw3g5NZDSrlZZAJurH1A==",
- "dependencies": {
- "glob": "^10.3.7"
- },
- "bin": {
- "rimraf": "dist/esm/bin.mjs"
- },
- "engines": {
- "node": ">=14"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
"node_modules/roarr": {
"version": "2.15.4",
"resolved": "https://registry.npmjs.org/roarr/-/roarr-2.15.4.tgz",
@@ -1312,25 +1019,6 @@
"integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==",
"dev": true
},
- "node_modules/shebang-command": {
- "version": "2.0.0",
- "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
- "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
- "dependencies": {
- "shebang-regex": "^3.0.0"
- },
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/shebang-regex": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
- "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
- "engines": {
- "node": ">=8"
- }
- },
"node_modules/signal-exit": {
"version": "3.0.7",
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz",
@@ -1355,20 +1043,7 @@
"version": "4.2.3",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
- "dependencies": {
- "emoji-regex": "^8.0.0",
- "is-fullwidth-code-point": "^3.0.0",
- "strip-ansi": "^6.0.1"
- },
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/string-width-cjs": {
- "name": "string-width",
- "version": "4.2.3",
- "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
- "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
+ "dev": true,
"dependencies": {
"emoji-regex": "^8.0.0",
"is-fullwidth-code-point": "^3.0.0",
@@ -1382,18 +1057,7 @@
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
- "dependencies": {
- "ansi-regex": "^5.0.1"
- },
- "engines": {
- "node": ">=8"
- }
- },
- "node_modules/strip-ansi-cjs": {
- "name": "strip-ansi",
- "version": "6.0.1",
- "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
- "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
+ "dev": true,
"dependencies": {
"ansi-regex": "^5.0.1"
},
@@ -1422,44 +1086,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
- "node_modules/tar": {
- "version": "7.0.1",
- "resolved": "https://registry.npmjs.org/tar/-/tar-7.0.1.tgz",
- "integrity": "sha512-IjMhdQMZFpKsHEQT3woZVxBtCQY+0wk3CVxdRkGXEgyGa0dNS/ehPvOMr2nmfC7x5Zj2N+l6yZUpmICjLGS35w==",
- "dependencies": {
- "@isaacs/fs-minipass": "^4.0.0",
- "chownr": "^3.0.0",
- "minipass": "^5.0.0",
- "minizlib": "^3.0.1",
- "mkdirp": "^3.0.1",
- "yallist": "^5.0.0"
- },
- "engines": {
- "node": ">=18"
- }
- },
- "node_modules/tar/node_modules/mkdirp": {
- "version": "3.0.1",
- "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
- "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
- "bin": {
- "mkdirp": "dist/cjs/src/bin.js"
- },
- "engines": {
- "node": ">=10"
- },
- "funding": {
- "url": "https://github.com/sponsors/isaacs"
- }
- },
- "node_modules/tar/node_modules/yallist": {
- "version": "5.0.0",
- "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
- "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==",
- "engines": {
- "node": ">=18"
- }
- },
"node_modules/type-fest": {
"version": "0.13.1",
"resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.13.1.tgz",
@@ -1496,6 +1122,7 @@
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
"integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
+ "dev": true,
"dependencies": {
"isexe": "^2.0.0"
},
@@ -1532,23 +1159,6 @@
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
}
},
- "node_modules/wrap-ansi-cjs": {
- "name": "wrap-ansi",
- "version": "7.0.0",
- "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
- "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
- "dependencies": {
- "ansi-styles": "^4.0.0",
- "string-width": "^4.1.0",
- "strip-ansi": "^6.0.0"
- },
- "engines": {
- "node": ">=10"
- },
- "funding": {
- "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
- }
- },
"node_modules/y18n": {
"version": "5.0.8",
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
@@ -1592,85 +1202,6 @@
}
},
"dependencies": {
- "@isaacs/cliui": {
- "version": "8.0.2",
- "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz",
- "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==",
- "requires": {
- "string-width": "^5.1.2",
- "string-width-cjs": "npm:string-width@^4.2.0",
- "strip-ansi": "^7.0.1",
- "strip-ansi-cjs": "npm:strip-ansi@^6.0.1",
- "wrap-ansi": "^8.1.0",
- "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0"
- },
- "dependencies": {
- "ansi-regex": {
- "version": "6.0.1",
- "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz",
- "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA=="
- },
- "ansi-styles": {
- "version": "6.2.1",
- "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz",
- "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug=="
- },
- "emoji-regex": {
- "version": "9.2.2",
- "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz",
- "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg=="
- },
- "string-width": {
- "version": "5.1.2",
- "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz",
- "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==",
- "requires": {
- "eastasianwidth": "^0.2.0",
- "emoji-regex": "^9.2.2",
- "strip-ansi": "^7.0.1"
- }
- },
- "strip-ansi": {
- "version": "7.1.0",
- "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz",
- "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==",
- "requires": {
- "ansi-regex": "^6.0.1"
- }
- },
- "wrap-ansi": {
- "version": "8.1.0",
- "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
- "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==",
- "requires": {
- "ansi-styles": "^6.1.0",
- "string-width": "^5.0.1",
- "strip-ansi": "^7.0.1"
- }
- }
- }
- },
- "@isaacs/fs-minipass": {
- "version": "4.0.0",
- "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.0.tgz",
- "integrity": "sha512-S00nN1Qt3z3dSP6Db45fj/mksrAq5XWNIJ/SWXGP8XPT2jrzEuYRCSEx08JpJwBcG2F1xgiOtBMGDU0AZHmxew==",
- "requires": {
- "minipass": "^7.0.4"
- },
- "dependencies": {
- "minipass": {
- "version": "7.0.4",
- "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.0.4.tgz",
- "integrity": "sha512-jYofLM5Dam9279rdkWzqHozUo4ybjdZmCsDHePy5V/PbBcVMiSZR97gmAy45aqi8CK1lG2ECd356FU86avfwUQ=="
- }
- }
- },
- "@pkgjs/parseargs": {
- "version": "0.11.0",
- "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
- "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==",
- "optional": true
- },
"@protobufjs/aspromise": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
@@ -1747,15 +1278,22 @@
"integrity": "sha512-93+VvleD3mXwlLI/xASjw0FzKcwzl3OdTCzm1LaRfqgS21gfFtK3zDXM5Op9TeeMsJVOaJ2VRDpT9q4Y3d0AvA==",
"dev": true
},
+ "adm-zip": {
+ "version": "0.5.16",
+ "resolved": "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.16.tgz",
+ "integrity": "sha512-TGw5yVi4saajsSEgz25grObGHEUaDrniwvA2qwSC060KfqGPdglhvPMA2lPIoxs3PQIItj2iag35fONcQqgUaQ=="
+ },
"ansi-regex": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
- "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="
+ "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+ "dev": true
},
"ansi-styles": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+ "dev": true,
"requires": {
"color-convert": "^2.0.1"
}
@@ -1793,29 +1331,11 @@
"proxy-from-env": "^1.1.0"
}
},
- "balanced-match": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
- "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
- },
"boolean": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz",
"integrity": "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw=="
},
- "brace-expansion": {
- "version": "2.0.1",
- "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
- "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
- "requires": {
- "balanced-match": "^1.0.0"
- }
- },
- "chownr": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz",
- "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g=="
- },
"cliui": {
"version": "8.0.1",
"resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
@@ -1901,6 +1421,7 @@
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
"integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
+ "dev": true,
"requires": {
"color-name": "~1.1.4"
}
@@ -1908,7 +1429,8 @@
"color-name": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
- "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="
+ "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
+ "dev": true
},
"color-support": {
"version": "1.1.3",
@@ -1931,16 +1453,6 @@
"integrity": "sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==",
"dev": true
},
- "cross-spawn": {
- "version": "7.0.6",
- "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
- "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
- "requires": {
- "path-key": "^3.1.0",
- "shebang-command": "^2.0.0",
- "which": "^2.0.1"
- }
- },
"debug": {
"version": "4.3.4",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
@@ -1993,15 +1505,11 @@
"resolved": "https://registry.npmjs.org/detect-node/-/detect-node-2.1.0.tgz",
"integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g=="
},
- "eastasianwidth": {
- "version": "0.2.0",
- "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz",
- "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
- },
"emoji-regex": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
- "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
+ "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
+ "dev": true
},
"error-ex": {
"version": "1.3.2",
@@ -2050,22 +1558,6 @@
"integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==",
"dev": true
},
- "foreground-child": {
- "version": "3.1.1",
- "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.1.1.tgz",
- "integrity": "sha512-TMKDUnIte6bfb5nWv7V/caI169OHgvwjb7V4WkeUvbQQdjr5rWKqHFiKWb/fcOwB+CzBT+qbWjvj+DVwRskpIg==",
- "requires": {
- "cross-spawn": "^7.0.0",
- "signal-exit": "^4.0.1"
- },
- "dependencies": {
- "signal-exit": {
- "version": "4.1.0",
- "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz",
- "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="
- }
- }
- },
"form-data": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
@@ -2130,25 +1622,6 @@
"integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
"dev": true
},
- "glob": {
- "version": "10.3.12",
- "resolved": "https://registry.npmjs.org/glob/-/glob-10.3.12.tgz",
- "integrity": "sha512-TCNv8vJ+xz4QiqTpfOJA7HvYv+tNIRHKfUWw/q+v2jdgN4ebz+KY9tGx5J4rHP0o84mNP+ApH66HRX8us3Khqg==",
- "requires": {
- "foreground-child": "^3.1.0",
- "jackspeak": "^2.3.6",
- "minimatch": "^9.0.1",
- "minipass": "^7.0.4",
- "path-scurry": "^1.10.2"
- },
- "dependencies": {
- "minipass": {
- "version": "7.0.4",
- "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.0.4.tgz",
- "integrity": "sha512-jYofLM5Dam9279rdkWzqHozUo4ybjdZmCsDHePy5V/PbBcVMiSZR97gmAy45aqi8CK1lG2ECd356FU86avfwUQ=="
- }
- }
- },
"global-agent": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/global-agent/-/global-agent-3.0.0.tgz",
@@ -2217,21 +1690,14 @@
"is-fullwidth-code-point": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
- "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg=="
+ "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
+ "dev": true
},
"isexe": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
- "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="
- },
- "jackspeak": {
- "version": "2.3.6",
- "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-2.3.6.tgz",
- "integrity": "sha512-N3yCS/NegsOBokc8GAdM8UcmfsKiSS8cipheD/nivzr700H+nsMOxJjQnvwOcRYVuFkdH0wGUvW2WbXGmrZGbQ==",
- "requires": {
- "@isaacs/cliui": "^8.0.2",
- "@pkgjs/parseargs": "^0.11.0"
- }
+ "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
+ "dev": true
},
"json-parse-better-errors": {
"version": "1.0.2",
@@ -2320,14 +1786,6 @@
"mime-db": "1.52.0"
}
},
- "minimatch": {
- "version": "9.0.4",
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.4.tgz",
- "integrity": "sha512-KqWh+VchfxcMNRAJjj2tnsSJdNbHsVgnkBhTNrW7AjVo6OvLtxw8zfT9oLw1JSohlFzJ8jCoTgaoXvJ+kHt6fw==",
- "requires": {
- "brace-expansion": "^2.0.1"
- }
- },
"minimist": {
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
@@ -2337,23 +1795,8 @@
"minipass": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/minipass/-/minipass-5.0.0.tgz",
- "integrity": "sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ=="
- },
- "minizlib": {
- "version": "3.0.1",
- "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.1.tgz",
- "integrity": "sha512-umcy022ILvb5/3Djuu8LWeqUa8D68JaBzlttKeMWen48SjabqS3iY5w/vzeMzMUNhLDifyhbOwKDSznB1vvrwg==",
- "requires": {
- "minipass": "^7.0.4",
- "rimraf": "^5.0.5"
- },
- "dependencies": {
- "minipass": {
- "version": "7.0.4",
- "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.0.4.tgz",
- "integrity": "sha512-jYofLM5Dam9279rdkWzqHozUo4ybjdZmCsDHePy5V/PbBcVMiSZR97gmAy45aqi8CK1lG2ECd356FU86avfwUQ=="
- }
- }
+ "integrity": "sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ==",
+ "dev": true
},
"mkdirp": {
"version": "0.5.6",
@@ -2415,27 +1858,6 @@
"json-parse-better-errors": "^1.0.1"
}
},
- "path-key": {
- "version": "3.1.1",
- "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
- "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="
- },
- "path-scurry": {
- "version": "1.10.2",
- "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.10.2.tgz",
- "integrity": "sha512-7xTavNy5RQXnsjANvVvMkEjvloOinkAjv/Z6Ildz9v2RinZ4SBKTWFOVRbaF8p0vpHnyjV/UwNDdKuUv6M5qcA==",
- "requires": {
- "lru-cache": "^10.2.0",
- "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0"
- },
- "dependencies": {
- "lru-cache": {
- "version": "10.2.0",
- "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.2.0.tgz",
- "integrity": "sha512-2bIM8x+VAf6JT4bKAljS1qUWgMsqZRPGJS6FSahIMPVvctcNhyVp7AJu7quxOW9jwkryBReKZY5tY5JYv2n/7Q=="
- }
- }
- },
"protobufjs": {
"version": "7.2.5",
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.2.5.tgz",
@@ -2499,14 +1921,6 @@
"integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
"dev": true
},
- "rimraf": {
- "version": "5.0.5",
- "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.5.tgz",
- "integrity": "sha512-CqDakW+hMe/Bz202FPEymy68P+G50RfMQK+Qo5YUqc9SPipvbGjCGKd0RSKEelbsfQuw3g5NZDSrlZZAJurH1A==",
- "requires": {
- "glob": "^10.3.7"
- }
- },
"roarr": {
"version": "2.15.4",
"resolved": "https://registry.npmjs.org/roarr/-/roarr-2.15.4.tgz",
@@ -2553,19 +1967,6 @@
"integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==",
"dev": true
},
- "shebang-command": {
- "version": "2.0.0",
- "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
- "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
- "requires": {
- "shebang-regex": "^3.0.0"
- }
- },
- "shebang-regex": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
- "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A=="
- },
"signal-exit": {
"version": "3.0.7",
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz",
@@ -2590,16 +1991,7 @@
"version": "4.2.3",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
- "requires": {
- "emoji-regex": "^8.0.0",
- "is-fullwidth-code-point": "^3.0.0",
- "strip-ansi": "^6.0.1"
- }
- },
- "string-width-cjs": {
- "version": "npm:string-width@4.2.3",
- "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
- "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
+ "dev": true,
"requires": {
"emoji-regex": "^8.0.0",
"is-fullwidth-code-point": "^3.0.0",
@@ -2610,14 +2002,7 @@
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
- "requires": {
- "ansi-regex": "^5.0.1"
- }
- },
- "strip-ansi-cjs": {
- "version": "npm:strip-ansi@6.0.1",
- "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
- "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
+ "dev": true,
"requires": {
"ansi-regex": "^5.0.1"
}
@@ -2634,31 +2019,6 @@
"integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==",
"dev": true
},
- "tar": {
- "version": "7.0.1",
- "resolved": "https://registry.npmjs.org/tar/-/tar-7.0.1.tgz",
- "integrity": "sha512-IjMhdQMZFpKsHEQT3woZVxBtCQY+0wk3CVxdRkGXEgyGa0dNS/ehPvOMr2nmfC7x5Zj2N+l6yZUpmICjLGS35w==",
- "requires": {
- "@isaacs/fs-minipass": "^4.0.0",
- "chownr": "^3.0.0",
- "minipass": "^5.0.0",
- "minizlib": "^3.0.1",
- "mkdirp": "^3.0.1",
- "yallist": "^5.0.0"
- },
- "dependencies": {
- "mkdirp": {
- "version": "3.0.1",
- "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
- "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg=="
- },
- "yallist": {
- "version": "5.0.0",
- "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
- "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw=="
- }
- }
- },
"type-fest": {
"version": "0.13.1",
"resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.13.1.tgz",
@@ -2686,6 +2046,7 @@
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
"integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
+ "dev": true,
"requires": {
"isexe": "^2.0.0"
}
@@ -2710,16 +2071,6 @@
"strip-ansi": "^6.0.0"
}
},
- "wrap-ansi-cjs": {
- "version": "npm:wrap-ansi@7.0.0",
- "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
- "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
- "requires": {
- "ansi-styles": "^4.0.0",
- "string-width": "^4.1.0",
- "strip-ansi": "^6.0.0"
- }
- },
"y18n": {
"version": "5.0.8",
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
diff --git a/js/node/package.json b/js/node/package.json
index 195e252f1064b..22af4b7876d37 100644
--- a/js/node/package.json
+++ b/js/node/package.json
@@ -7,17 +7,15 @@
},
"author": "fs-eire",
"binary": {
- "module_path": "./bin",
- "host": "https://onnxruntimetestdata.blob.core.windows.net/onnxruntime-node-prebuild/",
"napi_versions": [
- 3
+ 6
]
},
"version": "1.22.0",
"dependencies": {
+ "adm-zip": "^0.5.16",
"global-agent": "^3.0.0",
- "onnxruntime-common": "file:../common",
- "tar": "^7.0.1"
+ "onnxruntime-common": "file:../common"
},
"scripts": {
"postinstall": "node ./script/install",
diff --git a/js/node/script/install-metadata-versions.js b/js/node/script/install-metadata-versions.js
new file mode 100644
index 0000000000000..1261a36994300
--- /dev/null
+++ b/js/node/script/install-metadata-versions.js
@@ -0,0 +1,7 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// This file is generated by /js/scripts/update-version.ts
+// Do not modify file content manually.
+
+module.exports = { nuget: [{ feed: 'nuget', version: '1.22.0' }] };
diff --git a/js/node/script/install-metadata.js b/js/node/script/install-metadata.js
new file mode 100644
index 0000000000000..e0186ec45d1b4
--- /dev/null
+++ b/js/node/script/install-metadata.js
@@ -0,0 +1,58 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+'use strict';
+
+const metadataVersions = require('./install-metadata-versions.js');
+
+const metadata = {
+ // Requirements defines a list of manifest to install for a specific platform/architecture combination.
+ requirements: {
+ 'win32/x64': [],
+ 'win32/arm64': [],
+ 'linux/x64': ['cuda12'],
+ 'linux/arm64': [],
+ 'darwin/x64': [],
+ 'darwin/arm64': [],
+ },
+ // Each manifest defines a list of files to install
+ manifests: {
+ 'linux/x64:cuda12': {
+ './libonnxruntime_providers_cuda.so': {
+ package: 'nuget:linux/x64:cuda12',
+ path: 'runtimes/win-x64/native/libonnxruntime_providers_cuda.so',
+ },
+ './libonnxruntime_providers_shared.so': {
+ package: 'nuget:linux/x64:cuda12',
+ path: 'runtimes/win-x64/native/libonnxruntime_providers_shared.so',
+ },
+ './libonnxruntime_providers_tensorrt.so': {
+ package: 'nuget:linux/x64:cuda12',
+ path: 'runtimes/win-x64/native/libonnxruntime_providers_tensorrt.so',
+ },
+ },
+ },
+ // Each package defines a list of package metadata. The first available package will be used.
+ packages: {
+ 'nuget:win32/x64:cuda12': {
+ name: 'Microsoft.ML.OnnxRuntime.Gpu.Windows',
+ versions: metadataVersions.nuget,
+ },
+ 'nuget:linux/x64:cuda12': {
+ name: 'Microsoft.ML.OnnxRuntime.Gpu.Linux',
+ versions: metadataVersions.nuget,
+ },
+ },
+ feeds: {
+ nuget: {
+ type: 'nuget',
+ index: 'https://api.nuget.org/v3/index.json',
+ },
+ nuget_nightly: {
+ type: 'nuget',
+ index: 'https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json',
+ },
+ },
+};
+
+module.exports = metadata;
diff --git a/js/node/script/install-utils.js b/js/node/script/install-utils.js
new file mode 100644
index 0000000000000..abfacce881600
--- /dev/null
+++ b/js/node/script/install-utils.js
@@ -0,0 +1,306 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+'use strict';
+
+const fs = require('fs');
+const https = require('https');
+const { execFileSync } = require('child_process');
+const path = require('path');
+const os = require('os');
+const AdmZip = require('adm-zip'); // Use adm-zip instead of spawn
+
+async function downloadFile(url, dest) {
+ return new Promise((resolve, reject) => {
+ const file = fs.createWriteStream(dest);
+ https
+ .get(url, (res) => {
+ if (res.statusCode !== 200) {
+ file.close();
+ fs.unlinkSync(dest);
+ reject(new Error(`Failed to download from ${url}. HTTP status code = ${res.statusCode}`));
+ return;
+ }
+
+ res.pipe(file);
+ file.on('finish', () => {
+ file.close();
+ resolve();
+ });
+ file.on('error', (err) => {
+ fs.unlinkSync(dest);
+ reject(err);
+ });
+ })
+ .on('error', (err) => {
+ fs.unlinkSync(dest);
+ reject(err);
+ });
+ });
+}
+
+async function downloadJson(url) {
+ return new Promise((resolve, reject) => {
+ https
+ .get(url, (res) => {
+ const { statusCode } = res;
+ const contentType = res.headers['content-type'];
+
+ if (!statusCode) {
+ reject(new Error('No response statud code from server.'));
+ return;
+ }
+ if (statusCode >= 400 && statusCode < 500) {
+ resolve(null);
+ return;
+ } else if (statusCode !== 200) {
+ reject(new Error(`Failed to download build list. HTTP status code = ${statusCode}`));
+ return;
+ }
+ if (!contentType || !/^application\/json/.test(contentType)) {
+ reject(new Error(`unexpected content type: ${contentType}`));
+ return;
+ }
+ res.setEncoding('utf8');
+ let rawData = '';
+ res.on('data', (chunk) => {
+ rawData += chunk;
+ });
+ res.on('end', () => {
+ try {
+ resolve(JSON.parse(rawData));
+ } catch (e) {
+ reject(e);
+ }
+ });
+ res.on('error', (err) => {
+ reject(err);
+ });
+ })
+ .on('error', (err) => {
+ reject(err);
+ });
+ });
+}
+
+async function installPackages(packages, manifests, feeds) {
+ // Step.1: resolve packages
+ const resolvedPackages = new Map();
+ for (const packageCandidates of packages) {
+ // iterate all candidates from packagesInfo and try to find the first one that exists
+ for (const { feed, version } of packageCandidates.versions) {
+ const { type, index } = feeds[feed];
+ const pkg = await resolvePackage(type, index, packageCandidates.name, version);
+ if (pkg) {
+ resolvedPackages.set(packageCandidates, pkg);
+ break;
+ }
+ }
+ if (!resolvedPackages.has(packageCandidates)) {
+ throw new Error(`Failed to resolve package. No package exists for: ${JSON.stringify(packageCandidates)}`);
+ }
+ }
+
+ // Step.2: download packages
+ for (const [pkgInfo, pkg] of resolvedPackages) {
+ const manifestsForPackage = manifests.filter((x) => x.packagesInfo === pkgInfo);
+ await pkg.download(manifestsForPackage);
+ }
+}
+
+async function resolvePackage(type, index, packageName, version) {
+ // https://learn.microsoft.com/en-us/nuget/api/overview
+ const nugetPackageUrlResolver = async (index, packageName, version) => {
+ // STEP.1 - get Nuget package index
+ const nugetIndex = await downloadJson(index);
+ if (!nugetIndex) {
+ throw new Error(`Failed to download Nuget index from ${index}`);
+ }
+
+ // STEP.2 - get the base url of "PackageBaseAddress/3.0.0"
+ const packageBaseUrl = nugetIndex.resources.find((x) => x['@type'] === 'PackageBaseAddress/3.0.0')?.['@id'];
+ if (!packageBaseUrl) {
+ throw new Error(`Failed to find PackageBaseAddress in Nuget index`);
+ }
+
+ // STEP.3 - get the package version info
+ const packageInfo = await downloadJson(`${packageBaseUrl}${packageName.toLowerCase()}/index.json`);
+ if (!packageInfo.versions.includes(version.toLowerCase())) {
+ throw new Error(`Failed to find specific package versions for ${packageName} in ${index}`);
+ }
+
+ // STEP.4 - generate the package URL
+ const packageUrl = `${packageBaseUrl}${packageName.toLowerCase()}/${version.toLowerCase()}/${packageName.toLowerCase()}.${version.toLowerCase()}.nupkg`;
+ const packageFileName = `${packageName.toLowerCase()}.${version.toLowerCase()}.nupkg`;
+
+ return {
+ download: async (manifests) => {
+ if (manifests.length === 0) {
+ return;
+ }
+
+ // Create a temporary directory
+ const tempDir = path.join(os.tmpdir(), `onnxruntime-node-pkgs_${Date.now()}`);
+ fs.mkdirSync(tempDir, { recursive: true });
+
+ try {
+ const packageFilePath = path.join(tempDir, packageFileName);
+
+ // Download the NuGet package
+ console.log(`Downloading ${packageUrl}`);
+ await downloadFile(packageUrl, packageFilePath);
+
+ // Load the NuGet package (which is a ZIP file)
+ let zip;
+ try {
+ zip = new AdmZip(packageFilePath);
+ } catch (err) {
+ throw new Error(`Failed to open NuGet package: ${err.message}`);
+ }
+
+ // Extract only the needed files from the package
+ const extractDir = path.join(tempDir, 'extracted');
+ fs.mkdirSync(extractDir, { recursive: true });
+
+ // Process each manifest and extract/copy files to their destinations
+ for (const manifest of manifests) {
+ const { filepath, pathInPackage } = manifest;
+
+ // Create directory for the target file
+ const targetDir = path.dirname(filepath);
+ fs.mkdirSync(targetDir, { recursive: true });
+
+ // Check if the file exists directly in the zip
+ const zipEntry = zip.getEntry(pathInPackage);
+ if (!zipEntry) {
+ throw new Error(`Failed to find ${pathInPackage} in NuGet package`);
+ }
+
+ console.log(`Extracting ${pathInPackage} to ${filepath}`);
+
+ // Extract just this entry to a temporary location
+ const extractedFilePath = path.join(extractDir, path.basename(pathInPackage));
+ zip.extractEntryTo(zipEntry, extractDir, false, true);
+
+ // Copy to the final destination
+ fs.copyFileSync(extractedFilePath, filepath);
+ }
+ } finally {
+ // Clean up the temporary directory - always runs even if an error occurs
+ try {
+ fs.rmSync(tempDir, { recursive: true });
+ } catch (e) {
+ console.warn(`Failed to clean up temporary directory: ${tempDir}`, e);
+ // Don't rethrow this error as it would mask the original error
+ }
+ }
+ },
+ };
+ };
+
+ switch (type) {
+ case 'nuget':
+ return await nugetPackageUrlResolver(index, packageName, version);
+ default:
+ throw new Error(`Unsupported package type: ${type}`);
+ }
+}
+
+function tryGetCudaVersion() {
+ // Should only return 11 or 12.
+
+ // try to get the CUDA version from the system ( `nvcc --version` )
+ let ver = 12;
+ try {
+ const nvccVersion = execFileSync('nvcc', ['--version'], { encoding: 'utf8' });
+ const match = nvccVersion.match(/release (\d+)/);
+ if (match) {
+ ver = parseInt(match[1]);
+ if (ver !== 11 && ver !== 12) {
+ throw new Error(`Unsupported CUDA version: ${ver}`);
+ }
+ }
+ } catch (e) {
+ if (e?.code === 'ENOENT') {
+ console.warn('`nvcc` not found. Assuming CUDA 12.');
+ } else {
+ console.warn('Failed to detect CUDA version from `nvcc --version`:', e.message);
+ }
+ }
+
+ // assume CUDA 12 if failed to detect
+ return ver;
+}
+
+function parseInstallFlag() {
+ let flag = process.env.ONNXRUNTIME_NODE_INSTALL || process.env.npm_config_onnxruntime_node_install;
+ if (!flag) {
+ for (let i = 0; i < process.argv.length; i++) {
+ if (process.argv[i].startsWith('--onnxruntime-node-install=')) {
+ flag = process.argv[i].split('=')[1];
+ break;
+ } else if (process.argv[i] === '--onnxruntime-node-install') {
+ flag = 'true';
+ }
+ }
+ }
+ switch (flag) {
+ case 'true':
+ case '1':
+ case 'ON':
+ return true;
+ case 'skip':
+ return false;
+ case undefined: {
+ flag = parseInstallCudaFlag();
+ if (flag === 'skip') {
+ return false;
+ }
+ if (flag === 11) {
+ throw new Error('CUDA 11 is no longer supported. Please consider using CPU or upgrade to CUDA 12.');
+ }
+ if (flag === 12) {
+ return 'cuda12';
+ }
+ return undefined;
+ }
+ default:
+ if (!flag || typeof flag !== 'string') {
+ throw new Error(`Invalid value for --onnxruntime-node-install: ${flag}`);
+ }
+ }
+}
+
+function parseInstallCudaFlag() {
+ let flag = process.env.ONNXRUNTIME_NODE_INSTALL_CUDA || process.env.npm_config_onnxruntime_node_install_cuda;
+ if (!flag) {
+ for (let i = 0; i < process.argv.length; i++) {
+ if (process.argv[i].startsWith('--onnxruntime-node-install-cuda=')) {
+ flag = process.argv[i].split('=')[1];
+ break;
+ } else if (process.argv[i] === '--onnxruntime-node-install-cuda') {
+ flag = 'true';
+ }
+ }
+ }
+ switch (flag) {
+ case 'true':
+ case '1':
+ case 'ON':
+ return tryGetCudaVersion();
+ case 'v11':
+ return 11;
+ case 'v12':
+ return 12;
+ case 'skip':
+ case undefined:
+ return flag;
+ default:
+ throw new Error(`Invalid value for --onnxruntime-node-install-cuda: ${flag}`);
+ }
+}
+
+module.exports = {
+ installPackages,
+ parseInstallFlag,
+};
diff --git a/js/node/script/install.js b/js/node/script/install.js
index d406da3591eec..b278b4ade6e3c 100644
--- a/js/node/script/install.js
+++ b/js/node/script/install.js
@@ -8,21 +8,20 @@
// not always available.
// The purpose of this script is to download the required binaries for the platform and architecture.
-// Currently, most of the binaries are already bundled in the package, except for the following:
-// - Linux/x64/CUDA 12
+// Currently, most of the binaries are already bundled in the package, except for the files that described in the file
+// install-metadata.js.
//
-// The CUDA binaries are not bundled because they are too large to be allowed in the npm registry. Instead, they are
-// downloaded from the GitHub release page of ONNX Runtime. The script will download the binaries if they are not
-// already present in the package.
+// Some files (eg. the CUDA EP binaries) are not bundled because they are too large to be allowed in the npm registry.
+// Instead, they are downloaded from the Nuget feed. The script will download the binaries if they are not already
+// present in the NPM package.
// Step.1: Check if we should exit early
const os = require('os');
-const fs = require('fs');
-const https = require('https');
const path = require('path');
-const tar = require('tar');
-const { execFileSync } = require('child_process');
const { bootstrap: globalAgentBootstrap } = require('global-agent');
+const { installPackages, parseInstallFlag } = require('./install-utils.js');
+
+const INSTALL_METADATA = require('./install-metadata.js');
// Bootstrap global-agent to honor the proxy settings in
// environment variables, e.g. GLOBAL_AGENT_HTTPS_PROXY.
@@ -30,169 +29,106 @@ const { bootstrap: globalAgentBootstrap } = require('global-agent');
globalAgentBootstrap();
// commandline flag:
-// --onnxruntime-node-install-cuda Force install the CUDA EP binaries. Try to detect the CUDA version.
-// --onnxruntime-node-install-cuda=v11 Force install the CUDA EP binaries for CUDA 11.
-// --onnxruntime-node-install-cuda=v12 Force install the CUDA EP binaries for CUDA 12.
+//
+// --onnxruntime-node-install Force install the files that are not bundled in the package.
+//
+// --onnxruntime-node-install=skip Skip the installation of the files that are not bundled in the package.
+//
+// --onnxruntime-node-install=cuda12 Force install the CUDA EP binaries for CUDA 12.
+//
+// --onnxruntime-node-install-cuda Force install the CUDA EP binaries.
+// (deprecated, use --onnxruntime-node-install=cuda12)
+//
// --onnxruntime-node-install-cuda=skip Skip the installation of the CUDA EP binaries.
+// (deprecated, use --onnxruntime-node-install=skip)
//
-// Alternatively, use environment variable "ONNXRUNTIME_NODE_INSTALL_CUDA"
//
-// If the flag is not provided, the script will only install the CUDA EP binaries when:
-// - The platform is Linux/x64.
-// - The binaries are not already present in the package.
-// - The installation is not a local install (when used inside ONNX Runtime repo).
+// Alternatively, use environment variable "ONNXRUNTIME_NODE_INSTALL" or "ONNXRUNTIME_NODE_INSTALL_CUDA" (deprecated).
//
-const INSTALL_CUDA_FLAG = parseInstallCudaFlag();
-const NO_INSTALL = INSTALL_CUDA_FLAG === 'skip';
-const FORCE_INSTALL = !NO_INSTALL && INSTALL_CUDA_FLAG;
-
-const IS_LINUX_X64 = os.platform() === 'linux' && os.arch() === 'x64';
-const BIN_FOLDER = path.join(__dirname, '..', 'bin/napi-v3/linux/x64');
-const BIN_FOLDER_EXISTS = fs.existsSync(BIN_FOLDER);
-const CUDA_DLL_EXISTS = fs.existsSync(path.join(BIN_FOLDER, 'libonnxruntime_providers_cuda.so'));
-const ORT_VERSION = require('../package.json').version;
-
-const npm_config_local_prefix = process.env.npm_config_local_prefix;
-const npm_package_json = process.env.npm_package_json;
-const SKIP_LOCAL_INSTALL =
- npm_config_local_prefix && npm_package_json && path.dirname(npm_package_json) === npm_config_local_prefix;
-
-const shouldInstall = FORCE_INSTALL || (!SKIP_LOCAL_INSTALL && IS_LINUX_X64 && BIN_FOLDER_EXISTS && !CUDA_DLL_EXISTS);
-if (NO_INSTALL || !shouldInstall) {
+// If the flag is not provided, the script will look up the metadata file to determine the manifest.
+//
+
+/**
+ * Possible values:
+ * - undefined: the default behavior. This is the value when no installation flag is specified.
+ *
+ * - false: skip installation. This is the value when the installation flag is set to "skip":
+ * --onnxruntime-node-install=skip
+ *
+ * - true: force installation. This is the value when the installation flag is set with no value:
+ * --onnxruntime-node-install
+ *
+ * - string: the installation flag is set to a specific value:
+ * --onnxruntime-node-install=cuda12
+ */
+const INSTALL_FLAG = parseInstallFlag();
+
+// if installation is skipped, exit early
+if (INSTALL_FLAG === false) {
process.exit(0);
}
-
-// Step.2: Download the required binaries
-const artifactUrl = {
- get 11() {
- // TODO: support ORT Cuda v11 binaries
- throw new Error(`CUDA 11 binaries are not supported by this script yet.
-
-To use ONNX Runtime Node.js binding with CUDA v11 support, please follow the manual steps:
-
-1. Use "--onnxruntime-node-install-cuda=skip" to skip the auto installation.
-2. Navigate to https://aiinfra.visualstudio.com/PublicPackages/_artifacts/feed/onnxruntime-cuda-11
-3. Download the binaries for your platform and architecture
-4. Extract the following binaries to "node_modules/onnxruntime-node/bin/napi-v3/linux/x64:
- - libonnxruntime_providers_tensorrt.so
- - libonnxruntime_providers_shared.so
- - libonnxruntime.so.${ORT_VERSION}
- - libonnxruntime_providers_cuda.so
-`);
- },
- 12: `https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-gpu-${
- ORT_VERSION
- }.tgz`,
-}[INSTALL_CUDA_FLAG || tryGetCudaVersion()];
-console.log(`Downloading "${artifactUrl}"...`);
-
-const FILES = new Set([
- 'libonnxruntime_providers_tensorrt.so',
- 'libonnxruntime_providers_shared.so',
- `libonnxruntime.so.${ORT_VERSION}`,
- 'libonnxruntime_providers_cuda.so',
-]);
-
-downloadAndExtract(artifactUrl, BIN_FOLDER, FILES);
-
-async function downloadAndExtract(url, dest, files) {
- return new Promise((resolve, reject) => {
- https.get(url, (res) => {
- const { statusCode } = res;
- const contentType = res.headers['content-type'];
-
- if (statusCode === 301 || statusCode === 302) {
- downloadAndExtract(res.headers.location, dest, files).then(
- (value) => resolve(value),
- (reason) => reject(reason),
- );
- return;
- } else if (statusCode !== 200) {
- throw new Error(`Failed to download the binaries: ${res.statusCode} ${res.statusMessage}.
-
-Use "--onnxruntime-node-install-cuda=skip" to skip the installation. You will still be able to use ONNX Runtime, but the CUDA EP will not be available.`);
- }
-
- if (!contentType || !/^application\/octet-stream/.test(contentType)) {
- throw new Error(`unexpected content type: ${contentType}`);
- }
-
- res
- .pipe(
- tar.t({
- strict: true,
- onentry: (entry) => {
- const filename = path.basename(entry.path);
- if (entry.type === 'File' && files.has(filename)) {
- console.log(`Extracting "${filename}" to "${dest}"...`);
- entry.pipe(fs.createWriteStream(path.join(dest, filename)));
- entry.on('finish', () => {
- console.log(`Finished extracting "${filename}".`);
- });
- }
- },
- }),
- )
- .on('error', (err) => {
- throw new Error(`Failed to extract the binaries: ${err.message}.
-
-Use "--onnxruntime-node-install-cuda=skip" to skip the installation. You will still be able to use ONNX Runtime, but the CUDA EP will not be available.`);
- });
- });
- });
+// if installation is not specified, exit early when the installation is local (e.g. `npm ci` in /js/node/)
+if (INSTALL_FLAG === undefined) {
+ const npm_config_local_prefix = process.env.npm_config_local_prefix;
+ const npm_package_json = process.env.npm_package_json;
+ const IS_LOCAL_INSTALL =
+ npm_config_local_prefix && npm_package_json && path.dirname(npm_package_json) === npm_config_local_prefix;
+ if (IS_LOCAL_INSTALL) {
+ process.exit(0);
+ }
}
-function tryGetCudaVersion() {
- // Should only return 11 or 12.
-
- // try to get the CUDA version from the system ( `nvcc --version` )
- let ver = 12;
- try {
- const nvccVersion = execFileSync('nvcc', ['--version'], { encoding: 'utf8' });
- const match = nvccVersion.match(/release (\d+)/);
- if (match) {
- ver = parseInt(match[1]);
- if (ver !== 11 && ver !== 12) {
- throw new Error(`Unsupported CUDA version: ${ver}`);
- }
- }
- } catch (e) {
- if (e?.code === 'ENOENT') {
- console.warn('`nvcc` not found. Assuming CUDA 12.');
- } else {
- console.warn('Failed to detect CUDA version from `nvcc --version`:', e.message);
+const PLATFORM = `${os.platform()}/${os.arch()}`;
+let INSTALL_MANIFEST_NAMES = INSTALL_METADATA.requirements[PLATFORM] ?? [];
+
+// if installation is specified explicitly, validate the manifest
+if (typeof INSTALL_FLAG === 'string') {
+ const installations = INSTALL_FLAG.split(',').map((x) => x.trim());
+ for (const installation of installations) {
+ if (INSTALL_MANIFEST_NAMES.indexOf(installation) === -1) {
+ throw new Error(`Invalid installation: ${installation} for platform: ${PLATFORM}`);
}
}
+ INSTALL_MANIFEST_NAMES = installations;
+}
+
+const BIN_FOLDER = path.join(__dirname, '..', 'bin/napi-v6', PLATFORM);
+const INSTALL_MANIFESTS = [];
+
+const PACKAGES = new Set();
+for (const name of INSTALL_MANIFEST_NAMES) {
+ const manifest = INSTALL_METADATA.manifests[`${PLATFORM}:${name}`];
+ if (!manifest) {
+ throw new Error(`Manifest not found: ${name} for platform: ${PLATFORM}`);
+ }
- // assume CUDA 12 if failed to detect
- return ver;
+ for (const [filename, { package: pkg, path: pathInPackage }] of Object.entries(manifest)) {
+ const packageCandidates = INSTALL_METADATA.packages[pkg];
+ if (!packageCandidates) {
+ throw new Error(`Package information not found: ${pkg}`);
+ }
+ PACKAGES.add(packageCandidates);
+
+ INSTALL_MANIFESTS.push({
+ filepath: path.normalize(path.join(BIN_FOLDER, filename)),
+ packagesInfo: packageCandidates,
+ pathInPackage,
+ });
+ }
}
-function parseInstallCudaFlag() {
- let flag = process.env.ONNXRUNTIME_NODE_INSTALL_CUDA || process.env.npm_config_onnxruntime_node_install_cuda;
- if (!flag) {
- for (let i = 0; i < process.argv.length; i++) {
- if (process.argv[i].startsWith('--onnxruntime-node-install-cuda=')) {
- flag = process.argv[i].split('=')[1];
- break;
- } else if (process.argv[i] === '--onnxruntime-node-install-cuda') {
- flag = 'true';
- }
+// If the installation flag is not specified, we do a check to see if the files are already installed.
+if (INSTALL_FLAG === undefined) {
+ let hasMissingFiles = false;
+ for (const { filepath } of INSTALL_MANIFESTS) {
+ if (!require('fs').existsSync(filepath)) {
+ hasMissingFiles = true;
+ break;
}
}
- switch (flag) {
- case 'true':
- case '1':
- case 'ON':
- return tryGetCudaVersion();
- case 'v11':
- return 11;
- case 'v12':
- return 12;
- case 'skip':
- case undefined:
- return flag;
- default:
- throw new Error(`Invalid value for --onnxruntime-node-install-cuda: ${flag}`);
+ if (!hasMissingFiles) {
+ process.exit(0);
}
}
+
+void installPackages(PACKAGES, INSTALL_MANIFESTS, INSTALL_METADATA.feeds);
diff --git a/js/node/src/inference_session_wrap.cc b/js/node/src/inference_session_wrap.cc
index 5512b418b5cfb..84ed3457a488b 100644
--- a/js/node/src/inference_session_wrap.cc
+++ b/js/node/src/inference_session_wrap.cc
@@ -5,18 +5,12 @@
#include "common.h"
#include "inference_session_wrap.h"
+#include "ort_instance_data.h"
#include "run_options_helper.h"
#include "session_options_helper.h"
#include "tensor_helper.h"
#include
-Napi::FunctionReference InferenceSessionWrap::wrappedSessionConstructor;
-Napi::FunctionReference InferenceSessionWrap::ortTensorConstructor;
-
-Napi::FunctionReference& InferenceSessionWrap::GetTensorConstructor() {
- return InferenceSessionWrap::ortTensorConstructor;
-}
-
Napi::Object InferenceSessionWrap::Init(Napi::Env env, Napi::Object exports) {
// create ONNX runtime env
Ort::InitApi();
@@ -37,8 +31,8 @@ Napi::Object InferenceSessionWrap::Init(Napi::Env env, Napi::Object exports) {
InstanceAccessor("inputMetadata", &InferenceSessionWrap::GetMetadata, nullptr, napi_default, reinterpret_cast(true)),
InstanceAccessor("outputMetadata", &InferenceSessionWrap::GetMetadata, nullptr, napi_default, reinterpret_cast(false))});
- wrappedSessionConstructor = Napi::Persistent(func);
- wrappedSessionConstructor.SuppressDestruct();
+ OrtInstanceData::Create(env, func);
+
exports.Set("InferenceSession", func);
Napi::Function listSupportedBackends = Napi::Function::New(env, InferenceSessionWrap::ListSupportedBackends);
@@ -55,22 +49,15 @@ Napi::Value InferenceSessionWrap::InitOrtOnce(const Napi::CallbackInfo& info) {
Napi::HandleScope scope(env);
int log_level = info[0].As().Int32Value();
-
- Ort::Env* ortEnv = env.GetInstanceData();
- if (ortEnv == nullptr) {
- ortEnv = new Ort::Env{OrtLoggingLevel(log_level), "onnxruntime-node"};
- env.SetInstanceData(ortEnv);
- }
-
Napi::Function tensorConstructor = info[1].As();
- ortTensorConstructor = Napi::Persistent(tensorConstructor);
- ortTensorConstructor.SuppressDestruct();
+
+ OrtInstanceData::InitOrt(env, log_level, tensorConstructor);
return env.Undefined();
}
InferenceSessionWrap::InferenceSessionWrap(const Napi::CallbackInfo& info)
- : Napi::ObjectWrap(info), initialized_(false), disposed_(false), session_(nullptr), defaultRunOptions_(nullptr) {}
+ : Napi::ObjectWrap(info), initialized_(false), disposed_(false), session_(nullptr) {}
Napi::Value InferenceSessionWrap::LoadModel(const Napi::CallbackInfo& info) {
Napi::Env env = info.Env();
@@ -83,14 +70,13 @@ Napi::Value InferenceSessionWrap::LoadModel(const Napi::CallbackInfo& info) {
ORT_NAPI_THROW_TYPEERROR_IF(argsLength == 0, env, "Expect argument: model file path or buffer.");
try {
- defaultRunOptions_.reset(new Ort::RunOptions{});
Ort::SessionOptions sessionOptions;
if (argsLength == 2 && info[0].IsString() && info[1].IsObject()) {
Napi::String value = info[0].As();
ParseSessionOptions(info[1].As(), sessionOptions);
- this->session_.reset(new Ort::Session(*env.GetInstanceData(),
+ this->session_.reset(new Ort::Session(*OrtInstanceData::OrtEnv(),
#ifdef _WIN32
reinterpret_cast(value.Utf16Value().c_str()),
#else
@@ -105,7 +91,7 @@ Napi::Value InferenceSessionWrap::LoadModel(const Napi::CallbackInfo& info) {
int64_t bytesLength = info[2].As().Int64Value();
ParseSessionOptions(info[3].As(), sessionOptions);
- this->session_.reset(new Ort::Session(*env.GetInstanceData(),
+ this->session_.reset(new Ort::Session(*OrtInstanceData::OrtEnv(),
reinterpret_cast(buffer) + bytesOffset, bytesLength,
sessionOptions));
} else {
@@ -225,7 +211,7 @@ Napi::Value InferenceSessionWrap::Run(const Napi::CallbackInfo& info) {
ParseRunOptions(info[2].As(), runOptions);
}
if (preferredOutputLocations_.size() == 0) {
- session_->Run(runOptions == nullptr ? *defaultRunOptions_.get() : runOptions,
+ session_->Run(runOptions == nullptr ? *OrtInstanceData::OrtDefaultRunOptions() : runOptions,
inputIndex == 0 ? nullptr : &inputNames_cstr[0], inputIndex == 0 ? nullptr : &inputValues[0],
inputIndex, outputIndex == 0 ? nullptr : &outputNames_cstr[0],
outputIndex == 0 ? nullptr : &outputValues[0], outputIndex);
@@ -254,7 +240,7 @@ Napi::Value InferenceSessionWrap::Run(const Napi::CallbackInfo& info) {
}
}
- session_->Run(runOptions == nullptr ? *defaultRunOptions_.get() : runOptions, *ioBinding_);
+ session_->Run(runOptions == nullptr ? *OrtInstanceData::OrtDefaultRunOptions() : runOptions, *ioBinding_);
auto outputs = ioBinding_->GetOutputValues();
ORT_NAPI_THROW_ERROR_IF(outputs.size() != outputIndex, env, "Output count mismatch.");
@@ -278,8 +264,6 @@ Napi::Value InferenceSessionWrap::Dispose(const Napi::CallbackInfo& info) {
ORT_NAPI_THROW_ERROR_IF(this->disposed_, env, "Session already disposed.");
this->ioBinding_.reset(nullptr);
-
- this->defaultRunOptions_.reset(nullptr);
this->session_.reset(nullptr);
this->disposed_ = true;
diff --git a/js/node/src/inference_session_wrap.h b/js/node/src/inference_session_wrap.h
index 776cdc0d3b51e..7a6b1232400ec 100644
--- a/js/node/src/inference_session_wrap.h
+++ b/js/node/src/inference_session_wrap.h
@@ -12,7 +12,6 @@
class InferenceSessionWrap : public Napi::ObjectWrap {
public:
static Napi::Object Init(Napi::Env env, Napi::Object exports);
- static Napi::FunctionReference& GetTensorConstructor();
InferenceSessionWrap(const Napi::CallbackInfo& info);
@@ -79,15 +78,10 @@ class InferenceSessionWrap : public Napi::ObjectWrap {
// private members
- // persistent constructor
- static Napi::FunctionReference wrappedSessionConstructor;
- static Napi::FunctionReference ortTensorConstructor;
-
// session objects
bool initialized_;
bool disposed_;
std::unique_ptr session_;
- std::unique_ptr defaultRunOptions_;
// input/output metadata
std::vector inputNames_;
diff --git a/js/node/src/ort_instance_data.cc b/js/node/src/ort_instance_data.cc
new file mode 100644
index 0000000000000..d9b66909f1291
--- /dev/null
+++ b/js/node/src/ort_instance_data.cc
@@ -0,0 +1,60 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include
+#include
+
+#include "common.h"
+#include "ort_instance_data.h"
+#include "onnxruntime_cxx_api.h"
+
+std::unique_ptr OrtInstanceData::ortEnv;
+std::unique_ptr OrtInstanceData::ortDefaultRunOptions;
+std::mutex OrtInstanceData::ortEnvMutex;
+std::atomic OrtInstanceData::ortEnvRefCount;
+std::atomic OrtInstanceData::ortEnvDestroyed;
+
+OrtInstanceData::OrtInstanceData() {
+ ++ortEnvRefCount;
+}
+
+OrtInstanceData::~OrtInstanceData() {
+ if (--ortEnvRefCount == 0) {
+ std::lock_guard lock(ortEnvMutex);
+ if (ortEnv) {
+ ortDefaultRunOptions.reset(nullptr);
+ ortEnv.reset();
+ ortEnvDestroyed = true;
+ }
+ }
+}
+
+void OrtInstanceData::Create(Napi::Env env, Napi::Function inferenceSessionWrapperFunction) {
+ ORT_NAPI_THROW_ERROR_IF(env.GetInstanceData() != nullptr, env, "OrtInstanceData already created.");
+ auto data = new OrtInstanceData{};
+ data->wrappedSessionConstructor = Napi::Persistent(inferenceSessionWrapperFunction);
+ env.SetInstanceData(data);
+}
+
+void OrtInstanceData::InitOrt(Napi::Env env, int log_level, Napi::Function tensorConstructor) {
+ auto data = env.GetInstanceData();
+ ORT_NAPI_THROW_ERROR_IF(data == nullptr, env, "OrtInstanceData not created.");
+
+ data->ortTensorConstructor = Napi::Persistent(tensorConstructor);
+
+ if (!ortEnv) {
+ std::lock_guard lock(ortEnvMutex);
+ if (!ortEnv) {
+ ORT_NAPI_THROW_ERROR_IF(ortEnvDestroyed, env, "OrtEnv already destroyed.");
+ ortEnv.reset(new Ort::Env{OrtLoggingLevel(log_level), "onnxruntime-node"});
+ ortDefaultRunOptions.reset(new Ort::RunOptions{});
+ }
+ }
+}
+
+const Napi::FunctionReference& OrtInstanceData::TensorConstructor(Napi::Env env) {
+ auto data = env.GetInstanceData();
+ ORT_NAPI_THROW_ERROR_IF(data == nullptr, env, "OrtInstanceData not created.");
+
+ return data->ortTensorConstructor;
+}
diff --git a/js/node/src/ort_instance_data.h b/js/node/src/ort_instance_data.h
new file mode 100644
index 0000000000000..bb70ac8e87d3a
--- /dev/null
+++ b/js/node/src/ort_instance_data.h
@@ -0,0 +1,50 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include
+#include "onnxruntime_cxx_api.h"
+
+/**
+ * The OrtInstanceData class is designed to manage the lifecycle of necessary instance data, including:
+ * - The Ort::Env singleton instance.
+ * This is a global singleton that is shared across all InferenceSessionWrap instances. It is created when the first
+ * time `InferenceSession.initOrtOnce()` is called. It is destroyed when the last active NAPI Env is destroyed.
+ * Once destroyed, it cannot be created again.
+ *
+ * - The Object reference of the InferenceSessionWrap class and the Tensor constructor.
+ * This is a per-env data that has the same lifecycle as the Napi::Env. If there are worker threads, each thread will
+ * have its own handle to the InferenceSessionWrap class and the Tensor constructor.
+ *
+ * The OrtInstanceData class is bind to the Napi::Env using environment life cycle APIs.
+ * see https://nodejs.org/api/n-api.html#environment-life-cycle-apis
+ */
+struct OrtInstanceData {
+ // Create a new OrtInstanceData object related to the Napi::Env
+ static void Create(Napi::Env env, Napi::Function inferenceSessionWrapperFunction);
+ // Initialize Ort for the Napi::Env
+ static void InitOrt(Napi::Env env, int log_level, Napi::Function tensorConstructor);
+ // Get the Tensor constructor reference for the Napi::Env
+ static const Napi::FunctionReference& TensorConstructor(Napi::Env env);
+ // Get the global Ort::Env
+ static const Ort::Env* OrtEnv() { return ortEnv.get(); }
+ // Get the default Ort::RunOptions
+ static Ort::RunOptions* OrtDefaultRunOptions() { return ortDefaultRunOptions.get(); }
+
+ ~OrtInstanceData();
+
+ private:
+ OrtInstanceData();
+
+ // per env persistent constructors
+ Napi::FunctionReference wrappedSessionConstructor;
+ Napi::FunctionReference ortTensorConstructor;
+
+ // ORT env (global singleton)
+ static std::unique_ptr ortEnv;
+ static std::unique_ptr ortDefaultRunOptions;
+ static std::mutex ortEnvMutex;
+ static std::atomic ortEnvRefCount;
+ static std::atomic ortEnvDestroyed;
+};
diff --git a/js/node/src/session_options_helper.cc b/js/node/src/session_options_helper.cc
index 3c607d88e5402..b189b45556306 100644
--- a/js/node/src/session_options_helper.cc
+++ b/js/node/src/session_options_helper.cc
@@ -321,44 +321,46 @@ void ParseSessionOptions(const Napi::Object options, Ort::SessionOptions& sessio
// external data
if (options.Has("externalData")) {
auto externalDataValue = options.Get("externalData");
- ORT_NAPI_THROW_TYPEERROR_IF(!externalDataValue.IsArray(), options.Env(),
- "Invalid argument: sessionOptions.externalData must be an array.");
- auto externalData = externalDataValue.As();
- std::vector> paths;
- std::vector buffs;
- std::vector sizes;
+ if (!externalDataValue.IsNull() && !externalDataValue.IsUndefined()) {
+ ORT_NAPI_THROW_TYPEERROR_IF(!externalDataValue.IsArray(), options.Env(),
+ "Invalid argument: sessionOptions.externalData must be an array.");
+ auto externalData = externalDataValue.As();
+ std::vector> paths;
+ std::vector buffs;
+ std::vector sizes;
- for (const auto& kvp : externalData) {
- Napi::Value value = kvp.second;
- ORT_NAPI_THROW_TYPEERROR_IF(!value.IsObject(), options.Env(),
- "Invalid argument: sessionOptions.externalData value must be an object in Node.js binding.");
- Napi::Object obj = value.As();
- ORT_NAPI_THROW_TYPEERROR_IF(!obj.Has("path") || !obj.Get("path").IsString(), options.Env(),
- "Invalid argument: sessionOptions.externalData value must have a 'path' property of type string in Node.js binding.");
+ for (const auto& kvp : externalData) {
+ Napi::Value value = kvp.second;
+ ORT_NAPI_THROW_TYPEERROR_IF(!value.IsObject(), options.Env(),
+ "Invalid argument: sessionOptions.externalData value must be an object in Node.js binding.");
+ Napi::Object obj = value.As();
+ ORT_NAPI_THROW_TYPEERROR_IF(!obj.Has("path") || !obj.Get("path").IsString(), options.Env(),
+ "Invalid argument: sessionOptions.externalData value must have a 'path' property of type string in Node.js binding.");
#ifdef _WIN32
- auto path = obj.Get("path").As().Utf16Value();
- paths.push_back(std::wstring{path.begin(), path.end()});
+ auto path = obj.Get("path").As().Utf16Value();
+ paths.push_back(std::wstring{path.begin(), path.end()});
#else
- auto path = obj.Get("path").As().Utf8Value();
- paths.push_back(path);
+ auto path = obj.Get("path").As().Utf8Value();
+ paths.push_back(path);
#endif
- ORT_NAPI_THROW_TYPEERROR_IF(!obj.Has("data") ||
- !obj.Get("data").IsBuffer() ||
- !(obj.Get("data").IsTypedArray() && obj.Get("data").As().TypedArrayType() == napi_uint8_array),
- options.Env(),
- "Invalid argument: sessionOptions.externalData value must have an 'data' property of type buffer or typed array in Node.js binding.");
+ ORT_NAPI_THROW_TYPEERROR_IF(!obj.Has("data") ||
+ !obj.Get("data").IsBuffer() ||
+ !(obj.Get("data").IsTypedArray() && obj.Get("data").As().TypedArrayType() == napi_uint8_array),
+ options.Env(),
+ "Invalid argument: sessionOptions.externalData value must have an 'data' property of type buffer or typed array in Node.js binding.");
- auto data = obj.Get("data");
- if (data.IsBuffer()) {
- buffs.push_back(data.As>().Data());
- sizes.push_back(data.As>().Length());
- } else {
- auto typedArray = data.As();
- buffs.push_back(reinterpret_cast(typedArray.ArrayBuffer().Data()) + typedArray.ByteOffset());
- sizes.push_back(typedArray.ByteLength());
+ auto data = obj.Get("data");
+ if (data.IsBuffer()) {
+ buffs.push_back(data.As>().Data());
+ sizes.push_back(data.As>().Length());
+ } else {
+ auto typedArray = data.As();
+ buffs.push_back(reinterpret_cast(typedArray.ArrayBuffer().Data()) + typedArray.ByteOffset());
+ sizes.push_back(typedArray.ByteLength());
+ }
}
+ sessionOptions.AddExternalInitializersFromFilesInMemory(paths, buffs, sizes);
}
- sessionOptions.AddExternalInitializersFromFilesInMemory(paths, buffs, sizes);
}
}
diff --git a/js/node/src/tensor_helper.cc b/js/node/src/tensor_helper.cc
index 12b1a79793ff3..0630386cfc645 100644
--- a/js/node/src/tensor_helper.cc
+++ b/js/node/src/tensor_helper.cc
@@ -7,6 +7,7 @@
#include
#include "common.h"
+#include "ort_instance_data.h"
#include "tensor_helper.h"
#include "inference_session_wrap.h"
@@ -275,12 +276,18 @@ Napi::Value OrtValueToNapiValue(Napi::Env env, Ort::Value&& value) {
}
// new Tensor("string", stringArray /* string[] */, dims /* number[] */)
- return scope.Escape(InferenceSessionWrap::GetTensorConstructor().New({Napi::String::New(env, "string"), stringArray, dims}));
+ return scope.Escape(OrtInstanceData::TensorConstructor(env)
+ .New({Napi::String::New(env, "string"),
+ stringArray,
+ dims}));
} else {
// number data
if (isGpuBuffer) {
// Tensor.fromGpuBuffer(buffer, options)
- Napi::Function tensorFromGpuBuffer = InferenceSessionWrap::GetTensorConstructor().Value().Get("fromGpuBuffer").As();
+ Napi::Function tensorFromGpuBuffer = OrtInstanceData::TensorConstructor(env)
+ .Value()
+ .Get("fromGpuBuffer")
+ .As();
OrtValue* underlyingOrtValue = value.release();
auto options = Napi::Object::New(env);
@@ -311,10 +318,10 @@ Napi::Value OrtValueToNapiValue(Napi::Env env, Ort::Value&& value) {
NAPI_THROW_IF_FAILED(env, status, Napi::Value);
// new Tensor(type, typedArrayData, dims)
- return scope.Escape(InferenceSessionWrap::GetTensorConstructor().New(
- {type,
- Napi::Value(env, typedArrayData),
- dims}));
+ return scope.Escape(OrtInstanceData::TensorConstructor(env)
+ .New({type,
+ Napi::Value(env, typedArrayData),
+ dims}));
}
}
}
diff --git a/js/node/test/e2e/worker-test.ts b/js/node/test/e2e/worker-test.ts
new file mode 100644
index 0000000000000..3b803c86d8b5b
--- /dev/null
+++ b/js/node/test/e2e/worker-test.ts
@@ -0,0 +1,51 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import { Worker, isMainThread, parentPort } from 'node:worker_threads';
+import { InferenceSession, Tensor } from 'onnxruntime-common';
+import { assertTensorEqual, SQUEEZENET_INPUT0_DATA, SQUEEZENET_OUTPUT0_DATA, TEST_DATA_ROOT } from '../test-utils';
+import * as path from 'path';
+
+if (isMainThread) {
+ describe('E2E Tests - worker test', () => {
+ it('should run in worker', (done) => {
+ const worker = new Worker(__filename, {
+ stdout: true,
+ stderr: true,
+ });
+ worker.on('message', (msg) => {
+ if (msg.result === 'success') {
+ done();
+ } else {
+ done(new Error(`Worker failed: ${msg.error}`));
+ }
+ });
+ worker.on('error', (err) => {
+ console.error(`Worker error: ${err}`);
+ done(err);
+ });
+ });
+ });
+} else {
+ const workerMain = async () => {
+ // require onnxruntime-node.
+ require('../..');
+
+ const input0 = new Tensor('float32', SQUEEZENET_INPUT0_DATA, [1, 3, 224, 224]);
+ const expectedOutput0 = new Tensor('float32', SQUEEZENET_OUTPUT0_DATA, [1, 1000, 1, 1]);
+
+ const session = await InferenceSession.create(path.join(TEST_DATA_ROOT, 'squeezenet.onnx'));
+
+ const result = await session!.run({ data_0: input0 }, ['softmaxout_1']);
+ console.log('result:', result);
+ assertTensorEqual(result.softmaxout_1, expectedOutput0);
+ };
+ workerMain().then(
+ () => {
+ parentPort?.postMessage({ result: 'success' });
+ },
+ (err) => {
+ parentPort?.postMessage({ result: 'failed', error: err });
+ },
+ );
+}
diff --git a/js/node/test/test-main.ts b/js/node/test/test-main.ts
index 6e7905a24711a..ec7d4e2fc12d0 100644
--- a/js/node/test/test-main.ts
+++ b/js/node/test/test-main.ts
@@ -21,6 +21,7 @@ require('./unittests/lib/tensor');
// E2E tests
require('./e2e/simple-e2e-tests');
require('./e2e/inference-session-run');
+require('./e2e/worker-test');
// Test ONNX spec tests
import { run as runTestRunner } from './test-runner';
diff --git a/js/scripts/update-version.ts b/js/scripts/update-version.ts
index df6a9ea334db5..ef7c5bfbbf0a6 100644
--- a/js/scripts/update-version.ts
+++ b/js/scripts/update-version.ts
@@ -1,8 +1,9 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
-// This script update source file "version.ts" under the following folders:
-// /js/${arg0}/lib/version.ts
+// This script update the following source files:
+// - /js/${arg0}/lib/version.ts
+// - /js/node/script/install-metadata-versions.js (only for arg0=="node")
//
// version data is read from file /js/${arg0}/package.json
@@ -21,7 +22,9 @@ if (typeof version !== 'string') {
throw new Error(`failed to parse "version" from file: ${PACKAGE_JSON_FILE}`);
}
-const FILE_CONTENT = `// Copyright (c) Microsoft Corporation. All rights reserved.
+{
+ // update /js/${arg0}/lib/version.ts
+ const FILE_CONTENT = `// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// This file is generated by /js/scripts/update-version.ts
@@ -30,4 +33,45 @@ const FILE_CONTENT = `// Copyright (c) Microsoft Corporation. All rights reserve
export const version = ${JSON.stringify(version)};
`;
-fs.writeFileSync(path.join(__dirname, '..', packageName, 'lib', 'version.ts'), FILE_CONTENT);
+ fs.writeFileSync(path.join(__dirname, '..', packageName, 'lib', 'version.ts'), FILE_CONTENT);
+}
+
+if (packageName === 'node') {
+ // update /js/node/script/install-metadata-versions.js
+
+ // If there is a second argument, use it as the version candidates. Otherwise, use the version from package.json.
+ // ";" will be used as the separator.
+ const versionCandidates = (process.argv[3] ?? '')
+ .split(';')
+ .map((v) => v.trim())
+ .filter((v) => !!v);
+
+ type NodeInstallMetadataVersions = Record>;
+ const versions: NodeInstallMetadataVersions = { nuget: [] };
+
+ if (versionCandidates.length > 0) {
+ // append dev versions
+ for (const v of versionCandidates) {
+ versions.nuget.push({
+ feed: 'nuget_nightly',
+ version: v,
+ });
+ }
+ } else {
+ // append release version
+ versions.nuget.push({
+ feed: 'nuget',
+ version,
+ });
+ }
+
+ const FILE_CONTENT = `// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// This file is generated by /js/scripts/update-version.ts
+// Do not modify file content manually.
+
+module.exports = ${JSON.stringify(versions)};
+`;
+ fs.writeFileSync(path.join(__dirname, '..', 'node', 'script', 'install-metadata-versions.js'), FILE_CONTENT);
+}
diff --git a/js/web/lib/wasm/jsep/backend-webnn.ts b/js/web/lib/wasm/jsep/backend-webnn.ts
index c2a855bedca22..4de02983d068d 100644
--- a/js/web/lib/wasm/jsep/backend-webnn.ts
+++ b/js/web/lib/wasm/jsep/backend-webnn.ts
@@ -79,11 +79,20 @@ export class WebNNBackend {
* Maps from session id to list of graph inputs.
*/
private sessionGraphInputs: Map = new Map();
+ /**
+ * Maps from session id to list of graph outputs.
+ */
+ private sessionGraphOutputs: Map = new Map();
/**
* Temporary graph inputs for the current session.
* These inputs will be registered when the session is created.
*/
private temporaryGraphInputs: string[] = [];
+ /**
+ * Temporary graph outputs for the current session.
+ * These outputs will be registered when the session is created.
+ */
+ private temporaryGraphOutputs: string[] = [];
/**
* Temporary tensors for the current session.
*/
@@ -167,10 +176,15 @@ export class WebNNBackend {
this.sessionGraphInputs.set(sessionId, this.temporaryGraphInputs);
this.temporaryGraphInputs = [];
}
+ if (this.temporaryGraphOutputs.length > 0) {
+ this.sessionGraphOutputs.set(sessionId, this.temporaryGraphOutputs);
+ this.temporaryGraphOutputs = [];
+ }
}
public onReleaseSession(sessionId: number): void {
this.sessionGraphInputs.delete(sessionId);
+ this.sessionGraphOutputs.delete(sessionId);
const mlContext = this.mlContextBySessionId.get(sessionId)!;
if (!mlContext) {
// Current session is not a WebNN session.
@@ -363,6 +377,10 @@ export class WebNNBackend {
this.temporaryGraphInputs.push(inputName);
}
+ public registerGraphOutput(outputName: string): void {
+ this.temporaryGraphOutputs.push(outputName);
+ }
+
public isGraphInput(sessionId: number, inputName: string): boolean {
const inputNames = this.sessionGraphInputs.get(sessionId);
if (!inputNames) {
@@ -371,6 +389,14 @@ export class WebNNBackend {
return inputNames.includes(inputName);
}
+ public isGraphOutput(sessionId: number, outputName: string): boolean {
+ const outputNames = this.sessionGraphOutputs.get(sessionId);
+ if (!outputNames) {
+ return false;
+ }
+ return outputNames.includes(outputName);
+ }
+
public isInt64Supported(sessionId: number): boolean {
const context = this.mlContextBySessionId.get(sessionId);
return !!context?.opSupportLimits().input.dataTypes.includes('int64');
diff --git a/js/web/lib/wasm/wasm-core-impl.ts b/js/web/lib/wasm/wasm-core-impl.ts
index 8dd643293937b..227c89a53afc6 100644
--- a/js/web/lib/wasm/wasm-core-impl.ts
+++ b/js/web/lib/wasm/wasm-core-impl.ts
@@ -172,7 +172,13 @@ export const initEp = async (env: Env, epName: string): Promise => {
/**
* valid data locations for input/output tensors.
*/
-type SupportedTensorDataLocationForInputOutput = 'cpu' | 'cpu-pinned' | 'gpu-buffer' | 'ml-tensor';
+type SupportedTensorDataLocationForInputOutput =
+ | 'cpu'
+ | 'cpu-pinned'
+ | 'gpu-buffer'
+ | 'ml-tensor'
+ // Use 'ml-tensor' during inference, but output a tensor located on the CPU.
+ | 'ml-tensor-cpu-output';
type IOBindingState = {
/**
@@ -424,6 +430,11 @@ export const createSession = async (
typeof options?.preferredOutputLocation === 'string'
? options.preferredOutputLocation
: (options?.preferredOutputLocation?.[nameString] ?? 'cpu');
+ const isGraphOutput = wasm.webnnIsGraphOutput;
+ if (location === 'cpu' && isGraphOutput && isGraphOutput(sessionHandle, nameString)) {
+ outputPreferredLocations.push('ml-tensor-cpu-output');
+ continue;
+ }
if (location !== 'cpu' && location !== 'cpu-pinned' && location !== 'gpu-buffer' && location !== 'ml-tensor') {
throw new Error(`Not supported preferred output location: ${location}.`);
}
@@ -438,7 +449,10 @@ export const createSession = async (
// use IO binding only when at least one output is preferred to be on GPU.
let bindingState: IOBindingState | null = null;
- if (!BUILD_DEFS.DISABLE_JSEP && outputPreferredLocations.some((l) => l === 'gpu-buffer' || l === 'ml-tensor')) {
+ if (
+ !BUILD_DEFS.DISABLE_JSEP &&
+ outputPreferredLocations.some((l) => l === 'gpu-buffer' || l === 'ml-tensor' || l === 'ml-tensor-cpu-output')
+ ) {
ioBindingHandle = wasm._OrtCreateBinding(sessionHandle);
if (ioBindingHandle === 0) {
checkLastError("Can't create IO binding.");
@@ -447,7 +461,10 @@ export const createSession = async (
bindingState = {
handle: ioBindingHandle,
outputPreferredLocations,
- outputPreferredLocationsEncoded: outputPreferredLocations.map((l) => dataLocationStringToEnum(l)),
+ outputPreferredLocationsEncoded: outputPreferredLocations
+ // 'ml-tensor-cpu-output' is treated as 'ml-tensor' for the purpose of IO binding.
+ .map((l) => (l === 'ml-tensor-cpu-output' ? 'ml-tensor' : l))
+ .map((l) => dataLocationStringToEnum(l)),
};
}
@@ -599,10 +616,11 @@ export const prepareInputOutputTensor = async (
}
} else {
const isGraphInput = wasm.webnnIsGraphInput;
- if (dataType !== 'string' && isGraphInput) {
+ const isGraphOutput = wasm.webnnIsGraphOutput;
+ if (dataType !== 'string' && isGraphInput && isGraphOutput) {
const tensorName = wasm.UTF8ToString(tensorNameUTF8Encoded);
// Promote the tensor to 'ml-tensor' if it is a graph input.
- if (isGraphInput(sessionId, tensorName)) {
+ if (isGraphInput(sessionId, tensorName) || isGraphOutput(sessionId, tensorName)) {
const dataTypeEnum = tensorDataTypeStringToEnum(dataType);
dataByteLength = calculateTensorSizeInBytes(dataTypeEnum, dims)!;
actualLocation = 'ml-tensor';
@@ -810,6 +828,7 @@ export const run = async (
}
const output: TensorMetadata[] = [];
+ const outputPromises: Array> = [];
for (let i = 0; i < outputCount; i++) {
const tensor = Number(wasm.getValue(outputValuesOffset + i * ptrSize, '*'));
@@ -958,6 +977,20 @@ export const run = async (
},
'ml-tensor',
]);
+ } else if (preferredLocation === 'ml-tensor-cpu-output' && size > 0) {
+ const data = wasm.webnnCreateMLTensorDownloader!(dataOffset, type as Tensor.MLTensorDataTypes)();
+ const index = output.length;
+ // Delay the data download and releasing the tensor until we can wait for all output tensors to be downloaded.
+ keepOutputTensor = true;
+ outputPromises.push(
+ (async () => {
+ const result: [number, Tensor.DataType] = [index, await data];
+ wasm.webnnReleaseTensorId!(dataOffset);
+ wasm._OrtReleaseTensor(tensor);
+ return result;
+ })(),
+ );
+ output.push([type, dims, [], 'cpu']);
} else {
const typedArrayConstructor = tensorTypeToTypedArrayConstructor(type);
const data = new typedArrayConstructor(size);
@@ -975,7 +1008,6 @@ export const run = async (
if (!keepOutputTensor) {
wasm._OrtReleaseTensor(tensor);
}
- wasm.webnnOnRunEnd?.(sessionHandle);
}
}
@@ -992,8 +1024,14 @@ export const run = async (
false,
]);
}
+ // Wait for all output tensor data to be downloaded.
+ for (const [index, data] of await Promise.all(outputPromises)) {
+ output[index][2] = data;
+ }
return output;
} finally {
+ wasm.webnnOnRunEnd?.(sessionHandle);
+
wasm.stackRestore(beforeRunStack);
if (BUILD_DEFS.USE_WEBGPU_EP) {
diff --git a/js/web/lib/wasm/wasm-types.ts b/js/web/lib/wasm/wasm-types.ts
index b2ca8480f1546..22af02b2790f4 100644
--- a/js/web/lib/wasm/wasm-types.ts
+++ b/js/web/lib/wasm/wasm-types.ts
@@ -287,6 +287,19 @@ export declare namespace JSEP {
* @returns whether the input is a WebNN graph input.
*/
webnnIsGraphInput: (sessionId: number, inputName: string) => boolean;
+ /**
+ * [exported from pre-jsep.js] Register a WebNN graph output.
+ * @param outputName - specify the output name.
+ */
+ webnnRegisterGraphOutput: (outputName: string) => void;
+ /**
+ * [exported from pre-jsep.js] Check if a graph output is a WebNN graph output.
+ * @param sessionId - specify the session ID.
+ * @param outputName - specify the output name.
+ * @returns whether the output is a WebNN graph output.
+ */
+ webnnIsGraphOutput: (sessionId: number, outputName: string) => boolean;
+
/**
* [exported from pre-jsep.js] Create a temporary MLTensor for a session.
* @param sessionId - specify the session ID.
diff --git a/js/web/script/pull-prebuilt-wasm-artifacts.ts b/js/web/script/pull-prebuilt-wasm-artifacts.ts
index e5eace8d80dcf..89c57c191de0e 100644
--- a/js/web/script/pull-prebuilt-wasm-artifacts.ts
+++ b/js/web/script/pull-prebuilt-wasm-artifacts.ts
@@ -189,7 +189,7 @@ async function main() {
if (!run) {
// API reference: https://docs.github.com/en/rest/actions/workflow-runs?apiVersion=2022-11-28#list-workflow-runs-for-a-workflow
const mainRunData = await downloadJson(
- `https://api.github.com/repos/Microsoft/onnxruntime/actions/workflows/152051496/runs?branch=main${allowImcomplete ? '' : '&status=completed'}&per_page=1&exclude_pull_requests=1`,
+ `https://api.github.com/repos/Microsoft/onnxruntime/actions/workflows/152051496/runs?branch=main${allowImcomplete ? '' : '&status=success'}&per_page=1&exclude_pull_requests=1`,
);
if (mainRunData.workflow_runs.length === 0) {
throw new Error('No build found');
diff --git a/js/web/script/test-runner-cli-args.ts b/js/web/script/test-runner-cli-args.ts
index 088a66b24f7bd..f546f58a28bfa 100644
--- a/js/web/script/test-runner-cli-args.ts
+++ b/js/web/script/test-runner-cli-args.ts
@@ -405,7 +405,7 @@ export function parseTestRunnerCliArgs(cmdlineArgs: string[]): TestRunnerCliArgs
// and ChromeCanary is not in CI.
const defaultBrowserBackends = ['webgl', 'webgpu', 'wasm' /*, 'webnn'*/];
- const nodejsBackends = ['cpu', 'wasm'];
+ const nodejsBackends = ['cpu', 'wasm', 'webgpu'];
const backendArgs = args.backend || args.b;
const backend =
typeof backendArgs !== 'string'
diff --git a/js/web/test/e2e/exports/testcases/vite-default/package-lock.json b/js/web/test/e2e/exports/testcases/vite-default/package-lock.json
index 708e458748b3a..c9da59b4b0021 100644
--- a/js/web/test/e2e/exports/testcases/vite-default/package-lock.json
+++ b/js/web/test/e2e/exports/testcases/vite-default/package-lock.json
@@ -12,7 +12,7 @@
},
"devDependencies": {
"@vitejs/plugin-vue": "^5.2.1",
- "vite": "^6.2.5"
+ "vite": "^6.2.6"
}
},
"node_modules/@babel/helper-string-parser": {
@@ -1069,9 +1069,9 @@
}
},
"node_modules/vite": {
- "version": "6.2.5",
- "resolved": "https://registry.npmjs.org/vite/-/vite-6.2.5.tgz",
- "integrity": "sha512-j023J/hCAa4pRIUH6J9HemwYfjB5llR2Ps0CWeikOtdR8+pAURAk0DoJC5/mm9kd+UgdnIy7d6HE4EAvlYhPhA==",
+ "version": "6.2.6",
+ "resolved": "https://registry.npmjs.org/vite/-/vite-6.2.6.tgz",
+ "integrity": "sha512-9xpjNl3kR4rVDZgPNdTL0/c6ao4km69a/2ihNQbcANz8RuCOK3hQBmLSJf3bRKVQjVMda+YvizNE8AwvogcPbw==",
"dev": true,
"license": "MIT",
"dependencies": {
diff --git a/js/web/test/e2e/exports/testcases/vite-default/package.json b/js/web/test/e2e/exports/testcases/vite-default/package.json
index 904db7a41de9c..5169734074299 100644
--- a/js/web/test/e2e/exports/testcases/vite-default/package.json
+++ b/js/web/test/e2e/exports/testcases/vite-default/package.json
@@ -13,6 +13,6 @@
},
"devDependencies": {
"@vitejs/plugin-vue": "^5.2.1",
- "vite": "^6.2.5"
+ "vite": "^6.2.6"
}
}
diff --git a/js/web/test/test-main.ts b/js/web/test/test-main.ts
index 96ff3a16a716c..f32107e4a0c65 100644
--- a/js/web/test/test-main.ts
+++ b/js/web/test/test-main.ts
@@ -13,7 +13,16 @@ import { Logger } from '../lib/onnxjs/instrument';
import { Test } from './test-types';
-if (ORT_WEB_TEST_CONFIG.model.some((testGroup) => testGroup.tests.some((test) => test.backend === 'cpu'))) {
+if (
+ // when NPM test is launched with `-e=node` and (`-b=cpu` or `-b=webgpu`), load ONNXRuntime Node.js binding.
+ platform.name === 'Node.js' &&
+ (ORT_WEB_TEST_CONFIG.model.some((testGroup) =>
+ testGroup.tests.some((test) => test.backend === 'cpu' || test.backend === 'webgpu'),
+ ) ||
+ ORT_WEB_TEST_CONFIG.op.some((testGroup) =>
+ testGroup.tests.some((test) => test.backend === 'cpu' || test.backend === 'webgpu'),
+ ))
+) {
// require onnxruntime-node
require('../../node');
}
diff --git a/onnxruntime/contrib_ops/cuda/bert/group_query_attention.cc b/onnxruntime/contrib_ops/cuda/bert/group_query_attention.cc
index 9f1bc46ee297d..68c4b01d2db20 100644
--- a/onnxruntime/contrib_ops/cuda/bert/group_query_attention.cc
+++ b/onnxruntime/contrib_ops/cuda/bert/group_query_attention.cc
@@ -63,6 +63,7 @@ GroupQueryAttention::GroupQueryAttention(const OpKernelInfo& info)
if (!disable_flash_attention_) {
zeros_ = this->GetScratchBuffer(kZerosCount, nullptr);
+ CUDA_CALL_THROW(cudaMemset(zeros_.get(), 0, kZerosCount * sizeof(int)));
}
}
diff --git a/onnxruntime/contrib_ops/webgpu/bert/flash_attention.cc b/onnxruntime/contrib_ops/webgpu/bert/flash_attention.cc
index bb5de40eb27c5..eb5de4634a4d8 100644
--- a/onnxruntime/contrib_ops/webgpu/bert/flash_attention.cc
+++ b/onnxruntime/contrib_ops/webgpu/bert/flash_attention.cc
@@ -146,12 +146,12 @@ Status FlashAttentionProgram::GenerateShaderCode(ShaderHelper& shader) const {
const min_value : q_element_t = q_element_t(-65504.0);
// Default SHM usage limit is 16KB in Dawn.
- var k_tile : array, max_k_step>; // 96 * 2 * 16 = 3KB.
- var v_tile : array, max_k_step>; // 96 * 2 * 16 = 3KB.
+ // vec4 * qkv_head_size_vec * max_k_step = 8 * (128/4) * 16 = 4KB. 128 is head_size for phi4.
+ var k_tile : array, max_k_step>;
+ var v_tile : array, max_k_step>;
// Private memory per lane.
var q_tile : array;
- var o_tile : array;
fn loadq(q_idx_global : u32, head_idx: u32)
{
// Stored as float16[batch_size,sequence_length,3072] the inputs as per onnx MHA
@@ -186,6 +186,34 @@ Status FlashAttentionProgram::GenerateShaderCode(ShaderHelper& shader) const {
v_tile[slot][idx%qkv_head_size_vec] = val;
}
}
+)HELPER_FN";
+
+ if (is_qualcomm_) {
+ shader.AdditionalImplementation() << R"HELPER_FN(
+ const half_qkv_head_size_vec = qkv_head_size_vec / 2u;
+
+ // Move half of o_tile from private memory into workgroup memory to reduce register pressure.
+ // Note that register spill was observed on Qualcomm if whole o_tile is on private memory.
+ // vec4 * half_qkv_head_size_vec * workgroup_size_x = 8 * (128/4/2) * 64 = 8KB.
+ var o_tile_r : array, workgroup_size_x>;
+
+ // Private memory per lane.
+ var o_tile : array;
+ fn writeo(o_idx_global: u32, head_idx: u32, local_idx: u32)
+ {
+ // Stored as float16[batch_size,sequence_length,3072]
+ let offset = o_idx_global * num_heads * qkv_head_size_vec + head_idx * qkv_head_size_vec;
+ for (var idx:u32 = 0; idx < half_qkv_head_size_vec; idx ++)
+ {
+ output[offset+idx] = o_tile[idx];
+ output[offset+idx+half_qkv_head_size_vec] = o_tile_r[local_idx][idx];
+ }
+ }
+ )HELPER_FN";
+ } else {
+ shader.AdditionalImplementation() << R"HELPER_FN(
+ // Private memory per lane.
+ var o_tile : array;
fn writeo(o_idx_global: u32, head_idx: u32)
{
// Stored as float16[batch_size,sequence_length,3072]
@@ -195,7 +223,8 @@ Status FlashAttentionProgram::GenerateShaderCode(ShaderHelper& shader) const {
output[offset+idx] = o_tile[idx];
}
}
-)HELPER_FN";
+ )HELPER_FN";
+ }
if (has_attention_bias_) {
shader.AdditionalImplementation() << R"HELPER_FN(
@@ -228,7 +257,7 @@ Status FlashAttentionProgram::GenerateShaderCode(ShaderHelper& shader) const {
// Each lane/thread is responsible for a single q.
shader.MainFunctionBody() << R"MAIN_FN(
let head_idx = u32(workgroup_idx / uniforms.num_seq_tile);
- let capped_sg_id = min(sg_id, max_k_step);
+ let capped_sg_id = min(sg_id, max_k_step - 1u);
let capped_sg_size = min(sg_size, max_k_step);
// Load Q
@@ -324,30 +353,31 @@ Status FlashAttentionProgram::GenerateShaderCode(ShaderHelper& shader) const {
qk_4[2] = select(min_value, qk_4[2], k_start+14 < seq_causal_length);
qk_4[3] = select(min_value, qk_4[3], k_start+15 < seq_causal_length);
}
-
- //
- // Compute SoftMax as per Flash Attention technique.
- //
- // Crux of Flash Attention is here, that allows for partial softmax computation,
- // direct update of output and merging with previous results.
- // https://courses.cs.washington.edu/courses/cse599m/23sp/notes/flashattn.pdf
- // Where b is the block size of the tile. Xi is storing QKtranspose for the ith tile.
- // mi_local is the max of Xi. Note: _ in this notation means what follows is a
- // subscript. max_j=1:b (Xi[j]) is the max of Xi[j] for j=1 to b.
- //
- // for i = 1, #tiles do
- // Xi = Q[k,:] Kt[:, (i-1) b : i b]
- // mi_local= max_j=1:b (Xi[j])
- // Mi = max(M_(i-1), mi_local)
- // d'_i = d'_(i-1) * e^(M_(i-1)-M_i) + Σ_j=1:b e^(Xi[j]-Mi)
- // o'_i = o'_(i-1) * d'_(i-1) * e^(M_(i-1)-M_i) / d'_i + Σ_j=1:b (e^(Xi[j]-Mi) / d'_i) V[j + (i - 1)b,:]
- // end
- //
- // In the code below:
- // dleft is the first term of d'_i expression above : d'_(i-1) * e^(M_(i-1)-M_i).
- // sum is the second term of the same expression : Σ_j=1:b e^(Xi[j]-Mi)
- // o_ratio is the part of the first term of o'_i expression above : d'_(i-1) * e^(M_(i-1)-M_i) / d'_i
- //
+)MAIN_FN";
+ //
+ // Compute SoftMax as per Flash Attention technique.
+ //
+ // Crux of Flash Attention is here, that allows for partial softmax computation,
+ // direct update of output and merging with previous results.
+ // https://courses.cs.washington.edu/courses/cse599m/23sp/notes/flashattn.pdf
+ // Where b is the block size of the tile. Xi is storing QKtranspose for the ith tile.
+ // mi_local is the max of Xi. Note: _ in this notation means what follows is a
+ // subscript. max_j=1:b (Xi[j]) is the max of Xi[j] for j=1 to b.
+ //
+ // for i = 1, #tiles do
+ // Xi = Q[k,:] Kt[:, (i-1) b : i b]
+ // mi_local= max_j=1:b (Xi[j])
+ // Mi = max(M_(i-1), mi_local)
+ // d'_i = d'_(i-1) * e^(M_(i-1)-M_i) + Σ_j=1:b e^(Xi[j]-Mi)
+ // o'_i = o'_(i-1) * d'_(i-1) * e^(M_(i-1)-M_i) / d'_i + Σ_j=1:b (e^(Xi[j]-Mi) / d'_i) V[j + (i - 1)b,:]
+ // end
+ //
+ // In the code below:
+ // dleft is the first term of d'_i expression above : d'_(i-1) * e^(M_(i-1)-M_i).
+ // sum is the second term of the same expression : Σ_j=1:b e^(Xi[j]-Mi)
+ // o_ratio is the part of the first term of o'_i expression above : d'_(i-1) * e^(M_(i-1)-M_i) / d'_i
+ //
+ shader.MainFunctionBody() << R"MAIN_FN(
var local_max_temp = max(qk_1, qk_2);
if (sg_size > 8)
{
@@ -379,6 +409,87 @@ Status FlashAttentionProgram::GenerateShaderCode(ShaderHelper& shader) const {
previous_denom = d;
let o_ratio = dleft / d;
+)MAIN_FN";
+
+ if (is_qualcomm_) {
+ shader.MainFunctionBody() << R"MAIN_FN(
+ if (sg_size > 8) {
+ for (var i:u32 = 0; i < half_qkv_head_size_vec; i++)
+ {
+ var val = v_tile[capped_sg_id][i];
+ var sum = subgroupShuffle(val, 0) * qk_1[0];
+ sum += subgroupShuffle(val, 1) * qk_1[1];
+ sum += subgroupShuffle(val, 2) * qk_1[2];
+ sum += subgroupShuffle(val, 3) * qk_1[3];
+ sum += subgroupShuffle(val, 4) * qk_2[0];
+ sum += subgroupShuffle(val, 5) * qk_2[1];
+ sum += subgroupShuffle(val, 6) * qk_2[2];
+ sum += subgroupShuffle(val, 7) * qk_2[3];
+ sum += subgroupShuffle(val, 8) * qk_3[0];
+ sum += subgroupShuffle(val, 9) * qk_3[1];
+ sum += subgroupShuffle(val, 10) * qk_3[2];
+ sum += subgroupShuffle(val, 11) * qk_3[3];
+ sum += subgroupShuffle(val, 12) * qk_4[0];
+ sum += subgroupShuffle(val, 13) * qk_4[1];
+ sum += subgroupShuffle(val, 14) * qk_4[2];
+ sum += subgroupShuffle(val, 15) * qk_4[3];
+ o_tile[i] = o_tile[i] * o_ratio + sum;
+
+ val = v_tile[capped_sg_id][half_qkv_head_size_vec + i];
+ sum = subgroupShuffle(val, 0) * qk_1[0];
+ sum += subgroupShuffle(val, 1) * qk_1[1];
+ sum += subgroupShuffle(val, 2) * qk_1[2];
+ sum += subgroupShuffle(val, 3) * qk_1[3];
+ sum += subgroupShuffle(val, 4) * qk_2[0];
+ sum += subgroupShuffle(val, 5) * qk_2[1];
+ sum += subgroupShuffle(val, 6) * qk_2[2];
+ sum += subgroupShuffle(val, 7) * qk_2[3];
+ sum += subgroupShuffle(val, 8) * qk_3[0];
+ sum += subgroupShuffle(val, 9) * qk_3[1];
+ sum += subgroupShuffle(val, 10) * qk_3[2];
+ sum += subgroupShuffle(val, 11) * qk_3[3];
+ sum += subgroupShuffle(val, 12) * qk_4[0];
+ sum += subgroupShuffle(val, 13) * qk_4[1];
+ sum += subgroupShuffle(val, 14) * qk_4[2];
+ sum += subgroupShuffle(val, 15) * qk_4[3];
+ o_tile_r[local_idx][i] = o_tile_r[local_idx][i] * o_ratio + sum;
+ }
+ }
+ else
+ {
+ for (var i:u32 = 0; i < half_qkv_head_size_vec; i++)
+ {
+ var val = v_tile[capped_sg_id][i];
+ var sum = subgroupShuffle(val, 0) * qk_1[0];
+ sum += subgroupShuffle(val, 1) * qk_1[1];
+ sum += subgroupShuffle(val, 2) * qk_1[2];
+ sum += subgroupShuffle(val, 3) * qk_1[3];
+ sum += subgroupShuffle(val, 4) * qk_2[0];
+ sum += subgroupShuffle(val, 5) * qk_2[1];
+ sum += subgroupShuffle(val, 6) * qk_2[2];
+ sum += subgroupShuffle(val, 7) * qk_2[3];
+ o_tile[i] = o_tile[i] * o_ratio + sum;
+
+ val = v_tile[capped_sg_id][half_qkv_head_size_vec + i];
+ sum = subgroupShuffle(val, 0) * qk_1[0];
+ sum += subgroupShuffle(val, 1) * qk_1[1];
+ sum += subgroupShuffle(val, 2) * qk_1[2];
+ sum += subgroupShuffle(val, 3) * qk_1[3];
+ sum += subgroupShuffle(val, 4) * qk_2[0];
+ sum += subgroupShuffle(val, 5) * qk_2[1];
+ sum += subgroupShuffle(val, 6) * qk_2[2];
+ sum += subgroupShuffle(val, 7) * qk_2[3];
+ o_tile_r[local_idx][i] = o_tile_r[local_idx][i] * o_ratio + sum;
+ }
+ }
+ }
+
+ if (valid_q) {
+ writeo(q_idx_global, head_idx, local_idx);
+ }
+)MAIN_FN";
+ } else {
+ shader.MainFunctionBody() << R"MAIN_FN(
if (sg_size > 8) {
for (var i:u32 = 0; i < qkv_head_size_vec; i++)
{
@@ -424,6 +535,7 @@ Status FlashAttentionProgram::GenerateShaderCode(ShaderHelper& shader) const {
writeo(q_idx_global, head_idx);
}
)MAIN_FN";
+ }
return Status::OK();
}
@@ -761,7 +873,8 @@ Status ApplyFlashAttention(const Tensor* Q, const Tensor* K, const Tensor* V, co
if (parameters.sequence_length_ > 1) {
const uint32_t tile_size = 64;
bool has_attention_bias = attention_bias != nullptr;
- FlashAttentionProgram program{"FlashAttention", has_attention_bias, parameters.head_size_, parameters.num_heads_};
+ bool is_qualcomm = context.AdapterInfo().vendor == std::string_view{"qualcomm"};
+ FlashAttentionProgram program{"FlashAttention", has_attention_bias, is_qualcomm, parameters.head_size_, parameters.num_heads_};
program.AddInputs({{Q, ProgramTensorMetadataDependency::TypeAndRank, 4},
{present_key, ProgramTensorMetadataDependency::TypeAndRank, 4},
{present_value, ProgramTensorMetadataDependency::TypeAndRank, 4}});
@@ -771,13 +884,10 @@ Status ApplyFlashAttention(const Tensor* Q, const Tensor* K, const Tensor* V, co
program.AddOutputs({{output, ProgramTensorMetadataDependency::TypeAndRank, 4}});
const float alpha = parameters.scale_ == 0.0f ? 1.f / sqrt(static_cast(parameters.head_size_))
: parameters.scale_;
- std::string cache_hint = std::to_string(has_attention_bias) +
- std::to_string(parameters.head_size_) +
- std::to_string(parameters.num_heads_);
const uint32_t num_seq_tile = (parameters.sequence_length_ + tile_size - 1) / tile_size;
program.SetDispatchGroupSize(parameters.num_heads_ * num_seq_tile)
.SetWorkgroupSize(tile_size)
- .CacheHint(cache_hint)
+ .CacheHint(has_attention_bias, parameters.head_size_, parameters.num_heads_, is_qualcomm)
.AddUniformVariables({{static_cast(parameters.sequence_length_)},
{static_cast(parameters.total_sequence_length_)},
{static_cast(parameters.past_present_share_buffer_ ? parameters.past_sequence_length_ : parameters.total_sequence_length_)},
@@ -821,7 +931,8 @@ bool CanApplyFlashAttention(const Tensor* bias, const Tensor* present_key, const
bias == nullptr &&
context.HasFeature(wgpu::FeatureName::Subgroups) &&
present_key != nullptr && present_value != nullptr && present_key->SizeInBytes() > 0 &&
- present_value->SizeInBytes() > 0 && parameters.head_size_ % 4 == 0;
+ present_value->SizeInBytes() > 0 &&
+ ((context.AdapterInfo().vendor == std::string_view{"qualcomm"} && parameters.head_size_ % 8 == 0) || parameters.head_size_ % 4 == 0);
}
} // namespace webgpu
diff --git a/onnxruntime/contrib_ops/webgpu/bert/flash_attention.h b/onnxruntime/contrib_ops/webgpu/bert/flash_attention.h
index c066d6249c8b2..181e411cdc91f 100644
--- a/onnxruntime/contrib_ops/webgpu/bert/flash_attention.h
+++ b/onnxruntime/contrib_ops/webgpu/bert/flash_attention.h
@@ -36,10 +36,12 @@ class FlashAttentionProgram final : public Program {
public:
FlashAttentionProgram(const std::string& kernel_name,
bool has_attention_bias,
+ bool is_qualcomm,
int qkv_head_size,
int qkv_num_heads)
: Program{kernel_name},
has_attention_bias_(has_attention_bias),
+ is_qualcomm_(is_qualcomm),
qkv_head_size_(qkv_head_size),
qkv_num_heads_(qkv_num_heads) {
}
@@ -57,6 +59,7 @@ class FlashAttentionProgram final : public Program {
private:
bool has_attention_bias_;
+ bool is_qualcomm_;
int qkv_head_size_;
int qkv_num_heads_;
};
diff --git a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc
index 0e75990045b4a..22a0034ed8013 100644
--- a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc
+++ b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc
@@ -684,7 +684,9 @@ Status MatMulNBits::ComputeInternal(onnxruntime::webgpu::ComputeContext& context
}
// On FP32 only GPUs, integer math is faster than FP32 therefore always use DP4A independent of length of M.
- if ((M >= kMinMForTileOptimization || y->DataType() == DataTypeImpl::GetType()) && CanApplyDP4AMatrixMatMulNBits(context, accuracy_level_, block_size, batch_count, N, K, components_a, has_zero_points)) {
+ if ((M >= kMinMForTileOptimization || y->DataType() == DataTypeImpl::GetType() ||
+ context.AdapterInfo().vendor == std::string_view{"qualcomm"}) &&
+ CanApplyDP4AMatrixMatMulNBits(context, accuracy_level_, block_size, batch_count, N, K, components_a, has_zero_points)) {
return ApplyDP4AMatrixMatMulNBits(a, b, scales, M, N, K, block_size, kMinMForTileOptimization, context, y);
}
diff --git a/onnxruntime/core/common/string_utils.h b/onnxruntime/core/common/string_utils.h
index 716eed1afec51..c2e26f629330f 100644
--- a/onnxruntime/core/common/string_utils.h
+++ b/onnxruntime/core/common/string_utils.h
@@ -3,6 +3,8 @@
#pragma once
+#include
+#include
#include
#include
#include
@@ -84,5 +86,21 @@ inline uint32_t GetHashFromString(const std::string& str_value) {
return hash;
}
+/**
+ * Returns a lowercase version of the input string.
+ * @param str The string to lowercase.
+ * @return The lowercased string.
+ */
+inline std::string GetLowercaseString(std::string str) {
+ // https://en.cppreference.com/w/cpp/string/byte/tolower
+ // The behavior of tolower from is undefined if the argument is neither representable as unsigned char
+ // nor equal to EOF. To use tolower safely with a plain char (or signed char), the argument must be converted to
+ // unsigned char.
+ std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) {
+ return static_cast(std::tolower(c));
+ });
+ return str;
+}
+
} // namespace utils
} // namespace onnxruntime
diff --git a/onnxruntime/core/framework/config_options.cc b/onnxruntime/core/framework/config_options.cc
index 9fe5beafd6e7e..a638660de262e 100644
--- a/onnxruntime/core/framework/config_options.cc
+++ b/onnxruntime/core/framework/config_options.cc
@@ -31,14 +31,14 @@ std::string ConfigOptions::GetConfigOrDefault(const std::string& config_key,
Status ConfigOptions::AddConfigEntry(const char* config_key, const char* config_value) noexcept {
std::string key = config_key;
- if (key.empty() || key.length() > 128)
- return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Config key is empty or longer than maximum length 128");
+ if (key.empty() || key.length() > kMaxKeyLength)
+ return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Config key is empty or longer than maximum length ",
+ kMaxKeyLength);
std::string val = config_value;
- if (val.length() > onnxruntime::kMaxStrLen)
+ if (val.length() > kMaxValueLength)
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
- "Config value is longer than maximum length: ",
- onnxruntime::kMaxStrLen);
+ "Config value is longer than maximum length: ", kMaxValueLength);
auto iter = configurations.find(config_key);
if (iter != configurations.cend()) {
@@ -52,6 +52,10 @@ Status ConfigOptions::AddConfigEntry(const char* config_key, const char* config_
return Status::OK();
}
+const std::unordered_map& ConfigOptions::GetConfigOptionsMap() const noexcept {
+ return configurations;
+}
+
std::ostream& operator<<(std::ostream& os, const ConfigOptions& config_options) {
for (const auto& [key, value] : config_options.configurations) {
os << " " << key << ": " << value;
diff --git a/onnxruntime/core/framework/config_options.h b/onnxruntime/core/framework/config_options.h
index efdfdb45abbaa..028220d15fc8a 100644
--- a/onnxruntime/core/framework/config_options.h
+++ b/onnxruntime/core/framework/config_options.h
@@ -15,6 +15,11 @@ namespace onnxruntime {
* Provides infrastructure to add/get config entries
*/
struct ConfigOptions {
+ // Maximum key/value string lengths specified in
+ // core/session/onnxruntime_session_options_config_keys.h
+ static constexpr size_t kMaxKeyLength = 1024;
+ static constexpr size_t kMaxValueLength = 2048;
+
std::unordered_map configurations;
// Gets the config string associated with the given config_key.
@@ -33,6 +38,9 @@ struct ConfigOptions {
// Add a config pair (config_key, config_value) to this instance of ConfigOptions
Status AddConfigEntry(const char* config_key, const char* config_value) noexcept;
+ // Gets a constant reference the map of all configurations.
+ const std::unordered_map& GetConfigOptionsMap() const noexcept;
+
friend std::ostream& operator<<(std::ostream& os, const ConfigOptions& config_options);
};
diff --git a/onnxruntime/core/framework/graph_partitioner.cc b/onnxruntime/core/framework/graph_partitioner.cc
index 50f14104cfd7a..7f6cb03936be1 100644
--- a/onnxruntime/core/framework/graph_partitioner.cc
+++ b/onnxruntime/core/framework/graph_partitioner.cc
@@ -748,7 +748,8 @@ static Status InlineFunctionsAOTImpl(const ExecutionProviders& execution_provide
// Validate the ep_context_path to make sure it is file path and check whether the file exist already
static Status GetValidatedEpContextPath(const std::filesystem::path& ep_context_path,
const std::filesystem::path& model_path,
- std::filesystem::path& context_cache_path) {
+ std::filesystem::path& context_cache_path,
+ bool allow_overwrite_output_model = false) {
if (!ep_context_path.empty()) {
context_cache_path = ep_context_path;
if (!context_cache_path.has_filename()) {
@@ -765,7 +766,7 @@ static Status GetValidatedEpContextPath(const std::filesystem::path& ep_context_
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Both ep_context_path and model_path are empty.");
}
- if (std::filesystem::exists(context_cache_path)) {
+ if (std::filesystem::exists(context_cache_path) && !allow_overwrite_output_model) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to generate EP context model since the file '",
context_cache_path, "' exist already. Please remove the EP context model if you want to re-generate it.");
}
@@ -775,8 +776,7 @@ static Status GetValidatedEpContextPath(const std::filesystem::path& ep_context_
static Status CreateEpContextModel(const ExecutionProviders& execution_providers,
const Graph& graph,
- const std::filesystem::path& ep_context_path,
- const std::filesystem::path& ep_context_ext_ini_path,
+ const EpContextModelGenerationOptions& ep_context_gen_options,
const logging::Logger& logger) {
InlinedVector all_ep_context_nodes;
for (const auto& ep : execution_providers) {
@@ -785,6 +785,9 @@ static Status CreateEpContextModel(const ExecutionProviders& execution_providers
}
if (all_ep_context_nodes.size() < 1) {
+ ORT_RETURN_IF(ep_context_gen_options.error_if_no_compiled_nodes,
+ "Compiled model does not contain any EPContext nodes. "
+ "Check that the session EPs support compilation and can execute at least one model subgraph.");
return Status::OK();
}
@@ -798,7 +801,10 @@ static Status CreateEpContextModel(const ExecutionProviders& execution_providers
};
std::filesystem::path context_cache_path;
- ORT_RETURN_IF_ERROR(GetValidatedEpContextPath(ep_context_path, graph.ModelPath(), context_cache_path));
+ ORT_RETURN_IF_ERROR(GetValidatedEpContextPath(ep_context_gen_options.output_model_file_path,
+ graph.ModelPath(),
+ context_cache_path,
+ ep_context_gen_options.overwrite_existing_output_file));
Model ep_context_model(graph.Name(), false, graph.GetModel().MetaData(),
graph.GetModel().ModelPath(), // use source model path so that external initializers can find the data file path
@@ -848,20 +854,39 @@ static Status CreateEpContextModel(const ExecutionProviders& execution_providers
}
}
- size_t ini_size_threshold = 0;
- std::filesystem::path external_ini_path;
- if (ep_context_ext_ini_path.empty()) {
+ size_t ini_size_threshold = ep_context_gen_options.output_external_initializer_size_threshold;
+ std::filesystem::path external_ini_path = ep_context_gen_options.output_external_initializers_file_path;
+ if (external_ini_path.empty()) {
// Set the threshold to the max so all initializers are forced into the Onnx file
ini_size_threshold = SIZE_MAX;
external_ini_path = "./model_ext_ini.bin";
- } else {
- // Set the theshold to 0 so all initializers are forced into the external file
- ini_size_threshold = 0;
- external_ini_path = ep_context_ext_ini_path;
}
+
ModelSavingOptions model_saving_options{ini_size_threshold};
- ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(ep_context_model, context_cache_path,
- external_ini_path, model_saving_options));
+
+ if (ep_context_gen_options.output_model_buffer_ptr != nullptr &&
+ ep_context_gen_options.output_model_buffer_size_ptr != nullptr &&
+ ep_context_gen_options.output_model_buffer_allocator != nullptr) {
+ ORT_RETURN_IF_ERROR(ep_context_model.MainGraph().Resolve());
+ // TODO(adrianlizarraga): Investigate if we can make this more memory efficient.
+ // May be able to use allocator to directly allocate the ModelProto to avoid a copy.
+ ONNX_NAMESPACE::ModelProto model_proto = ep_context_model.ToGraphProtoWithExternalInitializers(external_ini_path,
+ context_cache_path,
+ model_saving_options);
+ size_t buffer_size = model_proto.ByteSizeLong();
+ ORT_RETURN_IF(buffer_size > static_cast(std::numeric_limits::max()),
+ "Cannot serialize ONNX ModelProto larger than 2GB");
+
+ AllocatorPtr allocator = ep_context_gen_options.output_model_buffer_allocator;
+ IAllocatorUniquePtr buffer = IAllocator::MakeUniquePtr(allocator, buffer_size);
+ model_proto.SerializeToArray(buffer.get(), static_cast(buffer_size));
+
+ *ep_context_gen_options.output_model_buffer_size_ptr = buffer_size;
+ *ep_context_gen_options.output_model_buffer_ptr = buffer.release();
+ } else {
+ ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(ep_context_model, context_cache_path,
+ external_ini_path, model_saving_options));
+ }
return Status::OK();
}
@@ -1110,6 +1135,7 @@ Status GraphPartitioner::Partition(Graph& graph, FuncManager& func_mgr,
const ConfigOptions& config_options,
const logging::Logger& logger,
Mode mode,
+ const EpContextModelGenerationOptions& ep_context_gen_options,
const layout_transformation::DebugGraphFn& debug_graph_fn) const {
// It is a greedy partitioning algorithm per provider preferences user provided when calling ONNX RUNTIME right now.
// 1. Execution providers' capabilities are checked one by one.
@@ -1156,12 +1182,12 @@ Status GraphPartitioner::Partition(Graph& graph, FuncManager& func_mgr,
if (mode == Mode::kNormal || mode == Mode::kAssignOnly) {
#if !defined(ORT_MINIMAL_BUILD)
- bool ep_context_enabled = config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") == "1";
- if (ep_context_enabled) {
- std::string ep_context_path = config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, "");
+ if (ep_context_gen_options.enable && ep_context_gen_options.output_model_buffer_ptr == nullptr) {
// Check before EP compile graphs
std::filesystem::path context_cache_path;
- ORT_RETURN_IF_ERROR(GetValidatedEpContextPath(ep_context_path, graph.ModelPath(), context_cache_path));
+ ORT_RETURN_IF_ERROR(GetValidatedEpContextPath(ep_context_gen_options.output_model_file_path, graph.ModelPath(),
+ context_cache_path,
+ ep_context_gen_options.overwrite_existing_output_file));
}
// We use this only if Resource Aware Partitioning is enabled for any of the EPs
@@ -1172,15 +1198,13 @@ Status GraphPartitioner::Partition(Graph& graph, FuncManager& func_mgr,
ORT_RETURN_IF_ERROR(PartitionOnnxFormatModel(partition_params, mode, providers_, kernel_registry_mgr_,
ep_acc_map, *graph_optimizer_registry_, logger));
- if (ep_context_enabled) {
- std::string ep_context_path = config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, "");
- std::string external_ini_file_name = config_options.GetConfigOrDefault(
- kOrtSessionOptionsEpContextModelExternalInitializersFileName, "");
- ORT_RETURN_IF_ERROR(CreateEpContextModel(providers_, graph, ep_context_path, external_ini_file_name, logger));
+ if (ep_context_gen_options.enable) {
+ ORT_RETURN_IF_ERROR(CreateEpContextModel(providers_, graph, ep_context_gen_options, logger));
}
#else
ORT_UNUSED_PARAMETER(config_options);
ORT_UNUSED_PARAMETER(logger);
+ ORT_UNUSED_PARAMETER(ep_context_gen_options);
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "ONNX models are not supported in this build.");
#endif //! defined(ORT_MINIMAL_BUILD)
} else {
diff --git a/onnxruntime/core/framework/graph_partitioner.h b/onnxruntime/core/framework/graph_partitioner.h
index 87edc7a64c6b5..6e36d79701fd7 100644
--- a/onnxruntime/core/framework/graph_partitioner.h
+++ b/onnxruntime/core/framework/graph_partitioner.h
@@ -15,6 +15,7 @@ class ExecutionProviders;
class KernelRegistryManager;
class Model;
struct ConfigOptions;
+struct EpContextModelGenerationOptions;
class GraphPartitioner {
public:
@@ -49,6 +50,7 @@ class GraphPartitioner {
const ConfigOptions& config_options,
const logging::Logger& logger,
Mode mode = Mode::kNormal,
+ const EpContextModelGenerationOptions& ep_context_gen_options = {},
const layout_transformation::DebugGraphFn& debug_graph_fn = {}) const;
bool IsLoadCancellationFlagSet() const {
diff --git a/onnxruntime/core/framework/session_options.cc b/onnxruntime/core/framework/session_options.cc
index 9d6cd3e58225e..a56383034686c 100644
--- a/onnxruntime/core/framework/session_options.cc
+++ b/onnxruntime/core/framework/session_options.cc
@@ -4,6 +4,7 @@
#include "core/framework/session_options.h"
#include "core/common/logging/logging.h"
#include "core/framework/ort_value.h"
+#include "core/session/onnxruntime_session_options_config_keys.h"
namespace onnxruntime {
@@ -96,4 +97,21 @@ void SessionOptions::AddCustomOpLibraryHandle(PathString library_name, void* lib
}
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS)
+EpContextModelGenerationOptions::EpContextModelGenerationOptions(const ConfigOptions& config_options) {
+ enable = config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") == "1";
+ output_model_file_path = config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, "");
+ output_external_initializers_file_path = config_options.GetConfigOrDefault(
+ kOrtSessionOptionsEpContextModelExternalInitializersFileName, "");
+ output_external_initializer_size_threshold = 0;
+ embed_ep_context_in_model = config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "0") == "1";
+}
+
+EpContextModelGenerationOptions SessionOptions::GetEpContextGenerationOptions() const {
+ if (this->has_explicit_ep_context_gen_options) {
+ return this->ep_context_gen_options;
+ }
+
+ return EpContextModelGenerationOptions(this->config_options);
+}
+
} // namespace onnxruntime
diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h
index ef323b99b006c..4cf7829fef549 100644
--- a/onnxruntime/core/framework/session_options.h
+++ b/onnxruntime/core/framework/session_options.h
@@ -11,6 +11,7 @@
#include
#include
#include "core/common/inlined_containers.h"
+#include "core/framework/allocator.h"
#include "core/framework/config_options.h"
#include "core/framework/ort_value.h"
#include "core/session/onnxruntime_c_api.h"
@@ -69,6 +70,26 @@ struct FreeDimensionOverride {
using CheckLoadCancellationFn = std::function;
+struct EpContextModelGenerationOptions {
+ EpContextModelGenerationOptions() = default;
+
+ // Initializes from string key/value pairs in session config options.
+ explicit EpContextModelGenerationOptions(const ConfigOptions& config_options);
+
+ bool enable = false;
+ bool overwrite_existing_output_file = false;
+ bool error_if_no_compiled_nodes = false;
+ bool embed_ep_context_in_model = false;
+
+ std::string output_model_file_path;
+ void** output_model_buffer_ptr = nullptr;
+ size_t* output_model_buffer_size_ptr = nullptr;
+ AllocatorPtr output_model_buffer_allocator = nullptr;
+
+ std::string output_external_initializers_file_path;
+ size_t output_external_initializer_size_threshold = 0;
+};
+
/**
* Configuration information for a session.
*/
@@ -199,6 +220,15 @@ struct SessionOptions {
// Load cancellation flag is necessary to be within shared memory as session_options are
// copied internally and the flag needs to be accessible across all copies.
std::shared_ptr load_cancellation_flag = std::make_shared(false);
+
+ // Options for generating compile EPContext models were previously stored in session_option.configs as
+ // string key/value pairs. To support more advanced options, such as setting input/output buffers, we
+ // now have to store EPContext options in a struct of type EpContextModelGenerationOptions.
+ // The function GetEpContextGenerationOptions() handles conversion of string key/value pairs to the new
+ // struct type.
+ bool has_explicit_ep_context_gen_options = false;
+ EpContextModelGenerationOptions ep_context_gen_options = {};
+ EpContextModelGenerationOptions GetEpContextGenerationOptions() const;
};
inline std::ostream& operator<<(std::ostream& os, const SessionOptions& session_options) {
diff --git a/onnxruntime/core/framework/transpose_helper.cc b/onnxruntime/core/framework/transpose_helper.cc
index 32d15bdf9060b..75f9492fb071d 100644
--- a/onnxruntime/core/framework/transpose_helper.cc
+++ b/onnxruntime/core/framework/transpose_helper.cc
@@ -22,7 +22,8 @@ struct has_mlas_transpose : std::true_type {};
template
typename std::enable_if::value, void>::type SimpleTransposeSingleAxisOutwards(
const T* input_data, T* output_data, int64_t num_loops, int64_t num_writers, int64_t writes_per_loop,
- int64_t writes_per_writer_per_loop) {
+ int64_t writes_per_writer_per_loop, concurrency::ThreadPool* tp = nullptr) {
+ ORT_UNUSED_PARAMETER(tp);
const T* end;
for (int64_t l = 0; l < num_loops; ++l) {
T* output_for_first_writer = output_data;
@@ -48,10 +49,10 @@ typename std::enable_if::value, void>::type SimpleTranspo
template
typename std::enable_if::value, void>::type SimpleTransposeSingleAxisOutwards(
const T* input_data, T* output_data, int64_t num_loops, int64_t num_writers, int64_t writes_per_loop,
- int64_t writes_per_writer_per_loop) {
+ int64_t writes_per_writer_per_loop, concurrency::ThreadPool* tp = nullptr) {
for (int64_t l = 0; l < num_loops; ++l) {
MlasTranspose(input_data, output_data, static_cast(writes_per_writer_per_loop),
- static_cast(num_writers));
+ static_cast(num_writers), tp);
input_data += writes_per_loop;
output_data += writes_per_loop;
}
@@ -82,25 +83,25 @@ void TransposeSingleAxisOutwards(gsl::span permutations, const Ten
switch (bytes_per_write) {
case (sizeof(uint8_t)): {
SimpleTransposeSingleAxisOutwards(input_data, output_data, num_loops, num_writers, writes_per_loop,
- writes_per_writer_per_loop);
+ writes_per_writer_per_loop, tp);
break;
}
case (sizeof(uint16_t)): {
SimpleTransposeSingleAxisOutwards(reinterpret_cast(input_data),
reinterpret_cast(output_data), num_loops, num_writers,
- writes_per_loop, writes_per_writer_per_loop);
+ writes_per_loop, writes_per_writer_per_loop, tp);
break;
}
case (sizeof(uint32_t)): {
SimpleTransposeSingleAxisOutwards(reinterpret_cast(input_data),
reinterpret_cast(output_data), num_loops, num_writers,
- writes_per_loop, writes_per_writer_per_loop);
+ writes_per_loop, writes_per_writer_per_loop, tp);
break;
}
case (sizeof(uint64_t)): {
SimpleTransposeSingleAxisOutwards(reinterpret_cast(input_data),
reinterpret_cast(output_data), num_loops, num_writers,
- writes_per_loop, writes_per_writer_per_loop);
+ writes_per_loop, writes_per_writer_per_loop, tp);
break;
}
default: {
@@ -125,7 +126,8 @@ void TransposeSingleAxisOutwards(gsl::span permutations, const Ten
template
typename std::enable_if::value, void>::type SimpleTransposeSingleAxisInwards(
const T* input_data, T* output_data, int64_t num_loops, int64_t num_readers, int64_t reads_per_loop,
- int64_t reads_per_reader_per_loop) {
+ int64_t reads_per_reader_per_loop, concurrency::ThreadPool* tp = nullptr) {
+ ORT_UNUSED_PARAMETER(tp);
T* end;
for (int64_t l = 0; l < num_loops; ++l) {
const T* input_for_first_reader = input_data;
@@ -150,10 +152,10 @@ typename std::enable_if::value, void>::type SimpleTranspo
template
typename std::enable_if::value, void>::type SimpleTransposeSingleAxisInwards(
const T* input_data, T* output_data, int64_t num_loops, int64_t num_readers, int64_t reads_per_loop,
- int64_t reads_per_reader_per_loop) {
+ int64_t reads_per_reader_per_loop, concurrency::ThreadPool* tp = nullptr) {
for (int64_t l = 0; l < num_loops; ++l) {
MlasTranspose(input_data, output_data, static_cast(num_readers),
- static_cast(reads_per_reader_per_loop));
+ static_cast(reads_per_reader_per_loop), tp);
input_data += reads_per_loop;
output_data += reads_per_loop;
}
@@ -162,7 +164,8 @@ typename std::enable_if::value, void>::type SimpleTranspos
// moving a single axis inwards where the read/write size is a power of 2 and between 8 and 64 bits.
// `input_shape_override` overrides the shape of `input` for compute purposes.
void TransposeSingleAxisInwards(gsl::span permutations, const Tensor& input, Tensor& output,
- size_t from, size_t to, const TensorShape* input_shape_override = nullptr) {
+ size_t from, size_t to, const TensorShape* input_shape_override = nullptr,
+ concurrency::ThreadPool* tp = nullptr) {
ORT_UNUSED_PARAMETER(permutations);
const auto& input_shape = input_shape_override ? *input_shape_override : input.Shape();
@@ -184,25 +187,25 @@ void TransposeSingleAxisInwards(gsl::span permutations, const Tens
switch (bytes_per_read) {
case (sizeof(uint8_t)): {
SimpleTransposeSingleAxisInwards(input_data, output_data, num_loops, num_readers, reads_per_loop,
- reads_per_reader_per_loop);
+ reads_per_reader_per_loop, tp);
break;
}
case (sizeof(uint16_t)): {
SimpleTransposeSingleAxisInwards(reinterpret_cast(input_data),
reinterpret_cast(output_data), num_loops, num_readers, reads_per_loop,
- reads_per_reader_per_loop);
+ reads_per_reader_per_loop, tp);
break;
}
case (sizeof(uint32_t)): {
SimpleTransposeSingleAxisInwards(reinterpret_cast(input_data),
reinterpret_cast(output_data), num_loops, num_readers, reads_per_loop,
- reads_per_reader_per_loop);
+ reads_per_reader_per_loop, tp);
break;
}
case (sizeof(uint64_t)): {
SimpleTransposeSingleAxisInwards(reinterpret_cast(input_data),
reinterpret_cast(output_data), num_loops, num_readers, reads_per_loop,
- reads_per_reader_per_loop);
+ reads_per_reader_per_loop, tp);
break;
}
default: {
@@ -236,7 +239,7 @@ void SingleAxisTranspose(gsl::span permutations, const Tensor& inp
if (from > to) {
TransposeSingleAxisOutwards(permutations, input, output, from, to, input_shape_override, tp);
} else {
- TransposeSingleAxisInwards(permutations, input, output, from, to, input_shape_override);
+ TransposeSingleAxisInwards(permutations, input, output, from, to, input_shape_override, tp);
}
}
diff --git a/onnxruntime/core/mlas/inc/mlas.h b/onnxruntime/core/mlas/inc/mlas.h
index db21157d2fdce..266370997fd46 100644
--- a/onnxruntime/core/mlas/inc/mlas.h
+++ b/onnxruntime/core/mlas/inc/mlas.h
@@ -1056,49 +1056,15 @@ MlasComputeTanh(
// Transpose routines.
//
+template
void
MLASCALL
MlasTranspose(
- const uint8_t* Input,
- uint8_t* Output,
- size_t M,
- size_t N
- );
-
-void
-MLASCALL
-MlasTranspose(
- const int8_t* Input,
- int8_t* Output,
- size_t M,
- size_t N
- );
-
-void
-MLASCALL
-MlasTranspose(
- const uint16_t* Input,
- uint16_t* Output,
- size_t M,
- size_t N
- );
-
-void
-MLASCALL
-MlasTranspose(
- const uint32_t* Input,
- uint32_t* Output,
+ const DataType* Input,
+ DataType* Output,
size_t M,
- size_t N
- );
-
-void
-MLASCALL
-MlasTranspose(
- const float* Input,
- float* Output,
- size_t M,
- size_t N
+ size_t N,
+ MLAS_THREADPOOL* ThreadPool
);
//
@@ -1940,20 +1906,22 @@ MlasConvDepthwise(
MLAS_HALF_GEMM_POSTPROCESSOR* PostProc
);
-
inline
void
MlasTranspose(
const MLAS_FP16* Input,
MLAS_FP16* Output,
size_t M,
- size_t N
+ size_t N,
+ MLAS_THREADPOOL* ThreadPool
)
{
MlasTranspose(
reinterpret_cast(Input),
reinterpret_cast(Output),
- M, N);
+ M,
+ N,
+ ThreadPool);
}
diff --git a/onnxruntime/core/mlas/inc/mlas_q4.h b/onnxruntime/core/mlas/inc/mlas_q4.h
index aec14070ffd55..c5f846fc7ffed 100644
--- a/onnxruntime/core/mlas/inc/mlas_q4.h
+++ b/onnxruntime/core/mlas/inc/mlas_q4.h
@@ -266,7 +266,7 @@ MlasBlockwiseQuantizedShape(
/**
* @brief Compute the sizes of the quantized data and quantization parameter buffers.
*
- * @param qbits The bit width of each quantized value.
+ * @tparam qbits The bit width of each quantized value.
* @param block_size The number of quantized values in a block.
* @param columnwise Whether a block contains values from a matrix column (true) or row (false).
* @param rows Number of matrix rows.
@@ -277,9 +277,9 @@ MlasBlockwiseQuantizedShape(
*
* If the qbits or block_size values are unsupported the output sizes will be zero.
*/
+template
void MLASCALL
MlasBlockwiseQuantizedBufferSizes(
- int qbits,
int block_size,
bool columnwise,
int rows,
diff --git a/onnxruntime/core/mlas/lib/q4_dq.cpp b/onnxruntime/core/mlas/lib/q4_dq.cpp
index 015d69de68766..c543770ee22d8 100644
--- a/onnxruntime/core/mlas/lib/q4_dq.cpp
+++ b/onnxruntime/core/mlas/lib/q4_dq.cpp
@@ -328,7 +328,7 @@ struct BitsTraits {
static constexpr float halfRange = static_cast(kMid - kMin);
// number of qbit elements to pack into whole bytes
- static constexpr int kPackSize = (qbits == 8) ? 1 : (qbits == 4) ? 2 : (qbits == 2) ? 4 : 0;
+ static constexpr int kPackSize = (qbits == 8) ? 1 : ((qbits == 4) ? 2 : ((qbits == 2) ? 4 : 0));
static_assert(kPackSize != 0, "Packing to whole bytes not supported for this qbits!");
};
@@ -387,12 +387,14 @@ range2scale(float min, float max, ScaleT& scale)
/**
- * @brief Blockwise quantization methods
+ * TODO(fajin): use int4/8 for symmetric quantization so the (vq - zp) operation in MatMulNBits can be saved.
+ * @brief Blockwise quantization methods. Source is row major. Dest, scale and zp are column major.
+ * Always quantize to unsigned int.
* @tparam ElementT source data type, e.g. fp32/fp16
* @tparam block_size number of elemenets quantized together
* @tparam qbits number of bits in each quantized element
- * @tparam Columnwise true: elements in a block come from one single column
- * false: elements in a block come from one single row
+ * @tparam Columnwise true: quantize along src column, pack along src column.
+ * false: quantize along src row, pack along src column.
*/
template <
typename ElementT,
@@ -402,11 +404,18 @@ template <
struct BlockwiseQuantizer {
// To support other qbits, need to add bit packing code for
// storing to dst and zero points
- static_assert(qbits == 4, "Only 4b block quantization is supported!");
+ static_assert(qbits == 2 || qbits == 4 || qbits == 8, "Only 2b, 4b and 8b block quantization is supported!");
using QuantBlk = std::conditional_t, Shape2D<1, block_size>>;
using ThreadBlk = Shape2D::kPackSize, QuantBlk::kColumn>;
+ static
+ MLAS_FORCEINLINE
+ int GetElem(int val, int idx)
+ {
+ return (val >> (qbits * idx)) & ((1 << qbits) - 1);
+ }
+
static
MLAS_FORCEINLINE
void quantizeMetaShape(int rows, int columns, int& meta_rows, int& meta_cols)
@@ -440,14 +449,14 @@ struct BlockwiseQuantizer {
scale_num_elements = meta_rows * meta_cols;
if (zero_point_bytes) {
- // this works for qbits == 4 but may need to be updated for other qbits values
+ // this works for qbits == 2, 4 or 8 but may need to be updated for other qbits values
*zero_point_bytes = ((meta_rows * qbits + 7) / 8) * meta_cols;
}
}
/**
* @brief Quantized a Matrix shape [rows, columns], resulting quantized
- * and packed data are stored in column major (transposed)
+ * and packed data are stored in column major (transposed).
* @param[out] dst pointer to the quantized weights, column major: [columns, rows]
* @param[out] scale pointer to the scales, column major: [columns/QuantBlk::kColumn, rows/QuantBlk::kRow]
* @param[out] zero_points pointer to the zero points, same shape as scale
@@ -479,8 +488,10 @@ struct BlockwiseQuantizer {
MlasTryBatchParallel(
thread_pool, total_thrd_blks,
[&](ptrdiff_t block_idx) {
- uint8_t zp_bytes[BitsTraits::kPackSize];
- std::fill_n(zp_bytes, BitsTraits::kPackSize, (uint8_t)8);
+ constexpr int kPackSize = BitsTraits::kPackSize;
+ uint8_t zp_bytes[kPackSize], vi[kPackSize];
+ std::fill_n(zp_bytes, kPackSize, (uint8_t)BitsTraits::kMid);
+ std::fill_n(vi, kPackSize, 0);
const int32_t r_blk_idx = static_cast(block_idx / thrd_col_blks);
const int32_t c_blk_idx = static_cast(block_idx % thrd_col_blks);
@@ -495,7 +506,7 @@ struct BlockwiseQuantizer {
const int meta_col = c / QuantBlk::kColumn;
// compute scale and zero point
- for (int kpack = 0; kpack < BitsTraits::kPackSize; kpack++) {
+ for (int kpack = 0; kpack < kPackSize; kpack++) {
// scan a single block to extract range [min, max]
float min = std::numeric_limits::max();
@@ -521,40 +532,42 @@ struct BlockwiseQuantizer {
}
}
- // !! 4b specific code as we need to pack 2 4b numbers into one byte
if (zero_points != nullptr) {
- const int32_t meta_idx = meta_col * ((row_blks + 1) / 2) + meta_row / 2;
- zero_points[meta_idx] = (zp_bytes[0] & 0xf) | (zp_bytes[1] << 4);
+ const int32_t meta_idx = meta_col * ((row_blks + kPackSize - 1) / kPackSize) + meta_row / kPackSize;
+ if constexpr (qbits == 8) {
+ zero_points[meta_idx] = zp_bytes[0];
+ } else if constexpr (qbits == 4) {
+ zero_points[meta_idx] = (zp_bytes[0] & 0xf) | (zp_bytes[1] << 4);
+ } else if constexpr (qbits == 2) {
+ zero_points[meta_idx] = (zp_bytes[0] & 0x3) | (zp_bytes[1] << 2) | (zp_bytes[2] << 4) | (zp_bytes[3] << 6);
+ } else {
+ MLAS_THROW_EX(std::runtime_error, "Unsupported qbits");
+ }
}
for (int32_t j = c; j < c_end; ++j) {
const int32_t meta_c = j / QuantBlk::kColumn;
- for (int32_t i = r; i < r_end; i += 2) {
- const int32_t meta_r = i / QuantBlk::kRow;
- const float scale = static_cast(scales[meta_c * row_blks + meta_r]);
- const float reciprocal_scale = scale ? 1.0f / scale : 0.0f;
- const int8_t zp = zp_bytes[meta_r & 1];
- const int8_t zp1 = zp_bytes[((i + 1) / QuantBlk::kRow) & 1];
-
- const float v0 = static_cast(src[i * leadingDimension + j]);
- const uint8_t vi0 = (uint8_t)std::clamp(roundf(v0 * reciprocal_scale + zp),
- 0.0f, BitsTraits::kMaxFp);
-
- uint8_t vi1 = (uint8_t)zp;
- if (i + 1 < r_end) {
- float reciprocal_scale1 = reciprocal_scale;
- if constexpr (QuantBlk::kRow == 1) {
- const float scale1 =
- static_cast(scales[meta_c * row_blks + meta_r + 1]);
- reciprocal_scale1 = scale1 ? 1.0f / scale1 : 0.0f;
- }
- const float v1 = static_cast(src[(i + 1) * leadingDimension + j]);
- vi1 = (uint8_t)std::clamp(roundf(v1 * reciprocal_scale1 + zp1), 0.0f,
- BitsTraits::kMaxFp);
+ for (int32_t i = r; i < r_end; i += kPackSize) {
+ for (int l = 0; l < kPackSize && i + l < r_end; l++) {
+ const int32_t meta_r = (i + l) / QuantBlk::kRow;
+ const float scale = static_cast(scales[meta_c * row_blks + meta_r]);
+ const float reciprocal_scale = scale ? 1.0f / scale : 0.0f;
+ const int32_t zp = zp_bytes[meta_r % kPackSize];
+
+ const float v = static_cast(src[(i + l) * leadingDimension + j]);
+ vi[l] = (uint8_t)std::clamp(roundf(v * reciprocal_scale + zp),
+ 0.0f, BitsTraits::kMaxFp);
}
- // !! 4b specific code
- dst[j * q_rows + i / 2] = (vi0 & 0xf) | (vi1 << 4);
+ if constexpr (qbits == 8) {
+ dst[j * q_rows + i / kPackSize] = vi[0];
+ } else if constexpr (qbits == 4) {
+ dst[j * q_rows + i / kPackSize] = (vi[0] & 0xf) | (vi[1] << 4);
+ } else if constexpr (qbits == 2) {
+ dst[j * q_rows + i / kPackSize] = (vi[0] & 0x3) | (vi[1] << 2) | (vi[2] << 4) | (vi[3] << 6);
+ } else {
+ MLAS_THROW_EX(std::runtime_error, "Unsupported qbits");
+ }
}
}
});
@@ -589,6 +602,7 @@ struct BlockwiseQuantizer {
int q_rows, q_cols;
quantizedShape(rows, columns, q_rows, q_cols);
+ constexpr int32_t kPackSize = BitsTraits::kPackSize;
MlasTryBatchParallel(
thread_pool, total_thrd_blks,
@@ -605,38 +619,22 @@ struct BlockwiseQuantizer {
for (int32_t j = c; j < c_end; ++j) {
const int32_t meta_col = j / QuantBlk::kColumn;
- // !! 4b specific code
- // the whole loop is 4b specific due to sub 8 bit packing
- // and unpacking. We can potentially make this qbits generic
- // by wraping the packing/unpacking code like cutlass::Array
- for (int32_t i = r; i < r_end; i += 2) {
+ for (int32_t i = r; i < r_end; ++i) {
const int32_t meta_row = i / QuantBlk::kRow;
-
- const float scale0 =
- static_cast(scales[meta_col * row_blks + meta_row]);
-
+ const float scale = static_cast(scales[meta_col * row_blks + meta_row]);
const int zp_pair =
- (zero_points == nullptr)
- ? 0x88
- : zero_points[meta_col * ((row_blks + 1) / 2) + meta_row / 2];
- const int zp0 = (meta_row & 1) ? (zp_pair >> 4) : (zp_pair & 0xf);
-
- const uint8_t vi0 = weights[j * q_rows + i / 2] & 0xf;
- const float v0 = (static_cast(vi0) - zp0) * scale0;
-
- dst[j * rows + i] = static_cast(v0);
- if ((i + 1) < r_end) {
- float scale1 = scale0;
- int zp1 = zp0;
- if constexpr (QuantBlk::kRow == 1) {
- scale1 =
- static_cast(scales[meta_col * row_blks + meta_row + 1]);
- zp1 = (zp_pair >> 4) & 0xf;
- }
- const uint8_t vi1 = weights[j * q_rows + i / 2] >> 4;
- const float v1 = (static_cast(vi1) - zp1) * scale1;
- dst[j * rows + (i + 1)] = static_cast(v1);
- }
+ zero_points
+ ? zero_points[meta_col * ((row_blks + kPackSize - 1) / kPackSize) + meta_row / kPackSize]
+ : 0;
+ const int vi_pair = weights[j * q_rows + i / kPackSize];
+
+ const int zp =
+ zero_points
+ ? GetElem(zp_pair, meta_row % kPackSize)
+ : BitsTraits::kMid;
+ const int vi = GetElem(vi_pair, i % kPackSize);
+ const float v = (vi - zp) * scale;
+ dst[j * rows + i] = ElementT(v);
}
}
});
@@ -1416,6 +1414,27 @@ MlasBlockwiseQuantizedShape(
}
}
+template
+void
+MlasBlockwiseQuantMetaShape(
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ int& meta_rows,
+ int& meta_cols
+ );
+
+template
+void
+MlasBlockwiseQuantMetaShape(
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ int& meta_rows,
+ int& meta_cols
+ );
template
void
@@ -1439,6 +1458,50 @@ MlasBlockwiseQuantMetaShape(
int& meta_cols
);
+ template
+void
+MlasBlockwiseQuantMetaShape(
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ int& meta_rows,
+ int& meta_cols
+ );
+
+template
+void
+MlasBlockwiseQuantMetaShape(
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ int& meta_rows,
+ int& meta_cols
+ );
+
+template
+void
+MlasBlockwiseQuantizedShape(
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ int& q_rows,
+ int& q_cols
+ );
+
+template
+void
+MlasBlockwiseQuantizedShape(
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ int& q_rows,
+ int& q_cols
+ );
+
template
void
MlasBlockwiseQuantizedShape(
@@ -1461,9 +1524,31 @@ MlasBlockwiseQuantizedShape(
int& q_cols
);
+ template
+void
+MlasBlockwiseQuantizedShape(
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ int& q_rows,
+ int& q_cols
+ );
+
+template
+void
+MlasBlockwiseQuantizedShape(
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ int& q_rows,
+ int& q_cols
+ );
+
+template
void MLASCALL
MlasBlockwiseQuantizedBufferSizes(
- int qbits,
int block_size,
bool columnwise,
int rows,
@@ -1478,75 +1563,108 @@ MlasBlockwiseQuantizedBufferSizes(
*q_zero_point_size_in_bytes = 0;
}
- if (qbits == 4) {
- switch (block_size) {
- case 16:
- if (columnwise) {
- BlockwiseQuantizer::quantizedBufferSizes(
- rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
- );
- } else {
- BlockwiseQuantizer::quantizedBufferSizes(
- rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
- );
- }
- break;
-
- case 32:
- if (columnwise) {
- BlockwiseQuantizer::quantizedBufferSizes(
- rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
- );
- } else {
- BlockwiseQuantizer::quantizedBufferSizes(
- rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
- );
- }
- break;
-
- case 64:
- if (columnwise) {
- BlockwiseQuantizer::quantizedBufferSizes(
- rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
- );
- } else {
- BlockwiseQuantizer::quantizedBufferSizes(
- rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
- );
- }
- break;
-
- case 128:
- if (columnwise) {
- BlockwiseQuantizer::quantizedBufferSizes(
- rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
- );
- } else {
- BlockwiseQuantizer::quantizedBufferSizes(
- rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
- );
- }
- break;
-
- case 256:
- if (columnwise) {
- BlockwiseQuantizer::quantizedBufferSizes(
- rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
- );
- } else {
- BlockwiseQuantizer::quantizedBufferSizes(
- rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
- );
- }
- break;
+ switch (block_size) {
+ case 16:
+ if (columnwise) {
+ BlockwiseQuantizer::quantizedBufferSizes(
+ rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+ );
+ } else {
+ BlockwiseQuantizer::quantizedBufferSizes(
+ rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+ );
+ }
+ break;
- default:
- // Only block size 16, 32, 64, 128, 256 are supported.
- break;
- }
+ case 32:
+ if (columnwise) {
+ BlockwiseQuantizer::quantizedBufferSizes(
+ rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+ );
+ } else {
+ BlockwiseQuantizer::quantizedBufferSizes(
+ rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+ );
+ }
+ break;
+
+ case 64:
+ if (columnwise) {
+ BlockwiseQuantizer::quantizedBufferSizes(
+ rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+ );
+ } else {
+ BlockwiseQuantizer::quantizedBufferSizes(
+ rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+ );
+ }
+ break;
+
+ case 128:
+ if (columnwise) {
+ BlockwiseQuantizer::quantizedBufferSizes(
+ rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+ );
+ } else {
+ BlockwiseQuantizer::quantizedBufferSizes(
+ rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+ );
+ }
+ break;
+
+ case 256:
+ if (columnwise) {
+ BlockwiseQuantizer::quantizedBufferSizes(
+ rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+ );
+ } else {
+ BlockwiseQuantizer::quantizedBufferSizes(
+ rows, columns, q_data_size_in_bytes, q_scale_num_elements, q_zero_point_size_in_bytes
+ );
+ }
+ break;
+
+ default:
+ // Only block size 16, 32, 64, 128, 256 are supported.
+ break;
}
}
+template
+void MLASCALL
+MlasBlockwiseQuantizedBufferSizes<2>(
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ size_t& q_data_size_in_bytes,
+ size_t& q_scale_num_elements,
+ size_t* q_zero_point_size_in_bytes
+);
+
+template
+void MLASCALL
+MlasBlockwiseQuantizedBufferSizes<4>(
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ size_t& q_data_size_in_bytes,
+ size_t& q_scale_num_elements,
+ size_t* q_zero_point_size_in_bytes
+);
+
+template
+void MLASCALL
+MlasBlockwiseQuantizedBufferSizes<8>(
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ size_t& q_data_size_in_bytes,
+ size_t& q_scale_num_elements,
+ size_t* q_zero_point_size_in_bytes
+);
template
void
@@ -1620,6 +1738,36 @@ MlasQuantizeBlockwise(
}
}
+template
+void
+MlasQuantizeBlockwise(
+ uint8_t* dst,
+ float* scales,
+ uint8_t* zero_points,
+ const float* src,
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ int leading_dimension,
+ MLAS_THREADPOOL* thread_pool
+ );
+
+template
+void
+MlasQuantizeBlockwise(
+ uint8_t* dst,
+ MLAS_FP16* scales,
+ uint8_t* zero_points,
+ const MLAS_FP16* src,
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ int leading_dimension,
+ MLAS_THREADPOOL* thread_pool
+ );
+
template
void
MlasQuantizeBlockwise(
@@ -1650,6 +1798,35 @@ MlasQuantizeBlockwise(
MLAS_THREADPOOL* thread_pool
);
+ template
+ void
+ MlasQuantizeBlockwise(
+ uint8_t* dst,
+ float* scales,
+ uint8_t* zero_points,
+ const float* src,
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ int leading_dimension,
+ MLAS_THREADPOOL* thread_pool
+ );
+
+ template
+ void
+ MlasQuantizeBlockwise(
+ uint8_t* dst,
+ MLAS_FP16* scales,
+ uint8_t* zero_points,
+ const MLAS_FP16* src,
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ int leading_dimension,
+ MLAS_THREADPOOL* thread_pool
+ );
template
void
@@ -1717,6 +1894,32 @@ MlasDequantizeBlockwise(
}
}
+template void
+MlasDequantizeBlockwise(
+ float* dst,
+ const uint8_t* src,
+ const float* scales,
+ const uint8_t* zero_points,
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ MLAS_THREADPOOL* thread_pool
+);
+
+template void
+MlasDequantizeBlockwise(
+ MLAS_FP16* dst,
+ const uint8_t* src,
+ const MLAS_FP16* scales,
+ const uint8_t* zero_points,
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ MLAS_THREADPOOL* thread_pool
+);
+
template void
MlasDequantizeBlockwise(
float* dst,
@@ -1730,6 +1933,45 @@ MlasDequantizeBlockwise(
MLAS_THREADPOOL* thread_pool
);
+template void
+MlasDequantizeBlockwise(
+ MLAS_FP16* dst,
+ const uint8_t* src,
+ const MLAS_FP16* scales,
+ const uint8_t* zero_points,
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ MLAS_THREADPOOL* thread_pool
+);
+
+template void
+MlasDequantizeBlockwise(
+ float* dst,
+ const uint8_t* src,
+ const float* scales,
+ const uint8_t* zero_points,
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ MLAS_THREADPOOL* thread_pool
+);
+
+template void
+MlasDequantizeBlockwise(
+ MLAS_FP16* dst,
+ const uint8_t* src,
+ const MLAS_FP16* scales,
+ const uint8_t* zero_points,
+ int block_size,
+ bool columnwise,
+ int rows,
+ int columns,
+ MLAS_THREADPOOL* thread_pool
+);
+
template
bool
MlasQDQQuantizeBlockwise(
diff --git a/onnxruntime/core/mlas/lib/transpose.cpp b/onnxruntime/core/mlas/lib/transpose.cpp
index a758a0e59fb4f..1ee2f90357e9e 100644
--- a/onnxruntime/core/mlas/lib/transpose.cpp
+++ b/onnxruntime/core/mlas/lib/transpose.cpp
@@ -16,6 +16,20 @@ Module Name:
#include "mlasi.h"
+//
+// Define the parameters to execute segments of a transpose operation on worker
+// threads.
+//
+
+template
+struct MLAS_TRANPOSE_WORK_BLOCK {
+ ptrdiff_t ThreadCountM;
+ const ElementType* Input;
+ ElementType* Output;
+ size_t M;
+ size_t N;
+};
+
#if defined(MLAS_SSE2_INTRINSICS)
MLAS_FORCEINLINE
@@ -541,51 +555,69 @@ MlasTranspose8xNVector(
MlasTranspose4xNVector(&Input[InputStride * 4], InputStride, &Output[OutputStride * 4], OutputStride);
}
+template
void
-MLASCALL
-MlasTranspose(
- const uint32_t* Input,
- uint32_t* Output,
- size_t M,
- size_t N
- )
+MlasTransposeThreaded(
+ void* Context,
+ ptrdiff_t ThreadId
+);
/*++
Routine Description:
- This routine transposes the input matrix (M rows by N columns) to the
- output matrix (N rows by M columns).
+ This routine is invoked from a worker thread to execute a segment of a transpose
Arguments:
- Input - Supplies the input buffer.
-
- Output - Supplies the output buffer.
-
- M - Supplies the number of rows for the input matrix and the number of
- columns for the output matrix.
+ Context - Supplies the pointer to the context for the threaded operation.
- N - Supplies the number of columns for the input matrix and the number of
- rows for the output matrix.
+ ThreadId - Supplies the current index of the threaded operation.
Return Value:
None.
--*/
+
+template<>
+void
+MlasTransposeThreaded(
+ void* Context,
+ ptrdiff_t ThreadId
+ )
{
- size_t n = N;
+ const auto* WorkBlock = (MLAS_TRANPOSE_WORK_BLOCK*)Context;
+
+ //
+ // Partition the operation along the M dimension.
+ //
+
+ size_t IndexM;
+ size_t CountM;
+ MlasPartitionWork(ThreadId, WorkBlock->ThreadCountM, WorkBlock->M, &IndexM, &CountM);
+
+ //
+ // Set transpose parameters.
+ //
+
+ const size_t M = WorkBlock->M;
+ const size_t N = WorkBlock->N;
+
+ const uint32_t* Input = WorkBlock->Input + IndexM * N;
+ uint32_t* Output = WorkBlock->Output + IndexM;
//
// Transpose elements from the input matrix to the output matrix 4 columns
// at a time.
//
+ size_t n = N;
+
while (n >= 4) {
const uint32_t* s = Input;
uint32_t* d = Output;
- size_t m = M;
+ size_t m = CountM;
#if defined(MLAS_SSE2_INTRINSICS) || defined(MLAS_NEON_INTRINSICS) || defined(MLAS_TARGET_POWER) || \
defined(MLAS_LSX_INTRINSICS)
@@ -624,7 +656,7 @@ Return Value:
const uint32_t* s = Input;
uint32_t* d = Output;
- size_t m = M;
+ size_t m = CountM;
while (m >= 4) {
@@ -650,68 +682,45 @@ Return Value:
}
}
+template<>
void
-MLASCALL
-MlasTranspose(
- const float* Input,
- float* Output,
- size_t M,
- size_t N
+MlasTransposeThreaded(
+ void* Context,
+ ptrdiff_t ThreadId
)
{
- MlasTranspose(
- reinterpret_cast(Input),
- reinterpret_cast