diff --git a/CMakeLists.txt b/CMakeLists.txt index 38d2ad5..60dc7d0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.24") cmake_policy(SET CMP0135 NEW) endif() -set(STACK_VERSION 1.23.0 CACHE STRING "Main project version") +set(STACK_VERSION 1.24.0 CACHE STRING "Main project version") project(npu-linux-driver VERSION ${STACK_VERSION}) set(BUILD_NUMBER "dev-0" CACHE STRING "Build number composed of name and unique number used as driver version") diff --git a/cmake/compiler_flags.cmake b/cmake/compiler_flags.cmake index 149cbce..454a2b7 100644 --- a/cmake/compiler_flags.cmake +++ b/cmake/compiler_flags.cmake @@ -1,14 +1,6 @@ -# Copyright 2022-2024 Intel Corporation. +# Copyright (C) 2022-2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this -# software or the related documents without Intel's prior written permission. -# -# This software and the related documents are provided as is, with no express -# or implied warranties, other than those that are expressly stated in -# the License. +# SPDX-License-Identifier: MIT add_compile_options( # Compiler warnings diff --git a/cmake/detect_linux_system.cmake b/cmake/detect_linux_system.cmake index d0bc419..2f5deb3 100644 --- a/cmake/detect_linux_system.cmake +++ b/cmake/detect_linux_system.cmake @@ -1,14 +1,6 @@ -# Copyright 2022-2024 Intel Corporation. +# Copyright (C) 2022-2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this -# software or the related documents without Intel's prior written permission. -# -# This software and the related documents are provided as is, with no express -# or implied warranties, other than those that are expressly stated in -# the License. +# SPDX-License-Identifier: MIT function(read_os_release ENTRY VAR_OUTPUT) if (ANDROID) diff --git a/cmake/packaging/postinst b/cmake/packaging/postinst index 8ce1cac..8c22836 100755 --- a/cmake/packaging/postinst +++ b/cmake/packaging/postinst @@ -1,16 +1,8 @@ #!/bin/bash -# Copyright 2024 Intel Corporation. +# Copyright (C) 2024-2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this -# software or the related documents without Intel's prior written permission. -# -# This software and the related documents are provided as is, with no express -# or implied warranties, other than those that are expressly stated in -# the License. +# SPDX-License-Identifier: MIT set -e -o pipefail diff --git a/cmake/sanitizer.cmake b/cmake/sanitizer.cmake index 932c77b..7a7d199 100644 --- a/cmake/sanitizer.cmake +++ b/cmake/sanitizer.cmake @@ -1,14 +1,6 @@ -# Copyright 2022-2023 Intel Corporation. +# Copyright (C) 2022-2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this -# software or the related documents without Intel's prior written permission. -# -# This software and the related documents are provided as is, with no express -# or implied warranties, other than those that are expressly stated in -# the License. +# SPDX-License-Identifier: MIT # Add options for building with sanitizers if(NOT ENABLE_SANITIZER) diff --git a/compiler/CMakeLists.txt b/compiler/CMakeLists.txt index 639a87a..1311c73 100644 --- a/compiler/CMakeLists.txt +++ b/compiler/CMakeLists.txt @@ -1,14 +1,6 @@ -# Copyright 2022-2024 Intel Corporation. +# Copyright (C) 2022-2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this -# software or the related documents without Intel's prior written permission. -# -# This software and the related documents are provided as is, with no express -# or implied warranties, other than those that are expressly stated in -# the License. +# SPDX-License-Identifier: MIT include(openvino.cmake) include(npu_compiler.cmake) diff --git a/compiler/compiler_source.cmake b/compiler/compiler_source.cmake index 109a9be..966f39b 100644 --- a/compiler/compiler_source.cmake +++ b/compiler/compiler_source.cmake @@ -1,14 +1,6 @@ -# Copyright 2022-2025 Intel Corporation. +# Copyright (C) 2022-2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this -# software or the related documents without Intel's prior written permission. -# -# This software and the related documents are provided as is, with no express -# or implied warranties, other than those that are expressly stated in -# the License. +# SPDX-License-Identifier: MIT if(TARGET npu_compiler_source) return() @@ -23,16 +15,16 @@ endif() include(ExternalProject) set(OPENVINO_REPOSITORY https://github.com/openvinotoolkit/openvino.git) -set(OPENVINO_REVISION c01cd93e24d1cd78bfbb401eed51c08fb93e0816) -set(OPENCV_REVISION 4d6d6fb18fb859f176e5ce2ad3295097a42cd8af) -set(GENAI_REVISION 01f0fe1eded5934871fef866ed217a60fa2c6049) -set(ONNXRUNTIME_TAG microsoft:9001123f6813409bce2d8ec24888ac73e348c26e) -set(ONNXRUNTIME_REVISION 9001123f6813409bce2d8ec24888ac73e348c26e) +set(OPENVINO_REVISION 44526285f241251e9543276572676365fbe542a4) +set(OPENCV_REVISION 252403bbf2fc560007c2c9057db5a9a151e99dd7) +set(GENAI_REVISION 3c0e2d3e7e13fa5e1dd5ea9ef1df59ce9fa852b5) +set(ONNXRUNTIME_TAG microsoft:7a919c693692d50f7c222660b76fb5b0c9926738) +set(ONNXRUNTIME_REVISION 7a919c693692d50f7c222660b76fb5b0c9926738) -set(NPU_COMPILER_TAG npu_ud_2025_32_rc1) -set(NPU_COMPILER_REVISION df25c7815507db20f903ce585f3976ff927890d6) +set(NPU_COMPILER_TAG npu_ud_2025_38_rc1) +set(NPU_COMPILER_REVISION 5aa47b4f67f9eec535316adf449a6aca58c635b2) # Compiler might use different OpenVINO revision -set(NPU_COMPILER_OPENVINO_REVISION d97acfdce00ea5229e4c2d0ab03256ce0dff0a68) +set(NPU_COMPILER_OPENVINO_REVISION 7ced823330831da23d1985ee27e32b96ebfcf110) set(OPENVINO_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/src/openvino") file(MAKE_DIRECTORY ${OPENVINO_SOURCE_DIR}) diff --git a/compiler/npu_compiler.cmake b/compiler/npu_compiler.cmake index d1f98d6..bbcf381 100644 --- a/compiler/npu_compiler.cmake +++ b/compiler/npu_compiler.cmake @@ -1,13 +1,6 @@ -# Copyright 2022-2025 Intel Corporation. +# Copyright (C) 2022-2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this software -# or the related documents without Intel's prior written permission. -# -# This software and the related documents are provided as is, with no express or -# implied warranties, other than those that are expressly stated in the License. +# SPDX-License-Identifier: MIT add_library(npu_compiler INTERFACE) diff --git a/compiler/npu_compiler_build.cmake b/compiler/npu_compiler_build.cmake index 787838c..9904791 100644 --- a/compiler/npu_compiler_build.cmake +++ b/compiler/npu_compiler_build.cmake @@ -1,14 +1,6 @@ -# Copyright 2022-2025 Intel Corporation. +# Copyright (C) 2022-2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this -# software or the related documents without Intel's prior written permission. -# -# This software and the related documents are provided as is, with no express -# or implied warranties, other than those that are expressly stated in -# the License. +# SPDX-License-Identifier: MIT include(compiler_source.cmake) @@ -25,54 +17,91 @@ set(NPU_COMPILER_PACKAGE_DIR ${NPU_COMPILER_INSTALL_PREFIX}/cid) include(ExternalProject) +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D CMAKE_BUILD_TYPE=Release") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D CMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D BUILD_COMPILER_FOR_DRIVER=ON") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D BUILD_SHARED_LIBS=OFF") +# CLANG_FORMAT and NCC_STYLE is set to OFF to avoid LLVMDemangle doubled target issue +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_CLANG_FORMAT=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_CLANG_TIDY=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_NCC_STYLE=OFF") +# Copied from "how_to_build_driver_compiler" document +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_AUTO=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_AUTO_BATCH=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_BLOB_DUMP=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_FUNCTIONAL_TESTS=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_HETERO=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_INTEL_CPU=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_INTEL_GPU=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_JS=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_MULTI=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_INTEL_NPU=ON") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_INTEL_NPU_INTERNAL=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_INTEL_NPU_PROTOPIPE=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_OV_IR_FRONTEND=ON") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_OV_JAX_FRONTEND=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_OV_ONNX_FRONTEND=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_OV_PADDLE_FRONTEND=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_OV_PYTORCH_FRONTEND=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_OV_TF_FRONTEND=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_OV_TF_LITE_FRONTEND=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_PROXY=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_SAMPLES=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_TBBBIND_2_5=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_TEMPLATE=OFF") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ENABLE_TESTS=OFF") +# WA in case libgflags is installed in system +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D gflags_DIR=${CMAKE_CURRENT_SOURCE_DIR}/openvino_modules") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D OPENVINO_EXTRA_MODULES=${NPU_COMPILER_SOURCE_DIR}") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D OUTPUT_ROOT=${NPU_COMPILER_BINARY_DIR}") +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D THREADING=${THREADING}") + +if (ANDROID) + # First build native tools required for NPU compiler + set(NPU_COMPILER_NATIVE_TOOLS_BUILD npu_compiler_native_tools_build) + set(NPU_COMPILER_NATIVE_TOOLS_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/build_npu_compiler_native_tools) + + ExternalProject_Add( + ${NPU_COMPILER_NATIVE_TOOLS_BUILD} + DOWNLOAD_COMMAND "" + DEPENDS npu_compiler_source ${NPU_COMPILER_BUILD_DEPENDS} + SOURCE_DIR ${NPU_COMPILER_OPENVINO_SOURCE_DIR} + BINARY_DIR ${NPU_COMPILER_NATIVE_TOOLS_BINARY_DIR} + CMAKE_ARGS + ${NPU_COMPILER_CMAKE_ARGS} + BUILD_COMMAND + ${CMAKE_COMMAND} + --build ${NPU_COMPILER_NATIVE_TOOLS_BINARY_DIR} + --target npureg-tblgen mlir-headers mlir-generic-headers mlir-linalg-ods-yaml-gen flatc + --parallel ${PARALLEL_PROCESSES} + INSTALL_COMMAND + mkdir -p ${NPU_COMPILER_BINARY_DIR}/build-modules/npu_compiler/thirdparty/llvm-project/llvm/NATIVE && + cp -r ${NPU_COMPILER_NATIVE_TOOLS_BINARY_DIR}/build-modules/npu_compiler/thirdparty/llvm-project/llvm/bin ${NPU_COMPILER_BINARY_DIR}/build-modules/npu_compiler/thirdparty/llvm-project/llvm/NATIVE/ + BYPRODUCTS + ${NPU_COMPILER_BINARY_DIR}/bin/intel64/Release/flatc + ${NPU_COMPILER_BINARY_DIR}/bin/intel64/Release/npureg-tblgen + ) + + # Android specific settings + list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ANDROID_ABI=${ANDROID_ABI}") + list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ANDROID_PLATFORM=${ANDROID_PLATFORM}") + list(APPEND NPU_COMPILER_CMAKE_ARGS "-D ANDROID_STL=${ANDROID_STL}") + list(APPEND NPU_COMPILER_CMAKE_ARGS "-D CMAKE_CXX_FLAGS_INIT='-frtti'") +endif() + +list(APPEND NPU_COMPILER_CMAKE_ARGS "-D CMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") + ExternalProject_Add( npu_compiler_build DOWNLOAD_COMMAND "" - DEPENDS npu_compiler_source ${NPU_COMPILER_BUILD_DEPENDS} + DEPENDS npu_compiler_source ${NPU_COMPILER_BUILD_DEPENDS} ${NPU_COMPILER_NATIVE_TOOLS_BUILD} SOURCE_DIR ${NPU_COMPILER_OPENVINO_SOURCE_DIR} BINARY_DIR ${NPU_COMPILER_BINARY_DIR} CMAKE_ARGS - -D CMAKE_BUILD_TYPE=Release - -D CMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} - -D CMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} - -D BUILD_COMPILER_FOR_DRIVER=ON - -D BUILD_SHARED_LIBS=OFF - # CLANG_FORMAT and NCC_STYLE is set to OFF to avoid LLVMDemangle doubled target issue - -D ENABLE_CLANG_FORMAT=OFF - -D ENABLE_CLANG_TIDY=OFF - -D ENABLE_NCC_STYLE=OFF - # Copied from "how_to_build_driver_compiler" document - -D ENABLE_AUTO=OFF - -D ENABLE_AUTO_BATCH=OFF - -D ENABLE_BLOB_DUMP=OFF - -D ENABLE_FUNCTIONAL_TESTS=OFF - -D ENABLE_HETERO=OFF - -D ENABLE_INTEL_CPU=OFF - -D ENABLE_INTEL_GPU=OFF - -D ENABLE_JS=OFF - -D ENABLE_MULTI=OFF - -D ENABLE_INTEL_NPU_PROTOPIPE=OFF - -D ENABLE_OV_IR_FRONTEND=ON - -D ENABLE_OV_JAX_FRONTEND=OFF - -D ENABLE_OV_ONNX_FRONTEND=OFF - -D ENABLE_OV_PADDLE_FRONTEND=OFF - -D ENABLE_OV_PYTORCH_FRONTEND=OFF - -D ENABLE_OV_TF_FRONTEND=OFF - -D ENABLE_OV_TF_LITE_FRONTEND=OFF - -D ENABLE_PROXY=OFF - -D ENABLE_SAMPLES=OFF - -D ENABLE_TBBBIND_2_5=OFF - -D ENABLE_TEMPLATE=OFF - -D ENABLE_TESTS=OFF - # WA in case libgflags is installed in system - -D gflags_DIR=${CMAKE_CURRENT_SOURCE_DIR}/openvino_modules - -D OPENVINO_EXTRA_MODULES=${NPU_COMPILER_SOURCE_DIR} - -D OUTPUT_ROOT=${NPU_COMPILER_BINARY_DIR} - -D THREADING=${THREADING} + ${NPU_COMPILER_CMAKE_ARGS} BUILD_COMMAND ${CMAKE_COMMAND} --build ${NPU_COMPILER_BINARY_DIR} - --config Release --target compilerTest profilingTest vpuxCompilerL0Test loaderTest --parallel ${PARALLEL_PROCESSES} INSTALL_COMMAND diff --git a/compiler/openvino.cmake b/compiler/openvino.cmake index 6427783..bf75f09 100644 --- a/compiler/openvino.cmake +++ b/compiler/openvino.cmake @@ -1,8 +1,6 @@ -# # Copyright (C) 2024-2025 Intel Corporation # # SPDX-License-Identifier: MIT -# if (ENABLE_OPENVINO_PACKAGE) include(openvino_build.cmake) diff --git a/compiler/openvino_build.cmake b/compiler/openvino_build.cmake index 6bacd5e..aa29168 100644 --- a/compiler/openvino_build.cmake +++ b/compiler/openvino_build.cmake @@ -1,14 +1,6 @@ -# Copyright 2022-2025 Intel Corporation. +# Copyright (C) 2022-2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this -# software or the related documents without Intel's prior written permission. -# -# This software and the related documents are provided as is, with no express -# or implied warranties, other than those that are expressly stated in -# the License. +# SPDX-License-Identifier: MIT if(NOT ENABLE_OPENVINO_PACKAGE) return() diff --git a/compiler/openvino_modules/gflags-config.cmake b/compiler/openvino_modules/gflags-config.cmake index 4920e0e..e9e20a4 100644 --- a/compiler/openvino_modules/gflags-config.cmake +++ b/compiler/openvino_modules/gflags-config.cmake @@ -1,13 +1,5 @@ -# Copyright 2022-2023 Intel Corporation. +# Copyright (C) 2022-2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this -# software or the related documents without Intel's prior written permission. -# -# This software and the related documents are provided as is, with no express -# or implied warranties, other than those that are expressly stated in -# the License. +# SPDX-License-Identifier: MIT message(WARNING "Ignoring system gflags") diff --git a/firmware/bin/vpu_37xx_v1.bin b/firmware/bin/vpu_37xx_v1.bin index 65a297f..b67f984 100644 Binary files a/firmware/bin/vpu_37xx_v1.bin and b/firmware/bin/vpu_37xx_v1.bin differ diff --git a/firmware/bin/vpu_40xx_v1.bin b/firmware/bin/vpu_40xx_v1.bin index e633b1d..79efcc0 100755 Binary files a/firmware/bin/vpu_40xx_v1.bin and b/firmware/bin/vpu_40xx_v1.bin differ diff --git a/firmware/include/api/vpu_jsm_api.h b/firmware/include/api/vpu_jsm_api.h index ec5f692..f755636 100644 --- a/firmware/include/api/vpu_jsm_api.h +++ b/firmware/include/api/vpu_jsm_api.h @@ -23,12 +23,12 @@ /* * Minor version changes when API backward compatibility is preserved. */ -#define VPU_JSM_API_VER_MINOR 32 +#define VPU_JSM_API_VER_MINOR 33 /* * API header changed (field names, documentation, formatting) but API itself has not been changed */ -#define VPU_JSM_API_VER_PATCH 8 +#define VPU_JSM_API_VER_PATCH 0 /* * Index in the API version table @@ -76,8 +76,11 @@ #define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE 0xDU /* Job status returned when the job was preempted mid-command */ #define VPU_JSM_STATUS_PREEMPTED_MID_COMMAND 0xDU +/* Range of status codes that require engine reset */ +#define VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MIN 0xEU #define VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW 0xEU #define VPU_JSM_STATUS_MVNCI_PREEMPTION_TIMED_OUT 0xFU +#define VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MAX 0x1FU /* * Host <-> VPU IPC channels. @@ -406,7 +409,10 @@ struct vpu_hws_native_fence_log_header { struct { /** Index of the first free entry in buffer. */ uint32_t first_free_entry_idx; - /** Incremented each time NPU wraps around the buffer to write next entry. */ + /** + * Incremented whenever the NPU wraps around the buffer and writes + * to the first entry again. + */ uint32_t wraparound_count; }; /** Field allowing atomic update of both fields above. */ diff --git a/firmware/include/api/vpu_nce_hw_40xx.h b/firmware/include/api/vpu_nce_hw_40xx.h index 13e5a8e..23266f0 100644 --- a/firmware/include/api/vpu_nce_hw_40xx.h +++ b/firmware/include/api/vpu_nce_hw_40xx.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright (c) 2022-2023, Intel Corporation. + * Copyright (c) 2022-2025, Intel Corporation. */ #ifndef VPU_NCE_HW_40XX_H @@ -434,7 +434,7 @@ typedef struct { uint32_t ppe_clk_en : 1; uint32_t odu_stat_en : 1; uint32_t idu_stat_en : 1; - uint32_t reserved_1 : 1; + uint32_t noc_clk_en : 1; uint32_t odu_stat_clr_mode : 1; uint32_t idu_stat_clr_mode : 1; uint32_t se_only_en : 1; diff --git a/firmware/include/api/vpu_nnrt_api_40xx.h b/firmware/include/api/vpu_nnrt_api_40xx.h index 59917cd..cc7b607 100644 --- a/firmware/include/api/vpu_nnrt_api_40xx.h +++ b/firmware/include/api/vpu_nnrt_api_40xx.h @@ -44,6 +44,17 @@ * * API changelog * ------------- + * 11.13: + * - Accept CMX Shave stack frames from the blob + * 11.12: + * - 1KB Shave scratch region is moved to the end of CMX, + * default CMX Shave stacks increased to 7.5kB per Shave + * 11.11: + * - Add the ManagedMappedInference directly to the VpuHostParsedInference. + * + * 11.10.3: + * - update Field name from reserved_1 to noc_clk_en in DPU Descriptor. + * * 11.10: * - Increase the minor version number to uniquely identify the UD24 release from earlier versions * @@ -72,9 +83,11 @@ * barrier_programming_mode and barrier_configuration_stride in VpuManagedMappedInference) * to allow runtime to efficiently fill barrier FIFOs. */ + #define VPU_NNRT_40XX_API_VER_MAJOR 11 -#define VPU_NNRT_40XX_API_VER_MINOR 10 -#define VPU_NNRT_40XX_API_VER_PATCH 2 +#define VPU_NNRT_40XX_API_VER_MINOR 13 +#define VPU_NNRT_40XX_API_VER_PATCH 0 + #define VPU_NNRT_40XX_API_VER ((VPU_NNRT_40XX_API_VER_MAJOR << 16) | VPU_NNRT_40XX_API_VER_MINOR) /* Index in the API version table, same for all HW generations */ @@ -90,6 +103,9 @@ * * Act Runtime changelog: * ---------------------- + * 1.13: + * - Window 1F reset to the beginning of CMX tile + * * 1.12: * - Cache operation fix * @@ -100,6 +116,9 @@ * - Support for executing shave tasks directly from DDR (expects two FIFO pushes * with the full 32 bit AKI address and NW_PAGE is already correct) * + * 1.9.1 (NPU4 only): + * - Window 1F reset to the beginning of CMX tile + * * 1.9: * - Add clock gating support * @@ -107,9 +126,11 @@ * - Support Shave Shutdown control message * */ + #define VPU_ACT_RT_VER_MAJOR 1 #define VPU_ACT_RT_VER_MINOR 9 -#define VPU_ACT_RT_VER_PATCH 0 +#define VPU_ACT_RT_VER_PATCH 1 + #define VPU_ACT_RT_VER ((VPU_ACT_RT_VER_MAJOR << 16) | VPU_ACT_RT_VER_MINOR) /* @@ -353,9 +374,25 @@ static_assert(offsetof(VpuMappedInference, managed_inference) % 8 == 0, "Alignme struct VPU_ALIGNED_STRUCT(32) VpuHostParsedInference { uint64_t reserved_; VpuResourceRequirements resource_requirements_; - uint8_t pad_[4]; + + /** + * @brief Determines whether the access to the VpuManagedMappedInference is direct or indirect. + */ + enum VpuMmiAccessMode : uint8_t { + INDIRECT = 0, /**< The managed inference is accessed indirectly, through the managed_inference member of the + VpuMappedInference struct. */ + DIRECT, /**< The managed inference is accessed directly from the managed_inference_ member of the + VpuHostParsedInference struct. */ + UNKNOWN = 255 + }; + + VpuMmiAccessMode mmi_access_; + uint8_t pad_[3]; struct VpuPerformanceMetrics performance_metrics_; - VpuTaskReference mapped_; + union VPU_ALIGNED_STRUCT(8) { + VpuTaskReference mapped_; + VpuTaskReference managed_inference_; + }; }; static_assert(sizeof(VpuHostParsedInference) == 384, "VpuHostParsedInference size != 384"); diff --git a/firmware/include/api/vpu_nnrt_common.h b/firmware/include/api/vpu_nnrt_common.h index 275b9a1..00a91cd 100644 --- a/firmware/include/api/vpu_nnrt_common.h +++ b/firmware/include/api/vpu_nnrt_common.h @@ -52,8 +52,9 @@ constexpr uint32_t VPU_BARRIERS_PER_GROUP = 16; constexpr uint32_t VPU_DPU_PER_TILE = 1; constexpr uint32_t VPU_SNN_PER_TILE = VPU_DPU_PER_TILE; constexpr uint32_t VPU_SNN_TOTAL = VPU_SNN_PER_TILE * VPU_MAX_TILES; -constexpr uint32_t VPU_MAX_DMA_ENGINES = 2; constexpr uint32_t VPU_AS_PER_TILE = 2; +// On NPU4, there is only one physical DMA engine, but it is logically split into two interfaces. +constexpr uint32_t VPU_MAX_DMA_ENGINES = 2; constexpr uint32_t VPU_AS_TOTAL = VPU_AS_PER_TILE * VPU_MAX_TILES; #pragma pack(push, 1) diff --git a/firmware/include/api/vpu_nnrt_wlm.h b/firmware/include/api/vpu_nnrt_wlm.h index 2b4ea67..2c7dbae 100644 --- a/firmware/include/api/vpu_nnrt_wlm.h +++ b/firmware/include/api/vpu_nnrt_wlm.h @@ -323,7 +323,7 @@ struct VPU_ALIGNED_STRUCT(8) VpuManagedMappedInferenceInfo { uint64_t ref_info_base_media; }; -static_assert(sizeof(VpuManagedMappedInferenceInfo) == 392, "BarrierReferenceMap size != 392"); +static_assert(sizeof(VpuManagedMappedInferenceInfo) == 392, "VpuManagedMappedInferenceInfo size != 392"); static_assert(offsetof(VpuManagedMappedInferenceInfo, ref_info_base_vars) % 8 == 0, "Alignment error"); /** diff --git a/third_party/cmake/level-zero-npu-extensions.cmake b/third_party/cmake/level-zero-npu-extensions.cmake index 67faf56..c26ce9f 100644 --- a/third_party/cmake/level-zero-npu-extensions.cmake +++ b/third_party/cmake/level-zero-npu-extensions.cmake @@ -1,13 +1,6 @@ -# Copyright 2022-2023 Intel Corporation. +# Copyright (C) 2022-2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this software -# or the related documents without Intel's prior written permission. -# -# This software and the related documents are provided as is, with no express or -# implied warranties, other than those that are expressly stated in the License. +# SPDX-License-Identifier: MIT set(LEVEL_ZERO_EXT_HEADERS_DIR "${CMAKE_BINARY_DIR}/include/level_zero") file(MAKE_DIRECTORY ${LEVEL_ZERO_EXT_HEADERS_DIR}) diff --git a/third_party/cmake/level-zero.cmake b/third_party/cmake/level-zero.cmake index a8f7b1e..2b9d571 100644 --- a/third_party/cmake/level-zero.cmake +++ b/third_party/cmake/level-zero.cmake @@ -1,13 +1,6 @@ -# Copyright 2022-2024 Intel Corporation. +# Copyright (C) 2022-2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this software -# or the related documents without Intel's prior written permission. -# -# This software and the related documents are provided as is, with no express or -# implied warranties, other than those that are expressly stated in the License. +# SPDX-License-Identifier: MIT list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") diff --git a/third_party/cmake/perfetto.cmake b/third_party/cmake/perfetto.cmake index a736043..61e329e 100644 --- a/third_party/cmake/perfetto.cmake +++ b/third_party/cmake/perfetto.cmake @@ -1,14 +1,6 @@ -# Copyright 2024 Intel Corporation. +# Copyright (C) 2024-2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this software -# or the related documents without Intel's prior written permission. -# -# This software and the related documents are provided as is, with no express or -# implied warranties, other than those that are expressly stated in the License. - +# SPDX-License-Identifier: MIT project(perfetto) find_package(Threads) diff --git a/third_party/cmake/vpux_elf.cmake b/third_party/cmake/vpux_elf.cmake index 167ddcf..ad72fe7 100644 --- a/third_party/cmake/vpux_elf.cmake +++ b/third_party/cmake/vpux_elf.cmake @@ -1,14 +1,6 @@ -# Copyright 2022-2024 Intel Corporation. +# Copyright (C) 2022-2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this -# software or the related documents without Intel's prior written permission. -# -# This software and the related documents are provided as is, with no express -# or implied warranties, other than those that are expressly stated in -# the License. +# SPDX-License-Identifier: MIT # The libnpu_elf.a is picked up from compiler package, nothing to do if (TARGET vpux_elf) diff --git a/third_party/level-zero b/third_party/level-zero index e3b6efd..ff8c99d 160000 --- a/third_party/level-zero +++ b/third_party/level-zero @@ -1 +1 @@ -Subproject commit e3b6efdd91d67bb03024b266094afabd39e213bf +Subproject commit ff8c99d4abda00fba6d92548a9cb2f721764d9d0 diff --git a/third_party/vpux_elf b/third_party/vpux_elf index 4b0a4a0..ad07093 160000 --- a/third_party/vpux_elf +++ b/third_party/vpux_elf @@ -1 +1 @@ -Subproject commit 4b0a4a06ae09c0c3a973f8f18761c549ec2309eb +Subproject commit ad0709314e78449da87a6807665a615d469bac76 diff --git a/umd/level_zero_driver/api/ext/ze_queue.cpp b/umd/level_zero_driver/api/ext/ze_queue.cpp index dde7eb8..54456c6 100644 --- a/umd/level_zero_driver/api/ext/ze_queue.cpp +++ b/umd/level_zero_driver/api/ext/ze_queue.cpp @@ -28,7 +28,7 @@ zeCommandQueueSetWorkloadType(ze_command_queue_handle_t hCommandQueue, goto exit; } - ret = translateHandle(ZEL_HANDLE_COMMAND_LIST, hCommandQueue); + ret = translateHandle(ZEL_HANDLE_COMMAND_QUEUE, hCommandQueue); if (ret != ZE_RESULT_SUCCESS) { goto exit; } diff --git a/umd/level_zero_driver/source/cmdlist.cpp b/umd/level_zero_driver/source/cmdlist.cpp index 486d476..72950d6 100644 --- a/umd/level_zero_driver/source/cmdlist.cpp +++ b/umd/level_zero_driver/source/cmdlist.cpp @@ -24,6 +24,7 @@ #include "vpu_driver/source/device/vpu_device_context.hpp" #include "vpu_driver/source/memory/vpu_buffer_object.hpp" +#include #include #include #include @@ -237,13 +238,34 @@ ze_result_t CommandList::appendMemoryFillCmd(void *ptr, return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } - return appendCommandWithEvents(hSignalEvent, - numWaitEvents, - phWaitEvents, - ptr, - std::move(ptrBo), - size, - fill_pattern); + // Because fill operation can not be interrupted + // for efficiency reason we limit single operation to 8 MB + // If size is larger than FILL_SIZE_LIMIT, split it into multiple operations + static constexpr size_t FILL_SIZE_LIMIT = (8 << 20); + + size_t sizeLeft = size; + size_t offset = 0; + ze_result_t result = ZE_RESULT_SUCCESS; + while (sizeLeft > 0) { + size_t fillSize = std::min(sizeLeft, FILL_SIZE_LIMIT); + result = appendCommandWithEvents( + sizeLeft <= FILL_SIZE_LIMIT ? hSignalEvent : nullptr, + numWaitEvents, + phWaitEvents, + static_cast(ptr) + offset, + ptrBo, + fillSize, + fill_pattern); + if (result != ZE_RESULT_SUCCESS) { + LOG_E("Failed to append fill command to list"); + break; + } + sizeLeft -= fillSize; + offset += fillSize; + numWaitEvents = 0; + phWaitEvents = nullptr; + } + return result; } ze_result_t CommandList::appendMemoryFillAsCopyCmd(void *ptr, diff --git a/umd/level_zero_driver/source/device.cpp b/umd/level_zero_driver/source/device.cpp index c1226ee..6ad0baa 100644 --- a/umd/level_zero_driver/source/device.cpp +++ b/umd/level_zero_driver/source/device.cpp @@ -576,15 +576,11 @@ void Device::loadMetricGroupsInfo(std::vector &metricGroupsInfo) counter.metricDescription.c_str(), ZET_MAX_METRIC_DESCRIPTION - 1); properties.description[ZET_MAX_METRIC_DESCRIPTION - 1] = '\0'; - strncpy(properties.component, - counter.component.c_str(), - ZET_MAX_METRIC_COMPONENT - 1); + strncpy(properties.component, counter.component.c_str(), ZET_MAX_METRIC_COMPONENT - 1); properties.component[ZET_MAX_METRIC_COMPONENT - 1] = '\0'; properties.metricType = Metric::getMetricType(counter.metricType); properties.resultType = Metric::getValueType(counter.valueType); - strncpy(properties.resultUnits, - counter.units.c_str(), - ZET_MAX_METRIC_RESULT_UNITS - 1); + strncpy(properties.resultUnits, counter.units.c_str(), ZET_MAX_METRIC_RESULT_UNITS - 1); properties.resultUnits[ZET_MAX_METRIC_RESULT_UNITS - 1] = '\0'; allocationSize += Metric::getMetricValueSize(counter.valueType); diff --git a/umd/level_zero_driver/source/ext/elf_parser.cpp b/umd/level_zero_driver/source/ext/elf_parser.cpp index b59f07f..a13ff27 100644 --- a/umd/level_zero_driver/source/ext/elf_parser.cpp +++ b/umd/level_zero_driver/source/ext/elf_parser.cpp @@ -77,26 +77,29 @@ class DriverBufferManager : public elf::BufferManager { buffSpecs.procFlags); auto range = getBufferType(buffSpecs.procFlags); - if (buffSpecs.isSharable() && range == VPU::VPUBufferObject::Type::WriteCombineDma && - buffSpecs.size > 0) { + // If zero-sized buffer is requested, return only NPU address from required range. Use + // unmappable allocation to quickly deallocate buffer at function exit + if (buffSpecs.size == 0) { + constexpr size_t allocSize = 1; + auto bo = + ctx->createUntrackedBufferObject(allocSize, + VPU::VPUBufferObject::convertToUnmappable(range)); + VPUX_ELF_THROW_WHEN(bo == nullptr, elf::AllocError, "Failed to get zero-sized buffer"); + + LOG(GRAPH, "Zero-sized buffer, returning only vpu address: %#lx", bo->getVPUAddr()); + return elf::DeviceBuffer(nullptr, bo->getVPUAddr(), buffSpecs.size); + } + + // If buffer is sharable, return empty DeviceBuffer. The buffer will be added on submission + if (buffSpecs.isSharable() && range == VPU::VPUBufferObject::Type::WriteCombineDma) { LOG(GRAPH, "Shared scratch buffer size: %lu", buffSpecs.size); sharedScratchSize = buffSpecs.size; ctx->scratchCachePreload(buffSpecs.size); - // Return empty scratch buffer, it will be added in updateSharedScratchBuffers return elf::DeviceBuffer(); } - size_t size = buffSpecs.size; - if (size == 0) { - LOG(GRAPH, "WA for buffSpecs.size == 0 -> set size to 1"); - size = 1; - } - - auto bo = ctx->createUntrackedBufferObject(size, range); - if (bo == nullptr) { - LOG_E("Failed to allocate the memory"); - return elf::DeviceBuffer(); - } + auto bo = ctx->createUntrackedBufferObject(buffSpecs.size, range); + VPUX_ELF_THROW_WHEN(bo == nullptr, elf::AllocError, "Failed to allocate device buffer"); LOG(GRAPH, "Allocated: cpu_addr: %p, vpu_addr: %#lx, size: %#lx", @@ -107,10 +110,7 @@ class DriverBufferManager : public elf::BufferManager { void *ptr = bo->getBasePointer(); const std::lock_guard lock(mtx); auto [it, success] = tracedElfParserBuffers.emplace(ptr, std::move(bo)); - if (!success) { - LOG_E("Failed to trace elf parser buffer"); - return elf::DeviceBuffer(); - } + VPUX_ELF_THROW_WHEN(!success, elf::AllocError, "Failed to trace new device buffer"); return elf::DeviceBuffer(it->second->getBasePointer(), it->second->getVPUAddr(), @@ -646,7 +646,6 @@ bool ElfParser::getArgumentProperties(std::vector &inputP bos.push_back(std::move(bo)); for (const auto &buffer : cmdHpi->getAllocatedBuffers()) { + // Skip not allocated buffers if (buffer.size() == 0) { continue; } + // SharedScratchBuffer does not set the cpu address + if (buffer.cpu_addr() == nullptr && buffer.size() == getSharedScratchSize()) { + continue; + } + auto bo = findBuffer(buffer.cpu_addr()); if (bo == nullptr) { - // TODO: Shared scratch buffer is not allocated by DriverBufferManager. - // It is possible that there are two buffers with same size. Consider to - // add a better check for shared scratch buffer - if (buffer.size() == getSharedScratchSize()) - continue; - LOG_E("Failed to find a buffer in tracked memory"); return nullptr; } diff --git a/umd/level_zero_driver/source/ext/sha1/README.md b/umd/level_zero_driver/source/ext/sha1/README.md index 75a475d..41b377c 100644 --- a/umd/level_zero_driver/source/ext/sha1/README.md +++ b/umd/level_zero_driver/source/ext/sha1/README.md @@ -1,14 +1,5 @@ - + This SHA1 implementation comes from OpenBSD repository: diff --git a/umd/vpu_driver/source/command/copy_command.hpp b/umd/vpu_driver/source/command/copy_command.hpp index 3fd54bc..53502bd 100644 --- a/umd/vpu_driver/source/command/copy_command.hpp +++ b/umd/vpu_driver/source/command/copy_command.hpp @@ -49,8 +49,10 @@ class VPUCopyCommand : public VPUCommand { template static bool fillDescriptor(uint64_t srcAddr, uint64_t dstAddr, size_t size, VPUDescriptor &descriptor) { - // The hardware limits the DMA descriptor copy size to 16 MB - static constexpr uint32_t COPY_SIZE_LIMIT = (16 << 20) - 1; + // The some hardware limits the DMA descriptor copy size 16MB + // because copy operation can not be interrupted + // for efficiency reason we limit single operation to 8 MB + static constexpr uint32_t COPY_SIZE_LIMIT = (8 << 20); if (srcAddr == 0 || dstAddr == 0) { LOG_E("Failed to get vpu address for copy descriptor"); diff --git a/umd/vpu_driver/source/device/vpu_device_context.cpp b/umd/vpu_driver/source/device/vpu_device_context.cpp index 96f7d9f..99028b0 100644 --- a/umd/vpu_driver/source/device/vpu_device_context.cpp +++ b/umd/vpu_driver/source/device/vpu_device_context.cpp @@ -25,20 +25,6 @@ VPUDeviceContext::VPUDeviceContext(std::unique_ptr drvApi, VPUHwIn LOG(DEVICE, "VPUDeviceContext is created"); } -VPUBufferObject::Type convertDmaToShaveRange(VPUBufferObject::Type type) { - switch (type) { - case VPUBufferObject::Type::WriteCombineDma: - return VPUBufferObject::Type::WriteCombineShave; - case VPUBufferObject::Type::UncachedDma: - return VPUBufferObject::Type::UncachedShave; - case VPUBufferObject::Type::CachedDma: - return VPUBufferObject::Type::CachedShave; - default: - break; - } - return type; -} - std::shared_ptr VPUDeviceContext::importBufferObject(VPUBufferObject::Location type, int32_t fd) { auto bo = VPUBufferObject::importFromFd(*drvApi, type, fd); @@ -63,7 +49,7 @@ VPUDeviceContext::createBufferObject(size_t size, VPUBufferObject::Type type, VPUBufferObject::Location loc) { if (!hwInfo->dmaMemoryRangeCapability && (static_cast(type) & DRM_IVPU_BO_DMA_MEM)) - type = convertDmaToShaveRange(type); + type = VPUBufferObject::convertDmaToShaveRange(type); auto bo = VPUBufferObject::create(*drvApi, loc, type, size); if (bo == nullptr) { @@ -155,7 +141,7 @@ VPUDeviceContext::createUntrackedBufferObject(size_t size, VPUBufferObject::Type } if (!hwInfo->dmaMemoryRangeCapability && (static_cast(range) & DRM_IVPU_BO_DMA_MEM)) - range = convertDmaToShaveRange(range); + range = VPUBufferObject::convertDmaToShaveRange(range); auto bo = VPUBufferObject::create(*drvApi, VPUBufferObject::Location::Internal, range, size); if (bo == nullptr) { diff --git a/umd/vpu_driver/source/memory/vpu_buffer_object.hpp b/umd/vpu_driver/source/memory/vpu_buffer_object.hpp index b511c62..e0e1a37 100644 --- a/umd/vpu_driver/source/memory/vpu_buffer_object.hpp +++ b/umd/vpu_driver/source/memory/vpu_buffer_object.hpp @@ -28,6 +28,7 @@ class VPUBufferObject { WriteCombineFw = DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE, WriteCombineFwUnmappable = DRM_IVPU_BO_WC, WriteCombineShave = DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE | DRM_IVPU_BO_HIGH_MEM, + WriteCombineShaveUnmappable = DRM_IVPU_BO_WC | DRM_IVPU_BO_HIGH_MEM, WriteCombineDma = DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE | DRM_IVPU_BO_DMA_MEM, WriteCombineDmaUnmappable = DRM_IVPU_BO_WC | DRM_IVPU_BO_DMA_MEM, ImportedMemory = 0, @@ -159,6 +160,21 @@ class VPUBufferObject { bool exportToFd(int32_t &fd); uint64_t getId() const { return id; } + static VPUBufferObject::Type convertDmaToShaveRange(VPUBufferObject::Type type) { + auto t = static_cast(type); + if (!(t & DRM_IVPU_BO_DMA_MEM)) { + return type; + } + + t &= ~DRM_IVPU_BO_DMA_MEM; + t |= DRM_IVPU_BO_HIGH_MEM; + return static_cast(t); + } + + static VPUBufferObject::Type convertToUnmappable(VPUBufferObject::Type type) { + return static_cast(static_cast(type) & ~DRM_IVPU_BO_MAPPABLE); + } + private: const VPUDriverApi &drvApi; Location location; diff --git a/validation/umd-test/configs/README.md b/validation/umd-test/configs/README.md index 1c9db02..1e3071b 100644 --- a/validation/umd-test/configs/README.md +++ b/validation/umd-test/configs/README.md @@ -1,14 +1,5 @@ - + # NPU UMD test configuration overview diff --git a/validation/umd-test/configs/basic.yaml b/validation/umd-test/configs/basic.yaml index ebe79a5..af14736 100644 --- a/validation/umd-test/configs/basic.yaml +++ b/validation/umd-test/configs/basic.yaml @@ -1,14 +1,8 @@ -# Copyright 2023-2024 Intel Corporation. # -# This software and the related documents are Intel copyrighted materials, and -# your use of them is governed by the express license under which they were -# provided to you ("License"). Unless the License provides otherwise, you may -# not use, modify, copy, publish, distribute, disclose or transmit this -# software or the related documents without Intel's prior written permission. +# Copyright (C) 2024 Intel Corporation +# +# SPDX-License-Identifier: MIT # -# This software and the related documents are provided as is, with no express -# or implied warranties, other than those that are expressly stated in -# the License. log_level: ERROR # supported levels:QUIET ERROR, WARNING, INFO, VERBOSE model_dir: models/ diff --git a/validation/umd-test/graph_utilities.hpp b/validation/umd-test/graph_utilities.hpp index 6da98d3..b42f368 100644 --- a/validation/umd-test/graph_utilities.hpp +++ b/validation/umd-test/graph_utilities.hpp @@ -103,11 +103,44 @@ class InferenceRequest { return ret; } + bool validateOutput(const std::vector &reference) { + if (associatedOutput.empty()) + return true; + + if (reference.size() != + static_cast( + std::accumulate(associatedOutput.begin(), + associatedOutput.end(), + 0, + [](size_t sum, const auto &pair) { return sum + pair.second; }))) { + return false; + } + + const char *referenceData = reference.data(); + for (const auto &output : associatedOutput) { + if (memcmp(output.first, referenceData, output.second) != 0) { + return false; + } + referenceData += output.second; + } + + return true; + } + + void clearOutput() { + for (const auto &output : associatedOutput) { + if (output.first != nullptr) { + memset(output.first, 0, output.second); + } + } + } + public: ze_command_queue_handle_t queue = nullptr; ze_command_list_handle_t list = nullptr; ze_fence_handle_t fence = nullptr; double latencyMs = 0.f; + std::vector> associatedOutput; private: zeScope::SharedPtr scopedQueue; @@ -398,6 +431,35 @@ class Graph { } } + void setInput(const std::vector &networkInput) { + ASSERT_EQ(networkInput.size(), std::accumulate(inputSize.begin(), inputSize.end(), 0u)); + for (size_t i = 0; i < inputSize.size(); ++i) { + ASSERT_GT(inputSize[i], 0u); + memcpy(inArgs[i], + networkInput.data() + + std::accumulate(inputSize.begin(), inputSize.begin() + i, 0u), + inputSize[i]); + } + } + + void getCopyOfInput(std::vector &networkInput) { + networkInput.clear(); + for (size_t i = 0; i < inputSize.size(); ++i) { + networkInput.insert(networkInput.end(), + static_cast(inArgs[i]), + static_cast(inArgs[i]) + inputSize[i]); + } + } + + void getCopyOfOutput(std::vector &networkOutput) { + networkOutput.clear(); + for (size_t i = 0; i < outputSize.size(); ++i) { + networkOutput.insert(networkOutput.end(), + static_cast(outArgs[i]), + static_cast(outArgs[i]) + outputSize[i]); + } + } + void copyImageToInputArgument(void *dst) { if (dst == nullptr) { FAIL() << "Destination pointer is null"; @@ -528,6 +590,9 @@ class Graph { // Initialization is done in setUp to take advantage of ASSERT_* infer->setUpCommandQueue(hContext, hDevice); infer->setUpCommandList(hContext, hDevice, handle, graphDDI); + for (size_t i = 0; i < outArgs.size(); i++) { + infer->associatedOutput.push_back({outArgs[i], outputSize[i]}); + } return infer; } @@ -538,6 +603,10 @@ class Graph { // Initialization is done in setUp to take advantage of ASSERT_* infer->setUpCommandList(hContext, hDevice, handle, graphDDI); infer->setUpFence(hContext, hDevice); + for (size_t i = 0; i < outArgs.size(); i++) { + infer->associatedOutput.push_back({outArgs[i], outputSize[i]}); + } + return infer; } diff --git a/validation/umd-test/test_commands.cpp b/validation/umd-test/test_commands.cpp index 8397847..b17e9c2 100644 --- a/validation/umd-test/test_commands.cpp +++ b/validation/umd-test/test_commands.cpp @@ -857,7 +857,8 @@ using PatternType = std::variant; class CommandMemoryFill : public Command, public ::testing::WithParamInterface { public: - const size_t size = 12345u; + /* The driver split memfill commands into multiple operations if size is larger than 8MB */ + const size_t size = (8 << 20) * 2 + 12345u; }; INSTANTIATE_TEST_SUITE_P(, diff --git a/validation/umd-test/test_graph_cid.cpp b/validation/umd-test/test_graph_cid.cpp index 96fa756..ce1518d 100644 --- a/validation/umd-test/test_graph_cid.cpp +++ b/validation/umd-test/test_graph_cid.cpp @@ -483,17 +483,34 @@ class CompilerInDriverMultiInference : public CompilerInDriverLongT, BREAK_ON_FAIL(ret, stats); auto queue = scopedQueue.get(); + std::vector> inferReqs; inference.graph->allocateArguments(MemType::SHARED_MEMORY); inference.graph->copyInputData(); + std::vector commonInputData; + inference.graph->getCopyOfInput(commonInputData); + if (commonInputData.empty()) { + BREAK_ON_FAIL(ZE_RESULT_ERROR_UNKNOWN, stats); + } ret = zeGraphDDITableExt->pfnGraphInitialize(inference.graph->handle); BREAK_ON_FAIL(ret, stats); + inferReqs.push_back(inference.graph->newInferRequestUsingQueue(queue)); - std::vector> inferReqs; - for (size_t i = 0; i < inference.parallelReqs; i++) { - if (i > 0) { - inference.graph->allocateArguments(MemType::SHARED_MEMORY); - inference.graph->copyInputData(); - } + ret = inferReqs.back()->runAsync(); + BREAK_ON_FAIL(ret, stats); + + ret = inferReqs.back()->wait(UINT64_MAX); + BREAK_ON_FAIL(ret, stats); + /* Warm up and collect reference output */ + std::vector referenceOutput; + inference.graph->getCopyOfOutput(referenceOutput); + if (referenceOutput.empty()) { + BREAK_ON_FAIL(ZE_RESULT_ERROR_UNKNOWN, stats); + } + inference.graph->clearOutput(); + + for (size_t i = 1; i < inference.parallelReqs; i++) { + inference.graph->allocateArguments(MemType::SHARED_MEMORY); + inference.graph->setInput(commonInputData); inferReqs.push_back(inference.graph->newInferRequestUsingQueue(queue)); } @@ -504,12 +521,6 @@ class CompilerInDriverMultiInference : public CompilerInDriverLongT, if (workloadType != ZE_WORKLOAD_TYPE_FORCE_UINT32) { ret = zeCommandQueueDDITableExt->pfnSetWorkloadType(queue, ZE_WORKLOAD_TYPE_BACKGROUND); BREAK_ON_FAIL(ret, stats); - - ret = inferReqs[0]->runAsync(); - BREAK_ON_FAIL(ret, stats); - - ret = inferReqs[0]->wait(UINT64_MAX); - BREAK_ON_FAIL(ret, stats); } std::this_thread::sleep_for(std::chrono::microseconds(inference.delayInUs)); @@ -520,6 +531,11 @@ class CompilerInDriverMultiInference : public CompilerInDriverLongT, BREAK_ON_FAIL(ret, stats); } + if (workloadType != ZE_WORKLOAD_TYPE_FORCE_UINT32) { + ret = zeCommandQueueDDITableExt->pfnSetWorkloadType(queue, workloadType); + BREAK_ON_FAIL(ret, stats); + } + size_t inferReqIndex = 0; while (!stats.counter.isTimeout()) { stats.counter.delayNextFrame(); @@ -533,21 +549,22 @@ class CompilerInDriverMultiInference : public CompilerInDriverLongT, ret = inferReqs[inferReqIndex]->wait(UINT64_MAX); BREAK_ON_FAIL(ret, stats); - if (workloadType != ZE_WORKLOAD_TYPE_FORCE_UINT32) { - ret = zeCommandQueueDDITableExt->pfnSetWorkloadType(queue, workloadType); - BREAK_ON_FAIL(ret, stats); - workloadType = ZE_WORKLOAD_TYPE_FORCE_UINT32; - } - - // TODO: Add multiple inference request output validation if (inference.parallelReqs == 1 && inference.graph->classIndexes.size()) { inference.graph->checkResults(); inference.graph->clearOutput(); + } else { + // Only accuracy verification + if (!inferReqs[inferReqIndex]->validateOutput(referenceOutput)) { + TRACE("Output validation failed for inference request %s (%ld)", + inference.modelName.c_str(), + inferReqIndex); + BREAK_ON_FAIL(ZE_RESULT_ERROR_UNKNOWN, stats); + } + inferReqs[inferReqIndex]->clearOutput(); } stats.counter.recordFrame(inferReqs[inferReqIndex]->latencyMs); } - stats.counter.stopTimer(); return stats; }; diff --git a/validation/umd-test/test_graph_commands.cpp b/validation/umd-test/test_graph_commands.cpp index bff42b7..a8fdd08 100644 --- a/validation/umd-test/test_graph_commands.cpp +++ b/validation/umd-test/test_graph_commands.cpp @@ -476,56 +476,6 @@ TEST_P(CommandGraphLong, GraphInitAndExecWith200msDelay) { graph->checkResults(); } -static void resetDevice() { - std::string path = getDeviceSysFsDirectory() + "/reset"; - int fd = open(path.c_str(), O_WRONLY); - ASSERT_NE(fd, -1); - - ASSERT_EQ(write(fd, "1", 1), 1); - ASSERT_EQ(close(fd), 0); -} - -TEST_P(CommandGraphLong, InferenceDeviceResetInference) { - SKIP_NEEDS_ROOT(); - - ze_result_t result = zeGraphDDITableExt->pfnAppendGraphExecute(list, - graph->handle, - nullptr, - nullptr, - 0, - nullptr); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - result = zeCommandListClose(list); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - result = zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - result = zeCommandQueueSynchronize(queue, graphSyncTimeout); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - graph->checkResults(); - - resetDevice(); - - result = zeContextGetStatus(zeContext); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - result = zeDeviceGetStatus(zeDevice); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - graph->clearOutput(); - - result = zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - result = zeCommandQueueSynchronize(queue, graphSyncTimeout); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - graph->checkResults(); -} - TEST_P(CommandGraphLong, GetNativeBinaryAndReleaseAfterAppendGraphInitializeRunInference) { // Convert model to native format, zeGraphGetNativeBinary function is used auto nativeGraphBuffer = graph->getNativeBinaryAsNewBuffer(); diff --git a/validation/umd-test/test_graph_query.cpp b/validation/umd-test/test_graph_query.cpp index 6df7ceb..3b3f02c 100644 --- a/validation/umd-test/test_graph_query.cpp +++ b/validation/umd-test/test_graph_query.cpp @@ -21,8 +21,6 @@ class GraphQueryNetwork : public UmdTest, public ::testing::WithParamInterface().size(), 0); - graphBuffer = GraphBuffer::get(zeDevice, zeGraphDDITableExt, globalConfig, node); ASSERT_NE(graphBuffer, nullptr); } diff --git a/validation/umd-test/test_metric.cpp b/validation/umd-test/test_metric.cpp index 0595c1c..39b4953 100644 --- a/validation/umd-test/test_metric.cpp +++ b/validation/umd-test/test_metric.cpp @@ -278,51 +278,6 @@ TEST_F(MetricQuery, RunMetricQueryOnEmptyCommandList) { ASSERT_EQ(zeCommandQueueSynchronize(queue, syncTimeout), ZE_RESULT_SUCCESS); } -TEST_F(MetricQuery, MetricGroupCalculateEmptyMetricQuery) { - size_t groupIndex = 1; - MetricInitialize(groupIndex, 0); - - size_t queryDataSize = 0u; - EXPECT_EQ(zetMetricQueryGetData(query, &queryDataSize, nullptr), ZE_RESULT_SUCCESS); - EXPECT_GT(queryDataSize, 0u); - - std::vector queryRawData(queryDataSize, 0u); - EXPECT_EQ(zetMetricQueryGetData(query, - &queryDataSize, - reinterpret_cast(queryRawData.data())), - ZE_RESULT_SUCCESS); - - TRACE_BUF(queryRawData.data(), queryDataSize); - EXPECT_EQ(queryRawData[0], 0u); - - uint32_t metricValueCount = 0; - EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroups[groupIndex], - ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, - queryRawData.size(), - queryRawData.data(), - &metricValueCount, - nullptr), - ZE_RESULT_SUCCESS); - - EXPECT_GT(metricValueCount, 0); - - std::vector metricValues(metricValueCount); - EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroups[groupIndex], - ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, - queryRawData.size(), - queryRawData.data(), - &metricValueCount, - metricValues.data()), - ZE_RESULT_SUCCESS); - - for (uint32_t i = 0; i < metricValueCount; i++) { - EXPECT_EQ(metricValues[i].type, metricsPropertiesAll[groupIndex][i].resultType); - EXPECT_EQ(metricValues[i].value.ui64, 0llu); - } - - TRACE_BUF(metricValues.data(), metricValues.size() * sizeof(zet_typed_value_t)); -} - std::vector queryIndexesComputeEngine = {0, 1}; GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MetricQuery); diff --git a/validation/umd-test/umd_test.h b/validation/umd-test/umd_test.h index 1457e1e..1fdfa6e 100644 --- a/validation/umd-test/umd_test.h +++ b/validation/umd-test/umd_test.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -57,9 +58,10 @@ void PrintTo(const ze_result_t &result, std::ostream *os); SKIP_("Test is not supported in ChromeOS") \ } -#define SKIP_NEEDS_SYSFS_FILE(x) \ - if (!isFileAvailableInSysFs(x)) { \ - SKIP_("Test is not supported because " x " is missing in SysFs"); \ +#define SKIP_NEEDS_SYSFS_FILE(x) \ + if (!canReadFromSysFsFile(x)) { \ + SKIP_("Test is not supported because " x \ + " is missing in SysFs or does not have the appropriate permission"); \ } #define SKIP_HARDENING(msg) \ @@ -113,15 +115,27 @@ inline std::string getDeviceSysFsDirectory() { return path; } -inline bool isFileAvailableInSysFs(const std::string &filename) { +inline bool canReadFromSysFsFile(const std::string &filename) { std::filesystem::path deviceSysFs = getDeviceSysFsDirectory(); if (deviceSysFs.empty()) { TRACE("WARNING: No SysFs available in system\n"); return false; } + std::filesystem::path filePath = deviceSysFs / filename; std::error_code ec; - return std::filesystem::exists(deviceSysFs / filename, ec); + + if (!std::filesystem::exists(filePath, ec)) { + return false; + } + + std::string file = filePath.generic_string(); + int fd = access(file.c_str(), R_OK); + if (fd == -1) { + return false; + } + + return true; } class UmdTest : public ::testing::Test {