From 15d903960dff911f5ebaf28b467507f62876a5eb Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Thu, 14 Mar 2024 15:28:29 -0700 Subject: [PATCH] Embedding executable source contents in binaries for tracing. (#16757) This adds a new `CaptureExecutableSourcesPass` that allows for capture of individual `hal.executable.variant` ops at any number of compilation stages. Unlike the `DumpExecutableSourcesPass` this does not change the original source locations in the IR and instead captures both the textual IR and the remapped locations within it of each executable export at the time the pass is run. The textual IR is stored as resources and associated with the variants through linking and made available to serialization for embedding in target-specific formats. Because this increases compilation time (generating all of the sources multiple times per executable is expensive) and bloats binaries the capture is only enabled with the `--iree-hal-executable-debug-level=3` or greater flag set (default is `=2`). As part of this PR the CPU, Vulkan, and legacy ROCM formats have been updated to store the new information and source it at runtime. This is a breaking change to the executable library binary format. I'm not quite happy with it, but it's probably good enough for the next 6mo-1yr. To make this more usable a copy button has been added to the tracy source view: https://github.com/wolfpld/tracy/pull/750 Now clicking on a dispatch in the CPU or GPU timeline will show the source and the copy button can be used to get it in the clipboard. The source can then be run through `iree-compile --compile-mode=hal-executable` to generate binaries. ![image](https://github.com/openxla/iree/assets/75337/e690e1f8-52a7-40db-a3aa-5fd7e781791b) Fixes #15699. Closes #16223. --- compiler/plugins/target/LLVMCPU/BUILD.bazel | 2 + .../plugins/target/LLVMCPU/CMakeLists.txt | 2 + .../plugins/target/LLVMCPU/LLVMCPUTarget.cpp | 44 +- .../plugins/target/LLVMCPU/LibraryBuilder.cpp | 387 +++++++++++++----- .../plugins/target/LLVMCPU/LibraryBuilder.h | 38 +- compiler/plugins/target/ROCM/BUILD.bazel | 1 + compiler/plugins/target/ROCM/CMakeLists.txt | 1 + compiler/plugins/target/ROCM/ROCMTarget.cpp | 106 ++++- .../target/VulkanSPIRV/VulkanSPIRVTarget.cpp | 66 ++- .../compiler/Codegen/Utils/LinkingUtils.cpp | 16 +- .../iree/compiler/Dialect/HAL/IR/HALOps.td | 10 +- .../Dialect/HAL/Target/TargetBackend.cpp | 4 +- .../Dialect/HAL/Target/TargetBackend.h | 2 +- .../Dialect/HAL/Transforms/BUILD.bazel | 1 + .../Dialect/HAL/Transforms/CMakeLists.txt | 1 + .../Transforms/CaptureExecutableSources.cpp | 129 ++++++ .../Dialect/HAL/Transforms/Passes.cpp | 26 ++ .../compiler/Dialect/HAL/Transforms/Passes.td | 17 + .../Dialect/HAL/Transforms/test/BUILD.bazel | 1 + .../HAL/Transforms/test/CMakeLists.txt | 1 + .../test/capture_executable_sources.mlir | 66 +++ experimental/hip/stream_command_buffer.c | 11 +- experimental/rocm/direct_command_buffer.c | 17 +- experimental/rocm/native_executable.c | 54 ++- experimental/rocm/native_executable.h | 10 +- runtime/src/iree/base/tracing.h | 5 + runtime/src/iree/base/tracing/console.h | 3 + runtime/src/iree/base/tracing/tracy.cc | 97 ++++- runtime/src/iree/base/tracing/tracy.h | 10 + .../hal/drivers/cuda/stream_command_buffer.c | 10 +- .../drivers/vulkan/direct_command_buffer.cc | 13 +- .../hal/drivers/vulkan/native_executable.cc | 100 +++-- .../local/elf/testdata/elementwise_mul.mlir | 6 +- .../elf/testdata/elementwise_mul_arm_32.so | Bin 1860 -> 1944 bytes .../elf/testdata/elementwise_mul_arm_64.so | Bin 2408 -> 2512 bytes .../elf/testdata/elementwise_mul_riscv_32.so | Bin 1820 -> 2476 bytes .../elf/testdata/elementwise_mul_riscv_64.so | Bin 2512 -> 2912 bytes .../elf/testdata/elementwise_mul_x86_32.so | Bin 2032 -> 2308 bytes .../elf/testdata/elementwise_mul_x86_64.so | Bin 2744 -> 3008 bytes .../src/iree/hal/local/executable_library.h | 55 ++- .../iree/hal/local/executable_library_util.c | 50 ++- .../iree/hal/local/executable_library_util.h | 11 + .../hal/local/loaders/embedded_elf_loader.c | 5 + .../hal/local/loaders/static_library_loader.c | 5 + .../hal/local/loaders/system_library_loader.c | 5 + .../src/iree/schemas/bytecode_module_def.fbs | 2 +- .../src/iree/schemas/rocm_executable_def.fbs | 24 ++ .../src/iree/schemas/spirv_executable_def.fbs | 24 ++ 48 files changed, 1217 insertions(+), 221 deletions(-) create mode 100644 compiler/src/iree/compiler/Dialect/HAL/Transforms/CaptureExecutableSources.cpp create mode 100644 compiler/src/iree/compiler/Dialect/HAL/Transforms/test/capture_executable_sources.mlir diff --git a/compiler/plugins/target/LLVMCPU/BUILD.bazel b/compiler/plugins/target/LLVMCPU/BUILD.bazel index 877b666fa052..92c817f8e3ad 100644 --- a/compiler/plugins/target/LLVMCPU/BUILD.bazel +++ b/compiler/plugins/target/LLVMCPU/BUILD.bazel @@ -39,6 +39,7 @@ iree_compiler_cc_library( "//compiler/src/iree/compiler/Dialect/HAL/Target", "//compiler/src/iree/compiler/Dialect/HAL/Target:LLVMLinkerUtils", "//compiler/src/iree/compiler/Dialect/LinalgExt/IR", + "//compiler/src/iree/compiler/Dialect/Util/IR", "//compiler/src/iree/compiler/PluginAPI", "//compiler/src/iree/compiler/Utils", "//llvm-external-projects/iree-dialects:IREELinalgTransformDialect", @@ -63,6 +64,7 @@ iree_compiler_cc_library( "@llvm-project//mlir:ArmSMEDialect", "@llvm-project//mlir:ArmSMEToLLVMIRTranslation", "@llvm-project//mlir:BuiltinToLLVMIRTranslation", + "@llvm-project//mlir:IR", "@llvm-project//mlir:LLVMDialect", "@llvm-project//mlir:LLVMToLLVMIRTranslation", "@llvm-project//mlir:PDLDialect", diff --git a/compiler/plugins/target/LLVMCPU/CMakeLists.txt b/compiler/plugins/target/LLVMCPU/CMakeLists.txt index c8e22fd0e949..badb46d20e10 100644 --- a/compiler/plugins/target/LLVMCPU/CMakeLists.txt +++ b/compiler/plugins/target/LLVMCPU/CMakeLists.txt @@ -43,6 +43,7 @@ iree_cc_library( MLIRArmSMEDialect MLIRArmSMEToLLVMIRTranslation MLIRBuiltinToLLVMIRTranslation + MLIRIR MLIRLLVMDialect MLIRLLVMToLLVMIRTranslation MLIRPDLDialect @@ -57,6 +58,7 @@ iree_cc_library( iree::compiler::Dialect::HAL::Target iree::compiler::Dialect::HAL::Target::LLVMLinkerUtils iree::compiler::Dialect::LinalgExt::IR + iree::compiler::Dialect::Util::IR iree::compiler::PluginAPI iree::compiler::Utils iree::compiler::plugins::target::LLVMCPU::Builtins diff --git a/compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp b/compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp index 4245421edfcc..e81362470c76 100644 --- a/compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp +++ b/compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp @@ -37,6 +37,8 @@ #include "mlir/Dialect/PDL/IR/PDL.h" #include "mlir/Dialect/PDLInterp/IR/PDLInterp.h" #include "mlir/Dialect/Transform/IR/TransformDialect.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/DialectResourceBlobManager.h" #include "mlir/Target/LLVMIR/Dialect/ArmSME/ArmSMEToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" @@ -420,17 +422,45 @@ class LLVMCPUTargetBackend final : public TargetBackend { .value_or(APInt(64, 0)) .getSExtValue(); - std::string sourceFile = ""; - int sourceLine = 0; + LibraryBuilder::SourceLocation sourceLocation; if (options.debugLevel >= 1) { if (auto loc = findFirstFileLoc(exportOp.getLoc())) { - sourceFile = loc->getFilename().str(); - sourceLine = loc->getLine(); + sourceLocation = {"", loc->getFilename().str(), loc->getLine()}; + } + } + SmallVector stageLocations; + if (options.debugLevel >= 3) { + if (auto locsAttr = exportOp.getSourceLocsAttr()) { + for (auto locAttr : locsAttr.getValue()) { + if (auto loc = + findFirstFileLoc(cast(locAttr.getValue()))) { + stageLocations.push_back({ + locAttr.getName().str(), + loc->getFilename().str(), + loc->getLine(), + }); + } + } + } + } + libraryBuilder.addExport(exportOp.getName(), std::move(sourceLocation), + std::move(stageLocations), /*tag=*/"", + LibraryBuilder::DispatchAttrs{localMemorySize}, + llvmFunc); + } + + // Embed source files (if present). + if (auto sourcesAttr = variantOp.getSourcesAttr()) { + for (auto sourceAttr : sourcesAttr.getValue()) { + if (auto resourceAttr = dyn_cast_if_present( + sourceAttr.getValue())) { + auto handle = resourceAttr.getRawHandle(); + SmallVector rawData; + llvm::append_range(rawData, handle.getBlob()->getData()); + libraryBuilder.addSourceFile(sourceAttr.getName(), + std::move(rawData)); } } - libraryBuilder.addExport( - exportOp.getName(), sourceFile, sourceLine, /*tag=*/"", - LibraryBuilder::DispatchAttrs{localMemorySize}, llvmFunc); } auto queryFunctionName = std::string(kQueryFunctionName); diff --git a/compiler/plugins/target/LLVMCPU/LibraryBuilder.cpp b/compiler/plugins/target/LLVMCPU/LibraryBuilder.cpp index 7348f95fb851..3c39849d8c54 100644 --- a/compiler/plugins/target/LLVMCPU/LibraryBuilder.cpp +++ b/compiler/plugins/target/LLVMCPU/LibraryBuilder.cpp @@ -21,7 +21,7 @@ namespace mlir::iree_compiler::IREE::HAL { -static inline int64_t RoundUpToAlignment(int64_t value, int64_t alignment) { +static inline int64_t roundUpToAlignment(int64_t value, int64_t alignment) { return (value + (alignment - 1)) & ~(alignment - 1); } @@ -130,26 +130,53 @@ static llvm::StructType *makeDispatchAttrsType(llvm::LLVMContext &context) { return type; } -// %struct.iree_hal_executable_src_loc_v0_t = type { +// %struct.iree_hal_executable_source_location_v0_t = type { // i32, // i32, // i8* // } -static llvm::StructType *makeSrcLocType(llvm::LLVMContext &context) { +static llvm::StructType *makeSourceLocationType(llvm::LLVMContext &context) { if (auto *existingType = llvm::StructType::getTypeByName( - context, "iree_hal_executable_src_loc_v0_t")) { + context, "iree_hal_executable_source_location_v0_t")) { return existingType; } auto *i32Type = llvm::IntegerType::getInt32Ty(context); auto *i8PtrType = llvm::PointerType::getUnqual(context); - auto *type = llvm::StructType::create(context, - { - i32Type, - i32Type, - i8PtrType, - }, - "iree_hal_executable_src_loc_v0_t", - /*isPacked=*/false); + auto *type = + llvm::StructType::create(context, + { + i32Type, + i32Type, + i8PtrType, + }, + "iree_hal_executable_source_location_v0_t", + /*isPacked=*/false); + return type; +} + +// %struct.iree_hal_executable_stage_location_table_v0_t = type { +// i32, +// i8**, +// %struct.iree_hal_executable_source_location_v0_t*, +// } +static llvm::StructType * +makeStageLocationTableType(llvm::LLVMContext &context) { + if (auto *existingType = llvm::StructType::getTypeByName( + context, "iree_hal_executable_stage_location_table_v0_t")) { + return existingType; + } + auto *i32Type = llvm::IntegerType::getInt32Ty(context); + auto *i8PtrType = llvm::PointerType::getUnqual(context); + auto *sourceLocationType = makeSourceLocationType(context); + auto *type = + llvm::StructType::create(context, + { + i32Type, + i8PtrType->getPointerTo(), + sourceLocationType->getPointerTo(), + }, + "iree_hal_executable_stage_location_table_v0_t", + /*isPacked=*/false); return type; } @@ -159,7 +186,8 @@ static llvm::StructType *makeSrcLocType(llvm::LLVMContext &context) { // %struct.iree_hal_executable_dispatch_attrs_v0_t*, // i8**, // i8**, -// %struct.iree_hal_executable_src_loc_v0_t*, +// %struct.iree_hal_executable_source_location_v0_t*, +// %struct.iree_hal_executable_stage_location_table_v0_t*, // } static llvm::StructType *makeExportTableType(llvm::LLVMContext &context) { if (auto *existingType = llvm::StructType::getTypeByName( @@ -170,7 +198,8 @@ static llvm::StructType *makeExportTableType(llvm::LLVMContext &context) { auto *dispatchFunctionType = makeDispatchFunctionType(context); auto *dispatchAttrsType = makeDispatchAttrsType(context); auto *i8PtrType = llvm::PointerType::getUnqual(context); - auto *srcLocType = makeSrcLocType(context); + auto *sourceLocationType = makeSourceLocationType(context); + auto *stageLocationTableType = makeStageLocationTableType(context); auto *type = llvm::StructType::create( context, { @@ -179,7 +208,8 @@ static llvm::StructType *makeExportTableType(llvm::LLVMContext &context) { dispatchAttrsType->getPointerTo(), i8PtrType->getPointerTo(), i8PtrType->getPointerTo(), - srcLocType->getPointerTo(), + sourceLocationType->getPointerTo(), + stageLocationTableType->getPointerTo(), }, "iree_hal_executable_export_table_v0_t", /*isPacked=*/false); @@ -205,6 +235,53 @@ static llvm::StructType *makeConstantTableType(llvm::LLVMContext &context) { return type; } +// %struct.iree_hal_executable_source_file_v0_t = type { +// i32, +// i8*, +// i32, +// i8* +// } +static llvm::StructType *makeSourceFileType(llvm::LLVMContext &context) { + if (auto *existingType = llvm::StructType::getTypeByName( + context, "iree_hal_executable_source_file_v0_t")) { + return existingType; + } + auto *i32Type = llvm::IntegerType::getInt32Ty(context); + auto *i8PtrType = llvm::PointerType::getUnqual(context); + auto *type = llvm::StructType::create(context, + { + i32Type, + i8PtrType, + i32Type, + i8PtrType, + }, + "iree_hal_executable_source_file_v0_t", + /*isPacked=*/false); + return type; +} + +// %struct.iree_hal_executable_source_file_table_v0_t = type { +// i32, +// %struct.iree_hal_executable_source_file_v0_t*, +// } +static llvm::StructType *makeSourceTableType(llvm::LLVMContext &context) { + if (auto *existingType = llvm::StructType::getTypeByName( + context, "iree_hal_executable_source_file_table_v0_t")) { + return existingType; + } + auto *i32Type = llvm::IntegerType::getInt32Ty(context); + auto *sourceFileType = makeSourceFileType(context); + auto *type = + llvm::StructType::create(context, + { + i32Type, + sourceFileType->getPointerTo(), + }, + "iree_hal_executable_source_file_table_v0_t", + /*isPacked=*/false); + return type; +} + // %struct.iree_hal_executable_library_header_t = type { // i32, // i8*, @@ -244,12 +321,14 @@ static llvm::StructType *makeLibraryType(llvm::StructType *libraryHeaderType) { auto *importTableType = makeImportTableType(context); auto *exportTableType = makeExportTableType(context); auto *constantTableType = makeConstantTableType(context); + auto *sourceTableType = makeSourceTableType(context); auto *type = llvm::StructType::create(context, { libraryHeaderType->getPointerTo(), importTableType, exportTableType, constantTableType, + sourceTableType, }, "iree_hal_executable_library_v0_t", /*isPacked=*/false); @@ -264,8 +343,8 @@ static llvm::StructType *makeLibraryType(llvm::StructType *libraryHeaderType) { // // Example: // @.str.2 = private unnamed_addr constant [6 x i8] c"lib_a\00", align 1 -static llvm::Constant *getStringConstant(StringRef value, - llvm::Module *module) { +static llvm::Constant *createStringConstant(StringRef value, + llvm::Module *module) { auto i8Type = llvm::IntegerType::getInt8Ty(module->getContext()); auto i32Type = llvm::IntegerType::getInt32Ty(module->getContext()); auto *stringType = llvm::ArrayType::get(i8Type, value.size() + /*NUL*/ 1); @@ -281,6 +360,53 @@ static llvm::Constant *getStringConstant(StringRef value, stringType, global, ArrayRef{zero, zero}); } +// Creates a global NUL-terminated string constant or NULL if the string is +// empty. +static llvm::Constant *createStringConstantOrNull(StringRef value, + llvm::Module *module) { + if (value.empty()) { + auto i8Type = llvm::IntegerType::getInt8Ty(module->getContext()); + return llvm::ConstantPointerNull::get(i8Type->getPointerTo()); + } + return createStringConstant(value, module); +} + +// Creates a global serialized buffer constant (or string without NUL). +// +// Example: +// @.data = private unnamed_addr constant [5 x i8] c"lib_a", align 1 +static llvm::Constant *createBufferConstant(StringRef name, + ArrayRef value, + llvm::Module *module) { + auto i8Type = llvm::IntegerType::getInt8Ty(module->getContext()); + auto i32Type = llvm::IntegerType::getInt32Ty(module->getContext()); + auto *bufferType = llvm::ArrayType::get(i8Type, value.size()); + auto *literal = llvm::ConstantDataArray::get(module->getContext(), value); + auto *global = new llvm::GlobalVariable( + *module, bufferType, + /*isConstant=*/true, llvm::GlobalVariable::PrivateLinkage, literal, name); + global->setAlignment(llvm::MaybeAlign(1)); + llvm::Constant *zero = llvm::ConstantInt::get(i32Type, 0); + return llvm::ConstantExpr::getInBoundsGetElementPtr( + bufferType, global, ArrayRef{zero, zero}); +} + +// Creates a global constant with the given elements. +static llvm::Constant *createArrayConstant(StringRef name, + llvm::Type *elementType, + ArrayRef elements, + llvm::Module *module) { + auto *i32Type = llvm::IntegerType::getInt32Ty(module->getContext()); + llvm::Constant *zero = llvm::ConstantInt::get(i32Type, 0); + auto *arrayType = llvm::ArrayType::get(elementType, elements.size()); + auto *global = new llvm::GlobalVariable( + *module, arrayType, /*isConstant=*/true, + llvm::GlobalVariable::PrivateLinkage, + llvm::ConstantArray::get(arrayType, elements), name); + return llvm::ConstantExpr::getInBoundsGetElementPtr( + arrayType, global, ArrayRef{zero, zero}); +} + //===----------------------------------------------------------------------===// // Builder interface //===----------------------------------------------------------------------===// @@ -327,30 +453,20 @@ LibraryBuilder::buildLibraryV0ImportTable(std::string libraryName) { auto *importTableType = makeImportTableType(context); auto *i8Type = llvm::IntegerType::getInt8Ty(context); auto *i32Type = llvm::IntegerType::getInt32Ty(context); - llvm::Constant *zero = llvm::ConstantInt::get(i32Type, 0); - llvm::Constant *symbolNames = llvm::Constant::getNullValue(i8Type->getPointerTo()); if (!imports.empty()) { SmallVector symbolNameValues; for (auto &import : imports) { auto symbolName = import.symbol_name; - if (import.weak) { + if (import.weak) symbolName = "?" + symbolName; - } - symbolNameValues.push_back(getStringConstant(symbolName, module)); + symbolNameValues.push_back(createStringConstant(symbolName, module)); } - auto *symbolNamesType = - llvm::ArrayType::get(i8Type->getPointerTo(), symbolNameValues.size()); - auto *global = new llvm::GlobalVariable( - *module, symbolNamesType, /*isConstant=*/true, - llvm::GlobalVariable::PrivateLinkage, - llvm::ConstantArray::get(symbolNamesType, symbolNameValues), - /*Name=*/libraryName + "_import_names"); - symbolNames = llvm::ConstantExpr::getInBoundsGetElementPtr( - symbolNamesType, global, ArrayRef{zero, zero}); + symbolNames = + createArrayConstant(libraryName + "_import_names", + i8Type->getPointerTo(), symbolNameValues, module); } - return llvm::ConstantStruct::get( importTableType, { // count= @@ -366,35 +482,26 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) { auto *exportTableType = makeExportTableType(context); auto *dispatchFunctionType = makeDispatchFunctionType(context); auto *dispatchAttrsType = makeDispatchAttrsType(context); - auto *srcLocType = makeSrcLocType(context); + auto *sourceLocationType = makeSourceLocationType(context); + auto *stageLocationTableType = makeStageLocationTableType(context); auto *i8Type = llvm::IntegerType::getInt8Ty(context); auto *i16Type = llvm::IntegerType::getInt16Ty(context); auto *i32Type = llvm::IntegerType::getInt32Ty(context); - llvm::Constant *zero = llvm::ConstantInt::get(i32Type, 0); // iree_hal_executable_export_table_v0_t::ptrs SmallVector exportPtrValues; - for (auto dispatch : exports) { + for (auto dispatch : exports) exportPtrValues.push_back(dispatch.func); - } - auto *exportPtrsType = llvm::ArrayType::get( - dispatchFunctionType->getPointerTo(), exportPtrValues.size()); - llvm::Constant *exportPtrs = new llvm::GlobalVariable( - *module, exportPtrsType, /*isConstant=*/true, - llvm::GlobalVariable::PrivateLinkage, - llvm::ConstantArray::get(exportPtrsType, exportPtrValues), - /*Name=*/libraryName + "_funcs"); - // TODO(benvanik): force alignment (16? natural pointer width *2?) - exportPtrs = llvm::ConstantExpr::getInBoundsGetElementPtr( - exportPtrsType, exportPtrs, ArrayRef{zero, zero}); + llvm::Constant *exportPtrs = createArrayConstant( + libraryName + "_funcs", dispatchFunctionType->getPointerTo(), + exportPtrValues, module); // iree_hal_executable_export_table_v0_t::attrs llvm::Constant *exportAttrs = llvm::Constant::getNullValue(i32Type->getPointerTo()); - bool hasNonDefaultAttrs = - llvm::find_if(exports, [](const Dispatch &dispatch) { - return !dispatch.attrs.isDefault(); - }) != exports.end(); + bool hasNonDefaultAttrs = llvm::any_of(exports, [](const auto &dispatch) { + return !dispatch.attrs.isDefault(); + }); if (!hasNonDefaultAttrs) { SmallVector exportAttrValues; for (auto dispatch : exports) { @@ -403,23 +510,15 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) { { // local_memory_pages= llvm::ConstantInt::get( - i16Type, RoundUpToAlignment(dispatch.attrs.localMemorySize, + i16Type, roundUpToAlignment(dispatch.attrs.localMemorySize, kWorkgroupLocalMemoryPageSize) / kWorkgroupLocalMemoryPageSize), // reserved= llvm::ConstantInt::get(i16Type, 0), })); } - auto *exportAttrsType = - llvm::ArrayType::get(dispatchAttrsType, exportAttrValues.size()); - auto *global = new llvm::GlobalVariable( - *module, exportAttrsType, /*isConstant=*/true, - llvm::GlobalVariable::PrivateLinkage, - llvm::ConstantArray::get(exportAttrsType, exportAttrValues), - /*Name=*/libraryName + "_attrs"); - // TODO(benvanik): force alignment (16? natural pointer width?) - exportAttrs = llvm::ConstantExpr::getInBoundsGetElementPtr( - exportAttrsType, global, ArrayRef{zero, zero}); + exportAttrs = createArrayConstant(libraryName + "_attrs", dispatchAttrsType, + exportAttrValues, module); } // iree_hal_executable_export_table_v0_t::names @@ -427,68 +526,94 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) { llvm::Constant::getNullValue(i8Type->getPointerTo()->getPointerTo()); if (mode == Mode::INCLUDE_REFLECTION_ATTRS) { SmallVector exportNameValues; - for (auto dispatch : exports) { - exportNameValues.push_back(getStringConstant(dispatch.name, module)); - } - auto *exportNamesType = - llvm::ArrayType::get(i8Type->getPointerTo(), exportNameValues.size()); - auto *global = new llvm::GlobalVariable( - *module, exportNamesType, /*isConstant=*/true, - llvm::GlobalVariable::PrivateLinkage, - llvm::ConstantArray::get(exportNamesType, exportNameValues), - /*Name=*/libraryName + "_names"); - // TODO(benvanik): force alignment (16? natural pointer width *2?) - exportNames = llvm::ConstantExpr::getInBoundsGetElementPtr( - exportNamesType, global, ArrayRef{zero, zero}); + for (auto dispatch : exports) + exportNameValues.push_back(createStringConstant(dispatch.name, module)); + exportNames = + createArrayConstant(libraryName + "_names", i8Type->getPointerTo(), + exportNameValues, module); } // iree_hal_executable_export_table_v0_t::tags llvm::Constant *exportTags = llvm::Constant::getNullValue(i8Type->getPointerTo()->getPointerTo()); - if (mode == Mode::INCLUDE_REFLECTION_ATTRS) { + bool hasAnyTags = llvm::any_of( + exports, [](auto &dispatch) { return !dispatch.tag.empty(); }); + if (mode == Mode::INCLUDE_REFLECTION_ATTRS && hasAnyTags) { SmallVector exportTagValues; - for (auto dispatch : exports) { - exportTagValues.push_back(getStringConstant(dispatch.tag, module)); - } - auto *exportTagsType = - llvm::ArrayType::get(i8Type->getPointerTo(), exportTagValues.size()); - auto *global = new llvm::GlobalVariable( - *module, exportTagsType, /*isConstant=*/true, - llvm::GlobalVariable::PrivateLinkage, - llvm::ConstantArray::get(exportTagsType, exportTagValues), - /*Name=*/libraryName + "_tags"); - // TODO(benvanik): force alignment (16? natural pointer width *2?) - exportTags = llvm::ConstantExpr::getInBoundsGetElementPtr( - exportTagsType, global, ArrayRef{zero, zero}); + for (auto dispatch : exports) + exportTagValues.push_back( + createStringConstantOrNull(dispatch.tag, module)); + exportTags = createArrayConstant( + libraryName + "_tags", i8Type->getPointerTo(), exportTagValues, module); } - // iree_hal_executable_export_table_v0_t::src_locs - llvm::Constant *exportSrcLocs = - llvm::Constant::getNullValue(srcLocType->getPointerTo()); + // iree_hal_executable_export_table_v0_t::source_locations + llvm::Constant *exportSourceLocations = + llvm::Constant::getNullValue(sourceLocationType->getPointerTo()); if (mode == Mode::INCLUDE_REFLECTION_ATTRS) { - SmallVector exportSrcLocValues; + SmallVector exportSourceLocationValues; for (auto dispatch : exports) { - exportSrcLocValues.push_back(llvm::ConstantStruct::get( - srcLocType, + exportSourceLocationValues.push_back(llvm::ConstantStruct::get( + sourceLocationType, { // line= - llvm::ConstantInt::get(i32Type, dispatch.sourceLoc), + llvm::ConstantInt::get(i32Type, dispatch.sourceLocation.line), // path_length= - llvm::ConstantInt::get(i32Type, dispatch.sourceFile.length()), + llvm::ConstantInt::get(i32Type, + dispatch.sourceLocation.path.size()), // path= - getStringConstant(dispatch.sourceFile, module), + createStringConstant(dispatch.sourceLocation.path, module), })); } - auto *exportSrcLocsType = - llvm::ArrayType::get(srcLocType, exportSrcLocValues.size()); - auto *global = new llvm::GlobalVariable( - *module, exportSrcLocsType, /*isConstant=*/true, - llvm::GlobalVariable::PrivateLinkage, - llvm::ConstantArray::get(exportSrcLocsType, exportSrcLocValues), - /*Name=*/libraryName + "_src_locs"); - // TODO(benvanik): force alignment (16? natural pointer width?) - exportSrcLocs = llvm::ConstantExpr::getInBoundsGetElementPtr( - exportSrcLocsType, global, ArrayRef{zero, zero}); + exportSourceLocations = createArrayConstant( + libraryName + "_source_locations", sourceLocationType, + exportSourceLocationValues, module); + } + + // iree_hal_executable_export_table_v0_t::stage_locations + llvm::Constant *exportStageLocations = + llvm::Constant::getNullValue(stageLocationTableType->getPointerTo()); + if (mode == Mode::INCLUDE_REFLECTION_ATTRS) { + SmallVector exportStageTableValues; + for (auto dispatch : exports) { + SmallVector exportStageNameValues; + SmallVector exportSourceLocationValues; + for (auto &stageLocation : dispatch.stageLocations) { + exportStageNameValues.push_back( + createStringConstant(stageLocation.stage, module)); + exportSourceLocationValues.push_back(llvm::ConstantStruct::get( + sourceLocationType, + { + // line= + llvm::ConstantInt::get(i32Type, stageLocation.line), + // path_length= + llvm::ConstantInt::get(i32Type, stageLocation.path.size()), + // path= + createStringConstant(stageLocation.path, module), + })); + } + llvm::Constant *stageNamesPtr = createArrayConstant( + libraryName + "_" + dispatch.name + "_stage_names", + i8Type->getPointerTo(), exportStageNameValues, module); + llvm::Constant *sourceLocationsPtr = createArrayConstant( + libraryName + "_" + dispatch.name + "_stage_source_locations", + sourceLocationType, exportSourceLocationValues, module); + exportStageTableValues.push_back(llvm::ConstantStruct::get( + stageLocationTableType, + { + // count= + llvm::ConstantInt::get(i32Type, exportStageNameValues.size()), + // names= + stageNamesPtr, + // locations= + sourceLocationsPtr, + })); + } + if (!exportStageTableValues.empty()) { + exportStageLocations = createArrayConstant( + libraryName + "_stage_location_tables", stageLocationTableType, + exportStageTableValues, module); + } } return llvm::ConstantStruct::get( @@ -503,8 +628,10 @@ LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) { exportNames, // tags= exportTags, - // src_locs= - exportSrcLocs, + // source_locations= + exportSourceLocations, + // stage_locations= + exportStageLocations, }); } @@ -513,7 +640,6 @@ LibraryBuilder::buildLibraryV0ConstantTable(std::string libraryName) { auto &context = module->getContext(); auto *constantTableType = makeConstantTableType(context); auto *i32Type = llvm::IntegerType::getInt32Ty(context); - return llvm::ConstantStruct::get( constantTableType, { // count= @@ -521,6 +647,43 @@ LibraryBuilder::buildLibraryV0ConstantTable(std::string libraryName) { }); } +llvm::Constant * +LibraryBuilder::buildLibraryV0SourceTable(std::string libraryName) { + auto &context = module->getContext(); + auto *sourceFileType = makeSourceFileType(context); + auto *sourceTableType = makeSourceTableType(context); + auto *i32Type = llvm::IntegerType::getInt32Ty(context); + llvm::Constant *sourceFilesValue = + llvm::Constant::getNullValue(sourceFileType->getPointerTo()); + if (!sourceFiles.empty()) { + SmallVector sourceFileValues; + for (auto &sourceFile : sourceFiles) { + sourceFileValues.push_back(llvm::ConstantStruct::get( + sourceFileType, + { + // path_length= + llvm::ConstantInt::get(i32Type, sourceFile.path.size()), + // path= + createStringConstant(sourceFile.path, module), + // content_length= + llvm::ConstantInt::get(i32Type, sourceFile.contents.size()), + // content= + createBufferConstant(sourceFile.path, sourceFile.contents, + module), + })); + } + sourceFilesValue = createArrayConstant( + libraryName + "_sources", sourceFileType, sourceFileValues, module); + } + return llvm::ConstantStruct::get( + sourceTableType, { + // count= + llvm::ConstantInt::get(i32Type, sourceFiles.size()), + // files= + sourceFilesValue, + }); +} + llvm::Constant *LibraryBuilder::buildLibraryV0(std::string libraryName) { auto &context = module->getContext(); auto *libraryHeaderType = makeLibraryHeaderType(context); @@ -539,7 +702,7 @@ llvm::Constant *LibraryBuilder::buildLibraryV0(std::string libraryName) { llvm::ConstantInt::get(i32Type, static_cast(Version::LATEST)), // name= - getStringConstant(module->getName(), module), + createStringConstant(module->getName(), module), // features= llvm::ConstantInt::get(i32Type, static_cast(features)), // sanitizer= @@ -564,6 +727,8 @@ llvm::Constant *LibraryBuilder::buildLibraryV0(std::string libraryName) { buildLibraryV0ExportTable(libraryName), // constants= buildLibraryV0ConstantTable(libraryName), + // sources= + buildLibraryV0SourceTable(libraryName), }), /*Name=*/libraryName); // TODO(benvanik): force alignment (8? natural pointer width?) diff --git a/compiler/plugins/target/LLVMCPU/LibraryBuilder.h b/compiler/plugins/target/LLVMCPU/LibraryBuilder.h index 0869979f574e..fd3416b7e73b 100644 --- a/compiler/plugins/target/LLVMCPU/LibraryBuilder.h +++ b/compiler/plugins/target/LLVMCPU/LibraryBuilder.h @@ -11,6 +11,7 @@ #include #include "compiler/plugins/target/LLVMCPU/LLVMTargetOptions.h" +#include "compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.h" #include "llvm/IR/Module.h" #include "llvm/TargetParser/Triple.h" #include "mlir/Support/LogicalResult.h" @@ -46,10 +47,11 @@ class LibraryBuilder { // We may want to make this major release number, date codes (0x20220307), // or some semantic versioning we track in whatever spec we end up having. V_0_3 = 0x0000'0003u, // v0.3 - ~2022-08-08 + V_0_4 = 0x0000'0004u, // v0.4 - ~2024-03-12 // Pinned to the latest version. // Requires that the runtime be compiled with the same version. - LATEST = V_0_3, + LATEST = V_0_4, }; // iree_hal_executable_library_features_t @@ -84,6 +86,13 @@ class LibraryBuilder { constexpr bool isDefault() const { return localMemorySize == 0; } }; + // iree_hal_executable_source_location_v0_t + struct SourceLocation { + std::string stage; + std::string path; + uint32_t line; + }; + LibraryBuilder(llvm::Module *module, Mode mode, Version version = Version::LATEST) : module(module), mode(mode), version(version) {} @@ -111,10 +120,16 @@ class LibraryBuilder { // |name| will be used as the library export // |sourceFile| and |sourceLoc| are optional source information // |tag| is an optional attachment - void addExport(StringRef name, StringRef sourceFile, uint32_t sourceLoc, - StringRef tag, DispatchAttrs attrs, llvm::Function *func) { - exports.push_back( - {name.str(), sourceFile.str(), sourceLoc, tag.str(), attrs, func}); + void addExport(StringRef name, SourceLocation sourceLocation, + SmallVector stageLocations, StringRef tag, + DispatchAttrs attrs, llvm::Function *func) { + exports.push_back({name.str(), std::move(sourceLocation), + std::move(stageLocations), tag.str(), attrs, func}); + } + + // Defines a source file embedded in the library. + void addSourceFile(StringRef path, SmallVector contents) { + sourceFiles.push_back({path.str(), std::move(contents)}); } // Builds a `iree_hal_executable_library_query_fn_t` with the given @@ -133,6 +148,7 @@ class LibraryBuilder { llvm::Constant *buildLibraryV0ImportTable(std::string libraryName); llvm::Constant *buildLibraryV0ExportTable(std::string libraryName); llvm::Constant *buildLibraryV0ConstantTable(std::string libraryName); + llvm::Constant *buildLibraryV0SourceTable(std::string libraryName); llvm::Module *module = nullptr; Mode mode = Mode::INCLUDE_REFLECTION_ATTRS; @@ -148,15 +164,21 @@ class LibraryBuilder { struct Dispatch { std::string name; - std::string sourceFile; - uint32_t sourceLoc; + SourceLocation sourceLocation; + SmallVector stageLocations; std::string tag; DispatchAttrs attrs; llvm::Function *func; }; - SmallVector exports; + std::vector exports; size_t constantCount = 0; + + struct SourceFile { + std::string path; + SmallVector contents; + }; + SmallVector sourceFiles; }; } // namespace mlir::iree_compiler::IREE::HAL diff --git a/compiler/plugins/target/ROCM/BUILD.bazel b/compiler/plugins/target/ROCM/BUILD.bazel index 8c001d0316e8..cd62c873e786 100644 --- a/compiler/plugins/target/ROCM/BUILD.bazel +++ b/compiler/plugins/target/ROCM/BUILD.bazel @@ -56,6 +56,7 @@ iree_compiler_cc_library( "@llvm-project//mlir:AMDGPUTransforms", "@llvm-project//mlir:AMDGPUUtils", "@llvm-project//mlir:BuiltinToLLVMIRTranslation", + "@llvm-project//mlir:IR", "@llvm-project//mlir:LLVMDialect", "@llvm-project//mlir:LLVMToLLVMIRTranslation", "@llvm-project//mlir:Pass", diff --git a/compiler/plugins/target/ROCM/CMakeLists.txt b/compiler/plugins/target/ROCM/CMakeLists.txt index d1be43492046..2cae4e4dc62b 100644 --- a/compiler/plugins/target/ROCM/CMakeLists.txt +++ b/compiler/plugins/target/ROCM/CMakeLists.txt @@ -46,6 +46,7 @@ iree_cc_library( MLIRAMDGPUTransforms MLIRAMDGPUUtils MLIRBuiltinToLLVMIRTranslation + MLIRIR MLIRLLVMDialect MLIRLLVMToLLVMIRTranslation MLIRPass diff --git a/compiler/plugins/target/ROCM/ROCMTarget.cpp b/compiler/plugins/target/ROCM/ROCMTarget.cpp index a1e6818de0a6..deed5ec42361 100644 --- a/compiler/plugins/target/ROCM/ROCMTarget.cpp +++ b/compiler/plugins/target/ROCM/ROCMTarget.cpp @@ -19,6 +19,7 @@ #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h" #include "iree/compiler/PluginAPI/Client.h" #include "iree/compiler/Utils/FlatbufferUtils.h" +#include "iree/compiler/Utils/ModuleUtils.h" #include "iree/compiler/Utils/ToolUtils.h" #include "iree/schemas/rocm_executable_def_builder.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -37,6 +38,8 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/DialectResourceBlobManager.h" #include "mlir/Pass/PassManager.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" @@ -291,9 +294,11 @@ class ROCMTargetBackend final : public TargetBackend { } // Collect all the entry point names. - llvm::StringMap exportOps; + SmallVector exportOps; + llvm::StringMap exportOpMap; for (auto op : variantOp.getExportOps()) { - exportOps[op.getSymName()] = op; + exportOps.push_back(op); + exportOpMap[op.getSymName()] = op; } std::vector> workgroupSizes; SmallVector workgroupLocalMemories; @@ -306,7 +311,7 @@ class ROCMTargetBackend final : public TargetBackend { if (llvmFunc->isDeclaration()) continue; std::array workgroupSize; - auto exportOp = exportOps[func.getName()]; + auto exportOp = exportOpMap[func.getName()]; if (std::optional workgroupSizeAttr = exportOp.getWorkgroupSize()) { for (auto it : llvm::enumerate(workgroupSizeAttr.value())) { @@ -395,9 +400,6 @@ class ROCMTargetBackend final : public TargetBackend { for (llvm::Function &f : llvmModule->functions()) f.addFnAttr(llvm::Attribute::AlwaysInline); - iree_compiler::FlatbufferBuilder builder; - iree_hal_rocm_ExecutableDef_start_as_root(builder); - // Link user modules and libdevice (if required). // Note that linking order matters: llvm::Linker linker(*llvmModule); @@ -459,15 +461,81 @@ class ROCMTargetBackend final : public TargetBackend { variantOp.getName(), ".hsaco", targetHSACO); } + iree_compiler::FlatbufferBuilder builder; + iree_hal_rocm_ExecutableDef_start_as_root(builder); + + // Attach embedded source file contents. + SmallVector sourceFileRefs; + if (auto sourcesAttr = variantOp.getSourcesAttr()) { + for (auto sourceAttr : llvm::reverse(sourcesAttr.getValue())) { + if (auto resourceAttr = dyn_cast_if_present( + sourceAttr.getValue())) { + auto filenameRef = builder.createString(sourceAttr.getName()); + auto contentRef = builder.streamUint8Vec([&](llvm::raw_ostream &os) { + auto blobData = resourceAttr.getRawHandle().getBlob()->getData(); + os.write(blobData.data(), blobData.size()); + return true; + }); + sourceFileRefs.push_back(iree_hal_rocm_SourceFileDef_create( + builder, filenameRef, contentRef)); + } + } + std::reverse(sourceFileRefs.begin(), sourceFileRefs.end()); + } + + SmallVector entryPointNames; + SmallVector sourceLocationRefs; + entryPointNames.resize(exportOps.size()); + for (auto exportOp : exportOps) { + int64_t ordinal = exportOp.getOrdinalAttr().getInt(); + entryPointNames[ordinal] = exportOp.getName(); + + // Optional source location information for debugging/profiling. + if (serOptions.debugLevel >= 1) { + // We only ever resize to the maximum -- so all previous data will + // be kept as-is. + sourceLocationRefs.resize(exportOps.size()); + if (auto loc = findFirstFileLoc(exportOp.getLoc())) { + auto filenameRef = builder.createString(loc->getFilename()); + sourceLocationRefs[ordinal] = iree_hal_rocm_FileLineLocDef_create( + builder, filenameRef, loc->getLine()); + } + } + } + + // Optional compilation stage source files. + SmallVector stageLocationsRefs; + if (serOptions.debugLevel >= 3) { + for (auto exportOp : exportOps) { + SmallVector stageLocationRefs; + if (auto locsAttr = exportOp.getSourceLocsAttr()) { + for (auto locAttr : locsAttr.getValue()) { + if (auto loc = + findFirstFileLoc(cast(locAttr.getValue()))) { + auto stageNameRef = builder.createString(locAttr.getName()); + auto filenameRef = builder.createString(loc->getFilename()); + stageLocationRefs.push_back(iree_hal_rocm_StageLocationDef_create( + builder, stageNameRef, + iree_hal_rocm_FileLineLocDef_create(builder, filenameRef, + loc->getLine()))); + } + } + } + if (!stageLocationRefs.empty()) { + // We only ever resize to the maximum -- so all previous data will + // be kept as-is. + stageLocationsRefs.resize(exportOps.size()); + int64_t ordinal = exportOp.getOrdinalAttr().getInt(); + stageLocationsRefs[ordinal] = iree_hal_rocm_StageLocationsDef_create( + builder, builder.createOffsetVecDestructive(stageLocationRefs)); + } + } + } + auto hsacoRef = flatbuffers_string_create(builder, targetHSACO.c_str(), targetHSACO.size()); - auto entryPointNames = llvm::map_to_vector<8>( - variantOp.getBlock() - .getOps(), - [&](auto op) { return op.getName(); }); auto entryPointsRef = builder.createStringVec(entryPointNames); - iree_hal_rocm_BlockSizeDef_vec_start(builder); auto blockSizes = workgroupSizes.begin(); for (int i = 0, e = entryPointNames.size(); i < e; ++i) { @@ -483,6 +551,22 @@ class ROCMTargetBackend final : public TargetBackend { iree_hal_rocm_ExecutableDef_shared_memory_sizes_add( builder, workgroupLocalMemoriesRef); iree_hal_rocm_ExecutableDef_hsaco_image_add(builder, hsacoRef); + if (!sourceLocationRefs.empty()) { + auto sourceLocationsRef = + builder.createOffsetVecDestructive(sourceLocationRefs); + iree_hal_rocm_ExecutableDef_source_locations_add(builder, + sourceLocationsRef); + } + if (!stageLocationsRefs.empty()) { + auto stageLocationsRef = + builder.createOffsetVecDestructive(stageLocationsRefs); + iree_hal_rocm_ExecutableDef_stage_locations_add(builder, + stageLocationsRef); + } + if (!sourceFileRefs.empty()) { + auto sourceFilesRef = builder.createOffsetVecDestructive(sourceFileRefs); + iree_hal_rocm_ExecutableDef_source_files_add(builder, sourceFilesRef); + } iree_hal_rocm_ExecutableDef_end_as_root(builder); // Add the binary data to the target executable. diff --git a/compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp b/compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp index 1731b3d91034..73810f63e359 100644 --- a/compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp +++ b/compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp @@ -25,7 +25,9 @@ #include "mlir/Dialect/SPIRV/IR/TargetAndABI.h" #include "mlir/Dialect/SPIRV/Linking/ModuleCombiner.h" #include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/DialectResourceBlobManager.h" #include "mlir/Target/SPIRV/Serialization.h" namespace mlir::iree_compiler::IREE::HAL { @@ -222,6 +224,25 @@ class VulkanSPIRVTargetBackend : public TargetBackend { FlatbufferBuilder builder; iree_hal_spirv_ExecutableDef_start_as_root(builder); + // Attach embedded source file contents. + SmallVector sourceFileRefs; + if (auto sourcesAttr = variantOp.getSourcesAttr()) { + for (auto sourceAttr : llvm::reverse(sourcesAttr.getValue())) { + if (auto resourceAttr = dyn_cast_if_present( + sourceAttr.getValue())) { + auto filenameRef = builder.createString(sourceAttr.getName()); + auto contentRef = builder.streamUint8Vec([&](llvm::raw_ostream &os) { + auto blobData = resourceAttr.getRawHandle().getBlob()->getData(); + os.write(blobData.data(), blobData.size()); + return true; + }); + sourceFileRefs.push_back(iree_hal_spirv_SourceFileDef_create( + builder, filenameRef, contentRef)); + } + } + std::reverse(sourceFileRefs.begin(), sourceFileRefs.end()); + } + // The list of shader modules. SmallVector shaderModuleRefs; @@ -236,10 +257,9 @@ class VulkanSPIRVTargetBackend : public TargetBackend { subgroupSizes.resize(ordinalCount); shaderModuleIndices.resize(ordinalCount); - bool hasAnySubgroupSizes = false; - // Iterate over all spirv.module ops and encode them into the FlatBuffer // data structure. + bool hasAnySubgroupSizes = false; for (spirv::ModuleOp spvModuleOp : spirvModuleOps) { // Currently the spirv.module op should only have one entry point. Get it. auto spirvEntryPoints = spvModuleOp.getOps(); @@ -299,7 +319,37 @@ class VulkanSPIRVTargetBackend : public TargetBackend { sourceLocationRefs[ordinal] = iree_hal_spirv_FileLineLocDef_create( builder, filenameRef, loc->getLine()); } - }; + } + } + + // Optional compilation stage source files. + SmallVector stageLocationsRefs; + if (options.debugLevel >= 3) { + for (auto exportOp : exportOps) { + SmallVector stageLocationRefs; + if (auto locsAttr = exportOp.getSourceLocsAttr()) { + for (auto locAttr : locsAttr.getValue()) { + if (auto loc = + findFirstFileLoc(cast(locAttr.getValue()))) { + auto stageNameRef = builder.createString(locAttr.getName()); + auto filenameRef = builder.createString(loc->getFilename()); + stageLocationRefs.push_back( + iree_hal_spirv_StageLocationDef_create( + builder, stageNameRef, + iree_hal_spirv_FileLineLocDef_create(builder, filenameRef, + loc->getLine()))); + } + } + } + if (!stageLocationRefs.empty()) { + // We only ever resize to the maximum -- so all previous data will + // be kept as-is. + stageLocationsRefs.resize(ordinalCount); + int64_t ordinal = exportOp.getOrdinalAttr().getInt(); + stageLocationsRefs[ordinal] = iree_hal_spirv_StageLocationsDef_create( + builder, builder.createOffsetVecDestructive(stageLocationRefs)); + } + } } // Add top-level executable fields following their order of definition. @@ -324,6 +374,16 @@ class VulkanSPIRVTargetBackend : public TargetBackend { iree_hal_spirv_ExecutableDef_source_locations_add(builder, sourceLocationsRef); } + if (!stageLocationsRefs.empty()) { + auto stageLocationsRef = + builder.createOffsetVecDestructive(stageLocationsRefs); + iree_hal_spirv_ExecutableDef_stage_locations_add(builder, + stageLocationsRef); + } + if (!sourceFileRefs.empty()) { + auto sourceFilesRef = builder.createOffsetVecDestructive(sourceFileRefs); + iree_hal_spirv_ExecutableDef_source_files_add(builder, sourceFilesRef); + } iree_hal_spirv_ExecutableDef_end_as_root(builder); diff --git a/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.cpp b/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.cpp index 774536dcf54e..ad4e543d70b3 100644 --- a/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.cpp +++ b/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.cpp @@ -204,8 +204,9 @@ LogicalResult linkExecutablesInto( auto linkedTargetBuilder = OpBuilder::atBlockBegin(&linkedTargetOp.getBlock()); - // Aggregation of all external objects specified on variants used. + // Aggregation of all external objects and sources specified on variants used. SetVector objectAttrs; + NamedAttrList linkedSourceAttrs; // Iterate over all source executable ops, linking as many as we can. for (auto sourceExecutableOp : sourceExecutableOps) { @@ -228,6 +229,12 @@ LogicalResult linkExecutablesInto( objectAttrs.insert(objectsAttr.begin(), objectsAttr.end()); } + // Merge sources into the linked source listing. + if (auto sourcesAttr = variantOp.getSourcesAttr()) { + for (auto sourceAttr : sourcesAttr.getValue()) + linkedSourceAttrs.set(sourceAttr.getName(), sourceAttr.getValue()); + } + // Remap variant refs. auto oldVariantRefAttr = SymbolRefAttr::get(context, sourceExecutableOp.getName(), @@ -266,7 +273,6 @@ LogicalResult linkExecutablesInto( for (auto constantBlockOp : llvm::make_early_inc_range(variantOp.getConstantBlockOps())) { constantBlockOp->moveBefore(&*linkedTargetBuilder.getInsertionPoint()); - // linkedTargetBuilder.clone(constantBlockOp); } // Clone export ops and queue remapping ordinals and updating @@ -320,6 +326,12 @@ LogicalResult linkExecutablesInto( linkedTargetBuilder.getArrayAttr(objectAttrs.takeVector())); } + // Attach all source files from the source variants. + if (!linkedSourceAttrs.empty()) { + linkedTargetOp.setSourcesAttr( + linkedTargetBuilder.getDictionaryAttr(linkedSourceAttrs)); + } + // Update references to @executable::@target::@entry symbols. replaceEntryPointUses(moduleOp, symbolReplacements); diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td index 9d20cf3db611..3d1916f282fe 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td +++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td @@ -2054,7 +2054,8 @@ def HAL_ExecutableExportOp : HAL_Op<"executable.export", [ HAL_PipelineLayoutAttr:$layout, OptionalAttr:$workgroup_size, OptionalAttr:$subgroup_size, - OptionalAttr:$workgroup_local_memory + OptionalAttr:$workgroup_local_memory, + OptionalAttr:$source_locs ); let regions = (region AnyRegion:$workgroup_count); @@ -2069,7 +2070,8 @@ def HAL_ExecutableExportOp : HAL_Op<"executable.export", [ "::mlir::IntegerAttr":$workgroup_local_memory ), [{ build($_builder, $_state, nullptr, sym_name, ordinal, layout, - workgroup_size, subgroup_size, workgroup_local_memory); + workgroup_size, subgroup_size, workgroup_local_memory, + DictionaryAttr{}); }]>, ]; @@ -2119,7 +2121,8 @@ def HAL_ExecutableVariantOp : HAL_Op<"executable.variant", [ OptionalAttr:$sym_visibility, SymbolNameAttr:$sym_name, HAL_ExecutableTargetAttr:$target, - OptionalAttr:$objects + OptionalAttr:$objects, + OptionalAttr:$sources ); let regions = (region @@ -2131,6 +2134,7 @@ def HAL_ExecutableVariantOp : HAL_Op<"executable.variant", [ $sym_name `target` `(` $target `)` (`objects` `(` $objects^ `)` )? + (`sources` `(` $sources^ `)` )? attr-dict-with-keyword $body }]; diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.cpp index 0fae79fdf8ca..581ac6fef9eb 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.cpp @@ -35,8 +35,8 @@ void TargetOptions::bindOptions(OptionsBinder &binder) { binder.opt( "iree-hal-executable-debug-level", debugLevel, - llvm::cl::desc("Debug level for executable translation (0-3)"), - llvm::cl::init(2), llvm::cl::cat(halTargetOptionsCategory)); + llvm::cl::desc("Debug level for executable translation (0-3)."), + llvm::cl::cat(halTargetOptionsCategory)); binder.opt( "iree-hal-dump-executable-files-to", executableFilesPath, diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.h b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.h index 4efdeb48b7ed..bff153c4e911 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.h +++ b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.h @@ -35,7 +35,7 @@ struct TargetOptions { // 1: minimal debug information // 2: default debug information // 3: maximal debug information - int debugLevel; + int debugLevel = 2; // Default path to write executable files into. std::string executableFilesPath; diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/BUILD.bazel b/compiler/src/iree/compiler/Dialect/HAL/Transforms/BUILD.bazel index 2472db205bf8..e6f514d43040 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/BUILD.bazel +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/BUILD.bazel @@ -16,6 +16,7 @@ iree_compiler_cc_library( name = "Transforms", srcs = [ "AssignTargetDevices.cpp", + "CaptureExecutableSources.cpp", "ConfigureExecutables.cpp", "ConvertToHAL.cpp", "DumpExecutableBenchmarks.cpp", diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt index 616d145d0ce9..4201d373b1a3 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt @@ -17,6 +17,7 @@ iree_cc_library( "Passes.h" SRCS "AssignTargetDevices.cpp" + "CaptureExecutableSources.cpp" "ConfigureExecutables.cpp" "ConvertToHAL.cpp" "DumpExecutableBenchmarks.cpp" diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/CaptureExecutableSources.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/CaptureExecutableSources.cpp new file mode 100644 index 000000000000..60d8ddc4fd8d --- /dev/null +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/CaptureExecutableSources.cpp @@ -0,0 +1,129 @@ +// Copyright 2024 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +#include "iree/compiler/Dialect/HAL/IR/HALDialect.h" +#include "iree/compiler/Dialect/HAL/IR/HALOps.h" +#include "iree/compiler/Dialect/HAL/Transforms/Passes.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/LocationSnapshot.h" + +namespace mlir::iree_compiler::IREE::HAL { + +#define GEN_PASS_DEF_CAPTUREEXECUTABLESOURCESPASS +#include "iree/compiler/Dialect/HAL/Transforms/Passes.h.inc" + +namespace { + +//===----------------------------------------------------------------------===// +// --iree-hal-capture-executable-sources +//===----------------------------------------------------------------------===// + +static bool hasDictionaryAttrEntry(Operation *op, StringRef dictionaryName, + StringRef key) { + auto dictionaryAttr = op->getAttrOfType(dictionaryName); + return dictionaryAttr && dictionaryAttr.get(key); +} + +static void insertDictionaryAttrEntry(Operation *op, StringRef dictionaryName, + StringRef key, Attribute value) { + NamedAttrList attrs; + auto dictionaryAttr = op->getAttrOfType(dictionaryName); + if (dictionaryAttr) + attrs.assign(dictionaryAttr.getValue()); + attrs.set(key, value); + op->setAttr(dictionaryName, DictionaryAttr::get(op->getContext(), attrs)); +} + +static Attribute getSourceAttr(MLIRContext *context, StringRef fileName, + StringRef source) { + // TODO(benvanik): use our own resource attribute that allows us to store the + // source string verbatim (and out-of-band) in the file. Today only element + // attrs have resource equivalents upstream (no string resource attr). + Builder b(context); + auto blob = HeapAsmResourceBlob::allocateAndCopyInferAlign( + ArrayRef(source.data(), source.size())); + return DenseI8ResourceElementsAttr::get( + VectorType::get({static_cast(source.size())}, b.getI8Type()), + fileName, std::move(blob)); +} + +struct CaptureExecutableSourcesPass + : public IREE::HAL::impl::CaptureExecutableSourcesPassBase< + CaptureExecutableSourcesPass> { + using IREE::HAL::impl::CaptureExecutableSourcesPassBase< + CaptureExecutableSourcesPass>::CaptureExecutableSourcesPassBase; + void runOnOperation() override { + auto moduleOp = getOperation(); + auto moduleName = moduleOp.getName().value_or("module"); + + for (auto executableOp : moduleOp.getOps()) { + for (auto variantOp : + executableOp.getOps()) { + // Skip externally defined variants as there's no source to capture. + if (variantOp.isExternal()) + continue; + + // Ignore if there is already source assigned. + auto fileName = (moduleName + "_" + executableOp.getName() + "_" + + variantOp.getName() + "." + stage + ".mlir") + .str(); + if (hasDictionaryAttrEntry(variantOp, "sources", fileName)) + continue; + + // Create a standalone executable with just the variant being captured. + // This allows the source to be passed to iree-compile in the + // hal-executable compilation mode. + auto clonedExecutableOp = executableOp.cloneWithoutRegions(); + clonedExecutableOp.setVisibility(SymbolTable::Visibility::Public); + OpBuilder clonedBuilder = OpBuilder::atBlockBegin( + &clonedExecutableOp.getBody().emplaceBlock()); + auto clonedVariantOp = cast( + clonedBuilder.clone(*variantOp)); + clonedBuilder.create( + clonedBuilder.getUnknownLoc()); + + // Capture the source contents and update the locations in the IR to + // reference it. + std::string source; + llvm::raw_string_ostream os(source); + OpPrintingFlags flags; + flags.useLocalScope(); + mlir::generateLocationsFromIR(os, fileName, clonedExecutableOp, flags); + os << "\n"; // newline at end of file + + // Wrap up the contents and attach them to the variant. + auto sourceAttr = + getSourceAttr(variantOp.getContext(), fileName, source); + insertDictionaryAttrEntry(variantOp, "sources", fileName, sourceAttr); + + // Extract the new locations of the exported functions and attach them + // to the original. + SymbolTable symbolTable(variantOp.getInnerModule()); + SymbolTable clonedSymbolTable(clonedVariantOp.getInnerModule()); + for (auto [exportOp, clonedExportOp] : llvm::zip_equal( + variantOp.getExportOps(), clonedVariantOp.getExportOps())) { + // Attach the cloned function location that was updated to point into + // the source file and attach it to the original function. + auto clonedFuncOp = + clonedSymbolTable.lookup(clonedExportOp.getSymName()); + if (clonedFuncOp) { + insertDictionaryAttrEntry(exportOp, "source_locs", stage, + clonedFuncOp->getLoc()); + } + } + + clonedExecutableOp.erase(); + } + } + } +}; + +} // namespace + +} // namespace mlir::iree_compiler::IREE::HAL diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp index 273b2db10e0e..159ba7d21bee 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp @@ -243,7 +243,17 @@ void buildHALConfigurationPassPipeline(OpPassManager &passManager, // benchmarks using the substituted executables. addExecutableSubstitutionPasses(passManager, clSubstituteExecutableSource, clSubstituteExecutableSourcesFrom); + + // If debug information is requested capture the MLIR source text of each + // executable variant and associate it with the entry points. This allows us + // to preserve this information after translation and the original input IR + // has been erased. + if (targetOptions.debugLevel >= 3) { + passManager.addPass( + IREE::HAL::createCaptureExecutableSourcesPass({"0_source"})); + } } + //===----------------------------------------------------------------------===// // --iree-hal-transformation-pipeline //===----------------------------------------------------------------------===// @@ -297,6 +307,13 @@ void buildHALTransformPassPipeline(OpPassManager &passManager, {targetOptions.executableConfigurationsPath, "configured"})); } + // If debug information is requested capture the MLIR source text of each + // configured executable variant and associate it with the entry points. + if (targetOptions.debugLevel >= 3) { + passManager.addPass( + IREE::HAL::createCaptureExecutableSourcesPass({"1_configured"})); + } + // Substitute hal.executables we've configured with those specified on the // command line. This developer feature allows for hand editing the // configured executable with different lowering parameters. @@ -337,6 +354,15 @@ void buildHALTransformPassPipeline(OpPassManager &passManager, if (compileTo == PipelinePhase::ExecutableTargets) return; + // If debug information is requested capture the translated MLIR source text + // of each executable variant and associate it with the entry points. This + // allows us to compare the input IR with the translated IR before + // serialization (LLVM dialect, SPIR-V dialect, etc). + if (targetOptions.debugLevel >= 3) { + passManager.addPass( + IREE::HAL::createCaptureExecutableSourcesPass({"2_translated"})); + } + // Substitute hal.executables we've translated with those specified on the // command line. This developer feature allows for splicing in hand-authored // or hand-modified executables in various forms without modifying the diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td index 5ef5ee342c06..188340b53659 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td @@ -473,6 +473,23 @@ def ElideRedundantCommandsPass : // Benchmarking and debugging utilities //===----------------------------------------------------------------------===// +def CaptureExecutableSourcesPass : + Pass<"iree-hal-capture-executable-sources", "mlir::ModuleOp"> { + let summary = "Captures individual hal.executable.variant source listings and embeds them in the IR."; + let description = [{ + Captures a source listing of each hal.executable.variant and attaches the + source to the variant embedded in the IR. Entry points are assigned + locations in the IR relative to the captured source. + }]; + let options = [ + Option< + "stage", "stage", + "std::string", "\"source\"", + "Name used to indicate what stage of compilation is captured." + >, + ]; +} + def DumpExecutableSourcesPass : Pass<"iree-hal-dump-executable-sources", "mlir::ModuleOp"> { let summary = "Dumps individual hal.executable source listings to the provided path."; diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/BUILD.bazel b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/BUILD.bazel index 830b398fdea1..db9a2d4987e9 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/BUILD.bazel +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/BUILD.bazel @@ -17,6 +17,7 @@ iree_lit_test_suite( srcs = enforce_glob( [ "assign_target_devices.mlir", + "capture_executable_sources.mlir", "convert_to_hal.mlir", "dump_executable_benchmarks.mlir", "dump_executable_sources.mlir", diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt index 3b1b907f0944..ae4322d53213 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt @@ -15,6 +15,7 @@ iree_lit_test_suite( lit SRCS "assign_target_devices.mlir" + "capture_executable_sources.mlir" "convert_to_hal.mlir" "dump_executable_benchmarks.mlir" "dump_executable_sources.mlir" diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/capture_executable_sources.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/capture_executable_sources.mlir new file mode 100644 index 000000000000..e7838d63c207 --- /dev/null +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/capture_executable_sources.mlir @@ -0,0 +1,66 @@ +// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-capture-executable-sources{stage=configured})' %s | FileCheck %s + +#executable_target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64"> +#pipeline_layout = #hal.pipeline.layout, + #hal.descriptor_set.binding<1, storage_buffer>, + #hal.descriptor_set.binding<2, storage_buffer> + ]> +]> + +// CHECK-DAG: #[[EX0_VARIANT0_LOC:.+]] = loc("module_ex0_variant0.configured.mlir" +// CHECK-DAG: #[[EX1_VARIANT1_LOC:.+]] = loc("module_ex1_variant1.configured.mlir" + +// CHECK: hal.executable private @ex0 +hal.executable private @ex0 { + // CHECK-NEXT: hal.executable.variant public @variant0 + // CHECK-SAME: sources({module_ex0_variant0.configured.mlir = dense_resource + } { + ^bb0(%device: !hal.device, %arg0: index, %arg1: index, %arg2: index): // no predecessors + %c1 = arith.constant 1 : index + hal.return %c1, %c1, %c1 : index, index, index + } + builtin.module { + func.func @dispatch0() { + func.return + } + } + } +} + +// CHECK: hal.executable private @ex1 +hal.executable private @ex1 { + // CHECK-NEXT: hal.executable.variant public @variant1 + // CHECK-SAME: sources({module_ex1_variant1.configured.mlir = dense_resource + } { + ^bb0(%device: !hal.device, %arg0: index, %arg1: index, %arg2: index): // no predecessors + %c1 = arith.constant 1 : index + hal.return %c1, %c1, %c1 : index, index, index + } + builtin.module { + func.func @dispatch1() { + func.return + } + } + } +} + +// CHECK: {-# +// CHECK-NEXT: dialect_resources: { +// CHECK-NEXT: builtin: { +// CHECK-NEXT: module_ex0_variant0.configured.mlir: +// CHECK-NEXT: module_ex1_variant1.configured.mlir: +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: #-} diff --git a/experimental/hip/stream_command_buffer.c b/experimental/hip/stream_command_buffer.c index 036393469671..4d8db3866a58 100644 --- a/experimental/hip/stream_command_buffer.c +++ b/experimental/hip/stream_command_buffer.c @@ -124,9 +124,9 @@ static iree_status_t iree_hal_hip_stream_command_buffer_begin( IREE_HIP_TRACE_ZONE_BEGIN_EXTERNAL( command_buffer->tracing_context, command_buffer->hip_stream, - /*file_name=*/NULL, 0, /*line=*/0, /*func_name=*/NULL, 0, - "iree_hal_hip_stream_command_buffer", - strlen("iree_hal_hip_stream_command_buffer")); + /*file_name=*/NULL, 0, /*line=*/0, "iree_hal_hip_stream_command_buffer", + strlen("iree_hal_hip_stream_command_buffer"), + /*name=*/NULL, 0); return iree_ok_status(); } @@ -442,8 +442,9 @@ static iree_status_t iree_hal_hip_stream_command_buffer_dispatch( IREE_HIP_TRACE_ZONE_BEGIN_EXTERNAL( command_buffer->tracing_context, command_buffer->hip_stream, kernel_info.source_filename.data, kernel_info.source_filename.size, - kernel_info.source_line, /*func_name=*/NULL, 0, - kernel_info.function_name.data, kernel_info.function_name.size); + kernel_info.source_line, kernel_info.function_name.data, + kernel_info.function_name.size, + /*name=*/NULL, 0); IREE_RETURN_AND_END_ZONE_IF_ERROR( z0, iree_hal_resource_set_insert(command_buffer->resource_set, 1, diff --git a/experimental/rocm/direct_command_buffer.c b/experimental/rocm/direct_command_buffer.c index dfe00a2431bc..89868782bdf2 100644 --- a/experimental/rocm/direct_command_buffer.c +++ b/experimental/rocm/direct_command_buffer.c @@ -125,9 +125,9 @@ static iree_status_t iree_hal_rocm_direct_command_buffer_begin( IREE_ROCM_TRACE_ZONE_BEGIN_EXTERNAL( command_buffer->tracing_context, 0, - /*file_name=*/NULL, 0, /*line=*/0, /*func_name=*/NULL, 0, - "iree_hal_rocm_direct_command_buffer", - strlen("iree_hal_rocm_direct_command_buffer")); + /*file_name=*/NULL, 0, /*line=*/0, "iree_hal_rocm_direct_command_buffer", + strlen("iree_hal_rocm_direct_command_buffer"), + /*name=*/NULL, 0); return iree_ok_status(); } @@ -420,10 +420,13 @@ static iree_status_t iree_hal_rocm_direct_command_buffer_dispatch( executable, entry_point, &kernel_params)); IREE_ROCM_TRACE_ZONE_BEGIN_EXTERNAL( - command_buffer->tracing_context, 0, kernel_params.function_name.data, - kernel_params.function_name.size, - /*line=*/0, /*func_name=*/NULL, 0, kernel_params.function_name.data, - kernel_params.function_name.size); + command_buffer->tracing_context, 0, + kernel_params.source_location.file_name.data, + kernel_params.source_location.file_name.size, + kernel_params.source_location.line, + kernel_params.source_location.func_name.data, + kernel_params.source_location.func_name.size, + /*name=*/NULL, 0); // Patch the push constants in the kernel arguments. iree_host_size_t num_constants = diff --git a/experimental/rocm/native_executable.c b/experimental/rocm/native_executable.c index d758493e5bbd..217e32e8f520 100644 --- a/experimental/rocm/native_executable.c +++ b/experimental/rocm/native_executable.c @@ -154,17 +154,69 @@ iree_status_t iree_hal_rocm_native_executable_create( params->block_size[1] = block_sizes_vec[i].y; params->block_size[2] = block_sizes_vec[i].z; params->shared_memory_size = shared_memory_sizes[i]; + // Stash the entry point name in the string table for use when tracing. IREE_TRACE({ iree_host_size_t entry_name_length = flatbuffers_string_len(entry_name); memcpy(string_table_buffer, entry_name, entry_name_length); - params->function_name = + params->source_location.func_name = iree_make_string_view(string_table_buffer, entry_name_length); string_table_buffer += entry_name_length; }); } } + +#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION + iree_hal_rocm_FileLineLocDef_vec_t source_locations_vec = + iree_hal_rocm_ExecutableDef_source_locations_get(executable_def); + iree_hal_rocm_StageLocationsDef_vec_t stage_locations_vec = + iree_hal_rocm_ExecutableDef_stage_locations_get(executable_def); + for (iree_host_size_t i = 0; i < entry_count; ++i) { + iree_hal_rocm_StageLocationDef_vec_t stage_locations = + iree_hal_rocm_StageLocationsDef_locations_get( + iree_hal_rocm_StageLocationsDef_vec_at(stage_locations_vec, i)); + iree_hal_rocm_FileLineLocDef_table_t source_location = + iree_hal_rocm_FileLineLocDef_vec_at(source_locations_vec, i); + if (stage_locations) { + for (size_t j = 0; + j < iree_hal_rocm_StageLocationDef_vec_len(stage_locations); ++j) { + iree_hal_rocm_StageLocationDef_table_t stage_location = + iree_hal_rocm_StageLocationDef_vec_at(stage_locations, j); + // TODO(benvanik): a way to select what location is chosen. For now + // we just pick the first one. + source_location = + iree_hal_rocm_StageLocationDef_location_get(stage_location); + break; + } + } + iree_hal_rocm_kernel_params_t* params = &executable->entry_points[i]; + flatbuffers_string_t filename = + iree_hal_rocm_FileLineLocDef_filename_get(source_location); + params->source_location.file_name = + iree_make_string_view(filename, flatbuffers_string_len(filename)); + params->source_location.line = + iree_hal_rocm_FileLineLocDef_line_get(source_location); + } + + // Publish any embedded source files to the tracing infrastructure. + if (iree_hal_rocm_ExecutableDef_source_files_is_present(executable_def)) { + iree_hal_rocm_SourceFileDef_vec_t source_files_vec = + iree_hal_rocm_ExecutableDef_source_files_get(executable_def); + for (size_t i = 0; + i < iree_hal_rocm_SourceFileDef_vec_len(source_files_vec); ++i) { + iree_hal_rocm_SourceFileDef_table_t source_file = + iree_hal_rocm_SourceFileDef_vec_at(source_files_vec, i); + flatbuffers_string_t path = + iree_hal_rocm_SourceFileDef_path_get(source_file); + flatbuffers_uint8_vec_t content = + iree_hal_rocm_SourceFileDef_content_get(source_file); + IREE_TRACE_PUBLISH_SOURCE_FILE(path, flatbuffers_string_len(path), + content, + flatbuffers_uint8_vec_len(content)); + } + } +#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION } if (iree_status_is_ok(status)) { diff --git a/experimental/rocm/native_executable.h b/experimental/rocm/native_executable.h index 0c229a03b525..b23ab510cf17 100644 --- a/experimental/rocm/native_executable.h +++ b/experimental/rocm/native_executable.h @@ -18,14 +18,18 @@ extern "C" { #endif // __cplusplus +typedef struct iree_hal_rocm_source_location_t { + iree_string_view_t file_name; + int line; + iree_string_view_t func_name; +} iree_hal_rocm_source_location_t; + typedef struct iree_hal_rocm_kernel_params_t { iree_hal_pipeline_layout_t* layout; hipFunction_t function; uint32_t block_size[3]; uint32_t shared_memory_size; - IREE_TRACE(iree_string_view_t function_name;) - IREE_TRACE(iree_string_view_t source_filename;) - IREE_TRACE(uint32_t source_line;) + IREE_TRACE(iree_hal_rocm_source_location_t source_location;) } iree_hal_rocm_kernel_params_t; // Creates an executable from a HSACO module. The module may contain several diff --git a/runtime/src/iree/base/tracing.h b/runtime/src/iree/base/tracing.h index ae5b120b904e..e0c339a4228e 100644 --- a/runtime/src/iree/base/tracing.h +++ b/runtime/src/iree/base/tracing.h @@ -223,6 +223,11 @@ enum { // Exits a fiber context. #define IREE_TRACE_FIBER_LEAVE() +// Publishes a source file to the tracing infrastructure. +// The filename and contents are copied and need not live longer than the call. +#define IREE_TRACE_PUBLISH_SOURCE_FILE(filename, filename_length, content, \ + content_length) + // Begins a new zone with the parent function name. #define IREE_TRACE_ZONE_BEGIN(zone_id) \ iree_zone_id_t zone_id = 0; \ diff --git a/runtime/src/iree/base/tracing/console.h b/runtime/src/iree/base/tracing/console.h index cfba339528c9..65fcf89882e8 100644 --- a/runtime/src/iree/base/tracing/console.h +++ b/runtime/src/iree/base/tracing/console.h @@ -125,6 +125,9 @@ void iree_tracing_memory_free(const char* name, size_t name_length, void* ptr); #define IREE_TRACE_SET_APP_INFO(value, value_length) #define IREE_TRACE_SET_THREAD_NAME(name) iree_tracing_set_thread_name(name) +#define IREE_TRACE_PUBLISH_SOURCE_FILE(filename, filename_length, content, \ + content_length) + #if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_FIBERS // TODO(benvanik): console tracing fiber markers. #define IREE_TRACE_FIBER_ENTER(fiber) diff --git a/runtime/src/iree/base/tracing/tracy.cc b/runtime/src/iree/base/tracing/tracy.cc index 722ade897b70..333ebe3f58f3 100644 --- a/runtime/src/iree/base/tracing/tracy.cc +++ b/runtime/src/iree/base/tracing/tracy.cc @@ -30,8 +30,64 @@ void IREEDbgHelpUnlock(void) { ReleaseMutex(iree_dbghelp_mutex); } #if IREE_TRACING_FEATURES != 0 +typedef struct iree_tracing_source_file_t { + uint8_t* filename; + size_t filename_length; + uint8_t* content; + size_t content_length; +} iree_tracing_source_file_t; + +// Global registry of published source files allocated using the Tracy allocator +// and live for the entire lifetime of the program as Tracy will request the +// contents long past tear-down. +typedef struct iree_tracing_source_file_storage_t { + tracy::TracyMutex mutex; + iree_host_size_t capacity; + iree_host_size_t count; + iree_tracing_source_file_t** files; +} iree_tracing_source_file_storage_t; +static iree_tracing_source_file_storage_t iree_tracing_source_file_storage; + +static char* iree_tracing_tracy_source_file_callback(void* user_data, + const char* filename, + size_t& out_size) { + iree_tracing_source_file_storage_t* storage = + (iree_tracing_source_file_storage_t*)user_data; + + const iree_host_size_t filename_length = strlen(filename); + char* content_copy = NULL; + iree_host_size_t content_length = 0; + + storage->mutex.lock(); + + for (iree_host_size_t i = 0; i < storage->count; ++i) { + iree_tracing_source_file_t* source_file = storage->files[i]; + if (filename_length != source_file->filename_length) continue; + // NOTE: no case-insensitive/fuzzy comparison (yet). The paths are + // generated by the compiler in the same place and they should always line + // up but if we start embedding arbitrary user files we may need to + // normalize paths. + if (memcmp(filename, source_file->filename, filename_length) == 0) { + content_copy = + (char*)tracy::tracy_malloc_fast(source_file->content_length); + memcpy(content_copy, source_file->content, source_file->content_length); + content_length = source_file->content_length; + break; + } + } + + storage->mutex.unlock(); + + out_size = content_length; + return content_copy; +} + void iree_tracing_tracy_initialize() { - // No-op. + // Register a single source provider callback with Tracy. Tracy only supports + // one at a time and the callback must remain valid until program exit. + tracy::Profiler::SourceCallbackRegister( + iree_tracing_tracy_source_file_callback, + &iree_tracing_source_file_storage); } void iree_tracing_tracy_deinitialize() { @@ -48,6 +104,45 @@ void iree_tracing_tracy_deinitialize() { #endif // IREE_PLATFORM_* } +void iree_tracing_publish_source_file(const void* filename, + size_t filename_length, + const void* content, + size_t content_length) { + iree_tracing_source_file_storage_t* storage = + &iree_tracing_source_file_storage; + + // NOTE: this does not currently check to see whether the file has already + // been published. We could but in most valid usage we don't need to. + + // Allocate storage for the file - we do this as a single alloc of the entry + // with the filename and content tacked on. + size_t total_size = + sizeof(iree_tracing_source_file_t) + filename_length + content_length; + uint8_t* entry_ptr = (uint8_t*)tracy::tracy_malloc_fast(total_size); + iree_tracing_source_file_t* source_file = + (iree_tracing_source_file_t*)entry_ptr; + source_file->filename = entry_ptr + sizeof(*source_file); + source_file->filename_length = filename_length; + memcpy(source_file->filename, filename, filename_length); + source_file->content = source_file->filename + filename_length; + source_file->content_length = content_length; + memcpy(source_file->content, content, content_length); + + storage->mutex.lock(); + + // Grow capacity of the storage index if needed. + if (storage->count + 1 >= storage->capacity) { + storage->capacity = std::max((iree_host_size_t)32, storage->capacity * 2); + storage->files = (iree_tracing_source_file_t**)tracy::tracy_realloc( + storage->files, storage->capacity * sizeof(*storage->files)); + } + + // Append the file. + storage->files[storage->count++] = source_file; + + storage->mutex.unlock(); +} + iree_zone_id_t iree_tracing_zone_begin_impl( const iree_tracing_location_t* src_loc, const char* name, size_t name_length) { diff --git a/runtime/src/iree/base/tracing/tracy.h b/runtime/src/iree/base/tracing/tracy.h index fcc15854a5b7..97229b7aad5e 100644 --- a/runtime/src/iree/base/tracing/tracy.h +++ b/runtime/src/iree/base/tracing/tracy.h @@ -107,6 +107,11 @@ typedef struct ___tracy_source_location_data iree_tracing_location_t; void iree_tracing_tracy_initialize(); void iree_tracing_tracy_deinitialize(); +void iree_tracing_publish_source_file(const void* filename, + size_t filename_length, + const void* content, + size_t content_length); + IREE_MUST_USE_RESULT iree_zone_id_t iree_tracing_zone_begin_impl(const iree_tracing_location_t* src_loc, const char* name, size_t name_length); @@ -202,6 +207,11 @@ void* iree_tracing_obscure_ptr(void* ptr); #define IREE_TRACE_FIBER_LEAVE() #endif // IREE_TRACING_FEATURE_FIBERS +#define IREE_TRACE_PUBLISH_SOURCE_FILE(filename, filename_length, content, \ + content_length) \ + iree_tracing_publish_source_file(filename, filename_length, content, \ + content_length) + #define IREE_TRACE_ZONE_BEGIN(zone_id) \ IREE_TRACE_ZONE_BEGIN_NAMED(zone_id, NULL) diff --git a/runtime/src/iree/hal/drivers/cuda/stream_command_buffer.c b/runtime/src/iree/hal/drivers/cuda/stream_command_buffer.c index 26680a4957eb..c67b8c3dfe39 100644 --- a/runtime/src/iree/hal/drivers/cuda/stream_command_buffer.c +++ b/runtime/src/iree/hal/drivers/cuda/stream_command_buffer.c @@ -161,9 +161,8 @@ static iree_status_t iree_hal_cuda_stream_command_buffer_begin( IREE_CUDA_TRACE_ZONE_BEGIN_EXTERNAL( command_buffer->tracing_context, command_buffer->cu_stream, - /*file_name=*/NULL, 0, /*line=*/0, /*func_name=*/NULL, 0, - "iree_hal_cuda_stream_command_buffer", - strlen("iree_hal_cuda_stream_command_buffer")); + /*file_name=*/NULL, 0, /*line=*/0, "iree_hal_cuda_stream_command_buffer", + strlen("iree_hal_cuda_stream_command_buffer"), /*name=*/NULL, 0); return iree_ok_status(); } @@ -520,8 +519,9 @@ static iree_status_t iree_hal_cuda_stream_command_buffer_dispatch( IREE_CUDA_TRACE_ZONE_BEGIN_EXTERNAL( command_buffer->tracing_context, command_buffer->cu_stream, kernel_info.source_filename.data, kernel_info.source_filename.size, - kernel_info.source_line, /*func_name=*/NULL, 0, - kernel_info.function_name.data, kernel_info.function_name.size); + kernel_info.source_line, kernel_info.function_name.data, + kernel_info.function_name.size, + /*name=*/NULL, 0); IREE_RETURN_AND_END_ZONE_IF_ERROR( z0, iree_hal_resource_set_insert(command_buffer->resource_set, 1, diff --git a/runtime/src/iree/hal/drivers/vulkan/direct_command_buffer.cc b/runtime/src/iree/hal/drivers/vulkan/direct_command_buffer.cc index a445760b0136..c177e9e88d44 100644 --- a/runtime/src/iree/hal/drivers/vulkan/direct_command_buffer.cc +++ b/runtime/src/iree/hal/drivers/vulkan/direct_command_buffer.cc @@ -201,9 +201,8 @@ static iree_status_t iree_hal_vulkan_direct_command_buffer_begin( IREE_VULKAN_TRACE_ZONE_BEGIN_EXTERNAL( command_buffer->tracing_context, command_buffer->handle, /*file_name=*/NULL, 0, - /*line=*/0, /*func_name=*/NULL, 0, - "iree_hal_vulkan_direct_command_buffer", - strlen("iree_hal_vulkan_direct_command_buffer")); + /*line=*/0, "iree_hal_vulkan_direct_command_buffer", + strlen("iree_hal_vulkan_direct_command_buffer"), /*name=*/NULL, 0); return iree_ok_status(); } @@ -726,8 +725,8 @@ static iree_status_t iree_hal_vulkan_direct_command_buffer_dispatch( IREE_VULKAN_TRACE_ZONE_BEGIN_EXTERNAL( command_buffer->tracing_context, command_buffer->handle, source_location.file_name.data, source_location.file_name.size, - source_location.line, /*func_name=*/NULL, 0, - source_location.func_name.data, source_location.func_name.size); + source_location.line, source_location.func_name.data, + source_location.func_name.size, /*name=*/NULL, 0); }); IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert( @@ -769,8 +768,8 @@ static iree_status_t iree_hal_vulkan_direct_command_buffer_dispatch_indirect( IREE_VULKAN_TRACE_ZONE_BEGIN_EXTERNAL( command_buffer->tracing_context, command_buffer->handle, source_location.file_name.data, source_location.file_name.size, - source_location.line, /*func_name=*/NULL, 0, - source_location.func_name.data, source_location.func_name.size); + source_location.line, source_location.func_name.data, + source_location.func_name.size, /*name=*/NULL, 0); // Get the compiled and linked pipeline for the specified entry point and // bind it to the command buffer. diff --git a/runtime/src/iree/hal/drivers/vulkan/native_executable.cc b/runtime/src/iree/hal/drivers/vulkan/native_executable.cc index eb9d0d9ed7e4..b6d8dc67f7b4 100644 --- a/runtime/src/iree/hal/drivers/vulkan/native_executable.cc +++ b/runtime/src/iree/hal/drivers/vulkan/native_executable.cc @@ -29,8 +29,8 @@ typedef struct iree_hal_vulkan_entry_point_t { iree_string_view_t name; // Optional debug information. - IREE_TRACE(iree_string_view_t source_filename;) - IREE_TRACE(uint32_t source_line;) + IREE_TRACE(iree_hal_spirv_FileLineLocDef_table_t source_location;) + IREE_TRACE(iree_hal_spirv_StageLocationDef_vec_t stage_locations;) } iree_hal_vulkan_entry_point_t; static iree_status_t iree_hal_vulkan_create_shader_module( @@ -293,6 +293,8 @@ static iree_status_t iree_hal_spirv_executable_flatbuffer_verify( } } + // TODO: verify source locations, stage locations, and source files. + return iree_ok_status(); } @@ -406,24 +408,48 @@ iree_status_t iree_hal_vulkan_native_executable_create( } } - IREE_TRACE({ - if (iree_status_is_ok(status) && - iree_hal_spirv_ExecutableDef_source_locations_is_present( +#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION + if (iree_status_is_ok(status)) { + if (iree_hal_spirv_ExecutableDef_source_locations_is_present( executable_def)) { - iree_hal_spirv_FileLineLocDef_vec_t source_locs_vec = + iree_hal_spirv_FileLineLocDef_vec_t source_locations_vec = iree_hal_spirv_ExecutableDef_source_locations_get(executable_def); for (iree_host_size_t i = 0; i < entry_point_count; ++i) { - iree_hal_spirv_FileLineLocDef_table_t source_loc = - iree_hal_spirv_FileLineLocDef_vec_at(source_locs_vec, i); - flatbuffers_string_t filename = - iree_hal_spirv_FileLineLocDef_filename_get(source_loc); - uint32_t line = iree_hal_spirv_FileLineLocDef_line_get(source_loc); - executable->entry_points[i].source_filename = - iree_make_string_view(filename, flatbuffers_string_len(filename)); - executable->entry_points[i].source_line = line; + executable->entry_points[i].source_location = + iree_hal_spirv_FileLineLocDef_vec_at(source_locations_vec, i); + } + } + if (iree_hal_spirv_ExecutableDef_stage_locations_is_present( + executable_def)) { + iree_hal_spirv_StageLocationsDef_vec_t stage_locations_vec = + iree_hal_spirv_ExecutableDef_stage_locations_get(executable_def); + for (iree_host_size_t i = 0; i < entry_point_count; ++i) { + iree_hal_spirv_StageLocationsDef_table_t stage_locations = + iree_hal_spirv_StageLocationsDef_vec_at(stage_locations_vec, i); + executable->entry_points[i].stage_locations = + iree_hal_spirv_StageLocationsDef_locations_get(stage_locations); + } + } + + // Publish any embedded source files to the tracing infrastructure. + if (iree_hal_spirv_ExecutableDef_source_files_is_present(executable_def)) { + iree_hal_spirv_SourceFileDef_vec_t source_files_vec = + iree_hal_spirv_ExecutableDef_source_files_get(executable_def); + for (iree_host_size_t i = 0; + i < iree_hal_spirv_SourceFileDef_vec_len(source_files_vec); ++i) { + iree_hal_spirv_SourceFileDef_table_t source_file = + iree_hal_spirv_SourceFileDef_vec_at(source_files_vec, i); + flatbuffers_string_t path = + iree_hal_spirv_SourceFileDef_path_get(source_file); + flatbuffers_uint8_vec_t content = + iree_hal_spirv_SourceFileDef_content_get(source_file); + IREE_TRACE_PUBLISH_SOURCE_FILE(path, flatbuffers_string_len(path), + content, + flatbuffers_uint8_vec_len(content)); } } - }); + } +#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION if (iree_status_is_ok(status)) { *out_executable = (iree_hal_executable_t*)executable; @@ -461,17 +487,43 @@ void iree_hal_vulkan_native_executable_entry_point_source_location( if (entry_ordinal >= executable->entry_point_count) { return; } - iree_hal_vulkan_entry_point_t entry_point = - executable->entry_points[entry_ordinal]; - - out_source_location->func_name = entry_point.name; - + const iree_hal_vulkan_entry_point_t* entry_point = + &executable->entry_points[entry_ordinal]; + + out_source_location->func_name = entry_point->name; + +#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION + iree_hal_spirv_FileLineLocDef_table_t source_location = + entry_point->source_location; + if (entry_point->stage_locations) { + for (size_t i = 0; i < iree_hal_spirv_StageLocationDef_vec_len( + entry_point->stage_locations); + ++i) { + iree_hal_spirv_StageLocationDef_table_t stage_location = + iree_hal_spirv_StageLocationDef_vec_at(entry_point->stage_locations, + i); + // TODO(benvanik): a way to select what location is chosen. For now we + // just pick the first one. + source_location = + iree_hal_spirv_StageLocationDef_location_get(stage_location); + break; + } + } + if (source_location) { + flatbuffers_string_t filename = + iree_hal_spirv_FileLineLocDef_filename_get(source_location); + out_source_location->file_name = + iree_make_string_view(filename, flatbuffers_string_len(filename)); + out_source_location->line = + iree_hal_spirv_FileLineLocDef_line_get(source_location); + } else { + out_source_location->file_name = out_source_location->func_name; + out_source_location->line = 0; + } +#else out_source_location->file_name = out_source_location->func_name; out_source_location->line = 0; - IREE_TRACE({ - out_source_location->file_name = entry_point.source_filename; - out_source_location->line = entry_point.source_line; - }); +#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION } iree_status_t iree_hal_vulkan_native_executable_pipeline_for_entry_point( diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir index 1ae713ccd80f..ed717d53baa4 100644 --- a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir +++ b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir @@ -36,7 +36,11 @@ hal.executable.source public @ex { // // The ordinal is used to specify the entry point on command line tools and // must be unique across all entry points within the same executable. - hal.executable.export public @elementwise_mul ordinal(0) layout(#pipeline_layout) + hal.executable.export public @elementwise_mul ordinal(0) layout(#pipeline_layout) { + ^bb0(%arg0: !hal.device): + %c1 = arith.constant 1 : index + hal.return %c1, %c1, %c1 : index, index, index + } // The inner module defining the executable. This may have any number of // private functions and only those with declared entry points will be diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_arm_32.so b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_arm_32.so index ff99373bac6dd7c3979616fe4d1b49f05275be72..ef4b031e466647428369f12dfb449203fb1cce80 100644 GIT binary patch delta 851 zcmZWnO=uHA6#iy2n{L9IjwF({6qD+qJrtT^uR;WW@SvvBA3?BdNl`)S(u>Emnu84n zv)zlAz^J$0N8xf*KXDAv4h7mFTYu#-^z%*S zldbYO+O-hH7aqLZ9;c$?z_vjN+*%HR1s|4t$FR`pM$sn~&s3x@TI~>J*)b*gsQ9SJ z9b-@W^km1pTn|KRGZ3;7h`rYVuUrd~nDf6y(Kg3?N4M1lL#6YJ z3ZzQXPYdarkUmTAbeX*vu#w#^(qlhaT6X_L(a3lAx0W5|w1bH^9)!#?%03@W`IGdj z+#O34s70!4eNp`_yR(V>mTtO_6Ys~1F>$IclBwF;6D>u@06BM>-#@dq@^tOllKrq+ zeX{z%=hMl=ES1SqH3)ZNt>TAN=#Y$2YR`$N zf=V6`h_ZVQEr)J1nO|8Yt2d=__yQfPVK0V>eFDF6Tf delta 763 zcmZ8fJ!lj`6#gbVw|9FacN0>cc__A-SLM6Y5meX)HrlO%qb%Hy17K$mVSCgKiQ_7R zaNn72s2usuL`@-Rww>*|;&rL%Tpi$TJ6B#-xtUd!+gej-y;hubE&-pod=eEfuM1MX z-zko(t&-GZ#hMD3)H_wbsu=FY*+r@Iz$OA2TfWp8SL#7X-OMj`d9V`MnA6Si$KAuB z8-JSlp!+vTHf<7BNwSxHCN?zX>%Qy0Yg?F!nY*sw$`M8f zJeyqB#rkmJh&}n9qowI%&jy@SM@;hv;tPNEImmFv zro>IV5!z>l=-x|5azBK&z3KwFy>0unO;eYSR!W$dL4|N~P+*Yq l%ET4=yct%PVB~3+W4lIy#<7=b8TUDX@~DY@`zGpH?;j=?xl;fD diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_arm_64.so b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_arm_64.so index 022d36169f3977d093dd21440bef30e2930fffc2..d2f011a0de7533e80ba95a1975b2ad9fcef53a7e 100644 GIT binary patch delta 723 zcmX|9O=uHQ5T3U`$!`9-n+Q>aZ1x~j6REwa6n6C#qhOE~dr;a!gH>yzy@=E{2`B_| zu*>rhQla#g28E)hKtU^BY(Okgp&2!(niP4gv6)nklA;qomCBzWzjqY;6EVfxNV?Mp+nXb;oC4KuU>Sj$}JpF5?F-!FA%g7(# zd!6(Y4>CQ)0OBs>&bR?~?I)h%72+#CL})U!USdwVz}O^WU15Z#{5MObM};yla(0#Y zz*GQQ(WP8D>dCFjKW7>W2dyFj<0Gq7Ke2)8XVzccV7*nHTNzL#Fh?+JAUENiI+5e@ z11BxZ&={$i{9_r+_gjr&!W0(}&k*(x^#{Sk6EszX?K_V7+i%NYnR9YIi^IIwU;~HU z!Qr<#Fz117>SO6;6#;@8fw6tdG4%YftZ;j3rSgGQ4m@Rt-YODYSEB5j0udx2^JTkc zpEE~gE<=~dZkpqslPFyvd+9JQ?CI{o_Sj)>D~OxgqIv$Te=mh-x6SCmnLC zJI}7oMb}7(-WPoQ1=JWbACiYV9TFY0pRNgEK8=h;S*&#A8~?vN#rgP5S5i7xH{O;0 E0dCE_fdBvi delta 635 zcmYL`L1+^}6o%j1-DGE*q-J9w9>i_1holkG^rTSO!PDBJ(71X{!5(VCsz)g%=}kz% zZ04fzpx$htQ1w(OXuwkoMTiQ5hpLxKP`aXG#kjt8R~`7~@&5PEo8is#gS)9lHl2;a zzs2D=sK6lsF~@W?3xkT~ye<^S!sKrR2n2~dev6tCm}`o!P%ciOiDTj#UdBD~0lU@y z;**uW3{DpK`Y9rJo!v1L;4U+;^GUv6_*&_X#`*fChkRv{Y75Bsr$}4H1NCKMo45(J zk_s@C!vWG_^;Kef67I}e`9?KUfPs|kgN-Pn>Kh{Dzf`?4(4g>Kri%XV9+ zHM(_Ms$!sKOfW)BBX050xahNufeJ_JkZ1+M1gv~J2OxsQBHb#&pqFF z=6>hS?7gpa@7OAYAlp>>mF!suCwtr`(m{Um(0Zz(hseb#V7AKwE^=@VZr5`t?J_6e zz{qFfGPjEJ?6a!r&SiL>C;3Lx&vHJ$g5cKMc0D-D4gKzUgs6_khgk*m!D098MJk(2#`_b4@#Nv8K9Wo9 z9ZbdtQ+u{$HZNHRM{G|Bz&`t%JK&RDVBBP+(9d}PJI&sy=f)mcq@O?kq$b3zK+`HE&@iC{9(zj(`0Ld`Xrlmk6v+h(Oap!MI>t zXbzzk=FW*i@f@iix=1~9gbHW2QfaEF7}ZTwa;+8GiAE|-uN7fWwJj7dNF=WD=0EozHs`s z!JP7Qvp2O$BBRnn+Hp4-$6bWyYLz=Vt`b^Rl`xJ~3QhfdsoeYWpN1ns+6tBQE`!Gi z89jeR!@(KSHg%Fd=}-)>myAtMlm50t(Jt1I(b+2F@aBjxF4pq1Ninu_o;l^bYv z`OHCVAU+u+t!@jI23``zsX7V|oD|`Kabb+FBmFh6GCfyGvFYo6V?0RuJ6IE_!XfSJ#Y4*&!C4| z!f5QMk$LDZaWCabQ#(fJ=6*^0vWB(SQR(VY*4{?iy3MBks{^L~W4wmHief~IF(;Wl zyyunsy?|Wa=Z{VK%>1O6-2P{!ULAGy$0;U?jBhmc)}k&Vkdr;Les}%Xcl2v3SQB)A zVd~em-PIrNH1!+Ik$qI$)o;98OQpVEq1CjQ`cpws`msT1H&&DGt5CEX4WzHe*hPAE zg<||RN#!>jmsyLt@W&co^Xv}M z(bqh84d3N=1^>LF@^7Zl5LMI`p%hx?h1xnH3awG028AdD=Sgjwph8=e_&26%PNMyJ zBF%YjDg4ANo#6Aqn404J?@Qs4Pb7vPDbAD!wR_Hz|0sHtF%SFSOG|0g;0T}L0J$yf z73?e7Z!pd><+<6xxR+x-UJ1EDp7W7Q0d{EY!GkP|hB(NkhV@Yr4IDar&g^KoJ=LDs z7$0iSY>4k`&$P#1OzFc}p3%15-Q9kx^qv9F#`!Hx@yN4s`8i6VzOe(tV?z|`PYm~) zvs{)!+2mjX5J&00L@q(0T=H;^agG6UT9Ok(DP6L&>3Am5*Oxl5pQq3rO5}3c)ZUR? za+pGTdT1zlfa`fFK9hOuAI@dX$QybJ0a?mStEAhBcUzkrVz09<cixSh;sLFvuj- k@HHj#=oh^lVuv={PBHIwd%~l=?gYond5!#oZMzTeABXX$g#Z8m literal 1820 zcma)6OKcle6ur+Aj|~)$+bJXxsD!#rKtgU*XNiQZBvmVwl*WjUgs>fFz?S0J9Z#x+ z4FyBpRlJx5+q5ESmq;TJ3xtZ=AjBdpvIiuKHVcFb?FK3o&Kb|dr9~=NI`huG=f3yu zyx;p%CyyzmBs5KKOX!Uw`0GGV8JA&+$#asBJ)$E-EyT~@1XGEyKd5hCAwc*;C40Ib z+lx5x5i}yo>_E?IBH2_AR}dyRFZBqT3(zk>CyMY1>YQWd7iaGN{#W<(YdZ}cX%Cn` z{sYloy@yPZr=jh`ES#U9q3pg@ZObZDO7jJ4$to{8rP+C_FkhXuOZIZ%%%Wv4i%gTbtm9o1`}CJ{pn1lc2XmIC3WZnF$a{=)}o?~=#tUbBd$Is zS2@y9D7qb%+x1&pJbyM1_D-ydnE$xd5i5)bgeejgTQXOfhevqx-bIJUf)5)z{ z#bmzGNRF->GWzK=GJ5HVjIO>UW?~xS7|JvZ<*p=D_xQqJ-M8|0Tz3LBi!!wdaa)?| z&gAc8R%Wx^H?HousKM-L;u+g2wrYsixvAP{^NnQE>)cSI;O@!+F^yBw-Q4`Ue(R^Z zCJte1_#4r&op~{! z0m9gez#{;Tu>1*~_k?G4KlI1Y;!5~;=Op|`!BYV5!8ivBEO!{ht~SeOMLh*Nkc`@A zGe@gO8)JnUKkwipCuXLm!XEvWAB^F=5kHal~S|P^Bh~!wl!a( z1XiCbIVDLu){+AqoFc-0a;a7=BZtMMOO9h#XBQo-Dd}>(RyT^L1 zJp9HLCf}USy?kOKDF6R7TsE#N?>N_(7zWn>YFt~cIroHn!91=tF$B&W8uJJPU|;5O zkBD$N5!%fk1ZO_;xc3D29*u9*cm|#Y7-Jsa5rXm1cxGtN1I*)D;Tyv@m+x$79FKMc zV3;*#QCJq_ICyCMR~l>KSj_X^UXWtoqw#C!+&n^%8jR7l;f(aC{|!rc9Ul2upe0Zc w*E2L~L=oV>hPEGh?DIIlIRAq+Rsc-3Ym*S(+x2>5(Y1kl^S(yj3;&V#4{Xc)h5!Hn diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_riscv_64.so b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_riscv_64.so index 71d6863bdbee1d36248dad3680af469dfd0d3fc7..deb3e0a99fe624541d8ab3229b9a42c42326582f 100644 GIT binary patch literal 2912 zcmbtWZ%kWN6hE(BTZT?vhLq_CS5(SeOvmW9xWp04Xk|E;Nu2?|G>_Le=|Z7>eK0VY zvSBHZ%-J;PSR(6)48qsO&!UT4;yyNuF)=Zr7SWkWA`!`^^PawYI`$lH#+$Ui^Y3@g zz4x7-zW2O`eI`KwT4wkS?r8uG+>y5Ff`vf?yaXley9~B~mFb%bHr?N#LyaUS~Y4_g|M8AcFPUHXzsYgoIRYqdMr{%1(akNj<{O1Mxoi z72nq<5KP$hLB5bage0X@Mset&Ggg|)JrJomMFtlI+vtk8z*)h`=uiTX1zCUWc^Xwnam6qirzqBsB`NJTL zM2GTU!BD*ghTImxTptJb$T3rS#V(jjm6z{;os)Bu$>z+tB~R7{%bRB#FJ4GJJ$Yd= zbFR!I`?o*!Ze0F00PfKxkNj3U$gettwa@Kw)N?KEf7wTKZAb5%Zs|*?!;^7gHd*?G z#bfo@q7}LCcHU0<=H8bN-hgCN>hc~)UA4oQH8s-&scV(+fi3gXH;{3D2&tLpq5SJg zu$E>PeL`k&pO9L$!}yhi(+2*;sw1`Pgz=nz7$qN*vX&nmm&V2y52UlqMiKd}&G`yo z1e2;0fKh(iG>e@0GEsf4P-%VsiR%f675o6pehU<8Ox&!R&7wXiI{a}_w_UM0e1)vZ z>xrsi@C(z-x0_{6F78jGhWTw(SO1y+ChmLUSKQGjxl3cYA?vNZ>Fn=zu_k?kWpu)y z*=(OxteHa12W#K^PtJ%npXPzZp0)n$U#%8vF6ZIYXA6-=ZAvo@EQA_QOr5Bb1n-$a zp*mXO-kXMxD*#gQY4fRy@Yyqi_d4U>XVUsOqx`VnlQI5b`ZGL$=E?25cYDx8b@(`j z2=8x-qc-2n`P(?IalVD?#WBJ8j~U`Au9^F3EI!BMXm;0fKQ|Y_bAX9zC?@xT;AY|` z(W3V4+Y_pb><)zMBDDbtrQ>yxxnP@WkWko%sRU>aQ_Mzl z3tYj0?$|&WT%A&^Q`a;VT&mI~p^Hg9K}nOqr78Uy^D`Np%v=bT!Xdd3t@Z>WQZN|m zew(RyK9{6vYN(@EQ)1wfd%|I*oAFsGHUk-U#xzxzbbN$C8E9%h;(6`RK|0g_LA(!q zw#COp0>@gO9Op5{QXf&@U>{*G5RINUA;nQ&QeP03F)rpNKJ^n};W;utEy_cBB@-|g z@u}Ylso#lb+R#*IYs?tCSh&jQ@_*u$UKiEKkc9Lc$pt<Y$-q5)b z(g}%A?-OJ<@{cnI+NjSc?jwGo9LQXVMTPR<&@(p eiN&?V`J+XPE>K-EpJu{?_)$K9U!*{eM*iOzIe10@ literal 2512 zcmb_dU2GIp6h5;*GmB6sE+pEKM;Q;_h^I%x==9+u12; zeX*3Z+ZQp=h=~ud6l07}Bn{OV9@>~H&k~+dS9~-zMhhWny{B`&MJ5oM;7#U!=bm%E zbMLw5-a8*IM~NoDX;s~>TT1m?VCiyx{t647OeiP zGwj$yp4HdyWc!PKuH}wo2ZtPY#1C*+_D{Bxu1c_&><{#)j~UQo|C0ed`0WAxw-VnO zmIM@}TlC*{{Ysd*|CO ztecqWRNPCu$v#nzXkA~Ae^*_eFlq}_t&Nj$IYZUUQ8K0ts!qR7#+52nulz_i|4f@b zgWcxM#hlqwE}QW$b&7wnh2qmMP`uVgE&9AZZ_G#QM}NOxKXH5Me0XoIrFlMFKYn{@ zX8ezI{k^Npr@JmzQ~KXz>{Qw5OH@_ZQ#Q@i#0tfgJg%QJ>T|QpzkH)*rgNcuS{obH z_I#YNBiBgFjE!#HOw=0P6CNAIuAwd%uN84V*Xj?y9U|af++SH4U=`vSVsFR!kul1= zPR82=?+KsZ0B*P|HzeSCUrq3bpDFR55xX4U#v61O3lcjZeNahmv0|Sh?Ik5f%&2>+ z?O^dhIhieSXL0udcX{q?)zOh-nG6Cv23O|~#R%^R+;|y}y04VTkDRQGlqgZKDg}S! zxs-67ev3^?8pvCoMG4Ot_LN=8Y*gx1XRB1Sl@BM9usqK#_6>PXg%b8aspOng1QlCd zc7=tC=lYV5Yj_c|uf6+L`pqMqNa87w^XD2$xd?RyCM7nXWA??lLcRICaLvI-U4igJ zxg*M#8?af)92>;tFy5*Jjs-r>HE=k<3$Fh;W#^dS<6Q;H??R$H$RCQm z!pFV9`-}f&ymH~>=nL1!wNnyr!2ueVdtC>5A#5^CiSr&){k>lP&(wM e#76r{_#ZYjzJPh5c9rlb{w^u#J~#>n`Tqjlc>rVp diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_x86_32.so b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_x86_32.so index 0b22ffc1e7683f82ab0fd1eb4e1899a97f956fdd..7f9eee1be7b676b1358099da0855562cc4c1c2ba 100644 GIT binary patch literal 2308 zcmah~UrbwN6#v>@^iHc~lSCm-Nt|w&SS4wqjV7+Ri;{3%+`vC*?K)_Hq)=9FBP8H7 zbq_Z;jXvzbEyVEPgAXR#n}e6N1>)La24>NIb464zOb9_A%0H!^-)+CrIg_2_ch5QB z`SYD~?sva?T4`>ySS(DCjoFycT(tQ0Jq3%4)v|lpA?9EYFf73)Fi$cd6(QnDHR(%Q z0AOd8%*=n0^ox57c*e#UP*G-|C+GPC1;lB+_I_sWi+C&PE^>A~S-=U>0n~*(jxA~( z{}&G%YI}q1#O|PCt9Ps;CSJwZe&YLR7wiv!U@K8~I1uo4`GdZ|K%hOM`A-G|zF_ys zus=NLI~56p2N_$So@5GGD{@9XK(8$Fsv-{zl(OC?wCan;-+D8;;xg1qL$Mn#7}bso zKS#H1eAV&k4XP@eFv!B=1l`)K>j`LMr zxfzqrU6!0y)$*(y-7=(XHp-2Q~#5$PA^wRCnTda9!>sjsMGGN!=w6-I;mUI zP{n1`r0*nl9v{PS-J4_dWYvE!F5#XV9kDMb$QR4ys=6!%_yx0RVF zO^RQgOVfa9TIv5z9i?6}OWn9^j(FWMVov>*JZrQr#H!Bc5|+rbS)NN+BhP0uxrAh% ziO5TkWydFbck=n=mrIi@2sG##zRFW(n3m#g5?|eo)h2nHTv(t4GymlC)Cq61-HZ+{%WM)yK==zt_?w)*%)?4w{lpPwKT4%O#nA1Usfa>5-W z2yXZ;N6Y+$t(-qb3clIxd_DwMr0gSMAMrzkhX}B5k-dUA?v*OC(-ns6%e?2BNxzTe zCkUv~g+xWdy(XyXn4;7QrHmu)tnjn$VQ$h(aaP&9m&)%f=Ij{k?H}x6&Mtp{S3%Rl z%oz>@VId*Z;n)1ksRaf!GLvlD!XeYw-_zaBoSh-f*%u@;MTJAYK7U6?cW)?WQr7xi zMT`=&#A1K4~M!)$N_qf0Qos6MDQ#B z*TMz_2$+eWugLE(DGw5`2lhoTJw=_ENlWB+S}4TDz8{c`I68s#5$ delta 1115 zcmZ8gO=uHQ5PqAyq${i@+M)=-7A?gnh{0R}1wC}NLKn0U3D{H*3gXE_78DYrtW{r! zjgdWj^WdS-3Kdc)l9_&W5tVbWgpJU-l=tJDe{^0o2VwsA;*+4f1(0%S*ICLOuZvH1xiWxP0h*+{1>--THR4A=E&fuxJX@C44uila>$p=GI+&P#~)n zt_b3cT;D~8OY}KNB1!*k1nV2g?1j~%ibb>0A0pG*AXM=y6NRB z-lEHcvW!dF(Q^VVnU=$`kAzVF8|V=28^U$pa77H+oibM3>wbP>Ey7W3I)j{db0X|> z!?ox)thP=!FBYLjqwRQ-_sO6`vQzdLsx~Red4aQgDk-3&dF8f+`#7#4{^?H^Fzy1{ l_9J+6UXKqo+|*InB)Kr{o>3Fq58Xin4{cH0WqcP8{{gmQU^f5& diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_x86_64.so b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul_x86_64.so index 8bb5760fb84dd5e48f07dddfad366c5770d87905..21b7a0150e72a697950254906b62bb49fd9e0e2a 100644 GIT binary patch literal 3008 zcmbtW-%lJ>6ut{nSnJXQBxqw3i)pox7z(B_B_W21JMByY1>4fvXx(gKrjWocE?a~L zeYRkyZ4;) zefOSwX79|qM%$4RpHE3ERWGT3XG*oQqt!1FWIL?NvEQWvstnB<{#w~xj0qMK6d5uT z@4wn|iRTeimEN3qJMg#4{?%&FF6b-eIL6I9+N--^!sn>N#%M~?J3X`TXc^)qTrwiZ z3r=ykpyYRaA>QNW$2?g_uy{RL{uSg2g{2<}NVgg@$klisY`8lg{rj{&A?lj?(tU0z`3Lo4=dY3u5pr6)H=_gMlPX)~6 zaBW+1S6lLkABU9~`h%G|^uC#D`G{27Q(B@u6`0h&)Np3iUvVJ1Be9atBUTK0So5lj zE3->`oDWUk4b%6)@~v6Eb*ug+E~`Q61TJeee`8I~v_sp^Uy+nqf7NWBF`@k@b7J6-BQMY&YXTP?SqMHD2}5Nn7HAM zG?-ZMM*eu^T>~nQ27-`D&!b>&KQ4L^GQZCmaC#~Tb>BG#3};F9dy>75vcGz=mkP31 z3bJ!sWIF-|WU}+vP4Cy?Ozv2!<#PvymmHW~abW3>AapKxT9*r2*9%&+tOes&ALR3k zw{hyg-XQimL=LzKjUUWonGA>=)o^qDY0Bc34t12c2IG6s>!#-?M{ z8at{7g_L`GFEXP;GvJ9V?f#Ylb<>{QK5_cV(MV~r9FW{ot{m>~t`1eazZb|>w+xx( zF0w{7Pw83D>_E-3YxcXl8ozxXqcih)vYihs{)p4p^w;$_^w8O+shMlinx@9`nwhFN zg!iWj!RZ-1nazzJriW>AUp)+WJI>Q=l>1~OpO2DB)+uNIVZH|>PX6AHv41z?I`ipW zL9NUKm1V)lc`or1SIPSF8ra8ia!zZqA+`%x#|@)aCflH^Q{peYKO}_Yw!5EIL(gbr zXmmg|^oECey;(e_8e(>Tm|QrD_JreM)eyHw;^@aQInnF}hX?w)-RM{}G#Kvb>5H6K z4c*a!0Xu@xD2z8dV((Bq=8Zi3AJ=~6P zsT=ydshS#VrMMo)Bb2};mdcC zd@oHgPyCX<=osiHoA}KG9>L^4{1-xC9XUsys*^Y%Tca2lg)iqKZs7}_MTgW*VhB5V teUpt9h!iK=k>|=ro9vxKUHF*!Z*4Z>Sk@IYU2@y;ffssS0vU_>{{Rr%$qoPj literal 2744 zcmb_eQD|Fb6#j2>x7DrVremTHVy7Y)7+C{-SfFSxCgWVWsXFbXXw0(qjuhH;rYWT0 zW-;r`^>W+#B7GV3Md+J-5h^O&G}xwEFr}zeUwm+g^s(YLI~LE${ZGw8MTh>7d(MB( z`M&@B=lu7-c_(h3X;BoxBOsm?f6avGk&gT!jv(7v;l`eENRbDf48>IHDt5RpP1= z$I|m@1WVrE*_*jR+TTB|H-FeCkjsmg5l8F&UxRp$;SPEp_8HqHG3`s1>b`!9p{!-4 zGU?%zmA3}QbLk7iR%&?YLN=Y9NWC>~WhZ=sjI0CicTtk%o9x_|$tFA3f4<4i_=P6> zlH|AYasyKIfqhedO+Tmi>94-x9CQwY9j}1)aRux|6$*C(+RO_Gb$Z*tIdcM>PH^(A z;Jg&Xm_!AfcopovNpO$93+`hsk%FCE0Q-x4tyVMLuJ^$0zWy0?5<+FLKe>!%aJ#NU zVMPUd+>1v%!(PV4=C^yajvfI{V%~K677gVq(}~X-H4oQV0H<#Wlx0I%0q4pbD68Nk zixB!6l(=UU{!lgdBWx5#Rv?P2tY8jB(99jvMheWnjI4p%AAqSf(2Ol8?Y2OqY`Vv9AVk2_k5^Ab*0he85L!^3 z(Y0XPwANqI+S}fTLP<5F8*l%DWmy9I%3LD4VcN+>ZTd+nh`nltzBlbnxV332Wi#~M zi*EOan77yKsYRkC5*zd3Rr}~%3Y(d8Z-(X{=N(%t2SM|M~rJ` zXan=S`YpI^Q+0^FYhY<^ZGo~iu0__i&5IYqdwlk9M&ajsl>HI^>H zV{YB17ReNwQGJ_=`)V*L?(zn2y5nni82f50{@n#v|F1kMd=?GDz%L7lq4gOsQz zCwH`VRP4=PPoXv9y{^BZC-pb=RIsS$^W9XDgh!gl5uoq zY24<7{OR7)I2(`eP4-dhJT0w)?_Ulfy8Z4s5gwelG&V6J!kP3~#-HV~BAm5`)8s;J zbTFMui*U}$=g^NjIni7XPLB)?i15JZ$cS|b-N=MjBFfBIF6(Q){1^9TD%RhtCyeJg z<^LpUzf^l!X2Jbn*(urS{823Tnfpq8rd~0g`@xdqxJ}$QmKtJ-oAKOlmi0cNp9k~s z{1H@$i}8FOS@L;gTyy?UAeMZL=kE{8HuNsources.count; ++i) { + const iree_hal_executable_source_file_v0_t* source_file = + &library->sources.files[i]; + IREE_TRACE_PUBLISH_SOURCE_FILE(source_file->path, source_file->path_length, + source_file->content, + source_file->content_length); + } +} + iree_zone_id_t iree_hal_executable_library_call_zone_begin( iree_string_view_t executable_identifier, const iree_hal_executable_library_v0_t* library, iree_host_size_t ordinal) { @@ -113,17 +124,34 @@ iree_zone_id_t iree_hal_executable_library_call_zone_begin( const char* source_file = NULL; size_t source_file_length = 0; - uint32_t source_line; - if (library->exports.src_locs != NULL) { - // We have source location data, so use it. - source_file = library->exports.src_locs[ordinal].path; - source_file_length = library->exports.src_locs[ordinal].path_length; - source_line = library->exports.src_locs[ordinal].line; - } else { - // No source location data, so make do with what we have. - source_file = executable_identifier.data; - source_file_length = executable_identifier.size; - source_line = ordinal; + uint32_t source_line = 0; + if (library->exports.stage_locations != NULL) { + for (uint32_t i = 0; i < library->exports.stage_locations->count; ++i) { + // TODO(benvanik): a way to select what location is chosen. For now we + // just pick the first one. + // const char* name = library->exports.stage_locations->names[i]; + const iree_hal_executable_source_location_v0_t* location = + &library->exports.stage_locations->locations[i]; + source_file = location->path; + source_file_length = location->path_length; + source_line = location->line; + break; + } + } + if (source_file == NULL) { + if (library->exports.source_locations != NULL) { + // We have source location data, so use it. + const iree_hal_executable_source_location_v0_t* location = + &library->exports.source_locations[ordinal]; + source_file = location->path; + source_file_length = location->path_length; + source_line = location->line; + } else { + // No source location data, so make do with what we have. + source_file = executable_identifier.data; + source_file_length = executable_identifier.size; + source_line = ordinal; + } } IREE_TRACE_ZONE_BEGIN_EXTERNAL(z0, source_file, source_file_length, diff --git a/runtime/src/iree/hal/local/executable_library_util.h b/runtime/src/iree/hal/local/executable_library_util.h index 931ddf242b02..9145eac4be83 100644 --- a/runtime/src/iree/hal/local/executable_library_util.h +++ b/runtime/src/iree/hal/local/executable_library_util.h @@ -34,6 +34,11 @@ void iree_hal_executable_library_deinitialize_imports( iree_allocator_t host_allocator); #if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION + +// Publishes all source files in the library to the tracing provider, if any. +void iree_hal_executable_library_publish_source_files( + const iree_hal_executable_library_v0_t* library); + iree_zone_id_t iree_hal_executable_library_call_zone_begin( iree_string_view_t executable_identifier, const iree_hal_executable_library_v0_t* library, iree_host_size_t ordinal); @@ -41,11 +46,17 @@ iree_zone_id_t iree_hal_executable_library_call_zone_begin( zone_id, executable_identifier, library, ordinal) \ iree_zone_id_t zone_id = iree_hal_executable_library_call_zone_begin( \ executable_identifier, library, ordinal) + #else + +static inline void iree_hal_executable_library_publish_source_files( + const iree_hal_executable_library_v0_t* library) {} + #define IREE_HAL_EXECUTABLE_LIBRARY_CALL_TRACE_ZONE_BEGIN( \ zone_id, executable_identifier, library, ordinal) \ iree_zone_id_t zone_id = 0; \ (void)zone_id; + #endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION #endif // IREE_HAL_LOCAL_EXECUTABLE_LIBRARY_UTIL_H_ diff --git a/runtime/src/iree/hal/local/loaders/embedded_elf_loader.c b/runtime/src/iree/hal/local/loaders/embedded_elf_loader.c index e87b42842b3a..6e9efc184760 100644 --- a/runtime/src/iree/hal/local/loaders/embedded_elf_loader.c +++ b/runtime/src/iree/hal/local/loaders/embedded_elf_loader.c @@ -156,6 +156,11 @@ static iree_status_t iree_hal_elf_executable_create( executable->library.v0); } + // Publish the executable sources with the tracing infrastructure. + if (iree_status_is_ok(status)) { + iree_hal_executable_library_publish_source_files(executable->library.v0); + } + if (iree_status_is_ok(status)) { *out_executable = (iree_hal_executable_t*)executable; } else { diff --git a/runtime/src/iree/hal/local/loaders/static_library_loader.c b/runtime/src/iree/hal/local/loaders/static_library_loader.c index bfce0c3ecc52..5dc7b2949a94 100644 --- a/runtime/src/iree/hal/local/loaders/static_library_loader.c +++ b/runtime/src/iree/hal/local/loaders/static_library_loader.c @@ -102,6 +102,11 @@ static iree_status_t iree_hal_static_executable_create( executable->library.v0); } + // Publish the executable sources with the tracing infrastructure. + if (iree_status_is_ok(status)) { + iree_hal_executable_library_publish_source_files(executable->library.v0); + } + if (iree_status_is_ok(status)) { *out_executable = (iree_hal_executable_t*)executable; } else { diff --git a/runtime/src/iree/hal/local/loaders/system_library_loader.c b/runtime/src/iree/hal/local/loaders/system_library_loader.c index 5082093b129a..755cb025bb6f 100644 --- a/runtime/src/iree/hal/local/loaders/system_library_loader.c +++ b/runtime/src/iree/hal/local/loaders/system_library_loader.c @@ -277,6 +277,11 @@ static iree_status_t iree_hal_system_executable_create( executable->library.v0); } + // Publish the executable sources with the tracing infrastructure. + if (iree_status_is_ok(status)) { + iree_hal_executable_library_publish_source_files(executable->library.v0); + } + if (iree_status_is_ok(status)) { *out_executable = (iree_hal_executable_t*)executable; } else { diff --git a/runtime/src/iree/schemas/bytecode_module_def.fbs b/runtime/src/iree/schemas/bytecode_module_def.fbs index 3ff1d502e51a..e264ba7c27d8 100644 --- a/runtime/src/iree/schemas/bytecode_module_def.fbs +++ b/runtime/src/iree/schemas/bytecode_module_def.fbs @@ -176,7 +176,7 @@ table FusedLocDef { locations:[int32]; } -// mlir/IR/BuiltinLocationAttributes.td : FusedLoc +// mlir/IR/BuiltinLocationAttributes.td : NameLoc table NameLocDef { name:string; child_location:int32; diff --git a/runtime/src/iree/schemas/rocm_executable_def.fbs b/runtime/src/iree/schemas/rocm_executable_def.fbs index 4cdcc650ea62..6df6d022de33 100644 --- a/runtime/src/iree/schemas/rocm_executable_def.fbs +++ b/runtime/src/iree/schemas/rocm_executable_def.fbs @@ -24,6 +24,23 @@ table FileLineLocDef { line:int32; } +// Source location keyed by a string compilation stage name. +table StageLocationDef { + stage:string; + location:FileLineLocDef; +} + +// Table of stage locations sorted in ascending order by stage name. +table StageLocationsDef { + locations:[StageLocationDef]; +} + +// An embedded source file referenced by locations in the file. +table SourceFileDef { + path:string; + content:[uint8]; +} + table ExecutableDef { // A map of entry point ordinals to string names as used in the shader // library. @@ -44,6 +61,13 @@ table ExecutableDef { // This information is optional and may be used by debuggers and profilers to // associate executable entry points with the source that generated them. source_locations:[FileLineLocDef]; + + // Table of source locations per entry point keyed by a string compilation + // stage name. Sorted ascending by name. + stage_locations:[StageLocationsDef]; + + // Embedded source files sorted ascending by path. + source_files:[SourceFileDef]; } root_type ExecutableDef; diff --git a/runtime/src/iree/schemas/spirv_executable_def.fbs b/runtime/src/iree/schemas/spirv_executable_def.fbs index e6ead6b083af..4eaea8fc426b 100644 --- a/runtime/src/iree/schemas/spirv_executable_def.fbs +++ b/runtime/src/iree/schemas/spirv_executable_def.fbs @@ -21,6 +21,23 @@ table FileLineLocDef { line:int32; } +// Source location keyed by a string compilation stage name. +table StageLocationDef { + stage:string; + location:FileLineLocDef; +} + +// Table of stage locations sorted in ascending order by stage name. +table StageLocationsDef { + locations:[StageLocationDef]; +} + +// An embedded source file referenced by locations in the file. +table SourceFileDef { + path:string; + content:[uint8]; +} + // A SPIR-V shader module and runtime pipeline layout description. // This information is used to create the VkShaderModule, VkPipelineLayout, and // any required VkDescriptorSetLayouts. @@ -47,6 +64,13 @@ table ExecutableDef { // This information is optional and may be used by debuggers and profilers to // associate executable entry points with the source that generated them. source_locations:[FileLineLocDef]; + + // Table of source locations per entry point keyed by a string compilation + // stage name. Sorted ascending by name. + stage_locations:[StageLocationsDef]; + + // Embedded source files sorted ascending by path. + source_files:[SourceFileDef]; } root_type ExecutableDef;