diff --git a/.github/workflows/pkgci_regression_test.yml b/.github/workflows/pkgci_regression_test.yml index 446c7dc6caf8..402158ea8b81 100644 --- a/.github/workflows/pkgci_regression_test.yml +++ b/.github/workflows/pkgci_regression_test.yml @@ -222,7 +222,7 @@ jobs: --goldentime-rocm-vae-ms 337.0 \ --goldendispatch-rocm-unet 1551 \ --goldendispatch-rocm-clip 1225 \ - --goldendispatch-rocm-vae 247 \ + --goldendispatch-rocm-vae 248 \ --goldensize-rocm-unet-bytes 2280000 \ --goldensize-rocm-clip-bytes 860000 \ --goldensize-rocm-vae-bytes 840000 \ @@ -243,7 +243,7 @@ jobs: --goldentime-rocm-vae-ms 80.0 \ --goldendispatch-rocm-unet 1551 \ --goldendispatch-rocm-clip 1225 \ - --goldendispatch-rocm-vae 247 \ + --goldendispatch-rocm-vae 248 \ --goldensize-rocm-unet-bytes 2270000 \ --goldensize-rocm-clip-bytes 860000 \ --goldensize-rocm-vae-bytes 840000 \ diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUApplyTilingLevel.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUApplyTilingLevel.cpp index 9b016fb884bf..000fa8195552 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUApplyTilingLevel.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUApplyTilingLevel.cpp @@ -132,17 +132,22 @@ applyTileAndFuseToEachRoot(RewriterBase &rewriter, scf::SCFTileAndFuseOptions::ControlFnTy controlFn = [&](tensor::ExtractSliceOp candidateSliceOp, OpResult originalProducer, - bool isDestinationOperand) { - Operation *owner = originalProducer.getOwner(); - bool yieldProducerReplacement = yieldReplacementsFor.contains(owner); - bool shouldFuse = false; - if (auto tilingOwner = dyn_cast(owner)) { - shouldFuse = !payloadOps.contains(tilingOwner); - } - // Do not fuse destination operands. - shouldFuse &= !isDestinationOperand; - return std::make_tuple(shouldFuse, yieldProducerReplacement); - }; + bool isDestinationOperand) + -> std::optional { + Operation *owner = originalProducer.getOwner(); + bool yieldProducerReplacement = yieldReplacementsFor.contains(owner); + bool shouldFuse = false; + if (auto tilingOwner = dyn_cast(owner)) { + shouldFuse = !payloadOps.contains(tilingOwner); + } + // Do not fuse destination operands. + shouldFuse &= !isDestinationOperand; + if (shouldFuse) { + return scf::SCFTileAndFuseOptions::ControlFnResult{ + yieldProducerReplacement}; + } + return std::nullopt; + }; tileAndFuseOptions.setFusionControlFn(controlFn); FailureOr tiledResults = diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTile.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTile.cpp index b8019278836d..070b94ddeb15 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTile.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTile.cpp @@ -123,8 +123,12 @@ static LogicalResult tileAndDistributeToThreads(TilingInterface consumerOp, tileAndFuseOptions.setTilingOptions(tilingOptions); tileAndFuseOptions.setFusionControlFn( [](tensor::ExtractSliceOp sliceOp, OpResult origProducer, - bool isDestinationOperand) -> std::tuple { - return {!isa(origProducer.getOwner()), false}; + bool isDestinationOperand) + -> std::optional { + if (isa(origProducer.getOwner())) { + return std::nullopt; + } + return scf::SCFTileAndFuseOptions::ControlFnResult{false}; }); FailureOr tileAndFuseResult = scf::tileConsumerAndFuseProducersUsingSCF(rewriter, consumerOp, diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUVerifyDistribution.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUVerifyDistribution.cpp index fe4738034b3c..a04876f00057 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUVerifyDistribution.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUVerifyDistribution.cpp @@ -30,7 +30,10 @@ struct GPUVerifyDistributionPass final void runOnOperation() override { FunctionOpInterface funcOp = getOperation(); - WalkResult res = funcOp.walk([](Operation *op) { + auto privateAddressSpace = gpu::AddressSpaceAttr::get( + &getContext(), gpu::GPUDialect::getPrivateAddressSpace()); + + WalkResult res = funcOp.walk([&](Operation *op) { if (auto forallOp = dyn_cast(op)) { std::optional mapping = forallOp.getMapping(); if (!mapping || mapping.value().empty()) { @@ -48,12 +51,25 @@ struct GPUVerifyDistributionPass final return WalkResult::advance(); } if (auto memoryEffectOp = dyn_cast(op)) { - if (memoryEffectOp.hasEffect() && - !operationHasParentForallOfMappingType< + if (!operationHasParentForallOfMappingType< mlir::gpu::GPUThreadMappingAttr, IREE::GPU::LaneIdAttr>(op)) { - op->emitOpError("write affecting operations are restricted to lane " - "or thread distributed contexts."); - return WalkResult::interrupt(); + for (Value operand : memoryEffectOp->getOperands()) { + auto type = dyn_cast(operand.getType()); + if (!type || !memoryEffectOp.getEffectOnValue( + operand)) { + continue; + } + + // Writes to private memory are fine. + if (type.getMemorySpace() == privateAddressSpace) { + continue; + } + + op->emitOpError( + "write affecting operations on shared resources are restricted " + "to lane or thread distributed contexts."); + return WalkResult::interrupt(); + } } } return WalkResult::advance(); diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td index 09c413897159..209d3e223f9c 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td @@ -225,6 +225,9 @@ def GPUTileReductionPass : def GPUVerifyDistributionPass : InterfacePass<"iree-codegen-gpu-verify-distribution", "mlir::FunctionOpInterface"> { let summary = "Pass to verify writes before resolving distributed contexts."; + let dependentDialects = [ + "::mlir::gpu::GPUDialect", + ]; } def GPUVectorAllocPass : diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_verify_distribution.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_verify_distribution.mlir index cf65a02d05c0..c7f45698fb95 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_verify_distribution.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_verify_distribution.mlir @@ -10,17 +10,6 @@ func.func @unmapped_forall(%out : memref<32xi32>) { // ----- -func.func @write_in_warp_forall(%out : memref<32xi32>) { - %c0 = arith.constant 0 : i32 - scf.forall (%arg0) in (32) { - // expected-error@+1 {{write affecting operations are restricted to lane or thread distributed contexts}} - memref.store %c0, %out[%arg0] : memref<32xi32> - } {mapping = [#gpu.warp]} - return -} - -// ----- - func.func @lane_forall_no_warp_parent(%out : memref<32xi32>) { // expected-error@+1 {{lane distributed scf.forall must have a parent subgroup distributed loop}} scf.forall (%arg0) in (32) { diff --git a/compiler/src/iree/compiler/Codegen/Common/VectorLayoutAnalysis.cpp b/compiler/src/iree/compiler/Codegen/Common/VectorLayoutAnalysis.cpp index 916524e93c8f..51f5fa01586a 100644 --- a/compiler/src/iree/compiler/Codegen/Common/VectorLayoutAnalysis.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/VectorLayoutAnalysis.cpp @@ -34,9 +34,9 @@ class DistributionLayout : public AnalysisState { explicit DistributionLayout(Value val) : AnalysisState(val) {} TypedValue getValue() const { - ProgramPoint point = getPoint(); - assert(isa(point) && "expected program point to be a value"); - Value val = cast(point); + auto anchor = getAnchor(); + assert(isa(anchor) && "expected anchor to be a value"); + Value val = cast(anchor); assert(isa(val.getType()) && "expected value to be of vector type"); return cast>(val); @@ -303,7 +303,7 @@ void DistributionLayout::print(raw_ostream &os) const { void DistributionLayout::onUpdate(DataFlowSolver *solver) const { AnalysisState::onUpdate(solver); - Value value = point.get(); + Value value = anchor.get(); if (propagation) { // Make propagation run again on all users of this value. diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/TilingInterfaceImpl.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/TilingInterfaceImpl.cpp index 82d975824409..b81723c5507a 100644 --- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/TilingInterfaceImpl.cpp +++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/TilingInterfaceImpl.cpp @@ -77,9 +77,11 @@ static void populateSliceIndices(OpBuilder &b, Location loc, Value src, } } -static Value extractSlice(OpBuilder &b, Location loc, Value src, - ArrayRef offsets, - ArrayRef sizes, AffineMap indexingMap) { +static tensor::ExtractSliceOp extractSlice(OpBuilder &b, Location loc, + Value src, + ArrayRef offsets, + ArrayRef sizes, + AffineMap indexingMap) { assert(offsets.size() == indexingMap.getNumDims() && offsets.size() == sizes.size() && "Invalid tile"); @@ -113,12 +115,40 @@ MultiMmaOp::getTiledImplementation(OpBuilder &builder, Location loc = getLoc(); SmallVector tiledOperands; - tiledOperands.emplace_back( - extractSlice(builder, loc, getLhs(), offsets, sizes, indexingMaps[0])); - tiledOperands.emplace_back( - extractSlice(builder, loc, getRhs(), offsets, sizes, indexingMaps[1])); - tiledOperands.emplace_back( - extractSlice(builder, loc, getAcc(), offsets, sizes, indexingMaps[2])); + SmallVector slices; + + // LHS + { + Operation *lhsSlice = + extractSlice(builder, loc, getLhs(), offsets, sizes, indexingMaps[0]); + if (!lhsSlice) { + return emitOpError("failed to get lhs slice"); + } + tiledOperands.emplace_back(lhsSlice->getResult(0)); + slices.push_back(lhsSlice); + } + + // RHS + { + Operation *rhsSlice = + extractSlice(builder, loc, getRhs(), offsets, sizes, indexingMaps[1]); + if (!rhsSlice) { + return emitOpError("failed to get rhs slice"); + } + tiledOperands.emplace_back(rhsSlice->getResult(0)); + slices.push_back(rhsSlice); + } + + // Acc + { + Operation *accSlice = + extractSlice(builder, loc, getAcc(), offsets, sizes, indexingMaps[2]); + if (!accSlice) { + return emitOpError("failed to get accumulator slice"); + } + tiledOperands.emplace_back(accSlice->getResult(0)); + slices.push_back(accSlice); + } SmallVector resultTypes; resultTypes.push_back(tiledOperands.back().getType()); @@ -126,8 +156,8 @@ MultiMmaOp::getTiledImplementation(OpBuilder &builder, Operation *tiledMmaOp = mlir::clone(builder, getOperation(), resultTypes, tiledOperands); - return TilingResult{{tiledMmaOp}, - SmallVector(tiledMmaOp->getResults())}; + return TilingResult{ + {tiledMmaOp}, SmallVector(tiledMmaOp->getResults()), slices}; } LogicalResult MultiMmaOp::getResultTilePosition( diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileRootAndFuseProducerConsumer.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileRootAndFuseProducerConsumer.cpp index 78b3b6ff457a..d7122263423e 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileRootAndFuseProducerConsumer.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileRootAndFuseProducerConsumer.cpp @@ -96,15 +96,19 @@ tileRootAndFuseProducers(IRRewriter &rewriter, TilingInterface rootOp, scf::SCFTileAndFuseOptions::ControlFnTy controlFn = [&](tensor::ExtractSliceOp candidateSliceOp, OpResult originalProducer, - bool isDestinationOperand) { - Operation *owner = originalProducer.getOwner(); - bool yieldProducerReplacement = yieldReplacementsFor.contains(owner); - // Do not fuse destination operands if onlyFuseProducerInputOperands is - // true. - bool shouldFuse = - !(onlyFuseProducerInputOperands && isDestinationOperand); - return std::make_tuple(shouldFuse, yieldProducerReplacement); - }; + bool isDestinationOperand) + -> std::optional { + Operation *owner = originalProducer.getOwner(); + bool yieldProducerReplacement = yieldReplacementsFor.contains(owner); + // Do not fuse destination operands if onlyFuseProducerInputOperands is + // true. + bool shouldFuse = !(onlyFuseProducerInputOperands && isDestinationOperand); + if (shouldFuse) { + return scf::SCFTileAndFuseOptions::ControlFnResult{ + yieldProducerReplacement}; + } + return std::nullopt; + }; tileAndFuseOptions.setFusionControlFn(controlFn); FailureOr tiledResults = diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_tests.mlir index 899c422c1c84..157c7a117c63 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_tests.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_tests.mlir @@ -1,5 +1,5 @@ // RUN: iree-opt --pass-pipeline="builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))" --split-input-file %s | FileCheck %s - +// XFAIL: * #pipeline_layout = #hal.pipeline.layout, #hal.pipeline.binding diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir index c0c84f375841..ab1d8967dbcf 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir @@ -525,9 +525,10 @@ hal.executable public @main { // the producer's (convolution's) distributed scf.forall loop. // CHECK-LABEL: func @conv_nchw_fused // CHECK: %[[ALLOCA:.+]] = memref.alloca() : memref<1x1x1x1xf32, #gpu.address_space> +// CHECK: %[[ALLOCA2:.+]] = memref.alloca() : memref<1x1x1x1xf32, #gpu.address_space> // CHECK: scf.for %{{.*}} = %c0 to %c64 step %c1 // CHECK: linalg.conv_2d_nchw_fchw -// CHECK-SAME: outs(%[[ALLOCA]] : memref<1x1x1x1xf32, #gpu.address_space>) +// CHECK-SAME: outs(%[[ALLOCA2]] : memref<1x1x1x1xf32, #gpu.address_space>) // CHECK: arith.addf // CHECK: arith.cmpf // CHECK: arith.select diff --git a/compiler/src/iree/compiler/Dialect/LinalgExt/Transforms/TilingInterfaceImpl.cpp b/compiler/src/iree/compiler/Dialect/LinalgExt/Transforms/TilingInterfaceImpl.cpp index c0d86dc4114d..81d2e32d460d 100644 --- a/compiler/src/iree/compiler/Dialect/LinalgExt/Transforms/TilingInterfaceImpl.cpp +++ b/compiler/src/iree/compiler/Dialect/LinalgExt/Transforms/TilingInterfaceImpl.cpp @@ -94,13 +94,18 @@ ScatterOp::getTiledImplementation(OpBuilder &builder, Location loc = getLoc(); auto zeroAttr = builder.getI64IntegerAttr(0); auto oneAttr = builder.getI64IntegerAttr(1); + SmallVector slices; // Slice of the updates. auto updateRank = getUpdateType().getRank(); SmallVector updateStrides(updateRank, oneAttr); - Value tiledUpdate = + Operation *updateSlice = getSlice(builder, loc, getUpdates(), offsets, sizes, updateStrides); - assert(tiledUpdate && "failed to get slice of update"); + if (!updateSlice) { + return emitOpError("failed to get updates slice"); + } + Value tiledUpdate = updateSlice->getResult(0); + slices.push_back(updateSlice); // Slice of indices. auto indicesRank = getIndicesType().getRank(); @@ -112,9 +117,13 @@ ScatterOp::getTiledImplementation(OpBuilder &builder, indicesSizes[dim] = getDim(builder, loc, getIndices(), dim); } SmallVector indicesStrides(indicesRank, oneAttr); - Value tiledIndices = getSlice(builder, loc, getIndices(), indicesOffsets, - indicesSizes, indicesStrides); - assert(tiledIndices && "failed to get slice of indices"); + Operation *indicesSlice = getSlice(builder, loc, getIndices(), indicesOffsets, + indicesSizes, indicesStrides); + if (!indicesSlice) { + return emitOpError("failed to get indices slices"); + } + Value tiledIndices = indicesSlice->getResult(0); + slices.push_back(indicesSlice); // Slice of the original. SmallVector originalOffsets, originalSizes; @@ -124,9 +133,14 @@ ScatterOp::getTiledImplementation(OpBuilder &builder, } auto originalRank = getOriginalType().getRank(); SmallVector originalStrides(originalRank, oneAttr); - Value tiledOriginal = getSlice(builder, loc, getOriginal(), originalOffsets, - originalSizes, originalStrides); - assert(tiledOriginal && "failed to get slice of original tensor"); + Operation *originalSlice = + getSlice(builder, loc, getOriginal(), originalOffsets, originalSizes, + originalStrides); + if (!originalSlice) { + return emitOpError("failed to get original tensor slice"); + } + Value tiledOriginal = originalSlice->getResult(0); + slices.push_back(originalSlice); SmallVector resultTypes; if (getNumResults()) { @@ -136,7 +150,8 @@ ScatterOp::getTiledImplementation(OpBuilder &builder, mlir::clone(builder, getOperation(), resultTypes, ValueRange{tiledUpdate, tiledIndices, tiledOriginal}); return TilingResult{{tiledScatterOp}, - SmallVector(tiledScatterOp->getResults())}; + SmallVector(tiledScatterOp->getResults()), + slices}; } LogicalResult ScatterOp::getResultTilePosition( @@ -249,11 +264,16 @@ SortOp::getTiledImplementation(OpBuilder &builder, sizes.size() == static_cast(rank)); auto oneAttr = builder.getI64IntegerAttr(1); SmallVector strides(rank, oneAttr); + SmallVector slices; SmallVector tiledOperands(getOutputs().size()); for (auto [idx, output] : llvm::enumerate(getOutputs())) { - tiledOperands[idx] = + Operation *slice = getSlice(builder, getLoc(), output, offsets, sizes, strides); - assert(tiledOperands[idx] && "failed to get slice of operand"); + if (!slice) { + return emitOpError("failed to get slice of operand ") << idx; + } + tiledOperands[idx] = slice->getResult(0); + slices.push_back(slice); } SmallVector resultTypes; if (getNumResults()) { @@ -262,8 +282,8 @@ SortOp::getTiledImplementation(OpBuilder &builder, } Operation *tiledSortOp = mlir::clone(builder, getOperation(), resultTypes, tiledOperands); - return TilingResult{{tiledSortOp}, - SmallVector{tiledSortOp->getResults()}}; + return TilingResult{ + {tiledSortOp}, SmallVector{tiledSortOp->getResults()}, slices}; } LogicalResult SortOp::getResultTilePosition( @@ -532,23 +552,29 @@ FftOp::getTiledImplementation(OpBuilder &builder, ArrayRef sizes) { int64_t rank = getOperandRank(); SmallVector strides(rank, builder.getI64IntegerAttr(1)); + SmallVector slices; SmallVector tiledOperands(3); tiledOperands[0] = getStage(); tiledOperands[1] = getRealCoeff(); tiledOperands[2] = getImagCoeff(); SmallVector resultTypes; - for (auto out : getOutputs()) { - tiledOperands.push_back( - getSlice(builder, getLoc(), out, offsets, sizes, strides)); + for (auto [index, out] : llvm::enumerate(getOutputs())) { + Operation *slice = + getSlice(builder, getLoc(), out, offsets, sizes, strides); + if (!slice) { + return emitOpError("failed to get slice of output ") << index; + } + tiledOperands.push_back(slice->getResult(0)); + slices.push_back(slice); if (hasPureTensorSemantics()) { resultTypes.push_back(tiledOperands.back().getType()); } } Operation *tiledFftOp = mlir::clone(builder, getOperation(), resultTypes, tiledOperands); - return TilingResult{{tiledFftOp}, - SmallVector(tiledFftOp->getResults())}; + return TilingResult{ + {tiledFftOp}, SmallVector(tiledFftOp->getResults()), slices}; } LogicalResult FftOp::getResultTilePosition( @@ -675,10 +701,30 @@ ScanOp::getTiledImplementation(OpBuilder &builder, auto oneAttr = builder.getI64IntegerAttr(1); SmallVector strides(rank, oneAttr); SmallVector tiledOperands; - tiledOperands.emplace_back( - getSlice(builder, getLoc(), getInput(), offsets, sizes, strides)); - tiledOperands.emplace_back( - getSlice(builder, getLoc(), getOutputs()[0], offsets, sizes, strides)); + SmallVector slices; + + // Input + { + Operation *inputSlice = + getSlice(builder, getLoc(), getInput(), offsets, sizes, strides); + if (!inputSlice) { + return emitOpError("failed to get input slice"); + } + tiledOperands.emplace_back(inputSlice->getResult(0)); + slices.push_back(inputSlice); + } + + // Output 0 + { + Operation *output0Slice = + getSlice(builder, getLoc(), getOutputs()[0], offsets, sizes, strides); + if (!output0Slice) { + return emitOpError("failed to get slice of output 0"); + } + tiledOperands.emplace_back(output0Slice->getResult(0)); + slices.push_back(output0Slice); + } + if (rank > 1) { SmallVector accumOffsets, accumSizes; if (failed(getResultTilePosition(builder, 1, offsets, sizes, accumOffsets, @@ -686,9 +732,13 @@ ScanOp::getTiledImplementation(OpBuilder &builder, return {}; } SmallVector accumStrides(rank - 1, oneAttr); - tiledOperands.emplace_back(getSlice(builder, getLoc(), getOutputs()[1], - accumOffsets, accumSizes, - accumStrides)); + Operation *output1Slice = getSlice(builder, getLoc(), getOutputs()[1], + accumOffsets, accumSizes, accumStrides); + if (!output1Slice) { + return emitOpError("failed to get output1 slice"); + } + tiledOperands.emplace_back(output1Slice->getResult(0)); + slices.push_back(output1Slice); } else { tiledOperands.emplace_back(getOutputs()[1]); } @@ -701,8 +751,8 @@ ScanOp::getTiledImplementation(OpBuilder &builder, Operation *tiledScanOp = mlir::clone(builder, getOperation(), resultTypes, tiledOperands); - return TilingResult{{tiledScanOp}, - SmallVector(tiledScanOp->getResults())}; + return TilingResult{ + {tiledScanOp}, SmallVector(tiledScanOp->getResults()), slices}; } LogicalResult ScanOp::getResultTilePosition( @@ -868,11 +918,27 @@ TopkOp::getTiledImplementation(OpBuilder &builder, } SmallVector tiledOperands; - tiledOperands.emplace_back( - getSlice(builder, loc, getValues(), offsets, sizes, strides)); + SmallVector slices; + + // Values + { + Operation *valuesSlice = + getSlice(builder, loc, getValues(), offsets, sizes, strides); + if (!valuesSlice) { + return emitOpError("failed to get values slice"); + } + tiledOperands.emplace_back(valuesSlice->getResult(0)); + slices.push_back(valuesSlice); + } + if (getIndices()) { - tiledOperands.emplace_back( - getSlice(builder, loc, *getIndices(), offsets, sizes, strides)); + Operation *indicesSlice = + getSlice(builder, loc, *getIndices(), offsets, sizes, strides); + if (!indicesSlice) { + return emitOpError("failed to get slices of indices"); + } + tiledOperands.emplace_back(indicesSlice->getResult(0)); + slices.push_back(indicesSlice); } // Replace the tile size for the K dimension to use the output size instead of @@ -880,10 +946,28 @@ TopkOp::getTiledImplementation(OpBuilder &builder, Value kSize = getDimValue(builder, getLoc(), outputValues(), getDimension()); outputSizes[getDimension()] = getAsOpFoldResult(kSize); - tiledOperands.emplace_back( - getSlice(builder, loc, getOutputs()[0], offsets, outputSizes, strides)); - tiledOperands.emplace_back( - getSlice(builder, loc, getOutputs()[1], offsets, outputSizes, strides)); + // Output 0 + { + Operation *output0Slice = + getSlice(builder, loc, getOutputs()[0], offsets, outputSizes, strides); + if (!output0Slice) { + return emitOpError("failed to get output 0 slice"); + } + tiledOperands.emplace_back(output0Slice->getResult(0)); + slices.push_back(output0Slice); + } + + // Output 1 + { + Operation *output1Slice = + getSlice(builder, loc, getOutputs()[1], offsets, outputSizes, strides); + if (!output1Slice) { + return emitOpError("failed to get output1 slice"); + } + tiledOperands.emplace_back(output1Slice->getResult(0)); + slices.push_back(output1Slice); + } + SmallVector resultTypes; if (hasPureTensorSemantics()) { resultTypes.push_back(tiledOperands[tiledOperands.size() - 2].getType()); @@ -892,8 +976,8 @@ TopkOp::getTiledImplementation(OpBuilder &builder, Operation *tiledTopkOp = mlir::clone(builder, getOperation(), resultTypes, tiledOperands); - return TilingResult{{tiledTopkOp}, - SmallVector(tiledTopkOp->getResults())}; + return TilingResult{ + {tiledTopkOp}, SmallVector(tiledTopkOp->getResults()), slices}; } LogicalResult TopkOp::getResultTilePosition( @@ -1183,15 +1267,25 @@ Im2colOp::getTiledImplementation(OpBuilder &builder, } SmallVector inputStrides(getInputRank(), one); - Value inputSlice = getSlice(builder, loc, getInput(), inputOffsets, - inputSizes, inputStrides); + + // Input + Operation *inputSlice = getSlice(builder, loc, getInput(), inputOffsets, + inputSizes, inputStrides); + if (!inputSlice) { + return emitOpError("failed to get slice of input"); + } + SmallVector outputStrides(getOutputRank(), one); - Value outputSlice = + Operation *outputSlice = getSlice(builder, loc, getOutput(), offsets, sizes, outputStrides); + if (!outputSlice) { + return emitOpError("failed to get outputSlice"); + } SmallVector resultTypes; if (hasPureTensorSemantics()) { - resultTypes.push_back(outputSlice.getType()); + resultTypes.append(outputSlice->result_type_begin(), + outputSlice->result_type_end()); } AffineExpr d0, d1; @@ -1206,15 +1300,18 @@ Im2colOp::getTiledImplementation(OpBuilder &builder, OpFoldResult mOffset = affine::makeComposedFoldedAffineApply( builder, loc, map, {mTileOffset, mOpOffset}); - SmallVector operands = {inputSlice, outputSlice}; + SmallVector operands = {inputSlice->getResult(0), + outputSlice->getResult(0)}; operands.append(getOperation()->getOperands().begin() + 2, getOperation()->getOperands().end()); Im2colOp tiledOp = - mlir::clone(builder, *this, TypeRange{outputSlice.getType()}, operands); + mlir::clone(builder, *this, outputSlice->getResultTypes(), operands); tiledOp.setMixedKOffset({kOffset}); tiledOp.setMixedMOffset({mOffset}); - return TilingResult{{tiledOp}, SmallVector(tiledOp->getResults())}; + return TilingResult{{tiledOp}, + SmallVector(tiledOp->getResults()), + {inputSlice, outputSlice}}; } FailureOr @@ -1313,10 +1410,28 @@ WinogradInputTransformOp::getTiledImplementation(OpBuilder &builder, inputOffsets[wDim] = wSizeAndOffset.second; SmallVector tiledOperands; - tiledOperands.emplace_back(getSlice(builder, loc, getInput(), inputOffsets, - inputSizes, inputStrides)); - tiledOperands.emplace_back(getSlice(builder, loc, getOutput(), outputOffsets, - outputSizes, outputStrides)); + SmallVector slices; + // Input + { + Operation *inputSlice = getSlice(builder, loc, getInput(), inputOffsets, + inputSizes, inputStrides); + if (!inputSlice) { + return emitOpError("failed to get input slice"); + } + tiledOperands.emplace_back(inputSlice->getResult(0)); + slices.push_back(inputSlice); + } + + // Output + { + Operation *outputSlice = getSlice(builder, loc, getOutput(), outputOffsets, + outputSizes, outputStrides); + if (!outputSlice) { + return emitOpError("failed to get output slice"); + } + tiledOperands.emplace_back(outputSlice->getResult(0)); + slices.push_back(outputSlice); + } SmallVector resultTypes; if (hasPureTensorSemantics()) { @@ -1326,7 +1441,8 @@ WinogradInputTransformOp::getTiledImplementation(OpBuilder &builder, Operation *tiledOp = mlir::clone(builder, getOperation(), resultTypes, tiledOperands); - return TilingResult{{tiledOp}, SmallVector(tiledOp->getResults())}; + return TilingResult{ + {tiledOp}, SmallVector(tiledOp->getResults()), slices}; } LogicalResult WinogradInputTransformOp::getResultTilePosition( @@ -1409,10 +1525,29 @@ FailureOr WinogradFilterTransformOp::getTiledImplementation( outputSizes[3] = inputSizes[fDim] = sizes[1]; SmallVector tiledOperands; - tiledOperands.emplace_back(getSlice(builder, loc, getInput(), inputOffsets, - inputSizes, inputStrides)); - tiledOperands.emplace_back(getSlice(builder, loc, getOutput(), outputOffsets, - outputSizes, outputStrides)); + SmallVector slices; + + // Input + { + Operation *inputSlice = getSlice(builder, loc, getInput(), inputOffsets, + inputSizes, inputStrides); + if (!inputSlice) { + return emitOpError("failed to get input slice"); + } + tiledOperands.emplace_back(inputSlice->getResult(0)); + slices.push_back(inputSlice); + } + + // Output + { + Operation *outputSlice = getSlice(builder, loc, getOutput(), outputOffsets, + outputSizes, outputStrides); + if (!outputSlice) { + return emitOpError("failed to get output slice"); + } + tiledOperands.emplace_back(outputSlice->getResult(0)); + slices.push_back(outputSlice); + } SmallVector resultTypes; if (hasPureTensorSemantics()) { @@ -1422,7 +1557,8 @@ FailureOr WinogradFilterTransformOp::getTiledImplementation( Operation *tiledOp = mlir::clone(builder, getOperation(), resultTypes, tiledOperands); - return TilingResult{{tiledOp}, SmallVector(tiledOp->getResults())}; + return TilingResult{ + {tiledOp}, SmallVector(tiledOp->getResults()), slices}; } LogicalResult WinogradFilterTransformOp::getResultTilePosition( @@ -1526,12 +1662,12 @@ FailureOr WinogradOutputTransformOp::getTiledImplementation( outputOffsets[hDim] = hSizeAndOffset.second; outputOffsets[wDim] = wSizeAndOffset.second; - Value outputSlice = getSlice(builder, loc, getOutput(), outputOffsets, - outputSizes, outputStrides); + Operation *outputSlice = getSlice(builder, loc, getOutput(), outputOffsets, + outputSizes, outputStrides); // The image dims of the winograd.output_transform result will always be a // multiple of the static output_tile_size, so insert a tensor.cast op to // maintain more static information in the IR. - auto outSliceType = cast(outputSlice.getType()); + auto outSliceType = cast(outputSlice->getResultTypes().front()); SmallVector staticOutShape(outSliceType.getShape()); auto constSizeH = getConstantIntValue(sizes[1]); if (constSizeH.has_value()) { @@ -1541,12 +1677,13 @@ FailureOr WinogradOutputTransformOp::getTiledImplementation( if (constSizeW.has_value()) { staticOutShape[wDim] = constSizeW.value() * getOutputTileSize(); } - Value staticOutputSlice = - castValue(builder, loc, outputSlice, outSliceType.clone(staticOutShape)); + Value staticOutputSlice = castValue(builder, loc, outputSlice->getResult(0), + outSliceType.clone(staticOutShape)); SmallVector tiledOperands; - tiledOperands.emplace_back(getSlice(builder, loc, getInput(), inputOffsets, - inputSizes, inputStrides)); + auto inputSlice = getSlice(builder, loc, getInput(), inputOffsets, inputSizes, + inputStrides); + tiledOperands.emplace_back(inputSlice->getResult(0)); tiledOperands.emplace_back(staticOutputSlice); SmallVector resultTypes; @@ -1561,7 +1698,7 @@ FailureOr WinogradOutputTransformOp::getTiledImplementation( if (!results.empty()) { results.front() = castValue(builder, loc, results.front(), outSliceType); } - return TilingResult{{tiledOp}, results}; + return TilingResult{{tiledOp}, results, {inputSlice, outputSlice}}; } LogicalResult WinogradOutputTransformOp::getResultTilePosition( @@ -1715,24 +1852,73 @@ AttentionOp::getTiledImplementation(OpBuilder &builder, Value scale = getScale(); SmallVector tiledOperands; - tiledOperands.emplace_back(getSlice(builder, loc, getQuery(), querySlice)); - tiledOperands.emplace_back(getSlice(builder, loc, getKey(), keySlice)); - tiledOperands.emplace_back(getSlice(builder, loc, getValue(), valueSlice)); + SmallVector slices; + + // Query + { + Operation *querySliceOp = getSlice(builder, loc, getQuery(), querySlice); + if (!querySliceOp) { + return emitOpError("failed to get query slice"); + } + tiledOperands.emplace_back(querySliceOp->getResult(0)); + slices.push_back(querySliceOp); + } + + // Key + { + Operation *keySliceOp = getSlice(builder, loc, getKey(), keySlice); + if (!keySliceOp) { + return emitOpError("failed to get key slice"); + } + tiledOperands.emplace_back(keySliceOp->getResult(0)); + slices.push_back(keySliceOp); + } + + // Value + { + Operation *valueSliceOp = getSlice(builder, loc, getValue(), valueSlice); + if (!valueSliceOp) { + return emitOpError("failed to get value slice"); + } + tiledOperands.emplace_back(valueSliceOp->getResult(0)); + slices.push_back(valueSliceOp); + } + + // Scale tiledOperands.emplace_back(scale); - tiledOperands.emplace_back(getSlice(builder, loc, getOutput(), outputSlice)); + + // Output + { + Operation *outputSliceOp = getSlice(builder, loc, getOutput(), outputSlice); + if (!outputSliceOp) { + return emitOpError("failed to get output slice"); + } + tiledOperands.emplace_back(outputSliceOp->getResult(0)); + slices.push_back(outputSliceOp); + } std::optional max = getMax(); if (max) { SmallVector maxSlice = getPermutedSlice(*getMaxMap(), offsets, sizes); - tiledOperands.emplace_back(getSlice(builder, loc, max.value(), maxSlice)); + Operation *maxSliceOp = getSlice(builder, loc, max.value(), maxSlice); + if (!maxSliceOp) { + return emitOpError("failed to get max slice"); + } + tiledOperands.emplace_back(maxSliceOp->getResult(0)); + slices.push_back(maxSliceOp); } std::optional sum = getMax(); if (sum) { SmallVector sumSlice = getPermutedSlice(*getSumMap(), offsets, sizes); - tiledOperands.emplace_back(getSlice(builder, loc, sum.value(), sumSlice)); + Operation *sumSliceOp = getSlice(builder, loc, sum.value(), sumSlice); + if (!sumSliceOp) { + return emitOpError("failed to get sum slice"); + } + tiledOperands.emplace_back(sumSliceOp->getResult(0)); + slices.push_back(sumSliceOp); } SmallVector resultTypes; @@ -1749,7 +1935,8 @@ AttentionOp::getTiledImplementation(OpBuilder &builder, Operation *tiledOp = mlir::clone(builder, getOperation(), resultTypes, tiledOperands); - return TilingResult{{tiledOp}, SmallVector(tiledOp->getResults())}; + return TilingResult{ + {tiledOp}, SmallVector(tiledOp->getResults()), slices}; } LogicalResult AttentionOp::getResultTilePosition( @@ -1845,13 +2032,68 @@ OnlineAttentionOp::getTiledImplementation(OpBuilder &builder, Value scale = getScale(); SmallVector tiledOperands; - tiledOperands.emplace_back(getSlice(builder, loc, getQuery(), querySlice)); - tiledOperands.emplace_back(getSlice(builder, loc, getKey(), keySlice)); - tiledOperands.emplace_back(getSlice(builder, loc, getValue(), valueSlice)); + SmallVector slices; + /// Query + { + Operation *querySliceOp = getSlice(builder, loc, getQuery(), querySlice); + if (!querySliceOp) { + return emitOpError("failed to get query slice"); + } + tiledOperands.emplace_back(querySliceOp->getResult(0)); + slices.push_back(querySliceOp); + } + + /// Key + { + Operation *keySliceOp = getSlice(builder, loc, getKey(), keySlice); + if (!keySliceOp) { + return emitOpError("failed to get key slice"); + } + tiledOperands.emplace_back(keySliceOp->getResult(0)); + slices.push_back(keySliceOp); + } + + /// Value + { + Operation *valueSliceOp = getSlice(builder, loc, getValue(), valueSlice); + if (!valueSliceOp) { + return emitOpError("failed to get value slice"); + } + tiledOperands.emplace_back(valueSliceOp->getResult(0)); + slices.push_back(valueSliceOp); + } + tiledOperands.emplace_back(scale); - tiledOperands.emplace_back(getSlice(builder, loc, getOutput(), outputSlice)); - tiledOperands.emplace_back(getSlice(builder, loc, getMax(), maxSlice)); - tiledOperands.emplace_back(getSlice(builder, loc, getSum(), sumSlice)); + + /// Output + { + Operation *outputSliceOp = getSlice(builder, loc, getOutput(), outputSlice); + if (!outputSliceOp) { + return emitOpError("failed to get output slice"); + } + tiledOperands.emplace_back(outputSliceOp->getResult(0)); + slices.push_back(outputSliceOp); + } + + /// Max + { + Operation *maxSliceOp = getSlice(builder, loc, getMax(), maxSlice); + if (!maxSliceOp) { + return emitOpError("failed to get max slice"); + } + tiledOperands.emplace_back(maxSliceOp->getResult(0)); + slices.push_back(maxSliceOp); + } + + /// Sum + { + Operation *sumSliceOp = getSlice(builder, loc, getSum(), sumSlice); + if (!sumSliceOp) { + return emitOpError("failed to get sum slice"); + } + tiledOperands.emplace_back(sumSliceOp->getResult(0)); + slices.push_back(sumSliceOp); + } SmallVector resultTypes; resultTypes.push_back(tiledOperands[4].getType()); @@ -1861,7 +2103,8 @@ OnlineAttentionOp::getTiledImplementation(OpBuilder &builder, Operation *tiledOp = mlir::clone(builder, getOperation(), resultTypes, tiledOperands); - return TilingResult{{tiledOp}, SmallVector(tiledOp->getResults())}; + return TilingResult{ + {tiledOp}, SmallVector(tiledOp->getResults()), slices}; } LogicalResult OnlineAttentionOp::getResultTilePosition( diff --git a/compiler/src/iree/compiler/Dialect/LinalgExt/Utils/Utils.cpp b/compiler/src/iree/compiler/Dialect/LinalgExt/Utils/Utils.cpp index aefcdeec4e07..0b67d4abff7f 100644 --- a/compiler/src/iree/compiler/Dialect/LinalgExt/Utils/Utils.cpp +++ b/compiler/src/iree/compiler/Dialect/LinalgExt/Utils/Utils.cpp @@ -46,25 +46,27 @@ SmallVector getDims(OpBuilder &builder, Location loc, [&](int64_t dim) { return getDim(builder, loc, shapedTypeValue, dim); }); } -Value getSlice(OpBuilder &b, Location loc, Value src, ArrayRef slice) { +Operation *getSlice(OpBuilder &b, Location loc, Value src, + ArrayRef slice) { return getSlice(b, loc, src, llvm::map_to_vector(slice, [](Range x) { return x.offset; }), llvm::map_to_vector(slice, [](Range x) { return x.size; }), llvm::map_to_vector(slice, [](Range x) { return x.stride; })); } -Value getSlice(OpBuilder &b, Location loc, Value src, - ArrayRef offsets, ArrayRef sizes, - ArrayRef strides) { - return TypeSwitch(src.getType()) - .Case([&](RankedTensorType t) -> Value { +Operation *getSlice(OpBuilder &b, Location loc, Value src, + ArrayRef offsets, + ArrayRef sizes, + ArrayRef strides) { + return TypeSwitch(src.getType()) + .Case([&](RankedTensorType t) -> Operation * { return b.create(loc, src, offsets, sizes, strides); }) - .Case([&](MemRefType type) -> Value { + .Case([&](MemRefType type) -> Operation * { return b.create(loc, src, offsets, sizes, strides); }) - .Default([&](Type t) { + .Default([&](Type t) -> Operation * { assert(false && "invalid type"); return nullptr; }); diff --git a/compiler/src/iree/compiler/Dialect/LinalgExt/Utils/Utils.h b/compiler/src/iree/compiler/Dialect/LinalgExt/Utils/Utils.h index e8f92fe963fd..4c1ce609fa10 100644 --- a/compiler/src/iree/compiler/Dialect/LinalgExt/Utils/Utils.h +++ b/compiler/src/iree/compiler/Dialect/LinalgExt/Utils/Utils.h @@ -31,10 +31,12 @@ SmallVector getDims(OpBuilder &builder, Location loc, Value v); /// Returns a `memref.subview` or a `tensor.extract_slice` based on the type of /// `src`. -Value getSlice(OpBuilder &b, Location loc, Value src, ArrayRef slice); -Value getSlice(OpBuilder &b, Location loc, Value src, - ArrayRef offsets, ArrayRef sizes, - ArrayRef strides); +Operation *getSlice(OpBuilder &b, Location loc, Value src, + ArrayRef slice); +Operation *getSlice(OpBuilder &b, Location loc, Value src, + ArrayRef offsets, + ArrayRef sizes, + ArrayRef strides); /// Returns a `memref.cast` or `tensor.cast` based on the type of `src`. Value castValue(OpBuilder &builder, Location loc, Value src, ShapedType type); diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json index 9c7b63563b11..adb1ebf4fa72 100644 --- a/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json +++ b/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json @@ -100,7 +100,6 @@ "onnx/node/generated/test_compress_1", "onnx/node/generated/test_compress_default_axis", "onnx/node/generated/test_compress_negative_axis", - "onnx/node/generated/test_conv_with_autopad_same", "onnx/node/generated/test_convtranspose_autopad_same", "onnx/node/generated/test_convtranspose_kernel_shape", "onnx/node/generated/test_convtranspose_output_shape", @@ -129,6 +128,7 @@ "onnx/node/generated/test_dft_opset19", "onnx/node/generated/test_edge_pad", "onnx/node/generated/test_einsum_sum", + "onnx/node/generated/test_gathernd_example_float32", "onnx/node/generated/test_gridsample_bicubic", "onnx/node/generated/test_gridsample_bicubic_align_corners_0_additional_1", "onnx/node/generated/test_gridsample_bicubic_align_corners_1_additional_1", @@ -167,8 +167,6 @@ "onnx/node/generated/test_maxpool_with_argmax_2d_precomputed_strides", "onnx/node/generated/test_maxunpool_export_with_output_shape", "onnx/node/generated/test_maxunpool_export_without_output_shape", - "onnx/node/generated/test_mod_mixed_sign_float16", - "onnx/node/generated/test_mod_mixed_sign_float32", "onnx/node/generated/test_mod_mixed_sign_float64", "onnx/node/generated/test_momentum", "onnx/node/generated/test_momentum_multiple", @@ -188,6 +186,7 @@ "onnx/node/generated/test_nllloss_NCd1d2_with_weight_reduction_sum_ii", "onnx/node/generated/test_nllloss_NCd1d2d3_none_no_weight_negative_ii", "onnx/node/generated/test_nllloss_NCd1d2d3_sum_weight_high_ii", + "onnx/node/generated/test_nllloss_NCd1d2d3_sum_weight_high_ii_expanded", "onnx/node/generated/test_nllloss_NCd1d2d3d4d5_mean_weight", "onnx/node/generated/test_nllloss_NCd1d2d3d4d5_none_no_weight", "onnx/node/generated/test_nonmaxsuppression_center_point_box_format", diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_cuda.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_cuda.json index fddbeb650160..c8b73fdceb91 100644 --- a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_cuda.json +++ b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_cuda.json @@ -109,7 +109,6 @@ "onnx/node/generated/test_compress_1", "onnx/node/generated/test_compress_default_axis", "onnx/node/generated/test_compress_negative_axis", - "onnx/node/generated/test_conv_with_autopad_same", "onnx/node/generated/test_convtranspose_autopad_same", "onnx/node/generated/test_convtranspose_kernel_shape", "onnx/node/generated/test_convtranspose_output_shape", @@ -138,6 +137,7 @@ "onnx/node/generated/test_dft_opset19", "onnx/node/generated/test_edge_pad", "onnx/node/generated/test_einsum_sum", + "onnx/node/generated/test_gathernd_example_float32", "onnx/node/generated/test_gridsample_bicubic", "onnx/node/generated/test_gridsample_bicubic_align_corners_0_additional_1", "onnx/node/generated/test_gridsample_bicubic_align_corners_1_additional_1", @@ -194,6 +194,7 @@ "onnx/node/generated/test_nllloss_NCd1d2_with_weight_reduction_sum_ii", "onnx/node/generated/test_nllloss_NCd1d2d3_none_no_weight_negative_ii", "onnx/node/generated/test_nllloss_NCd1d2d3_sum_weight_high_ii", + "onnx/node/generated/test_nllloss_NCd1d2d3_sum_weight_high_ii_expanded", "onnx/node/generated/test_nllloss_NCd1d2d3d4d5_mean_weight", "onnx/node/generated/test_nllloss_NCd1d2d3d4d5_none_no_weight", "onnx/node/generated/test_nonmaxsuppression_center_point_box_format", diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_rocm_rdna3.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_rocm_rdna3.json index c7a35682fb80..5e5f14df8da6 100644 --- a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_rocm_rdna3.json +++ b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_rocm_rdna3.json @@ -104,7 +104,6 @@ "onnx/node/generated/test_compress_1", "onnx/node/generated/test_compress_default_axis", "onnx/node/generated/test_compress_negative_axis", - "onnx/node/generated/test_conv_with_autopad_same", "onnx/node/generated/test_convtranspose_autopad_same", "onnx/node/generated/test_convtranspose_kernel_shape", "onnx/node/generated/test_convtranspose_output_shape", @@ -133,6 +132,7 @@ "onnx/node/generated/test_dft_opset19", "onnx/node/generated/test_edge_pad", "onnx/node/generated/test_einsum_sum", + "onnx/node/generated/test_gathernd_example_float32", "onnx/node/generated/test_gridsample_bicubic", "onnx/node/generated/test_gridsample_bicubic_align_corners_0_additional_1", "onnx/node/generated/test_gridsample_bicubic_align_corners_1_additional_1", @@ -189,6 +189,7 @@ "onnx/node/generated/test_nllloss_NCd1d2_with_weight_reduction_sum_ii", "onnx/node/generated/test_nllloss_NCd1d2d3_none_no_weight_negative_ii", "onnx/node/generated/test_nllloss_NCd1d2d3_sum_weight_high_ii", + "onnx/node/generated/test_nllloss_NCd1d2d3_sum_weight_high_ii_expanded", "onnx/node/generated/test_nllloss_NCd1d2d3d4d5_mean_weight", "onnx/node/generated/test_nllloss_NCd1d2d3d4d5_none_no_weight", "onnx/node/generated/test_nonmaxsuppression_center_point_box_format", diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_vulkan.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_vulkan.json index ad5602480f58..59b8eed2edd6 100644 --- a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_vulkan.json +++ b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_vulkan.json @@ -135,7 +135,6 @@ "onnx/node/generated/test_compress_1", "onnx/node/generated/test_compress_default_axis", "onnx/node/generated/test_compress_negative_axis", - "onnx/node/generated/test_conv_with_autopad_same", "onnx/node/generated/test_convtranspose_autopad_same", "onnx/node/generated/test_convtranspose_kernel_shape", "onnx/node/generated/test_convtranspose_output_shape", @@ -164,6 +163,7 @@ "onnx/node/generated/test_dft_opset19", "onnx/node/generated/test_edge_pad", "onnx/node/generated/test_einsum_sum", + "onnx/node/generated/test_gathernd_example_float32", "onnx/node/generated/test_gridsample", "onnx/node/generated/test_gridsample_aligncorners_true", "onnx/node/generated/test_gridsample_bicubic", @@ -216,9 +216,6 @@ "onnx/node/generated/test_min_uint16", "onnx/node/generated/test_min_uint8", "onnx/node/generated/test_mod_int64_fmod", - "onnx/node/generated/test_mod_mixed_sign_float16", - "onnx/node/generated/test_mod_mixed_sign_float32", - "onnx/node/generated/test_mod_mixed_sign_float64", "onnx/node/generated/test_mod_mixed_sign_int64", "onnx/node/generated/test_mod_uint64", "onnx/node/generated/test_momentum", @@ -239,6 +236,7 @@ "onnx/node/generated/test_nllloss_NCd1d2_with_weight_reduction_sum_ii", "onnx/node/generated/test_nllloss_NCd1d2d3_none_no_weight_negative_ii", "onnx/node/generated/test_nllloss_NCd1d2d3_sum_weight_high_ii", + "onnx/node/generated/test_nllloss_NCd1d2d3_sum_weight_high_ii_expanded", "onnx/node/generated/test_nllloss_NCd1d2d3d4d5_mean_weight", "onnx/node/generated/test_nllloss_NCd1d2d3d4d5_none_no_weight", "onnx/node/generated/test_nonmaxsuppression_center_point_box_format", @@ -562,6 +560,8 @@ "onnx/node/generated/test_min_float64", "onnx/node/generated/test_min_int16", "onnx/node/generated/test_min_int8", + "onnx/node/generated/test_mod_mixed_sign_float16", + "onnx/node/generated/test_mod_mixed_sign_float64", "onnx/node/generated/test_mod_mixed_sign_int8", "onnx/node/generated/test_mod_uint16", "onnx/node/generated/test_mod_uint8", diff --git a/third_party/llvm-project b/third_party/llvm-project index e268afbfed67..030c6da7af82 160000 --- a/third_party/llvm-project +++ b/third_party/llvm-project @@ -1 +1 @@ -Subproject commit e268afbfed678de5da8cac6bec488c9abca97b24 +Subproject commit 030c6da7af826b641db005be925b20f956c3a6bb diff --git a/third_party/torch-mlir b/third_party/torch-mlir index 2960538c6d14..edf725ef42b9 160000 --- a/third_party/torch-mlir +++ b/third_party/torch-mlir @@ -1 +1 @@ -Subproject commit 2960538c6d145a2bd1efa52c56f2bcaa1ffc45aa +Subproject commit edf725ef42b9bc7bc1dada691a3988b3c0038e33