|
21 | 21 | #include "mlir/IR/PatternMatch.h" |
22 | 22 | #include "mlir/Interfaces/DestinationStyleOpInterface.h" |
23 | 23 | #include "mlir/Interfaces/TilingInterface.h" |
| 24 | +#include "mlir/Transforms/RegionUtils.h" |
24 | 25 | #include "llvm/ADT/TypeSwitch.h" |
25 | 26 | #include "llvm/Support/Debug.h" |
26 | 27 | #include <optional> |
@@ -255,6 +256,18 @@ SmallVector<LoopLikeOpInterface> mlir::scfX::getOuterNestLoopsWhile( |
255 | 256 | return {nestLoops.rbegin(), nestLoops.rend()}; |
256 | 257 | } |
257 | 258 |
|
| 259 | +/// A listener that watches which ops were erased. |
| 260 | +struct ErasedOpListener : public RewriterBase::Listener { |
| 261 | +private: |
| 262 | + /// Pointers to all erased operations and blocks. |
| 263 | + DenseSet<void *> erased; |
| 264 | + |
| 265 | +public: |
| 266 | + ErasedOpListener() = default; |
| 267 | + void notifyOperationErased(Operation *op) override { erased.insert(op); } |
| 268 | + bool isErased(Operation *op) { return erased.count(op); } |
| 269 | +}; |
| 270 | + |
258 | 271 | /// Enhanced version of `tileAndFuseProducerOfSliceImpl`, which can deal with |
259 | 272 | /// multi-level `extractSliceOp`. E.g. |
260 | 273 | /// |
@@ -296,6 +309,57 @@ mlir::scfX::tileAndFuseProducerOfSlice(RewriterBase &rewriter, |
296 | 309 | tileAndFuseProducerOfSliceImpl(rewriter, sliceOp, outerLoops); |
297 | 310 | if (!fuseProducerResult) |
298 | 311 | return std::nullopt; |
| 312 | + |
| 313 | + // Cache old listener. |
| 314 | + OpBuilder::Listener *oldListener = rewriter.getListener(); |
| 315 | + // Set new listener. |
| 316 | + ErasedOpListener *newListener = new ErasedOpListener(); |
| 317 | + rewriter.setListener(newListener); |
| 318 | + |
| 319 | + auto producerOp = |
| 320 | + cast<TilingInterface>(fuseProducerResult->origProducer.getDefiningOp()); |
| 321 | + unsigned resultNumber = fuseProducerResult->origProducer.getResultNumber(); |
| 322 | + // cache candidate slice |
| 323 | + auto extractSliceOp = cast<tensor::ExtractSliceOp>(candidateSliceOp); |
| 324 | + SmallVector<OpFoldResult> offsets = extractSliceOp.getMixedOffsets(), |
| 325 | + sizes = extractSliceOp.getMixedSizes(), |
| 326 | + strides = extractSliceOp.getMixedStrides(); |
| 327 | + // Explicitly execute DCE. |
| 328 | + (void)mlir::simplifyRegions(rewriter, {*producerOp->getParentRegion()}); |
| 329 | + // If fused producer has multiple users. |
| 330 | + bool yieldReplacement = !newListener->isErased(producerOp); |
| 331 | + // Reset to old listener. |
| 332 | + rewriter.setListener(oldListener); |
| 333 | + // Delete new listener. |
| 334 | + delete newListener; |
| 335 | + |
| 336 | + if (yieldReplacement) { |
| 337 | + OpBuilder::InsertionGuard g(rewriter); |
| 338 | + // Set insertPoint right before tiled op. |
| 339 | + rewriter.setInsertionPoint(fuseProducerResult->tiledOps[0]); |
| 340 | + // Manually clone new candidate slice. |
| 341 | + auto clonedExtractSliceOp = rewriter.create<tensor::ExtractSliceOp>( |
| 342 | + producerOp->getLoc(), producerOp->getResult(resultNumber), offsets, |
| 343 | + sizes, strides); |
| 344 | + // Yield replacement for fused producer in avoid of repeated computation. |
| 345 | + if (failed(scf::yieldReplacementForFusedProducer( |
| 346 | + rewriter, clonedExtractSliceOp, fuseProducerResult.value(), |
| 347 | + outerLoops))) |
| 348 | + return std::nullopt; |
| 349 | + // Erase cloned candidate slice. |
| 350 | + rewriter.eraseOp(clonedExtractSliceOp); |
| 351 | + |
| 352 | + unsigned loopNumResults = outerLoops.front()->getNumResults(), |
| 353 | + producerNumResults = producerOp->getNumResults(); |
| 354 | + // Replace other users of fused producer with new loop results. |
| 355 | + for (auto &&[index, result] : llvm::enumerate(producerOp->getResults())) { |
| 356 | + rewriter.replaceAllUsesWith( |
| 357 | + result, outerLoops.front()->getResult(loopNumResults - |
| 358 | + producerNumResults + index)); |
| 359 | + } |
| 360 | + // Erase fused producer op. |
| 361 | + rewriter.eraseOp(producerOp); |
| 362 | + } |
299 | 363 | } |
300 | 364 | return fuseProducerResult; |
301 | 365 | } |
|
0 commit comments