Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 57 additions & 30 deletions include/gc/Analysis/MatmulConfigAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,67 @@ inline SmallVector<unsigned> extractDimTypeIdx(ArrayRef<DimType> tyList,
return idxList;
}

inline void getDimTypeFromIterators(linalg::LinalgOp linalgOp,
SmallVectorImpl<DimType> &dimTypes) {
SmallVector<mlir::utils::IteratorType> iteratorTypes =
linalgOp.getIteratorTypesArray();

for (const auto &&[idx, iterType] : llvm::enumerate(iteratorTypes)) {
if (iterType == mlir::utils::IteratorType::parallel) {
SmallVector<std::pair<Value, unsigned>> operandDimPairs;
linalgOp.mapIterationSpaceDimToAllOperandDims(idx, operandDimPairs);
if (operandDimPairs.size() == 3) {
dimTypes.push_back(DimType::Batch);
} else if (llvm::any_of(operandDimPairs,
[&](std::pair<Value, unsigned> pair) {
return pair.first ==
dyn_cast<linalg::ContractionOpInterface>(
linalgOp.getOperation())
.lhs();
})) {
dimTypes.push_back(DimType::M);
} else {
dimTypes.push_back(DimType::N);
}
} else if (iterType == mlir::utils::IteratorType::reduction) {
dimTypes.push_back(DimType::K);
}
}
}

inline SmallVector<DimType>
matchOperandToDimTypes(linalg::LinalgOp linalgOp, OpOperand *operand,
ArrayRef<DimType> allDimTypes) {
ArrayRef<AffineExpr> map =
linalgOp.getMatchingIndexingMap(operand).getResults();
SmallVector<DimType> res;
for (const AffineExpr &dim : map) {
AffineDimExpr dimExpr = dyn_cast<AffineDimExpr>(dim);
res.push_back(allDimTypes[dimExpr.getPosition()]);
}
return res;
}

inline SmallVector<SmallVector<DimType>>
getContractionOpOperandDimType(linalg::LinalgOp linalgOp) {
SmallVector<DimType> dimTypes;
getDimTypeFromIterators(linalgOp, dimTypes);
SmallVector<DimType> ADimTypes = matchOperandToDimTypes(
linalgOp, linalgOp.getDpsInputOperand(0), dimTypes);
SmallVector<DimType> BDimTypes = matchOperandToDimTypes(
linalgOp, linalgOp.getDpsInputOperand(1), dimTypes);
SmallVector<DimType> CDimTypes =
matchOperandToDimTypes(linalgOp, linalgOp.getDpsInitOperand(0), dimTypes);

return SmallVector<SmallVector<DimType>>{ADimTypes, BDimTypes, CDimTypes};
}

// Get the operand dim type for every operand for the given linalg op
inline FailureOr<SmallVector<SmallVector<DimType>>>
getOprandDimType(linalg::LinalgOp &linalgOp) {
// TODO: replace the linalgx op with generic op
if (llvm::isa<linalg::MatmulOp>(linalgOp)) {
return SmallVector<SmallVector<DimType>>{
SmallVector<DimType>{DimType::M, DimType::K},
SmallVector<DimType>{DimType::K, DimType::N},
SmallVector<DimType>{DimType::M, DimType::N}};
if (llvm::isa<linalg::ContractionOpInterface>(linalgOp.getOperation())) {
return getContractionOpOperandDimType(linalgOp);
} else if (linalgx::isGenericPackedMatmulOp(
linalgOp.getOperation(), linalgx::PackingType::VNNI_MM2D) ||
llvm::isa<linalgx::Mm2DVnniOp>(linalgOp)) {
Expand All @@ -72,31 +124,6 @@ getOprandDimType(linalg::LinalgOp &linalgOp) {
SmallVector<DimType>{DimType::N, DimType::K, DimType::K, DimType::N,
DimType::K},
SmallVector<DimType>{DimType::M, DimType::N, DimType::M, DimType::N}};
} else if (llvm::isa<linalg::BatchMatmulOp>(linalgOp)) {
return SmallVector<SmallVector<DimType>>{
SmallVector<DimType>{DimType::Batch, DimType::M, DimType::K},
SmallVector<DimType>{DimType::Batch, DimType::K, DimType::N},
SmallVector<DimType>{DimType::Batch, DimType::M, DimType::N}};
} else if (llvm::isa<linalg::MatmulTransposeAOp>(linalgOp)) {
return SmallVector<SmallVector<DimType>>{
SmallVector<DimType>{DimType::K, DimType::M},
SmallVector<DimType>{DimType::K, DimType::N},
SmallVector<DimType>{DimType::M, DimType::N}};
} else if (llvm::isa<linalg::MatmulTransposeBOp>(linalgOp)) {
return SmallVector<SmallVector<DimType>>{
SmallVector<DimType>{DimType::M, DimType::K},
SmallVector<DimType>{DimType::N, DimType::K},
SmallVector<DimType>{DimType::M, DimType::N}};
} else if (llvm::isa<linalg::BatchMatmulTransposeAOp>(linalgOp)) {
return SmallVector<SmallVector<DimType>>{
SmallVector<DimType>{DimType::Batch, DimType::K, DimType::M},
SmallVector<DimType>{DimType::Batch, DimType::K, DimType::N},
SmallVector<DimType>{DimType::Batch, DimType::M, DimType::N}};
} else if (llvm::isa<linalg::BatchMatmulTransposeBOp>(linalgOp)) {
return SmallVector<SmallVector<DimType>>{
SmallVector<DimType>{DimType::Batch, DimType::M, DimType::K},
SmallVector<DimType>{DimType::Batch, DimType::N, DimType::K},
SmallVector<DimType>{DimType::Batch, DimType::M, DimType::N}};
} else if (linalgx::isGenericPackedMatmulOp(linalgOp.getOperation(),
linalgx::PackingType::MM4D)) {
return SmallVector<SmallVector<DimType>>{
Expand Down
43 changes: 40 additions & 3 deletions lib/gc/Analysis/MatmulConfigAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,11 @@ bool validateConfig(const MatmulConfig &cfg) {
std::vector<uint32_t>
getCandidate(uint32_t num, uint32_t floor,
uint32_t ceil = std::numeric_limits<uint32_t>::max()) {
int defaultBlock = 32;
// factor
std::vector<uint32_t> candidates;
uint32_t upperbound = std::min(num, ceil);
uint32_t upperbound =
std::min(llvm::divideCeil(num, defaultBlock) * defaultBlock, ceil);
for (uint32_t i = floor; i <= upperbound; i++)
if (num % i == 0)
candidates.push_back(i);
Expand Down Expand Up @@ -199,6 +201,29 @@ double dynamicBufferizationCost(linalg::LinalgOp &linalgOp,
return cost;
}

double paddingCost(linalg::LinalgOp &linalgOp, ArrayRef<uint32_t> shape,
const MatmulConfig &config,
CPUTargetDescriptionAnalysis &sysDesc) {
double cost = 0;
uint32_t M = shape[0], N = shape[1], K = shape[2];
bool isPadOnM = M % config.innerMostMBlock != 0,
isPadOnK = K % config.innerMostKBlock != 0,
isPadOnN = N % config.innerMostNBlock != 0;
if (isPadOnM || isPadOnK) {
cost += llvm::divideCeil(M, config.innerMostMBlock) *
llvm::divideCeil(K, config.innerMostKBlock);
}
if (isPadOnK || isPadOnN) {
cost += llvm::divideCeil(N, config.innerMostNBlock) *
llvm::divideCeil(K, config.innerMostKBlock);
}
if (isPadOnM || isPadOnN) {
cost += llvm::divideCeil(N, config.innerMostNBlock) *
llvm::divideCeil(M, config.innerMostMBlock);
}
return cost;
}

using CostModelFn = std::function<double(
linalg::LinalgOp &linalgOp, ArrayRef<uint32_t> shape, MatmulConfig cfg,
CPUTargetDescriptionAnalysis &sysDesc)>;
Expand Down Expand Up @@ -243,6 +268,8 @@ prepareConfigCandidates(Operation *root, CPUTargetDescriptionAnalysis &sysDesc,
ArrayRef<uint32_t> shape,
ArrayRef<uint32_t> givenInnermostBlock,
bool allowIndivisibleInnerblock = false) {
LLVM_DEBUG(llvm::dbgs() << "allowIndivisibleInnerblock: "
<< allowIndivisibleInnerblock << "\n");
assert(shape.size() >= 3 && "shape.size() should >= 3");
std::vector<MatmulConfig> configs;
uint32_t threads = sysDesc.getNumThreads();
Expand Down Expand Up @@ -278,6 +305,13 @@ prepareConfigCandidates(Operation *root, CPUTargetDescriptionAnalysis &sysDesc,
: getCandidate((uint32_t)shape[2],
shape[2] >= noSmallBlockNeedThreshold ? 8U : 1U, 256U);

if (allowIndivisibleInnerblock) {
innerMostKBlockCandidates = {16, 32, 64};
innerMostNBlockCandidates = {16, 32, 64};
NBlockCandidates = innerMostNBlockCandidates;
KBlockCandidates = innerMostKBlockCandidates;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So after the change here, innermost Kblock will only be one of 16/32/64 if allowIndivisibleInnerblock is true?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, your understanding is correct

}

// TODO: improve via multi threading or add more constraints to restrict the
// candidate size
for (uint32_t MThreads : MThreadsCandidates) {
Expand Down Expand Up @@ -464,14 +498,17 @@ MatmulConfig MatmulConfigAnalysis::getConfig() {
{computationIntensityOnL2Cache, "computationIntensityOnL2Cache",
-1},
{memoryConsumptionOnThreadCost, "memoryConsumptionOnThreadCost",
-1}};
-1},
{paddingCost, "paddingCost", -1}};
SmallVector<uint32_t> shape = {M, N, K};
std::vector<MatmulConfig> configCandidates =
prepareConfigCandidates(root, sysDesc, shape, givenInnermostBlock,
allowIndivisibleInnerBlock);
for (auto &&[fn, name, threshold] : costModelList)
for (auto &&[fn, name, threshold] : costModelList) {
LLVM_DEBUG(llvm::dbgs() << name << "\n");
configCandidates = filterConfigByCostModel(
configCandidates, linalgOp, shape, sysDesc, fn, 0.5, threshold);
}
if (!configCandidates.empty())
config = configCandidates[0];
}
Expand Down
Loading