Skip to content

Commit f19e0fb

Browse files
committed
introduce padding cost
1 parent e04ea30 commit f19e0fb

File tree

1 file changed

+30
-3
lines changed

1 file changed

+30
-3
lines changed

lib/gc/Analysis/MatmulConfigAnalysis.cpp

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ getCandidate(uint32_t num, uint32_t floor,
5858
int defaultBlock = 32;
5959
// factor
6060
std::vector<uint32_t> candidates;
61-
uint32_t upperbound = std::min(llvm::divideCeil(num, defaultBlock), ceil);
61+
uint32_t upperbound =
62+
std::min(llvm::divideCeil(num, defaultBlock) * defaultBlock, ceil);
6263
for (uint32_t i = floor; i <= upperbound; i++)
6364
if (num % i == 0)
6465
candidates.push_back(i);
@@ -200,6 +201,29 @@ double dynamicBufferizationCost(linalg::LinalgOp &linalgOp,
200201
return cost;
201202
}
202203

204+
double paddingCost(linalg::LinalgOp &linalgOp, ArrayRef<uint32_t> shape,
205+
const MatmulConfig &config,
206+
CPUTargetDescriptionAnalysis &sysDesc) {
207+
double cost = 0;
208+
uint32_t M = shape[0], N = shape[1], K = shape[2];
209+
bool isPadOnM = M % config.innerMostMBlock != 0,
210+
isPadOnK = K % config.innerMostKBlock != 0,
211+
isPadOnN = N % config.innerMostNBlock != 0;
212+
if (isPadOnM || isPadOnK) {
213+
cost += llvm::divideCeil(M, config.innerMostMBlock) *
214+
llvm::divideCeil(K, config.innerMostKBlock);
215+
}
216+
if (isPadOnK || isPadOnN) {
217+
cost += llvm::divideCeil(N, config.innerMostNBlock) *
218+
llvm::divideCeil(K, config.innerMostKBlock);
219+
}
220+
if (isPadOnM || isPadOnN) {
221+
cost += llvm::divideCeil(N, config.innerMostNBlock) *
222+
llvm::divideCeil(M, config.innerMostMBlock);
223+
}
224+
return cost;
225+
}
226+
203227
using CostModelFn = std::function<double(
204228
linalg::LinalgOp &linalgOp, ArrayRef<uint32_t> shape, MatmulConfig cfg,
205229
CPUTargetDescriptionAnalysis &sysDesc)>;
@@ -474,14 +498,17 @@ MatmulConfig MatmulConfigAnalysis::getConfig() {
474498
{computationIntensityOnL2Cache, "computationIntensityOnL2Cache",
475499
-1},
476500
{memoryConsumptionOnThreadCost, "memoryConsumptionOnThreadCost",
477-
-1}};
501+
-1},
502+
{paddingCost, "paddingCost", -1}};
478503
SmallVector<uint32_t> shape = {M, N, K};
479504
std::vector<MatmulConfig> configCandidates =
480505
prepareConfigCandidates(root, sysDesc, shape, givenInnermostBlock,
481506
allowIndivisibleInnerBlock);
482-
for (auto &&[fn, name, threshold] : costModelList)
507+
for (auto &&[fn, name, threshold] : costModelList) {
508+
LLVM_DEBUG(llvm::dbgs() << name << "\n");
483509
configCandidates = filterConfigByCostModel(
484510
configCandidates, linalgOp, shape, sysDesc, fn, 0.5, threshold);
511+
}
485512
if (!configCandidates.empty())
486513
config = configCandidates[0];
487514
}

0 commit comments

Comments
 (0)