Skip to content

Commit

Permalink
Optimize how gen and kill sets are computed in dead store elimination…
Browse files Browse the repository at this point in the history
…. Existing tests ensure correctness
  • Loading branch information
trentxintong committed Dec 18, 2015
1 parent 4aabe88 commit 4491d86
Showing 1 changed file with 88 additions and 13 deletions.
101 changes: 88 additions & 13 deletions lib/SILOptimizer/Transforms/DeadStoreElimination.cpp
Expand Up @@ -93,12 +93,20 @@ STATISTIC(NumDeadStores, "Number of dead stores removed");
STATISTIC(NumPartialDeadStores, "Number of partial dead stores removed");

/// Are we building the gen/kill sets or actually performing the DSE.
enum class DSEComputeKind { BuildGenKillSet, PerformDSE };
enum class DSEComputeKind : unsigned {
ComputeMaxStoreSet=0,
BuildGenKillSet=1,
PerformDSE=2,
};

//===----------------------------------------------------------------------===//
// Utility Functions
//===----------------------------------------------------------------------===//

static inline bool isComputeMaxStoreSet(DSEComputeKind Kind) {
return Kind == DSEComputeKind::ComputeMaxStoreSet;
}

static inline bool isBuildingGenKillSet(DSEComputeKind Kind) {
return Kind == DSEComputeKind::BuildGenKillSet;
}
Expand Down Expand Up @@ -179,6 +187,12 @@ class BlockState {
/// kills an upward visible store.
llvm::BitVector BBKillSet;

/// A bit vector to keep the maximum number of stores that can reach the
/// beginning of the basic block. If a bit is set, that means there is
/// potentially a upward visible store to the location at the beginning
/// of the basic block.
llvm::BitVector BBMaxStoreSet;

/// The dead stores in the current basic block.
llvm::DenseSet<SILInstruction *> DeadStores;

Expand Down Expand Up @@ -214,6 +228,9 @@ class BlockState {
// GenSet and KillSet initially empty.
BBGenSet.resize(LSLocationNum, false);
BBKillSet.resize(LSLocationNum, false);

// MaxStoreSet is optimistically set to true initially.
BBMaxStoreSet.resize(LSLocationNum, true);
}

/// Check whether the WriteSetIn has changed. If it does, we need to rerun
Expand Down Expand Up @@ -315,6 +332,11 @@ class DSEContext {
return getBlockState(I->getParent());
}

/// LSLocation written has been extracted, expanded and mapped to the bit
/// position in the bitvector. update the gen kill set using the bit
/// position.
void updateMaxStoreSetForWrite(BlockState *S, unsigned bit);

/// LSLocation read has been extracted, expanded and mapped to the bit
/// position in the bitvector. update the gen kill set using the bit
/// position.
Expand Down Expand Up @@ -400,6 +422,9 @@ class DSEContext {
/// Compute the genset and killset for the current basic block.
void processBasicBlockForGenKillSet(SILBasicBlock *BB);

/// Compute the genset and killset for the current basic block.
void processBasicBlockForMaxStoreSet(SILBasicBlock *BB);

/// Compute the WriteSetOut and WriteSetIn for the current basic
/// block with the generated gen and kill set.
bool processBasicBlockWithGenKillSet(SILBasicBlock *BB);
Expand Down Expand Up @@ -435,6 +460,29 @@ void DSEContext::processBasicBlockForGenKillSet(SILBasicBlock *BB) {
}
}

void DSEContext::processBasicBlockForMaxStoreSet(SILBasicBlock *BB) {
// Compute the MaxStoreSet at the end of the basic block.
auto *BBState = getBlockState(BB);
if (BB->succ_empty()) {
BBState->BBMaxStoreSet.reset();
} else {
auto Iter = BB->succ_begin();
BBState->BBMaxStoreSet = getBlockState(*Iter)->BBMaxStoreSet;
Iter = std::next(Iter);
for (auto EndIter = BB->succ_end(); Iter != EndIter; ++Iter) {
BBState->BBMaxStoreSet &= getBlockState(*Iter)->BBMaxStoreSet;
}
}

// Compute the MaxStoreSet at the beginning of the basic block.
for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
// Only process store insts.
if (!isa<StoreInst>(*I))
continue;
processInstruction(&(*I), DSEComputeKind::ComputeMaxStoreSet);
}
}

bool DSEContext::processBasicBlockWithGenKillSet(SILBasicBlock *BB) {
// Compute the WriteSetOut at the end of the basic block.
mergeSuccessorStates(BB);
Expand Down Expand Up @@ -526,8 +574,6 @@ void DSEContext::updateWriteSetForRead(BlockState *S, unsigned bit) {
LSLocation &L = LSLocationVault[i];
if (!L.isMayAliasLSLocation(R, AA))
continue;
DEBUG(llvm::dbgs() << "Loc Removal: " << LSLocationVault[i].getBase()
<< "\n");
S->stopTrackingLSLocation(i);
}
}
Expand All @@ -540,6 +586,11 @@ void DSEContext::updateGenKillSetForRead(BlockState *S, unsigned bit) {
// alias analysis to determine whether 2 LSLocations are disjointed.
LSLocation &R = LSLocationVault[bit];
for (unsigned i = 0; i < S->LSLocationNum; ++i) {
// If BBMaxStoreSet is not turned on, then there is no reason to turn
// on the kill set nor the gen set for this store for this basic block.
// as there can NOT be a store that reaches the end of this basic block.
if (!S->BBMaxStoreSet.test(i))
continue;
LSLocation &L = LSLocationVault[i];
if (!L.isMayAliasLSLocation(R, AA))
continue;
Expand Down Expand Up @@ -579,6 +630,10 @@ void DSEContext::updateGenKillSetForWrite(BlockState *S, unsigned bit) {
S->BBGenSet.set(bit);
}

void DSEContext::updateMaxStoreSetForWrite(BlockState *S, unsigned bit) {
S->BBMaxStoreSet.set(bit);
}

void DSEContext::processRead(SILInstruction *I, BlockState *S, SILValue Mem,
DSEComputeKind Kind) {
// Construct a LSLocation to represent the memory read by this instruction.
Expand Down Expand Up @@ -612,6 +667,9 @@ void DSEContext::processRead(SILInstruction *I, BlockState *S, SILValue Mem,
}
#endif

if (isComputeMaxStoreSet(Kind))
return;

// Expand the given Mem into individual fields and process them as
// separate reads.
LSLocationList Locs;
Expand Down Expand Up @@ -653,21 +711,21 @@ void DSEContext::processWrite(SILInstruction *I, BlockState *S, SILValue Val,
if (!L.isValid())
return;

#ifndef NDEBUG
// Make sure that the LSLocation getType() returns the same type as the
// stored type.
if (auto *SI = dyn_cast<StoreInst>(I)) {
assert(SI->getDest().getType().getObjectType() == L.getType() &&
"LSLocation returns different type");
}
#endif

// Expand the given Mem into individual fields and process them as separate
// writes.
bool Dead = true;
LSLocationList Locs;
LSLocation::expand(L, Mod, Locs, TE);
llvm::BitVector V(Locs.size());

if (isComputeMaxStoreSet(Kind)) {
for (auto &E : Locs) {
// Only building the gen and kill sets here.
updateMaxStoreSetForWrite(S, getLSLocationBit(E));
}
return;
}

if (isBuildingGenKillSet(Kind)) {
for (auto &E : Locs) {
// Only building the gen and kill sets here.
Expand Down Expand Up @@ -776,6 +834,11 @@ void DSEContext::processUnknownReadMemInst(SILInstruction *I,
// Update the gen kill set.
if (isBuildingGenKillSet(Kind)) {
for (unsigned i = 0; i < S->LSLocationNum; ++i) {
// If BBMaxStoreSet is not turned on, then there is no reason to turn
// on the kill set nor the gen set for this store for this basic block.
// as there can NOT be a store that reaches the end of this basic block.
if (!S->BBMaxStoreSet.test(i))
continue;
if (!AA->mayReadFromMemory(I, LSLocationVault[i].getBase()))
continue;
S->BBKillSet.set(i);
Expand Down Expand Up @@ -843,6 +906,19 @@ bool DSEContext::run() {
BBToLocState[S.getBB()] = &S;
}

// Compute the max store set at the beginning of the basic block.
//
// This helps generating the genset and killset. If there is no way a
// location can have an upward store at a particular point in the basic block,
// we do not need to turn on the genset and killset for the location.
//
// Turning on the genset and killset can be costly as it involves querying
// the AA interface.
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F);
for (SILBasicBlock *B : PO->getPostOrder()) {
processBasicBlockForMaxStoreSet(B);
}

// Generate the genset and killset for each basic block.
for (auto &B : *F) {
processBasicBlockForGenKillSet(&B);
Expand All @@ -851,7 +927,6 @@ bool DSEContext::run() {
// Process each basic block with the gen and kill set. Every time the
// WriteSetIn of a basic block changes, the optimization is rerun on its
// predecessors.
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F);
llvm::SmallVector<SILBasicBlock *, 16> WorkList;
for (SILBasicBlock *B : PO->getPostOrder()) {
WorkList.push_back(B);
Expand Down

4 comments on commit 4491d86

@nadavrot
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Xin, what's the impact of this change?

@trentxintong
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

compiling stdlib -O
DSE drops from 1.8% to 1.4% of the overall compilation time.

@nadavrot
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Xin, who do you think should review this commit? @rudkx maybe?

@trentxintong
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.