Skip to content
Permalink
Browse files
Better handle clobbered registers in O0 register allocation
https://bugs.webkit.org/show_bug.cgi?id=240205
<rdar://87220688>

Reviewed by Yusuke Suzuki.

This patch makes Air's O0 register allocator better handle clobbered
registers. We now model both early and late clobber directly, and use
this to perform a basic interference analysis when allocating a register
to a Tmp. An early clobber interferes with any Use in an instruction, and
any early Defs. A late clobber interferes with any Defs in an instruction,
and any late Uses. What this enables is an early Use can be allocated
to a register that is only late clobbered. And a result can be allocated
to a register that is only early clobbered.

Prior to this, the algorithm had a bug where a Use may be allocated to
a register that is early clobbered.

* b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp:
(JSC::B3::Air::GenerateAndAllocateRegisters::buildLiveRanges):
(JSC::B3::Air::GenerateAndAllocateRegisters::alloc):
(JSC::B3::Air::GenerateAndAllocateRegisters::freeDeadTmpsIfNeeded):
(JSC::B3::Air::GenerateAndAllocateRegisters::assignTmp):
(JSC::B3::Air::GenerateAndAllocateRegisters::prepareForGeneration):
(JSC::B3::Air::GenerateAndAllocateRegisters::generate):
* b3/air/AirAllocateRegistersAndStackAndGenerateCode.h:
* b3/air/testair.cpp:
* jit/RegisterSet.h:

Canonical link: https://commits.webkit.org/250477@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@294087 268f45cc-cd09-0410-ab3c-d52691b4dbfc
  • Loading branch information
Saam Barati committed May 12, 2022
1 parent 00b9bb4 commit 31381facf3cc84d7e90ed68b67cce8e6cafacae4
@@ -1,3 +1,34 @@
2022-05-08 Saam Barati <sbarati@apple.com>

Better handle clobbered registers in O0 register allocation
https://bugs.webkit.org/show_bug.cgi?id=240205
<rdar://87220688>

Reviewed by Yusuke Suzuki.

This patch makes Air's O0 register allocator better handle clobbered
registers. We now model both early and late clobber directly, and use
this to perform a basic interference analysis when allocating a register
to a Tmp. An early clobber interferes with any Use in an instruction, and
any early Defs. A late clobber interferes with any Defs in an instruction,
and any late Uses. What this enables is an early Use can be allocated
to a register that is only late clobbered. And a result can be allocated
to a register that is only early clobbered.

Prior to this, the algorithm had a bug where a Use may be allocated to
a register that is early clobbered.

* b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp:
(JSC::B3::Air::GenerateAndAllocateRegisters::buildLiveRanges):
(JSC::B3::Air::GenerateAndAllocateRegisters::alloc):
(JSC::B3::Air::GenerateAndAllocateRegisters::freeDeadTmpsIfNeeded):
(JSC::B3::Air::GenerateAndAllocateRegisters::assignTmp):
(JSC::B3::Air::GenerateAndAllocateRegisters::prepareForGeneration):
(JSC::B3::Air::GenerateAndAllocateRegisters::generate):
* b3/air/AirAllocateRegistersAndStackAndGenerateCode.h:
* b3/air/testair.cpp:
* jit/RegisterSet.h:

2022-05-11 Patrick Angle <pangle@apple.com>

Web Inspector: Parse InjectedScriptSource as a built-in to get guaranteed non-user-overriden JSC built-ins
@@ -78,7 +78,7 @@ void GenerateAndAllocateRegisters::buildLiveRanges(UnifiedTmpLiveness& liveness)
{
m_liveRangeEnd = TmpMap<size_t>(m_code, 0);

m_globalInstIndex = 0;
m_globalInstIndex = 1;
for (BasicBlock* block : m_code) {
for (Tmp tmp : liveness.liveAtHead(block)) {
if (!tmp.isReg())
@@ -180,11 +180,12 @@ ALWAYS_INLINE void GenerateAndAllocateRegisters::spill(Tmp tmp, Reg reg)
{
ASSERT(reg);
ASSERT(m_map[tmp].reg == reg);
ASSERT(tmp.isReg() || m_liveRangeEnd[tmp] >= m_globalInstIndex);
flush(tmp, reg);
release(tmp, reg);
}

ALWAYS_INLINE void GenerateAndAllocateRegisters::alloc(Tmp tmp, Reg reg, bool isDef)
ALWAYS_INLINE void GenerateAndAllocateRegisters::alloc(Tmp tmp, Reg reg, Arg::Role role)
{
if (Tmp occupyingTmp = m_currentAllocation->at(reg))
spill(occupyingTmp, reg);
@@ -197,7 +198,7 @@ ALWAYS_INLINE void GenerateAndAllocateRegisters::alloc(Tmp tmp, Reg reg, bool is
m_availableRegs[tmp.bank()].clear(reg);
m_currentAllocation->at(reg) = tmp;

if (!isDef) {
if (Arg::isAnyUse(role)) {
intptr_t offset = m_map[tmp].spillSlot->offsetFromFP();
if (tmp.bank() == GP)
m_jit->load64(callFrameAddr(*m_jit, offset), reg.gpr());
@@ -206,12 +207,29 @@ ALWAYS_INLINE void GenerateAndAllocateRegisters::alloc(Tmp tmp, Reg reg, bool is
}
}

ALWAYS_INLINE void GenerateAndAllocateRegisters::freeDeadTmpsIfNeeded()
// freeDeadTmpsAtCurrentInst and freeDeadTmpsAtCurrentBlock are needed for correctness because
// we reuse stack slots between Tmps that don't interfere. So we need to make sure we don't
// spill a dead Tmp to a live Tmp's slot.
// freeDeadTmpsAtCurrentInst is meant to be called as we walk through each instruction in a basic block
// because it doesn't consult the entire register file, and is faster than freeDeadTmpsAtCurrentBlock.
// However, it only prunes things that die at a particular inst index within a block, and doesn't prevent
// us from propagating a Tmp that is live in one block to the head of a block where it is dead. If
// something dies within a block, freeDeadTmpsAtCurrentInst will catch it. freeDeadTmpsAtCurrentBlock is
// meant to ensure we prune away any Tmps that are dead at the head of a block.
ALWAYS_INLINE void GenerateAndAllocateRegisters::freeDeadTmpsAtCurrentInst()
{
if (m_didAlreadyFreeDeadSlots)
return;
auto iter = m_tmpsToRelease.find(m_globalInstIndex);
if (iter != m_tmpsToRelease.end()) {
for (Tmp tmp : iter->value) {
ASSERT(m_liveRangeEnd[tmp] < m_globalInstIndex);
if (Reg reg = m_map[tmp].reg)
release(tmp, reg);
}
}
}

m_didAlreadyFreeDeadSlots = true;
ALWAYS_INLINE void GenerateAndAllocateRegisters::freeDeadTmpsAtCurrentBlock()
{
for (size_t i = 0; i < m_currentAllocation->size(); ++i) {
Tmp tmp = m_currentAllocation->at(i);
if (!tmp)
@@ -225,44 +243,73 @@ ALWAYS_INLINE void GenerateAndAllocateRegisters::freeDeadTmpsIfNeeded()
}
}

ALWAYS_INLINE bool GenerateAndAllocateRegisters::assignTmp(Tmp& tmp, Bank bank, bool isDef)
ALWAYS_INLINE bool GenerateAndAllocateRegisters::assignTmp(Tmp& tmp, Bank bank, Arg::Role role)
{
ASSERT(!tmp.isReg());
if (Reg reg = m_map[tmp].reg) {
ASSERT(!m_namedDefdRegs.contains(reg));
tmp = Tmp(reg);

auto markRegisterAsUsed = [&] (Reg reg) {
if (Arg::isAnyDef(role))
m_clobberedToClear.clear(reg);
// At this point, it doesn't matter if we add it to the m_namedUsedRegs or m_namedDefdRegs.
// We just need to mark that we can't use it again for another tmp.
m_namedUsedRegs.set(reg);
ASSERT(!m_availableRegs[bank].get(reg));
return true;
}
};

bool mightInterfere = m_earlyClobber.numberOfSetRegisters() || m_lateClobber.numberOfSetRegisters();

auto interferesWithClobber = [&] (Reg reg) {
if (!mightInterfere)
return false;
if (Arg::isAnyUse(role) && m_earlyClobber.get(reg))
return true;
if (Arg::isAnyDef(role) && m_lateClobber.get(reg))
return true;
if (Arg::activeAt(role, Arg::Phase::Early) && m_earlyClobber.get(reg))
return true;
if (Arg::activeAt(role, Arg::Phase::Late) && m_lateClobber.get(reg))
return true;
return false;
};

if (Reg reg = m_map[tmp].reg) {
if (!interferesWithClobber(reg)) {
ASSERT(!m_namedDefdRegs.contains(reg));
tmp = Tmp(reg);
markRegisterAsUsed(reg);
ASSERT(!m_availableRegs[bank].get(reg));
return true;
}
// This is a rare case when we've already allocated a Tmp in some way, but another
// Role of the Tmp imposes some restriction on the register value. E.g, if
// we have a program like:
// Patch Use:tmp1, LateUse:tmp1, lateClobber:x0
// The first use of tmp1 can be allocated to x0, but the second cannot.
spill(tmp, reg);

if (!m_availableRegs[bank].numberOfSetRegisters())
freeDeadTmpsIfNeeded();
}

if (m_availableRegs[bank].numberOfSetRegisters()) {
// We first take an available register.
for (Reg reg : m_registers[bank]) {
if (m_namedUsedRegs.contains(reg) || m_namedDefdRegs.contains(reg))
if (interferesWithClobber(reg) || m_namedUsedRegs.contains(reg) || m_namedDefdRegs.contains(reg))
continue;
if (!m_availableRegs[bank].contains(reg))
continue;
m_namedUsedRegs.set(reg); // At this point, it doesn't matter if we add it to the m_namedUsedRegs or m_namedDefdRegs. We just need to mark that we can't use it again.
alloc(tmp, reg, isDef);

markRegisterAsUsed(reg);
alloc(tmp, reg, role);
tmp = Tmp(reg);
return true;
}

RELEASE_ASSERT_NOT_REACHED();
}

// Nothing was available, let's make some room.
for (Reg reg : m_registers[bank]) {
if (m_namedUsedRegs.contains(reg) || m_namedDefdRegs.contains(reg))
if (interferesWithClobber(reg) || m_namedUsedRegs.contains(reg) || m_namedDefdRegs.contains(reg))
continue;

m_namedUsedRegs.set(reg);

alloc(tmp, reg, isDef);
markRegisterAsUsed(reg);
alloc(tmp, reg, role);
tmp = Tmp(reg);
return true;
}
@@ -298,7 +345,7 @@ void GenerateAndAllocateRegisters::prepareForGeneration()

Vector<StackSlot*, 16> freeSlots;
Vector<StackSlot*, 4> toFree;
m_globalInstIndex = 0;
m_globalInstIndex = 1;
for (BasicBlock* block : m_code) {
auto assignStackSlotToTmp = [&] (Tmp tmp) {
if (tmp.isReg())
@@ -424,6 +471,12 @@ void GenerateAndAllocateRegisters::generate(CCallHelpers& jit)

buildLiveRanges(*m_liveness);

m_code.forEachTmp([&] (Tmp tmp) {
ASSERT(!tmp.isReg());
if (size_t liveRangeEnd = m_liveRangeEnd[tmp])
m_tmpsToRelease.add(liveRangeEnd + 1, Vector<Tmp, 2>()).iterator->value.append(tmp);
});

IndexMap<BasicBlock*, IndexMap<Reg, Tmp>> currentAllocationMap(m_code.size());
{
IndexMap<Reg, Tmp> defaultCurrentAllocation(Reg::maxIndex() + 1);
@@ -461,7 +514,7 @@ void GenerateAndAllocateRegisters::generate(CCallHelpers& jit)

Disassembler* disassembler = m_code.disassembler();

m_globalInstIndex = 0;
m_globalInstIndex = 1;

for (BasicBlock* block : m_code) {
context.currentBlock = block;
@@ -525,6 +578,8 @@ void GenerateAndAllocateRegisters::generate(CCallHelpers& jit)

++m_globalInstIndex;

freeDeadTmpsAtCurrentBlock();

bool isReplayingSameInst = false;
for (size_t instIndex = 0; instIndex < block->size(); ++instIndex) {
checkConsistency();
@@ -536,10 +591,11 @@ void GenerateAndAllocateRegisters::generate(CCallHelpers& jit)

Inst& inst = block->at(instIndex);

m_didAlreadyFreeDeadSlots = false;

m_namedUsedRegs = RegisterSet();
m_namedDefdRegs = RegisterSet();
m_earlyClobber = RegisterSet();
m_lateClobber = RegisterSet();
m_clobberedToClear = RegisterSet();

bool needsToGenerate = ([&] () -> bool {
// FIXME: We should consider trying to figure out if we can also elide Mov32s
@@ -614,59 +670,67 @@ void GenerateAndAllocateRegisters::generate(CCallHelpers& jit)
}
});

RegisterSet clobberedRegisters;
RegisterSet earlyNextClobberedRegisters;
{
Inst* nextInst = block->get(instIndex + 1);
if (inst.kind.opcode == Patch || (nextInst && nextInst->kind.opcode == Patch)) {
if (inst.kind.opcode == Patch)
clobberedRegisters.merge(inst.extraClobberedRegs());
if (nextInst && nextInst->kind.opcode == Patch)
earlyNextClobberedRegisters.merge(nextInst->extraEarlyClobberedRegs());

clobberedRegisters.filter(m_allowedRegisters);
clobberedRegisters.exclude(m_namedDefdRegs);
earlyNextClobberedRegisters.filter(m_allowedRegisters);

m_namedDefdRegs.merge(clobberedRegisters);
}
if (inst.kind.opcode == Patch) {
m_earlyClobber.merge(inst.extraEarlyClobberedRegs());
m_lateClobber.merge(inst.extraClobberedRegs());

m_earlyClobber.filter(m_allowedRegisters);
m_lateClobber.filter(m_allowedRegisters);

m_clobberedToClear.merge(m_earlyClobber);
m_clobberedToClear.merge(m_lateClobber);
m_clobberedToClear.exclude(m_namedDefdRegs);
}

auto allocNamed = [&] (const RegisterSet& named, bool isDef) {
auto allocNamed = [&] (const RegisterSet& named, Arg::Role role) {
for (Reg reg : named) {
if (Tmp occupyingTmp = currentAllocation[reg]) {
if (occupyingTmp == Tmp(reg))
continue;
}

freeDeadTmpsIfNeeded(); // We don't want to spill a dead tmp.
alloc(Tmp(reg), reg, isDef);
alloc(Tmp(reg), reg, role);
}
};

allocNamed(m_namedUsedRegs, false); // Must come before the defd registers since we may use and def the same register.
allocNamed(m_namedDefdRegs, true);
auto spillIfNeeded = [&] (const RegisterSet& set) {
for (Reg reg : set) {
if (Tmp tmp = m_currentAllocation->at(reg))
spill(tmp, reg);
}
};

freeDeadTmpsAtCurrentInst();

spillIfNeeded(m_earlyClobber);
spillIfNeeded(m_lateClobber);

allocNamed(m_namedUsedRegs, Arg::Role::Use); // Must come before the defd registers since we may use and def the same register.
allocNamed(m_namedDefdRegs, Arg::Role::Def);

if (needsToGenerate) {
auto tryAllocate = [&] {
Vector<Tmp*, 8> usesToAlloc;
Vector<Tmp*, 8> defsToAlloc;
Vector<std::pair<Tmp*, Arg::Role>, 8> usesToAlloc;
Vector<std::pair<Tmp*, Arg::Role>, 8> defsToAlloc;

inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank, Width) {
if (tmp.isReg())
return;

// We treat Use+Def as a use.
if (Arg::isAnyUse(role))
usesToAlloc.append(&tmp);
usesToAlloc.append(std::pair { &tmp, role });
else if (Arg::isAnyDef(role))
defsToAlloc.append(&tmp);
defsToAlloc.append(std::pair { &tmp, role });
});

auto tryAllocateTmps = [&] (auto& vector, bool isDef) {
auto tryAllocateTmps = [&] (auto& vector) {
bool success = true;
for (Tmp* tmp : vector)
success &= assignTmp(*tmp, tmp->bank(), isDef);
for (std::pair<Tmp*, Arg::Role> pair : vector) {
Tmp& tmp = *std::get<0>(pair);
Arg::Role role = std::get<1>(pair);
success &= assignTmp(tmp, tmp.bank(), role);
}
return success;
};

@@ -676,8 +740,8 @@ void GenerateAndAllocateRegisters::generate(CCallHelpers& jit)
// some tmps may both be used and defd. So we handle uses first since forEachTmp could
// walk uses/defs in any order.
bool success = true;
success &= tryAllocateTmps(usesToAlloc, false);
success &= tryAllocateTmps(defsToAlloc, true);
success &= tryAllocateTmps(usesToAlloc);
success &= tryAllocateTmps(defsToAlloc);
return success;
};

@@ -729,7 +793,6 @@ void GenerateAndAllocateRegisters::generate(CCallHelpers& jit)
}

if (m_code.needsUsedRegisters() && inst.kind.opcode == Patch) {
freeDeadTmpsIfNeeded();
RegisterSet registerSet;
for (size_t i = 0; i < currentAllocation.size(); ++i) {
if (currentAllocation[i])
@@ -738,13 +801,10 @@ void GenerateAndAllocateRegisters::generate(CCallHelpers& jit)
inst.reportUsedRegisters(registerSet);
}

auto clobber = [&] (const RegisterSet& set) {
for (Reg reg : set) {
Tmp tmp(reg);
ASSERT(currentAllocation[reg] == tmp);
m_availableRegs[tmp.bank()].set(reg);
m_currentAllocation->at(reg) = Tmp();
m_map[tmp].reg = Reg();
auto handleClobber = [&] {
for (Reg reg : m_clobberedToClear) {
if (Tmp tmp = m_currentAllocation->at(reg))
release(tmp, reg);
}
};

@@ -754,14 +814,11 @@ void GenerateAndAllocateRegisters::generate(CCallHelpers& jit)
jump = inst.generate(*m_jit, context);
ASSERT_UNUSED(jump, !jump.isSet());

allocNamed(earlyNextClobberedRegisters, true);
clobberedRegisters.merge(earlyNextClobberedRegisters);
clobber(clobberedRegisters);
handleClobber();
} else {
ASSERT(needsToGenerate);

clobber(clobberedRegisters);
ASSERT(earlyNextClobberedRegisters.isEmpty());
handleClobber();

if (block->numSuccessors()) {
// By default, we spill everything between block boundaries. However, we have a small

0 comments on commit 31381fa

Please sign in to comment.