Skip to content
Permalink
Browse files
[SIMD] SIMD functions should support Linear Scan and Graph Colouring …
…register allocators.

https://bugs.webkit.org/show_bug.cgi?id=246348

Reviewed by Yusuke Suzuki.

Today, we disable the linear scan and graph coloring register allocators
when WASM SIMD is enabled. Let's fix that by making them conservatively
treat floats as 128 bits when SIMD is enabled.

* Source/JavaScriptCore/assembler/MacroAssemblerARM64.h:
(JSC::MacroAssemblerARM64::vectorNarrow):
(JSC::MacroAssemblerARM64::vectorMulSat):
* Source/JavaScriptCore/b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp:
(JSC::B3::Air::GenerateAndAllocateRegisters::generate):
* Source/JavaScriptCore/b3/air/AirAllocateRegistersAndStackByLinearScan.cpp:
(JSC::B3::Air::allocateRegistersAndStackByLinearScan):
* Source/JavaScriptCore/b3/air/AirAllocateRegistersByGraphColoring.cpp:
(JSC::B3::Air::allocateRegistersByGraphColoring):
* Source/JavaScriptCore/b3/air/AirAllocateStackByGraphColoring.cpp:
(JSC::B3::Air::allocateStackByGraphColoring):
* Source/JavaScriptCore/b3/air/AirOpcode.opcodes:
* Source/JavaScriptCore/b3/air/AirTmpWidth.cpp:
(JSC::B3::Air::TmpWidth::recompute):
* Source/JavaScriptCore/jit/RegisterSet.h:
* Source/JavaScriptCore/runtime/Options.cpp:
(JSC::Options::recomputeDependentOptions):
* Source/JavaScriptCore/runtime/OptionsList.h:

Canonical link: https://commits.webkit.org/257519@main
  • Loading branch information
justinmichaud committed Dec 8, 2022
1 parent 7d493da commit 244d75cd9da4e7da65f1612fac1fbf240fa72104
Show file tree
Hide file tree
Showing 15 changed files with 203 additions and 88 deletions.
@@ -4920,28 +4920,31 @@ class MacroAssemblerARM64 : public AbstractMacroAssembler<Assembler> {
}
}

void vectorPmin(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
void vectorPmin(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest, FPRegisterID scratch)
{
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
ASSERT(left != dest);
ASSERT(right != dest);
ASSERT(left != scratch);
ASSERT(right != scratch);
// right < left ? right : left <=>
// left > right, dest = right

// each bit in lane is 1 if left > right
m_assembler.fcmgt(dest, left, right, simdInfo.lane);
m_assembler.fcmgt(scratch, left, right, simdInfo.lane);
// 1 means use left
m_assembler.bsl(dest, right, left);
m_assembler.bsl(scratch, right, left);
moveVector(scratch, dest);

}

void vectorPmax(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
void vectorPmax(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest, FPRegisterID scratch)
{
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
ASSERT(left != dest);
ASSERT(right != dest);
ASSERT(left != scratch);
ASSERT(right != scratch);
// right > left, dest = left
m_assembler.fcmgt(dest, right, left, simdInfo.lane);
m_assembler.bsl(dest, right, left);
m_assembler.fcmgt(scratch, right, left, simdInfo.lane);
m_assembler.bsl(scratch, right, left);
moveVector(scratch, dest);
}

void vectorBitwiseSelect(FPRegisterID left, FPRegisterID right, FPRegisterID inputBitsAndDest)
@@ -4979,6 +4982,11 @@ class MacroAssemblerARM64 : public AbstractMacroAssembler<Assembler> {
m_assembler.vectorEor(dest, left, right);
}

void moveZeroToVector(FPRegisterID dest)
{
vectorXor({ SIMDLane::v128, SIMDSignMode::None }, dest, dest, dest);
}

void vectorAbs(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
{
if (scalarTypeIsFloatingPoint(simdInfo.lane))
@@ -5078,17 +5086,19 @@ class MacroAssemblerARM64 : public AbstractMacroAssembler<Assembler> {
m_assembler.fcvtn(dest, input, simdInfo.lane);
}

void vectorNarrow(SIMDInfo simdInfo, FPRegisterID lower, FPRegisterID upper, FPRegisterID dest)
void vectorNarrow(SIMDInfo simdInfo, FPRegisterID lower, FPRegisterID upper, FPRegisterID dest, FPRegisterID scratch)
{
ASSERT(simdInfo.signMode != SIMDSignMode::None);
ASSERT(scalarTypeIsIntegral(simdInfo.lane));
ASSERT(scratch != upper);
if (simdInfo.signMode == SIMDSignMode::Signed) {
m_assembler.sqxtn(dest, lower, simdInfo.lane);
m_assembler.sqxtn2(dest, upper, simdInfo.lane);
m_assembler.sqxtn(scratch, lower, simdInfo.lane);
m_assembler.sqxtn2(scratch, upper, simdInfo.lane);
} else {
m_assembler.sqxtun(dest, lower, simdInfo.lane);
m_assembler.sqxtun2(dest, upper, simdInfo.lane);
m_assembler.sqxtun(scratch, lower, simdInfo.lane);
m_assembler.sqxtun2(scratch, upper, simdInfo.lane);
}
moveVector(scratch, dest);
}

void vectorConvert(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
@@ -5301,6 +5311,7 @@ class MacroAssemblerARM64 : public AbstractMacroAssembler<Assembler> {

void vectorMulSat(FPRegisterID a, FPRegisterID b, FPRegisterID dest)
{
ASSERT(dest != a && dest != b);
// (i_1 * i_2 + 2^14) >> 15
// <=>
// (i_1 * i_2 * 2 + 2^15) >> 16
@@ -2545,14 +2545,14 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
}
}

void vectorPmin(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
void vectorPmin(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest, FPRegisterID)
{
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
// right > left, dest = left
UNUSED_PARAM(left); UNUSED_PARAM(right); UNUSED_PARAM(dest); UNUSED_PARAM(simdInfo);
}

void vectorPmax(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
void vectorPmax(SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest, FPRegisterID)
{
ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
// left > right, dest = left
@@ -2594,6 +2594,11 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
UNUSED_PARAM(left); UNUSED_PARAM(right); UNUSED_PARAM(dest);
}

void moveZeroToVector(FPRegisterID dest)
{
vectorXor({ SIMDLane::v128, SIMDSignMode::None }, dest, dest, dest);
}

void vectorAbs(SIMDInfo simdInfo, FPRegisterID input, FPRegisterID dest)
{
switch (simdInfo.lane) {
@@ -2727,7 +2732,7 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
ASSERT(simdInfo.lane == SIMDLane::f64x2);
}

void vectorNarrow(SIMDInfo simdInfo, FPRegisterID lower, FPRegisterID upper, FPRegisterID dest)
void vectorNarrow(SIMDInfo simdInfo, FPRegisterID lower, FPRegisterID upper, FPRegisterID dest, FPRegisterID)
{
ASSERT(simdInfo.signMode != SIMDSignMode::None);
ASSERT(scalarTypeIsIntegral(simdInfo.lane));
@@ -2244,7 +2244,7 @@ class LowerToAir {
Tmp result = op == UDiv ? m_eax : m_edx;

append(Move, tmp(m_value->child(0)), m_eax);
append(Xor64, m_edx, m_edx);
append(Move, Arg::imm(0), m_edx);
append(div, m_eax, m_edx, tmp(m_value->child(1)));
append(Move, result, tmp(m_value));
}
@@ -78,7 +78,7 @@ struct TmpData {

Interval interval;
StackSlot* spilled { nullptr };
RegisterSet possibleRegs;
ScalarRegisterSet possibleRegs;
Reg assigned;
bool isUnspillable { false };
bool didBuildPossibleRegs { false };
@@ -151,10 +151,12 @@ class LinearScan {
{
forEachBank(
[&] (Bank bank) {
m_registers[bank] = m_code.regsInPriorityOrder(bank);
for (Reg r : m_registers[bank])
m_registerSetBuilder[bank].add(r, IgnoreVectors);
m_unifiedRegisterSetBuilder.merge(m_registerSetBuilder[bank]);
m_allowedRegistersInPriorityOrder[bank] = m_code.regsInPriorityOrder(bank);
for (Reg r : m_allowedRegistersInPriorityOrder[bank])
m_allowedRegisters[bank].add(r, IgnoreVectors);
m_allAllowedRegisters = m_allAllowedRegisters.toRegisterSet()
.merge(m_allowedRegisters[bank].toRegisterSet())
.buildScalarRegisterSet();
});
}

@@ -274,21 +276,19 @@ class LinearScan {
RegisterSetBuilder prevRegs = regs;
prev->forEach<Reg>(
[&] (Reg& reg, Arg::Role role, Bank, Width width) {
ASSERT(width <= Width64);
if (Arg::isLateDef(role))
prevRegs.add(reg, width);
});
if (prev->kind.opcode == Patch)
prevRegs.merge(prev->extraClobberedRegs());
prevRegs.filter(m_unifiedRegisterSetBuilder);
prevRegs.filter(m_allAllowedRegisters.toRegisterSet().includeWholeRegisterWidth());
if (!prevRegs.isEmpty())
m_clobbers.append(Clobber(indexOfHead + instIndex * 2 - 1, prevRegs.buildAndValidate()));
}
if (Inst* next = block->get(instIndex)) {
RegisterSetBuilder nextRegs = regs;
next->forEach<Reg>(
[&] (Reg& reg, Arg::Role role, Bank, Width width) {
ASSERT(width <= Width64);
if (Arg::isEarlyDef(role))
nextRegs.add(reg, width);
});
@@ -462,21 +462,21 @@ class LinearScan {
while (clobberIndex < m_clobbers.size() && m_clobbers[clobberIndex].index < index)
clobberIndex++;

RegisterSetBuilder possibleRegs = m_registerSetBuilder[bank];
RegisterSetBuilder possibleRegs = m_allowedRegisters[bank].toRegisterSet();
for (size_t i = clobberIndex; i < m_clobbers.size() && m_clobbers[i].index < entry.interval.end(); ++i)
possibleRegs.exclude(m_clobbers[i].regs);
possibleRegs.exclude(m_clobbers[i].regs.includeWholeRegisterWidth());

entry.possibleRegs = possibleRegs.buildWithLowerBits();
entry.possibleRegs = possibleRegs.buildScalarRegisterSet();
entry.didBuildPossibleRegs = true;
}

if (verbose())
dataLog(" Possible regs: ", entry.possibleRegs, "\n");

// Find a free register that we are allowed to use.
if (m_active.size() != m_registers[bank].size()) {
if (m_active.size() != m_allowedRegistersInPriorityOrder[bank].size()) {
bool didAssign = false;
for (Reg reg : m_registers[bank]) {
for (Reg reg : m_allowedRegistersInPriorityOrder[bank]) {
// FIXME: Could do priority coloring here.
// https://bugs.webkit.org/show_bug.cgi?id=170304
if (!m_activeRegs.contains(reg, IgnoreVectors) && entry.possibleRegs.contains(reg, IgnoreVectors)) {
@@ -542,7 +542,7 @@ class LinearScan {
{
TmpData& entry = m_map[tmp];
RELEASE_ASSERT(!entry.isUnspillable);
entry.spilled = m_code.addStackSlot(conservativeRegisterBytesWithoutVectors(tmp.bank()), StackSlotKind::Spill);
entry.spilled = m_code.addStackSlot(Options::useWebAssemblySIMD() ? conservativeRegisterBytes(tmp.bank()) : conservativeRegisterBytesWithoutVectors(tmp.bank()), StackSlotKind::Spill);
entry.assigned = Reg();
m_didSpill = true;
}
@@ -578,7 +578,7 @@ class LinearScan {
StackSlot* spilled = m_map[tmp].spilled;
if (!spilled)
return;
Opcode move = bank == GP ? Move : MoveDouble;
Opcode move = bank == GP ? Move : (Options::useWebAssemblySIMD() ? MoveVector : MoveDouble);
tmp = addSpillTmpWithInterval(bank, intervalForSpill(indexOfEarly, role));
if (role == Arg::Scratch)
return;
@@ -621,7 +621,9 @@ class LinearScan {
}

entry.spillIndex = m_usedSpillSlots.findBit(0, false);
ptrdiff_t offset = -static_cast<ptrdiff_t>(m_code.frameSize()) - static_cast<ptrdiff_t>(entry.spillIndex) * 8 - 8;
size_t slotSize = Options::useWebAssemblySIMD() ? conservativeRegisterBytes(FP) : conservativeRegisterBytesWithoutVectors(FP);
ASSERT(entry.spilled->byteSize() <= slotSize);
ptrdiff_t offset = -static_cast<ptrdiff_t>(m_code.frameSize()) - static_cast<ptrdiff_t>(entry.spillIndex) * slotSize - slotSize;
if (verbose())
dataLog(" Assigning offset = ", offset, " to spill ", pointerDump(entry.spilled), " for ", tmp, "\n");
entry.spilled->setOffsetFromFP(offset);
@@ -669,9 +671,9 @@ class LinearScan {
}

Code& m_code;
Vector<Reg> m_registers[numBanks];
RegisterSet m_registerSetBuilder[numBanks];
RegisterSet m_unifiedRegisterSetBuilder;
Vector<Reg> m_allowedRegistersInPriorityOrder[numBanks];
ScalarRegisterSet m_allowedRegisters[numBanks];
ScalarRegisterSet m_allAllowedRegisters;
IndexMap<BasicBlock*, size_t> m_startIndex;
TmpMap<TmpData> m_map;
IndexMap<BasicBlock*, PhaseInsertionSet> m_insertionSets;
@@ -687,7 +689,6 @@ class LinearScan {

void allocateRegistersAndStackByLinearScan(Code& code)
{
RELEASE_ASSERT(!Options::useWebAssemblySIMD());
PhaseScope phaseScope(code, "allocateRegistersAndStackByLinearScan");
if (verbose())
dataLog("Air before linear scan:\n", code);
@@ -65,6 +65,13 @@ class AbstractColoringAllocator {
, m_code(code)
{
initializeDegrees(tmpArraySize);

if (traceDebug) {
dataLog("Unspillable tmps: [");
for (size_t i = 0; i < unspillableTmps.size(); ++i)
dataLogIf(unspillableTmps.quickGet(i), TmpMapper::tmpFromAbsoluteIndex(i), ", ");
dataLogLn("]");
}

m_adjacencyList.resize(tmpArraySize);
m_moveList.resize(tmpArraySize);
@@ -330,7 +337,7 @@ class AbstractColoringAllocator {
ASSERT(!m_unspillableTmps.get(victimIndex));
ASSERT(!isPrecolored(victimIndex));
if (traceDebug)
dataLogLn("Selecting spill ", victimIndex);
dataLogLn("Selecting spill ", victimIndex, "(", TmpMapper::tmpFromAbsoluteIndex(victimIndex), ")");
return victimIndex;
}

@@ -634,8 +641,10 @@ class Briggs : public AbstractColoringAllocator<IndexType, InterferenceSet, bank
ASSERT(!m_simplifyWorklist.size());
ASSERT(m_spillWorklist.isEmpty());
IndexType firstNonRegIndex = m_lastPrecoloredRegisterIndex + 1;
for (IndexType i = firstNonRegIndex; i < m_degrees.size(); ++i)
for (IndexType i = firstNonRegIndex; i < m_degrees.size(); ++i) {
dataLogLnIf(!hasBeenSimplified(i), "Tmp ", TmpMapper::tmpFromAbsoluteIndex(i), " was not simplified. Maybe the graph is not colorable?");
ASSERT(hasBeenSimplified(i));
}
}

assignColors();
@@ -738,11 +747,11 @@ class Briggs : public AbstractColoringAllocator<IndexType, InterferenceSet, bank
unsigned degree = m_degrees[i];
if (degree < registerCount) {
if (traceDebug)
dataLogLn("Adding ", TmpMapper::tmpFromAbsoluteIndex(i), " to simplify worklist");
dataLogLn("Adding ", i, "(", TmpMapper::tmpFromAbsoluteIndex(i), ") with degree ", degree, " to simplify worklist");
m_simplifyWorklist.append(i);
} else {
if (traceDebug)
dataLogLn("Adding ", TmpMapper::tmpFromAbsoluteIndex(i), " to spill worklist");
dataLogLn("Adding ", i, "(", TmpMapper::tmpFromAbsoluteIndex(i), ") with degree ", degree, " to spill worklist");
addToSpill(i);
}
}
@@ -763,7 +772,7 @@ class Briggs : public AbstractColoringAllocator<IndexType, InterferenceSet, bank
m_isOnSelectStack.quickSet(lastIndex);

if (traceDebug)
dataLogLn("Simplifying ", lastIndex, " by adding it to select stack");
dataLogLn("Simplifying ", lastIndex, "(", TmpMapper::tmpFromAbsoluteIndex(lastIndex), ") by adding it to select stack");

forEachAdjacent(lastIndex, [this](IndexType adjacentTmpIndex) {
decrementDegreeInSimplification(adjacentTmpIndex);
@@ -1367,6 +1376,10 @@ class ColoringAllocator : public AllocatorType<IndexType, InterferenceSet, bank>
m_regsInPriorityOrder.append(reg);
}
}
if (traceDebug) {
dataLogLn("Registers in priority order: ", listDump(m_regsInPriorityOrder));
dataLogLn("Pinned regs: ", listDump(m_pinnedRegs));
}

m_interferenceEdges.setMaxIndex(AbsoluteTmpMapper<bank>::absoluteIndex(m_code.numTmps(bank)));

@@ -1563,8 +1576,13 @@ class ColoringAllocator : public AllocatorType<IndexType, InterferenceSet, bank>

void build(Inst* prevInst, Inst* nextInst, const typename TmpLiveness<bank>::LocalCalc& localCalc)
{
if (traceDebug)
if (traceDebug) {
dataLog("Building between ", pointerDump(prevInst), " and ", pointerDump(nextInst), ":\n");
dataLog("Live values: [");
for (Tmp liveTmp : localCalc.live())
dataLog(liveTmp, ", ");
dataLogLn("]");
}

Inst::forEachDefWithExtraClobberedRegs<Tmp>(
prevInst, nextInst,
@@ -1710,6 +1728,7 @@ class ColoringAllocator : public AllocatorType<IndexType, InterferenceSet, bank>
switch (inst.kind.opcode) {
case MoveFloat:
case MoveDouble:
case MoveVector:
break;
default:
return false;
@@ -1739,7 +1758,7 @@ class ColoringAllocator : public AllocatorType<IndexType, InterferenceSet, bank>
return false;

if (tmpWidth->defWidth(inst.args[0].tmp()) > Width32
&& tmpWidth->useWidth(inst.args[1].tmp()) > Width32)
|| tmpWidth->useWidth(inst.args[1].tmp()) > Width32)
return false;
}

@@ -1981,7 +2000,12 @@ class GraphColoringRegisterAllocation {

static unsigned stackSlotMinimumWidth(Width width)
{
return width <= Width32 ? 4 : 8;
if (width <= Width32)
return 4;
if (width <= Width64)
return 8;
ASSERT(width == Width128);
return 16;
}

template<Bank bank, typename AllocatorType>
@@ -2156,6 +2180,10 @@ class GraphColoringRegisterAllocation {
case 8:
move = bank == GP ? Move : MoveDouble;
break;
case 16:
ASSERT(bank == FP);
move = MoveVector;
break;
default:
RELEASE_ASSERT_NOT_REACHED();
break;
@@ -2193,7 +2221,6 @@ class GraphColoringRegisterAllocation {

void allocateRegistersByGraphColoring(Code& code)
{
RELEASE_ASSERT(!Options::useWebAssemblySIMD());
PhaseScope phaseScope(code, "allocateRegistersByGraphColoring");

if (traceDebug)

0 comments on commit 244d75c

Please sign in to comment.