Skip to content

Commit

Permalink
[JSC] Add ceil() support for x86 and expose it to B3
Browse files Browse the repository at this point in the history
https://bugs.webkit.org/show_bug.cgi?id=152231

Patch by Benjamin Poulain <bpoulain@apple.com> on 2015-12-14
Reviewed by Geoffrey Garen.

Most x86 CPUs we care about support ceil() natively
with the round instruction.

This patch expose that behind a runtime flag, use it
in the Math.ceil() thunk and expose it to B3.

* assembler/MacroAssemblerARM64.h:
(JSC::MacroAssemblerARM64::supportsFloatingPointCeil):
* assembler/MacroAssemblerARMv7.h:
(JSC::MacroAssemblerARMv7::supportsFloatingPointCeil):
* assembler/MacroAssemblerMIPS.h:
(JSC::MacroAssemblerMIPS::supportsFloatingPointCeil):
* assembler/MacroAssemblerSH4.h:
(JSC::MacroAssemblerSH4::supportsFloatingPointCeil):
* assembler/MacroAssemblerX86Common.cpp:
* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::ceilDouble):
(JSC::MacroAssemblerX86Common::ceilFloat):
(JSC::MacroAssemblerX86Common::supportsFloatingPointCeil):
(JSC::MacroAssemblerX86Common::supportsLZCNT):
* assembler/X86Assembler.h:
(JSC::X86Assembler::roundss_rr):
(JSC::X86Assembler::roundss_mr):
(JSC::X86Assembler::roundsd_rr):
(JSC::X86Assembler::roundsd_mr):
(JSC::X86Assembler::mfence):
(JSC::X86Assembler::X86InstructionFormatter::threeByteOp):
* b3/B3ConstDoubleValue.cpp:
(JSC::B3::ConstDoubleValue::ceilConstant):
* b3/B3ConstDoubleValue.h:
* b3/B3ConstFloatValue.cpp:
(JSC::B3::ConstFloatValue::ceilConstant):
* b3/B3ConstFloatValue.h:
* b3/B3LowerMacrosAfterOptimizations.cpp:
* b3/B3LowerToAir.cpp:
(JSC::B3::Air::LowerToAir::lower):
* b3/B3Opcode.cpp:
(WTF::printInternal):
* b3/B3Opcode.h:
* b3/B3ReduceDoubleToFloat.cpp:
* b3/B3ReduceStrength.cpp:
* b3/B3Validate.cpp:
* b3/B3Value.cpp:
(JSC::B3::Value::ceilConstant):
(JSC::B3::Value::effects):
(JSC::B3::Value::key):
(JSC::B3::Value::typeFor):
* b3/B3Value.h:
* b3/air/AirOpcode.opcodes:
* b3/testb3.cpp:
(JSC::B3::testCeilArg):
(JSC::B3::testCeilImm):
(JSC::B3::testCeilMem):
(JSC::B3::testCeilCeilArg):
(JSC::B3::testCeilIToD64):
(JSC::B3::testCeilIToD32):
(JSC::B3::testCeilArgWithUselessDoubleConversion):
(JSC::B3::testCeilArgWithEffectfulDoubleConversion):
(JSC::B3::populateWithInterestingValues):
(JSC::B3::run):
* ftl/FTLB3Output.h:
(JSC::FTL::Output::ceil64):
* jit/ThunkGenerators.cpp:
(JSC::ceilThunkGenerator):

Canonical link: https://commits.webkit.org/170383@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@194062 268f45cc-cd09-0410-ab3c-d52691b4dbfc
  • Loading branch information
Benjamin Poulain authored and webkit-commit-queue committed Dec 14, 2015
1 parent fe8f821 commit 01738ab
Show file tree
Hide file tree
Showing 25 changed files with 504 additions and 18 deletions.
72 changes: 72 additions & 0 deletions Source/JavaScriptCore/ChangeLog
@@ -1,3 +1,75 @@
2015-12-14 Benjamin Poulain <bpoulain@apple.com>

[JSC] Add ceil() support for x86 and expose it to B3
https://bugs.webkit.org/show_bug.cgi?id=152231

Reviewed by Geoffrey Garen.

Most x86 CPUs we care about support ceil() natively
with the round instruction.

This patch expose that behind a runtime flag, use it
in the Math.ceil() thunk and expose it to B3.

* assembler/MacroAssemblerARM64.h:
(JSC::MacroAssemblerARM64::supportsFloatingPointCeil):
* assembler/MacroAssemblerARMv7.h:
(JSC::MacroAssemblerARMv7::supportsFloatingPointCeil):
* assembler/MacroAssemblerMIPS.h:
(JSC::MacroAssemblerMIPS::supportsFloatingPointCeil):
* assembler/MacroAssemblerSH4.h:
(JSC::MacroAssemblerSH4::supportsFloatingPointCeil):
* assembler/MacroAssemblerX86Common.cpp:
* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::ceilDouble):
(JSC::MacroAssemblerX86Common::ceilFloat):
(JSC::MacroAssemblerX86Common::supportsFloatingPointCeil):
(JSC::MacroAssemblerX86Common::supportsLZCNT):
* assembler/X86Assembler.h:
(JSC::X86Assembler::roundss_rr):
(JSC::X86Assembler::roundss_mr):
(JSC::X86Assembler::roundsd_rr):
(JSC::X86Assembler::roundsd_mr):
(JSC::X86Assembler::mfence):
(JSC::X86Assembler::X86InstructionFormatter::threeByteOp):
* b3/B3ConstDoubleValue.cpp:
(JSC::B3::ConstDoubleValue::ceilConstant):
* b3/B3ConstDoubleValue.h:
* b3/B3ConstFloatValue.cpp:
(JSC::B3::ConstFloatValue::ceilConstant):
* b3/B3ConstFloatValue.h:
* b3/B3LowerMacrosAfterOptimizations.cpp:
* b3/B3LowerToAir.cpp:
(JSC::B3::Air::LowerToAir::lower):
* b3/B3Opcode.cpp:
(WTF::printInternal):
* b3/B3Opcode.h:
* b3/B3ReduceDoubleToFloat.cpp:
* b3/B3ReduceStrength.cpp:
* b3/B3Validate.cpp:
* b3/B3Value.cpp:
(JSC::B3::Value::ceilConstant):
(JSC::B3::Value::effects):
(JSC::B3::Value::key):
(JSC::B3::Value::typeFor):
* b3/B3Value.h:
* b3/air/AirOpcode.opcodes:
* b3/testb3.cpp:
(JSC::B3::testCeilArg):
(JSC::B3::testCeilImm):
(JSC::B3::testCeilMem):
(JSC::B3::testCeilCeilArg):
(JSC::B3::testCeilIToD64):
(JSC::B3::testCeilIToD32):
(JSC::B3::testCeilArgWithUselessDoubleConversion):
(JSC::B3::testCeilArgWithEffectfulDoubleConversion):
(JSC::B3::populateWithInterestingValues):
(JSC::B3::run):
* ftl/FTLB3Output.h:
(JSC::FTL::Output::ceil64):
* jit/ThunkGenerators.cpp:
(JSC::ceilThunkGenerator):

2015-12-14 Andreas Kling <akling@apple.com>

ResourceUsageOverlay should show GC timers.
Expand Down
1 change: 1 addition & 0 deletions Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
Expand Up @@ -1199,6 +1199,7 @@ class MacroAssemblerARM64 : public AbstractMacroAssembler<ARM64Assembler, MacroA
static bool supportsFloatingPointTruncate() { return true; }
static bool supportsFloatingPointSqrt() { return true; }
static bool supportsFloatingPointAbs() { return true; }
static bool supportsFloatingPointCeil() { return true; }

enum BranchTruncateType { BranchIfTruncateFailed, BranchIfTruncateSuccessful };

Expand Down
7 changes: 7 additions & 0 deletions Source/JavaScriptCore/assembler/MacroAssemblerARMv7.h
Expand Up @@ -856,6 +856,7 @@ class MacroAssemblerARMv7 : public AbstractMacroAssembler<ARMv7Assembler, MacroA
static bool supportsFloatingPointTruncate() { return true; }
static bool supportsFloatingPointSqrt() { return true; }
static bool supportsFloatingPointAbs() { return true; }
static bool supportsFloatingPointCeil() { return false; }

void loadDouble(ImplicitAddress address, FPRegisterID dest)
{
Expand Down Expand Up @@ -1046,6 +1047,12 @@ class MacroAssemblerARMv7 : public AbstractMacroAssembler<ARMv7Assembler, MacroA
m_assembler.vneg(dest, src);
}

NO_RETURN_DUE_TO_CRASH void ceilDouble(FPRegisterID, FPRegisterID)
{
ASSERT(!supportsFloatingPointCeil());
CRASH();
}

void convertInt32ToDouble(RegisterID src, FPRegisterID dest)
{
m_assembler.vmov(fpTempRegister, src, src);
Expand Down
7 changes: 7 additions & 0 deletions Source/JavaScriptCore/assembler/MacroAssemblerMIPS.h
Expand Up @@ -625,6 +625,12 @@ class MacroAssemblerMIPS : public AbstractMacroAssembler<MIPSAssembler, MacroAss
RELEASE_ASSERT_NOT_REACHED();
}

NO_RETURN_DUE_TO_CRASH void ceilDouble(FPRegisterID, FPRegisterID)
{
ASSERT(!supportsFloatingPointCeil());
CRASH();
}

ConvertibleLoadLabel convertibleLoadPtr(Address address, RegisterID dest)
{
ConvertibleLoadLabel result(this);
Expand Down Expand Up @@ -1209,6 +1215,7 @@ class MacroAssemblerMIPS : public AbstractMacroAssembler<MIPSAssembler, MacroAss
#endif
}
static bool supportsFloatingPointAbs() { return false; }
static bool supportsFloatingPointCeil() { return false; }

// Stack manipulation operations:
//
Expand Down
7 changes: 7 additions & 0 deletions Source/JavaScriptCore/assembler/MacroAssemblerSH4.h
Expand Up @@ -1091,6 +1091,7 @@ class MacroAssemblerSH4 : public AbstractMacroAssembler<SH4Assembler, MacroAssem
static bool supportsFloatingPointTruncate() { return true; }
static bool supportsFloatingPointSqrt() { return true; }
static bool supportsFloatingPointAbs() { return true; }
static bool supportsFloatingPointCeil() { return false; }

void moveDoubleToInts(FPRegisterID src, RegisterID dest1, RegisterID dest2)
{
Expand Down Expand Up @@ -1574,6 +1575,12 @@ class MacroAssemblerSH4 : public AbstractMacroAssembler<SH4Assembler, MacroAssem
m_assembler.dabs(dest);
}

NO_RETURN_DUE_TO_CRASH void ceilDouble(FPRegisterID, FPRegisterID)
{
ASSERT(!supportsFloatingPointCeil());
CRASH();
}

Jump branchTest8(ResultCondition cond, Address address, TrustedImm32 mask = TrustedImm32(-1))
{
RegisterID addressTempRegister = claimScratch();
Expand Down
3 changes: 2 additions & 1 deletion Source/JavaScriptCore/assembler/MacroAssemblerX86Common.cpp
Expand Up @@ -552,7 +552,8 @@ void MacroAssemblerX86Common::probe(MacroAssemblerX86Common::ProbeFunction funct
MacroAssemblerX86Common::SSE2CheckState MacroAssemblerX86Common::s_sse2CheckState = NotCheckedSSE2;
#endif

MacroAssemblerX86Common::LZCNTCheckState MacroAssemblerX86Common::s_lzcntCheckState = LZCNTCheckState::NotChecked;
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_sse4_1CheckState = CPUIDCheckState::NotChecked;
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_lzcntCheckState = CPUIDCheckState::NotChecked;

} // namespace JSC

Expand Down
68 changes: 62 additions & 6 deletions Source/JavaScriptCore/assembler/MacroAssemblerX86Common.h
Expand Up @@ -570,6 +570,25 @@ class MacroAssemblerX86Common : public AbstractMacroAssembler<X86Assembler, Macr
m_assembler.xorpd_rr(src, dst);
}

void ceilDouble(FPRegisterID src, FPRegisterID dst)
{
m_assembler.roundsd_rr(src, dst, X86Assembler::RoundingType::TowardInfiniti);
}

void ceilDouble(Address src, FPRegisterID dst)
{
m_assembler.roundsd_mr(src.offset, src.base, dst, X86Assembler::RoundingType::TowardInfiniti);
}

void ceilFloat(FPRegisterID src, FPRegisterID dst)
{
m_assembler.roundss_rr(src, dst, X86Assembler::RoundingType::TowardInfiniti);
}

void ceilFloat(Address src, FPRegisterID dst)
{
m_assembler.roundss_mr(src.offset, src.base, dst, X86Assembler::RoundingType::TowardInfiniti);
}

// Memory access operations:
//
Expand Down Expand Up @@ -1797,6 +1816,42 @@ class MacroAssemblerX86Common : public AbstractMacroAssembler<X86Assembler, Macr
return X86Assembler::maxJumpReplacementSize();
}

static bool supportsFloatingPointCeil()
{
if (s_sse4_1CheckState == CPUIDCheckState::NotChecked) {
int flags = 0;
#if COMPILER(MSVC)
int cpuInfo[4];
__cpuid(cpuInfo, 0x1);
flags = cpuInfo[2];
#elif COMPILER(GCC_OR_CLANG)
#if CPU(X86_64)
asm (
"movl $0x1, %%eax;"
"cpuid;"
"movl %%ecx, %0;"
: "=g" (flags)
:
: "%eax", "%ebx", "%ecx", "%edx"
);
#else
asm (
"movl $0x1, %%eax;"
"pushl %%ebx;"
"cpuid;"
"popl %%ebx;"
"movl %%ecx, %0;"
: "=g" (flags)
:
: "%eax", "%ecx", "%edx"
);
#endif
#endif // COMPILER(GCC_OR_CLANG)
s_sse4_1CheckState = (flags & (1 << 19)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
}
return s_sse4_1CheckState == CPUIDCheckState::Set;
}

#if ENABLE(MASM_PROBE)
void probe(ProbeFunction, void* arg1, void* arg2);
#endif // ENABLE(MASM_PROBE)
Expand Down Expand Up @@ -1837,10 +1892,10 @@ class MacroAssemblerX86Common : public AbstractMacroAssembler<X86Assembler, Macr
m_assembler.cmovl_rr(cond, src, dest);
#endif
}

static bool supportsLZCNT()
{
if (s_lzcntCheckState == LZCNTCheckState::NotChecked) {
if (s_lzcntCheckState == CPUIDCheckState::NotChecked) {
int flags = 0;
#if COMPILER(MSVC)
int cpuInfo[4];
Expand Down Expand Up @@ -1869,9 +1924,9 @@ class MacroAssemblerX86Common : public AbstractMacroAssembler<X86Assembler, Macr
);
#endif
#endif // COMPILER(GCC_OR_CLANG)
s_lzcntCheckState = (flags & 0x20) ? LZCNTCheckState::Set : LZCNTCheckState::Clear;
s_lzcntCheckState = (flags & 0x20) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
}
return s_lzcntCheckState == LZCNTCheckState::Set;
return s_lzcntCheckState == CPUIDCheckState::Set;
}

private:
Expand Down Expand Up @@ -2041,12 +2096,13 @@ class MacroAssemblerX86Common : public AbstractMacroAssembler<X86Assembler, Macr

#endif

enum class LZCNTCheckState {
enum class CPUIDCheckState {
NotChecked,
Clear,
Set
};
static LZCNTCheckState s_lzcntCheckState;
static CPUIDCheckState s_sse4_1CheckState;
static CPUIDCheckState s_lzcntCheckState;
};

} // namespace JSC
Expand Down
68 changes: 63 additions & 5 deletions Source/JavaScriptCore/assembler/X86Assembler.h
Expand Up @@ -264,6 +264,7 @@ class X86Assembler {
OP2_CVTSI2SD_VsdEd = 0x2A,
OP2_CVTTSD2SI_GdWsd = 0x2C,
OP2_UCOMISD_VsdWsd = 0x2E,
OP2_3BYTE_ESCAPE_3A = 0x3A,
OP2_CMOVCC = 0x40,
OP2_ADDSD_VsdWsd = 0x58,
OP2_MULSD_VsdWsd = 0x59,
Expand All @@ -280,7 +281,7 @@ class X86Assembler {
OP2_MOVD_EdVd = 0x7E,
OP2_JCC_rel32 = 0x80,
OP_SETCC = 0x90,
OP2_3BYTE_ESCAPE = 0xAE,
OP2_3BYTE_ESCAPE_AE = 0xAE,
OP2_IMUL_GvEv = 0xAF,
OP2_MOVZX_GvEb = 0xB6,
OP2_BSR = 0xBD,
Expand All @@ -295,7 +296,9 @@ class X86Assembler {
} TwoByteOpcodeID;

typedef enum {
OP3_MFENCE = 0xF0,
OP3_ROUNDSS_VssWssIb = 0x0A,
OP3_ROUNDSD_VsdWsdIb = 0x0B,
OP3_MFENCE = 0xF0,
} ThreeByteOpcodeID;


Expand Down Expand Up @@ -2349,6 +2352,41 @@ class X86Assembler {
m_formatter.twoByteOp(OP2_SQRTSD_VsdWsd, (RegisterID)dst, base, offset);
}

enum class RoundingType : uint8_t {
ToNearestWithTiesToEven = 0,
TowardNegativeInfiniti = 1,
TowardInfiniti = 2,
TowardZero = 3
};

void roundss_rr(XMMRegisterID src, XMMRegisterID dst, RoundingType rounding)
{
m_formatter.prefix(PRE_SSE_66);
m_formatter.threeByteOp(OP2_3BYTE_ESCAPE_3A, OP3_ROUNDSS_VssWssIb, (RegisterID)dst, (RegisterID)src);
m_formatter.immediate8(static_cast<uint8_t>(rounding));
}

void roundss_mr(int offset, RegisterID base, XMMRegisterID dst, RoundingType rounding)
{
m_formatter.prefix(PRE_SSE_66);
m_formatter.threeByteOp(OP2_3BYTE_ESCAPE_3A, OP3_ROUNDSS_VssWssIb, (RegisterID)dst, base, offset);
m_formatter.immediate8(static_cast<uint8_t>(rounding));
}

void roundsd_rr(XMMRegisterID src, XMMRegisterID dst, RoundingType rounding)
{
m_formatter.prefix(PRE_SSE_66);
m_formatter.threeByteOp(OP2_3BYTE_ESCAPE_3A, OP3_ROUNDSD_VsdWsdIb, (RegisterID)dst, (RegisterID)src);
m_formatter.immediate8(static_cast<uint8_t>(rounding));
}

void roundsd_mr(int offset, RegisterID base, XMMRegisterID dst, RoundingType rounding)
{
m_formatter.prefix(PRE_SSE_66);
m_formatter.threeByteOp(OP2_3BYTE_ESCAPE_3A, OP3_ROUNDSD_VsdWsdIb, (RegisterID)dst, base, offset);
m_formatter.immediate8(static_cast<uint8_t>(rounding));
}

// Misc instructions:

void int3()
Expand All @@ -2368,7 +2406,7 @@ class X86Assembler {

void mfence()
{
m_formatter.threeByteOp(OP3_MFENCE);
m_formatter.threeByteOp(OP2_3BYTE_ESCAPE_AE, OP3_MFENCE);
}

// Assembler admin methods:
Expand Down Expand Up @@ -2847,12 +2885,32 @@ class X86Assembler {
}
#endif

void threeByteOp(ThreeByteOpcodeID opcode)
void threeByteOp(TwoByteOpcodeID twoBytePrefix, ThreeByteOpcodeID opcode)
{
m_buffer.ensureSpace(maxInstructionSize);
m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
m_buffer.putByteUnchecked(twoBytePrefix);
m_buffer.putByteUnchecked(opcode);
}

void threeByteOp(TwoByteOpcodeID twoBytePrefix, ThreeByteOpcodeID opcode, int reg, RegisterID rm)
{
m_buffer.ensureSpace(maxInstructionSize);
emitRexIfNeeded(reg, 0, rm);
m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
m_buffer.putByteUnchecked(twoBytePrefix);
m_buffer.putByteUnchecked(opcode);
registerModRM(reg, rm);
}

void threeByteOp(TwoByteOpcodeID twoBytePrefix, ThreeByteOpcodeID opcode, int reg, RegisterID base, int displacement)
{
m_buffer.ensureSpace(maxInstructionSize);
emitRexIfNeeded(reg, 0, base);
m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
m_buffer.putByteUnchecked(OP2_3BYTE_ESCAPE);
m_buffer.putByteUnchecked(twoBytePrefix);
m_buffer.putByteUnchecked(opcode);
memoryModRM(reg, base, displacement);
}

#if CPU(X86_64)
Expand Down
5 changes: 5 additions & 0 deletions Source/JavaScriptCore/b3/B3ConstDoubleValue.cpp
Expand Up @@ -92,6 +92,11 @@ Value* ConstDoubleValue::absConstant(Procedure& proc) const
return proc.add<ConstDoubleValue>(origin(), fabs(m_value));
}

Value* ConstDoubleValue::ceilConstant(Procedure& proc) const
{
return proc.add<ConstDoubleValue>(origin(), ceil(m_value));
}

Value* ConstDoubleValue::sqrtConstant(Procedure& proc) const
{
return proc.add<ConstDoubleValue>(origin(), sqrt(m_value));
Expand Down

0 comments on commit 01738ab

Please sign in to comment.