diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 2902abfc6619e..e76a8a49e0bf4 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -3555,6 +3555,14 @@ void Assembler::movsd(Address dst, XMMRegister src) { emit_operand(src, dst, 0); } +void Assembler::vmovsd(XMMRegister dst, XMMRegister src, XMMRegister src2) { + assert(UseAVX > 0, "Requires some form of AVX"); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(src2->encoding(), src->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int16(0x11, (0xC0 | encode)); +} + void Assembler::movss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); @@ -6531,6 +6539,29 @@ void Assembler::vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) emit_int16((unsigned char)0xB9, (0xC0 | encode)); } +void Assembler::evfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2, EvexRoundPrefix rmode) { // Need to add rmode for rounding mode support + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(rmode, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_extended_context(); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0xAD, (0xC0 | encode)); +} + +void Assembler::vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + assert(VM_Version::supports_fma(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0xAD, (0xC0 | encode)); +} + +void Assembler::vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { + assert(VM_Version::supports_fma(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0xBD, (0xC0 | encode)); +} + void Assembler::vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { assert(VM_Version::supports_fma(), ""); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); @@ -6892,6 +6923,22 @@ void Assembler::vroundpd(XMMRegister dst, Address src, int32_t rmode, int vecto emit_int8((rmode)); } +void Assembler::vroundsd(XMMRegister dst, XMMRegister src, XMMRegister src2, int32_t rmode) { + assert(VM_Version::supports_avx(), ""); + assert(rmode <= 0x0f, "rmode 0x%x", rmode); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x0B, (0xC0 | encode), (rmode)); +} + +void Assembler::vrndscalesd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int32_t rmode) { + assert(VM_Version::supports_evex(), "requires EVEX support"); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x0B, (0xC0 | encode), (rmode)); +} + void Assembler::vrndscalepd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len) { assert(VM_Version::supports_evex(), "requires EVEX support"); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); @@ -8857,6 +8904,19 @@ void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) { emit_int8(imm8 & 0x01); } +void Assembler::extractps(Register dst, XMMRegister src, uint8_t imm8) { + assert(VM_Version::supports_sse4_1(), ""); + assert(imm8 <= 0x03, "imm8: %u", imm8); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + // imm8: + // 0x00 - extract from bits 31:0 + // 0x01 - extract from bits 63:32 + // 0x02 - extract from bits 95:64 + // 0x03 - extract from bits 127:96 + emit_int24(0x17, (0xC0 | encode), imm8 & 0x03); +} + // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL void Assembler::vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx2(), ""); @@ -9531,6 +9591,15 @@ void Assembler::evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, Addres emit_operand(dst, src, 0); } +void Assembler::evdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src, EvexRoundPrefix rmode) { + assert(VM_Version::supports_evex(), ""); + InstructionAttr attributes(rmode, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_extended_context(); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int16(0x5E, (0xC0 | encode)); +} + void Assembler::evpabsb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { assert(VM_Version::supports_avx512bw() && (vector_len == AVX_512bit || VM_Version::supports_avx512vl()), ""); InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 5102e2c384925..60883f13f7a79 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -528,6 +528,13 @@ class Assembler : public AbstractAssembler { EVEX_Z = 0x80 }; + enum EvexRoundPrefix { + EVEX_RNE = 0x0, + EVEX_RD = 0x1, + EVEX_RU = 0x2, + EVEX_RZ = 0x3 + }; + enum VexSimdPrefix { VEX_SIMD_NONE = 0x0, VEX_SIMD_66 = 0x1, @@ -886,6 +893,8 @@ class Assembler : public AbstractAssembler { void movsd(Address dst, XMMRegister src); void movlpd(XMMRegister dst, Address src); + void vmovsd(XMMRegister dst, XMMRegister src, XMMRegister src2); + // New cpus require use of movaps and movapd to avoid partial register stall // when moving between registers. void movaps(XMMRegister dst, XMMRegister src); @@ -2242,9 +2251,13 @@ class Assembler : public AbstractAssembler { void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src); void vdivsd(XMMRegister dst, XMMRegister nds, Address src); void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void evdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src, EvexRoundPrefix rmode); void vdivss(XMMRegister dst, XMMRegister nds, Address src); void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src); void vfmadd231sd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vfnmadd213sd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void evfnmadd213sd(XMMRegister dst, XMMRegister nds, XMMRegister src, EvexRoundPrefix rmode); + void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2); void vfmadd231ss(XMMRegister dst, XMMRegister nds, XMMRegister src); void vmulsd(XMMRegister dst, XMMRegister nds, Address src); void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src); @@ -2334,8 +2347,11 @@ class Assembler : public AbstractAssembler { // Round Packed Double precision value. void vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len); void vroundpd(XMMRegister dst, Address src, int32_t rmode, int vector_len); + void vrndscalesd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int32_t rmode); void vrndscalepd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len); void vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vector_len); + void vroundsd(XMMRegister dst, XMMRegister src, XMMRegister src2, int32_t rmode); + void vroundsd(XMMRegister dst, XMMRegister src, Address src2, int32_t rmode); // Bitwise Logical AND of Packed Floating-Point Values void andpd(XMMRegister dst, XMMRegister src); @@ -2719,6 +2735,8 @@ class Assembler : public AbstractAssembler { void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8); void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8); + void extractps(Register dst, XMMRegister src, uint8_t imm8); + // xmm/mem sourced byte/word/dword/qword replicate void vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len); void vpbroadcastb(XMMRegister dst, Address src, int vector_len); @@ -2952,6 +2970,8 @@ class InstructionAttr { _embedded_opmask_register_specifier = mask->encoding() & 0x7; } + void set_extended_context(void) { _is_extended_context = true; } + }; #endif // CPU_X86_ASSEMBLER_X86_HPP diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index db332274a68b6..b6a27abf0f37e 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -968,7 +968,7 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { break; case vmIntrinsics::_dpow: if (StubRoutines::dpow() != nullptr) { - __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); + __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); } else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); } diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86.cpp index 1d13b710430e8..d39cab092f8f3 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86.cpp @@ -87,6 +87,8 @@ void SharedRuntime::inline_check_hashcode_from_object_header(MacroAssembler* mas #if defined(TARGET_COMPILER_gcc) && !defined(_WIN64) JRT_LEAF(jfloat, SharedRuntime::frem(jfloat x, jfloat y)) jfloat retval; + const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); + if (!is_LP64 || UseAVX < 1 || !UseFMA) { asm ("\ 1: \n\ fprem \n\ @@ -97,11 +99,21 @@ jne 1b \n\ :"=t"(retval) :"0"(x), "u"(y) :"cc", "ax"); + } else { + assert(StubRoutines::fmod() != nullptr, ""); + jdouble (*addr)(jdouble, jdouble) = (double (*)(double, double))StubRoutines::fmod(); + jdouble dx = (jdouble) x; + jdouble dy = (jdouble) y; + + retval = (jfloat) (*addr)(dx, dy); + } return retval; JRT_END JRT_LEAF(jdouble, SharedRuntime::drem(jdouble x, jdouble y)) jdouble retval; + const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); + if (!is_LP64 || UseAVX < 1 || !UseFMA) { asm ("\ 1: \n\ fprem \n\ @@ -112,6 +124,12 @@ jne 1b \n\ :"=t"(retval) :"0"(x), "u"(y) :"cc", "ax"); + } else { + assert(StubRoutines::fmod() != nullptr, ""); + jdouble (*addr)(jdouble, jdouble) = (double (*)(double, double))StubRoutines::fmod(); + + retval = (*addr)(x, y); + } return retval; JRT_END #endif // TARGET_COMPILER_gcc && !_WIN64 diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index 6cd1765151492..2c100da4a5dbe 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -3937,6 +3937,10 @@ void StubGenerator::generate_initial_stubs() { } generate_libm_stubs(); + + if ((UseAVX >= 1) && (VM_Version::supports_avx512vlbwdq() || VM_Version::supports_fma())) { + StubRoutines::_fmod = generate_libmFmod(); // from stubGenerator_x86_64_fmod.cpp + } } void StubGenerator::generate_continuation_stubs() { diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp index 86e3b169554d2..e7cc8e3b40a40 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp @@ -486,6 +486,7 @@ class StubGenerator: public StubCodeGenerator { address generate_libmPow(); address generate_libmLog(); address generate_libmLog10(); + address generate_libmFmod(); // Shared constants static address ZERO; diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_fmod.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_fmod.cpp new file mode 100644 index 0000000000000..04ad300ddcd87 --- /dev/null +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_fmod.cpp @@ -0,0 +1,524 @@ +/* + * Copyright (c) 2023, Intel Corporation. All rights reserved. + * Intel Math Library (LIBM) Source Code + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "macroAssembler_x86.hpp" +#include "stubGenerator_x86_64.hpp" + +/******************************************************************************/ +// ALGORITHM DESCRIPTION - FMOD() +// --------------------- +// +// If either value1 or value2 is NaN, the result is NaN. +// +// If neither value1 nor value2 is NaN, the sign of the result equals the sign of the dividend. +// +// If the dividend is an infinity or the divisor is a zero or both, the result is NaN. +// +// If the dividend is finite and the divisor is an infinity, the result equals the dividend. +// +// If the dividend is a zero and the divisor is finite, the result equals the dividend. +// +// In the remaining cases, where neither operand is an infinity, a zero, or NaN, the floating-point +// remainder result from a dividend value1 and a divisor value2 is defined by the mathematical +// relation result = value1 - (value2 * q), where q is an integer that is negative only if +// value1 / value2 is negative, and positive only if value1 / value2 is positive, and whose magnitude +// is as large as possible without exceeding the magnitude of the true mathematical quotient of value1 and value2. +// +/******************************************************************************/ + +#define __ _masm-> + +ATTRIBUTE_ALIGNED(32) static const uint64_t CONST_NaN[] = { + 0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL // NaN vector +}; +ATTRIBUTE_ALIGNED(32) static const uint64_t CONST_1p260[] = { + 0x5030000000000000ULL, // 0x1p+260 +}; + +ATTRIBUTE_ALIGNED(32) static const uint64_t CONST_MAX[] = { + 0x7FEFFFFFFFFFFFFFULL, // Max +}; + +ATTRIBUTE_ALIGNED(32) static const uint64_t CONST_INF[] = { + 0x7FF0000000000000ULL, // Inf +}; + +ATTRIBUTE_ALIGNED(32) static const uint64_t CONST_e307[] = { + 0x7FE0000000000000ULL +}; + +address StubGenerator::generate_libmFmod() { + StubCodeMark mark(this, "StubRoutines", "libmFmod"); + address start = __ pc(); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + if (VM_Version::supports_avx512vlbwdq()) { // AVX512 version + + // Source used to generate the AVX512 fmod assembly below: + // + // #include + // #include + // #pragma float_control(precise, on) + // + // #define UINT32 unsigned int + // #define SINT32 int + // #define UINT64 unsigned __int64 + // #define SINT64 __int64 + // + // #define DP_FMA(a, b, c) __fence(_mm_cvtsd_f64(_mm_fmadd_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c)))) + // #define DP_FMA_RN(a, b, c) _mm_cvtsd_f64(_mm_fmadd_round_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c), (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC))) + // #define DP_FMA_RZ(a, b, c) __fence(_mm_cvtsd_f64(_mm_fmadd_round_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c), (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)))) + // + // #define DP_ROUND_RZ(a) _mm_cvtsd_f64(_mm_roundscale_sd(_mm_setzero_pd(), _mm_set_sd(a), (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC))) + // + // #define DP_CONST(C) _castu64_f64(0x##C##ull) + // #define DP_AND(X, Y) _mm_cvtsd_f64(_mm_and_pd(_mm_set_sd(X), _mm_set_sd(Y))) + // #define DP_XOR(X, Y) _mm_cvtsd_f64(_mm_xor_pd(_mm_set_sd(X), _mm_set_sd(Y))) + // #define DP_OR(X, Y) _mm_cvtsd_f64(_mm_or_pd(_mm_set_sd(X), _mm_set_sd(Y))) + // #define DP_DIV_RZ(a, b) __fence(_mm_cvtsd_f64(_mm_div_round_sd(_mm_set_sd(a), _mm_set_sd(b), (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)))) + // #define DP_FNMA(a, b, c) __fence(_mm_cvtsd_f64(_mm_fnmadd_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c)))) + // #define DP_FNMA_RZ(a, b, c) __fence(_mm_cvtsd_f64(_mm_fnmadd_round_sd(_mm_set_sd(a), _mm_set_sd(b), _mm_set_sd(c), (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)))) + // + // #define D2L(x) _mm_castpd_si128(x) + // // transfer highest 32 bits (of low 64b) to GPR + // #define TRANSFER_HIGH_INT32(X) _mm_extract_epi32(D2L(_mm_set_sd(X)), 1) + // + // double fmod(double x, double y) + // { + // double a, b, sgn_a, q, bs, bs2; + // unsigned eq; + + Label L_5280, L_52a0, L_5256, L_5300, L_5320, L_52c0, L_52d0, L_5360, L_5380, L_53b0, L_5390; + Label L_53c0, L_52a6, L_53d0, L_exit; + + __ movdqa(xmm2, xmm0); + // // |x|, |y| + // a = DP_AND(x, DP_CONST(7fffffffffffffff)); + __ movq(xmm0, xmm0); + __ mov64(rax, 0x7FFFFFFFFFFFFFFFULL); + __ evpbroadcastq(xmm3, rax, Assembler::AVX_128bit); + __ vpand(xmm6, xmm0, xmm3, Assembler::AVX_128bit); + // b = DP_AND(y, DP_CONST(7fffffffffffffff)); + __ vpand(xmm4, xmm1, xmm3, Assembler::AVX_128bit); + // // sign(x) + // sgn_a = DP_XOR(x, a); + __ vpxor(xmm3, xmm6, xmm0, Assembler::AVX_128bit); + // q = DP_DIV_RZ(a, b); + __ movq(xmm5, xmm4); + __ evdivsd(xmm0, xmm6, xmm5, Assembler::EVEX_RZ); + // q = DP_ROUND_RZ(q); + __ movq(xmm0, xmm0); + // a = DP_AND(x, DP_CONST(7fffffffffffffff)); + __ vxorpd(xmm7, xmm7, xmm7, Assembler::AVX_128bit); + // q = DP_ROUND_RZ(q); + __ vroundsd(xmm0, xmm7, xmm0, 0xb); + // eq = TRANSFER_HIGH_INT32(q); + __ extractps(rax, xmm0, 1); + // if (!eq) return x + sgn_a; + __ testl(rax, rax); + __ jcc(Assembler::equal, L_5280); + // if (eq >= 0x7fefffffu) goto SPECIAL_FMOD; + __ cmpl(rax, 0x7feffffe); + __ jcc(Assembler::belowEqual, L_52a0); + __ vpxor(xmm2, xmm2, xmm2, Assembler::AVX_128bit); + // SPECIAL_FMOD: + // + // // y==0 or x==Inf? + // if ((b == 0.0) || (!(a <= DP_CONST(7fefffffffffffff)))) + __ ucomisd(xmm4, xmm2); + __ jcc(Assembler::notEqual, L_5256); + __ jcc(Assembler::noParity, L_5300); + __ bind(L_5256); + __ movsd(xmm2, ExternalAddress((address)CONST_MAX), rax); + __ ucomisd(xmm2, xmm6); + __ jcc(Assembler::below, L_5300); + __ movsd(xmm0, ExternalAddress((address)CONST_INF), rax); + // return DP_FNMA(b, q, a); // NaN + // // y is NaN? + // if (!(b <= DP_CONST(7ff0000000000000))) return y + y; + __ ucomisd(xmm0, xmm4); + __ jcc(Assembler::aboveEqual, L_5320); + __ vaddsd(xmm0, xmm1, xmm1); + __ jmp(L_exit); + // if (!eq) return x + sgn_a; + __ align32(); + __ bind(L_5280); + __ vaddsd(xmm0, xmm3, xmm2); + __ jmp(L_exit); + // a = DP_FNMA_RZ(b, q, a); + __ align(8); + __ bind(L_52a0); + __ evfnmadd213sd(xmm0, xmm4, xmm6, Assembler::EVEX_RZ); + // while (b <= a) + __ bind(L_52a6); + __ ucomisd(xmm0, xmm4); + __ jcc(Assembler::aboveEqual, L_52c0); + // a = DP_XOR(a, sgn_a); + __ vpxor(xmm0, xmm3, xmm0, Assembler::AVX_128bit); + __ jmp(L_exit); + __ bind(L_52c0); + __ movq(xmm6, xmm0); + // q = DP_ROUND_RZ(q); + __ vpxor(xmm1, xmm1, xmm1, Assembler::AVX_128bit); + __ align32(); + __ bind(L_52d0); + // q = DP_DIV_RZ(a, b); + __ evdivsd(xmm2, xmm6, xmm5, Assembler::EVEX_RZ); + // q = DP_ROUND_RZ(q); + __ movq(xmm2, xmm2); + __ vroundsd(xmm2, xmm1, xmm2, 0xb); + // a = DP_FNMA_RZ(b, q, a); + __ evfnmadd213sd(xmm2, xmm4, xmm0, Assembler::EVEX_RZ); + // while (b <= a) + __ ucomisd(xmm2, xmm4); + __ movq(xmm6, xmm2); + __ movapd(xmm0, xmm2); + __ jcc(Assembler::aboveEqual, L_52d0); + // a = DP_XOR(a, sgn_a); + __ vpxor(xmm0, xmm3, xmm2, Assembler::AVX_128bit); + __ jmp(L_exit); + // return DP_FNMA(b, q, a); // NaN + __ bind(L_5300); + __ vfnmadd213sd(xmm0, xmm4, xmm6); + __ jmp(L_exit); + // bs = b * DP_CONST(7fe0000000000000); + __ bind(L_5320); + __ vmulsd(xmm1, xmm4, ExternalAddress((address)CONST_e307), rax); + // q = DP_DIV_RZ(a, bs); + __ movq(xmm2, xmm1); + __ evdivsd(xmm0, xmm6, xmm2, Assembler::EVEX_RZ); + // q = DP_ROUND_RZ(q); + __ movq(xmm0, xmm0); + __ vroundsd(xmm7, xmm7, xmm0, 0xb); + // eq = TRANSFER_HIGH_INT32(q); + __ extractps(rax, xmm7, 1); + // if (eq >= 0x7fefffffu) + __ cmpl(rax, 0x7fefffff); + __ jcc(Assembler::below, L_5360); + // // b* 2*1023 * 2^1023 + // bs2 = bs * DP_CONST(7fe0000000000000); + __ vmulsd(xmm0, xmm1, ExternalAddress((address)CONST_e307), rax); + // while (bs2 <= a) + __ ucomisd(xmm6, xmm0); + __ jcc(Assembler::aboveEqual, L_5380); + __ movapd(xmm7, xmm6); + __ jmp(L_53b0); + // a = DP_FNMA_RZ(b, q, a); + __ bind(L_5360); + __ evfnmadd213sd(xmm7, xmm1, xmm6, Assembler::EVEX_RZ); + __ jmp(L_53b0); + // q = DP_ROUND_RZ(q); + __ bind(L_5380); + __ vxorpd(xmm8, xmm8, xmm8, Assembler::AVX_128bit); + // q = DP_DIV_RZ(qa, bs2); + __ align32(); + __ bind(L_5390); + __ evdivsd(xmm7, xmm6, xmm0, Assembler::EVEX_RZ); + // q = DP_ROUND_RZ(q); + __ movq(xmm7, xmm7); + __ vroundsd(xmm7, xmm8, xmm7, 0xb); + // a = DP_FNMA_RZ(bs2, q, a); + __ evfnmadd213sd(xmm7, xmm0, xmm6, Assembler::EVEX_RZ); + // while (bs2 <= a) + __ ucomisd(xmm7, xmm0); + __ movapd(xmm6, xmm7); + __ jcc(Assembler::aboveEqual, L_5390); + // while (bs <= a) + __ bind(L_53b0); + __ ucomisd(xmm7, xmm1); + __ jcc(Assembler::aboveEqual, L_53c0); + __ movapd(xmm0, xmm7); + __ jmp(L_52a6); + // q = DP_ROUND_RZ(q); + __ bind(L_53c0); + __ vxorpd(xmm6, xmm6, xmm6, Assembler::AVX_128bit); + // q = DP_DIV_RZ(a, bs); + __ align32(); + __ bind(L_53d0); + __ evdivsd(xmm0, xmm7, xmm2, Assembler::EVEX_RZ); + // q = DP_ROUND_RZ(q); + __ movq(xmm0, xmm0); + __ vroundsd(xmm0, xmm6, xmm0, 0xb); + // a = DP_FNMA_RZ(bs, q, a); + __ evfnmadd213sd(xmm0, xmm1, xmm7, Assembler::EVEX_RZ); + // while (bs <= a) + __ ucomisd(xmm0, xmm1); + __ movapd(xmm7, xmm0); + __ jcc(Assembler::aboveEqual, L_53d0); + __ jmp(L_52a6); + + __ bind(L_exit); + +//////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////// +// AVX2 code +//////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////// + } else if (VM_Version::supports_fma()) { // AVX2 version + + Label L_104a, L_11bd, L_10c1, L_1090, L_11b9, L_10e7, L_11af, L_111c, L_10f3, L_116e, L_112a; + Label L_1173, L_1157, L_117f, L_11a0; + + // double fmod(double x, double y) + // { + // double a, b, sgn_a, q, bs, bs2, corr, res; + // unsigned eq; + // unsigned mxcsr, mxcsr_rz; + + // __asm { stmxcsr DWORD PTR[mxcsr] } + // mxcsr_rz = 0x7f80 | mxcsr; + __ push(rax); + __ stmxcsr(Address(rsp, 0)); + __ movl(rax, Address(rsp, 0)); + __ movl(rcx, rax); + __ orl(rcx, 0x7f80); + __ movl(Address(rsp, 0x04), rcx); + + // // |x|, |y| + // a = DP_AND(x, DP_CONST(7fffffffffffffff)); + __ movq(xmm2, xmm0); + __ vmovdqu(xmm3, ExternalAddress((address)CONST_NaN), rcx); + __ vpand(xmm4, xmm2, xmm3, Assembler::AVX_128bit); + // b = DP_AND(y, DP_CONST(7fffffffffffffff)); + __ vpand(xmm3, xmm1, xmm3, Assembler::AVX_128bit); + // // sign(x) + // sgn_a = DP_XOR(x, a); + __ mov64(rcx, 0x8000000000000000ULL); + __ movq(xmm5, rcx); + __ vpand(xmm2, xmm2, xmm5, Assembler::AVX_128bit); + + // if (a < b) return x + sgn_a; + __ ucomisd(xmm3, xmm4); + __ jcc(Assembler::belowEqual, L_104a); + __ vaddsd(xmm0, xmm2, xmm0); + __ jmp(L_11bd); + + // if (((mxcsr & 0x6000)!=0x2000) && (a < b * 0x1p+260)) + __ bind(L_104a); + __ andl(rax, 0x6000); + __ cmpl(rax, 0x2000); + __ jcc(Assembler::equal, L_10c1); + __ vmulsd(xmm0, xmm3, ExternalAddress((address)CONST_1p260), rax); + __ ucomisd(xmm0, xmm4); + __ jcc(Assembler::belowEqual, L_10c1); + // { + // q = DP_DIV(a, b); + __ vdivpd(xmm0, xmm4, xmm3, Assembler::AVX_128bit); + // corr = DP_SHR(DP_FNMA(b, q, a), 63); + __ movapd(xmm1, xmm0); + __ vfnmadd213sd(xmm1, xmm3, xmm4); + __ movq(xmm5, xmm1); + __ vpxor(xmm1, xmm1, xmm1, Assembler::AVX_128bit); + __ vpcmpgtq(xmm5, xmm1, xmm5, Assembler::AVX_128bit); + // q = DP_PSUBQ(q, corr); + __ vpaddq(xmm0, xmm5, xmm0, Assembler::AVX_128bit); + // q = DP_TRUNC(q); + __ vroundsd(xmm0, xmm0, xmm0, 3); + // a = DP_FNMA(b, q, a); + __ vfnmadd213sd(xmm0, xmm3, xmm4); + __ align32(); + // while (b <= a) + __ bind(L_1090); + __ ucomisd(xmm0, xmm3); + __ jcc(Assembler::below, L_11b9); + // { + // q = DP_DIV(a, b); + __ vdivsd(xmm4, xmm0, xmm3); + // corr = DP_SHR(DP_FNMA(b, q, a), 63); + __ movapd(xmm5, xmm4); + __ vfnmadd213sd(xmm5, xmm3, xmm0); + __ movq(xmm5, xmm5); + __ vpcmpgtq(xmm5, xmm1, xmm5, Assembler::AVX_128bit); + // q = DP_PSUBQ(q, corr); + __ vpaddq(xmm4, xmm5, xmm4, Assembler::AVX_128bit); + // q = DP_TRUNC(q); + __ vroundsd(xmm4, xmm4, xmm4, 3); + // a = DP_FNMA(b, q, a); + __ vfnmadd231sd(xmm0, xmm3, xmm4); + __ jmp(L_1090); + // } + // return DP_XOR(a, sgn_a); + // } + + // __asm { ldmxcsr DWORD PTR [mxcsr_rz] } + __ bind(L_10c1); + __ ldmxcsr(Address(rsp, 0x04)); + + // q = DP_DIV(a, b); + __ vdivpd(xmm0, xmm4, xmm3, Assembler::AVX_128bit); + // q = DP_TRUNC(q); + __ vroundsd(xmm0, xmm0, xmm0, 3); + + // eq = TRANSFER_HIGH_INT32(q); + __ extractps(rax, xmm0, 1); + + // if (__builtin_expect((eq >= 0x7fefffffu), (0==1))) goto SPECIAL_FMOD; + __ cmpl(rax, 0x7feffffe); + __ jcc(Assembler::above, L_10e7); + + // a = DP_FNMA(b, q, a); + __ vfnmadd213sd(xmm0, xmm3, xmm4); + __ jmp(L_11af); + + // SPECIAL_FMOD: + + // // y==0 or x==Inf? + // if ((b == 0.0) || (!(a <= DP_CONST(7fefffffffffffff)))) + __ bind(L_10e7); + __ vpxor(xmm5, xmm5, xmm5, Assembler::AVX_128bit); + __ ucomisd(xmm3, xmm5); + __ jcc(Assembler::notEqual, L_10f3); + __ jcc(Assembler::noParity, L_111c); + + __ bind(L_10f3); + __ movsd(xmm5, ExternalAddress((address)CONST_MAX), rax); + __ ucomisd(xmm5, xmm4); + __ jcc(Assembler::below, L_111c); + // return res; + // } + // // y is NaN? + // if (!(b <= DP_CONST(7ff0000000000000))) { + __ movsd(xmm0, ExternalAddress((address)CONST_INF), rax); + __ ucomisd(xmm0, xmm3); + __ jcc(Assembler::aboveEqual, L_112a); + // res = y + y; + __ vaddsd(xmm0, xmm1, xmm1); + // __asm { ldmxcsr DWORD PTR[mxcsr] } + __ ldmxcsr(Address(rsp, 0)); + __ jmp(L_11bd); + // { + // res = DP_FNMA(b, q, a); // NaN + __ bind(L_111c); + __ vfnmadd213sd(xmm0, xmm3, xmm4); + // __asm { ldmxcsr DWORD PTR[mxcsr] } + __ ldmxcsr(Address(rsp, 0)); + __ jmp(L_11bd); + // return res; + // } + + // // b* 2*1023 + // bs = b * DP_CONST(7fe0000000000000); + __ bind(L_112a); + __ vmulsd(xmm1, xmm3, ExternalAddress((address)CONST_e307), rax); + + // q = DP_DIV(a, bs); + __ vdivsd(xmm0, xmm4, xmm1); + // q = DP_TRUNC(q); + __ vroundsd(xmm0, xmm0, xmm0, 3); + + // eq = TRANSFER_HIGH_INT32(q); + __ extractps(rax, xmm0, 1); + + // if (eq >= 0x7fefffffu) + __ cmpl(rax, 0x7fefffff); + __ jcc(Assembler::below, L_116e); + // { + // // b* 2*1023 * 2^1023 + // bs2 = bs * DP_CONST(7fe0000000000000); + __ vmulsd(xmm0, xmm1, ExternalAddress((address)CONST_e307), rax); + // while (bs2 <= a) + __ ucomisd(xmm4, xmm0); + __ jcc(Assembler::below, L_1173); + // { + // q = DP_DIV(a, bs2); + __ bind(L_1157); + __ vdivsd(xmm5, xmm4, xmm0); + // q = DP_TRUNC(q); + __ vroundsd(xmm5, xmm5, xmm5, 3); + // a = DP_FNMA(bs2, q, a); + __ vfnmadd231sd(xmm4, xmm0, xmm5); + // while (bs2 <= a) + __ ucomisd(xmm4, xmm0); + __ jcc(Assembler::aboveEqual, L_1157); + __ jmp(L_1173); + // } + // } + // else + // a = DP_FNMA(bs, q, a); + __ bind(L_116e); + __ vfnmadd231sd(xmm4, xmm1, xmm0); + + // while (bs <= a) + __ bind(L_1173); + __ ucomisd(xmm4, xmm1); + __ jcc(Assembler::aboveEqual, L_117f); + __ movapd(xmm0, xmm4); + __ jmp(L_11af); + // { + // q = DP_DIV(a, bs); + __ bind(L_117f); + __ vdivsd(xmm0, xmm4, xmm1); + // q = DP_TRUNC(q); + __ vroundsd(xmm0, xmm0, xmm0, 3); + // a = DP_FNMA(bs, q, a); + __ vfnmadd213sd(xmm0, xmm1, xmm4); + + // while (bs <= a) + __ ucomisd(xmm0, xmm1); + __ movapd(xmm4, xmm0); + __ jcc(Assembler::aboveEqual, L_117f); + __ jmp(L_11af); + __ align32(); + // { + // q = DP_DIV(a, b); + __ bind(L_11a0); + __ vdivsd(xmm1, xmm0, xmm3); + // q = DP_TRUNC(q); + __ vroundsd(xmm1, xmm1, xmm1, 3); + // a = DP_FNMA(b, q, a); + __ vfnmadd231sd(xmm0, xmm3, xmm1); + + // FMOD_CONT: + // while (b <= a) + __ bind(L_11af); + __ ucomisd(xmm0, xmm3); + __ jcc(Assembler::aboveEqual, L_11a0); + // } + + // __asm { ldmxcsr DWORD PTR[mxcsr] } + __ ldmxcsr(Address(rsp, 0)); + __ bind(L_11b9); + __ vpxor(xmm0, xmm2, xmm0, Assembler::AVX_128bit); + // } + + // goto FMOD_CONT; + + // } + __ bind(L_11bd); + __ pop(rax); + + } else { // SSE version + assert(false, "SSE not implemented"); + } + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; +} + +#undef __ diff --git a/src/hotspot/share/runtime/stubRoutines.cpp b/src/hotspot/share/runtime/stubRoutines.cpp index 7a6974088ba43..73bc3a003fbbf 100644 --- a/src/hotspot/share/runtime/stubRoutines.cpp +++ b/src/hotspot/share/runtime/stubRoutines.cpp @@ -161,6 +161,7 @@ address StubRoutines::_vectorizedMismatch = nullptr; address StubRoutines::_dexp = nullptr; address StubRoutines::_dlog = nullptr; address StubRoutines::_dlog10 = nullptr; +address StubRoutines::_fmod = nullptr; address StubRoutines::_dpow = nullptr; address StubRoutines::_dsin = nullptr; address StubRoutines::_dcos = nullptr; diff --git a/src/hotspot/share/runtime/stubRoutines.hpp b/src/hotspot/share/runtime/stubRoutines.hpp index 5ce9176f08a2f..ae37b9409e90b 100644 --- a/src/hotspot/share/runtime/stubRoutines.hpp +++ b/src/hotspot/share/runtime/stubRoutines.hpp @@ -249,6 +249,7 @@ class StubRoutines: AllStatic { static address _dlibm_reduce_pi04l; static address _dlibm_tan_cot_huge; static address _dtan; + static address _fmod; static address _f2hf; static address _hf2f; @@ -425,6 +426,7 @@ class StubRoutines: AllStatic { static address dlog() { return _dlog; } static address dlog10() { return _dlog10; } static address dpow() { return _dpow; } + static address fmod() { return _fmod; } static address dsin() { return _dsin; } static address dcos() { return _dcos; } static address dlibm_reduce_pi04l() { return _dlibm_reduce_pi04l; } diff --git a/src/hotspot/share/runtime/vmStructs.cpp b/src/hotspot/share/runtime/vmStructs.cpp index 37241534b2b7e..cd3ccb79e3133 100644 --- a/src/hotspot/share/runtime/vmStructs.cpp +++ b/src/hotspot/share/runtime/vmStructs.cpp @@ -556,6 +556,7 @@ static_field(StubRoutines, _dlog, address) \ static_field(StubRoutines, _dlog10, address) \ static_field(StubRoutines, _dpow, address) \ + static_field(StubRoutines, _fmod, address) \ static_field(StubRoutines, _dsin, address) \ static_field(StubRoutines, _dcos, address) \ static_field(StubRoutines, _dtan, address) \ diff --git a/test/hotspot/jtreg/compiler/floatingpoint/DmodTest.java b/test/hotspot/jtreg/compiler/floatingpoint/DmodTest.java new file mode 100644 index 0000000000000..4f386a42de439 --- /dev/null +++ b/test/hotspot/jtreg/compiler/floatingpoint/DmodTest.java @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2023, Intel Corporation. All rights reserved. + * Intel Math Library (LIBM) Source Code + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8308966 + * @summary Add intrinsic for float/double modulo for x86 AVX2 and AVX512 + * @run main compiler.floatingpoint.DmodTest + */ + + package compiler.floatingpoint; + + import java.lang.Double; + + public class DmodTest { + static double [] op1 = { 1.2345d, 0.0d, -0.0d, 1.0d/0.0d, -1.0d/0.0d, 0.0d/0.0d }; + static double [] op2 = { 1.2345d, 0.0d, -0.0d, 1.0d/0.0d, -1.0d/0.0d, 0.0d/0.0d }; + static double [][] res = { + { + 0.0d, + Double.NaN, + Double.NaN, + 1.2345d, + 1.2345d, + Double.NaN, + }, + { + 0.0d, + Double.NaN, + Double.NaN, + 0.0d, + 0.0d, + Double.NaN, + }, + { + -0.0d, + Double.NaN, + Double.NaN, + -0.0d, + -0.0d, + Double.NaN, + }, + { + Double.NaN, + Double.NaN, + Double.NaN, + Double.NaN, + Double.NaN, + Double.NaN, + }, + { + Double.NaN, + Double.NaN, + Double.NaN, + Double.NaN, + Double.NaN, + Double.NaN, + }, + { + Double.NaN, + Double.NaN, + Double.NaN, + Double.NaN, + Double.NaN, + Double.NaN, + }, + }; + public static void main(String[] args) throws Exception { + double f1, f2, f3; + boolean failure = false; + boolean print_failure = false; + for (int i = 0; i < 100_000; i++) { + for (int j = 0; j < op1.length; j++) { + for (int k = 0; k < op2.length; k++) { + f1 = op1[j]; + f2 = op2[k]; + f3 = f1 % f2; + + if (Double.isNaN(res[j][k])) { + if (!Double.isNaN(f3)) { + failure = true; + print_failure = true; + } + } else if (Double.isNaN(f3)) { + failure = true; + print_failure = true; + } else if (f3 != res[j][k]) { + failure = true; + print_failure = true; + } + + if (print_failure) { + System.out.println( "Actual " + f1 + " % " + f2 + " = " + f3); + System.out.println( "Expected " + f1 + " % " + f2 + " = " + res[j][k]); + print_failure = false; + } + } + } + } + + if (failure) { + throw new RuntimeException("Test Failed"); + } else { + System.out.println("Test passed."); + } + } +} + diff --git a/test/hotspot/jtreg/compiler/floatingpoint/FmodTest.java b/test/hotspot/jtreg/compiler/floatingpoint/FmodTest.java new file mode 100644 index 0000000000000..859fd34650a51 --- /dev/null +++ b/test/hotspot/jtreg/compiler/floatingpoint/FmodTest.java @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2023, Intel Corporation. All rights reserved. + * Intel Math Library (LIBM) Source Code + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8308966 + * @summary Add intrinsic for float/double modulo for x86 AVX2 and AVX512 + * @run main compiler.floatingpoint.FmodTest + */ + + package compiler.floatingpoint; + + import java.lang.Float; + + public class FmodTest { + static float [] op1 = { 1.2345f, 0.0f, -0.0f, 1.0f/0.0f, -1.0f/0.0f, 0.0f/0.0f }; + static float [] op2 = { 1.2345f, 0.0f, -0.0f, 1.0f/0.0f, -1.0f/0.0f, 0.0f/0.0f }; + static float [][] res = { + { + 0.0f, + Float.NaN, + Float.NaN, + 1.2345f, + 1.2345f, + Float.NaN, + }, + { + 0.0f, + Float.NaN, + Float.NaN, + 0.0f, + 0.0f, + Float.NaN, + }, + { + -0.0f, + Float.NaN, + Float.NaN, + -0.0f, + -0.0f, + Float.NaN, + }, + { + Float.NaN, + Float.NaN, + Float.NaN, + Float.NaN, + Float.NaN, + Float.NaN, + }, + { + Float.NaN, + Float.NaN, + Float.NaN, + Float.NaN, + Float.NaN, + Float.NaN, + }, + { + Float.NaN, + Float.NaN, + Float.NaN, + Float.NaN, + Float.NaN, + Float.NaN, + }, + }; + public static void main(String[] args) throws Exception { + float f1, f2, f3; + boolean failure = false; + boolean print_failure = false; + for (int i = 0; i < 100_000; i++) { + for (int j = 0; j < op1.length; j++) { + for (int k = 0; k < op2.length; k++) { + f1 = op1[j]; + f2 = op2[k]; + f3 = f1 % f2; + + if (Float.isNaN(res[j][k])) { + if (!Float.isNaN(f3)) { + failure = true; + print_failure = true; + } + } else if (Float.isNaN(f3)) { + failure = true; + print_failure = true; + } else if (f3 != res[j][k]) { + failure = true; + print_failure = true; + } + + if (print_failure) { + System.out.println( "Actual " + f1 + " % " + f2 + " = " + f3); + System.out.println( "Expected " + f1 + " % " + f2 + " = " + res[j][k]); + print_failure = false; + } + } + } + } + + if (failure) { + throw new RuntimeException("Test Failed"); + } else { + System.out.println("Test passed."); + } + } + }