Skip to content
Permalink
Browse files

Fix issue with atan on x86

Fixed issue with the new atan function where
passing the argument(-0.0) was failing to
return -0.0. Fixed both the scalar and vector
versions.

Also changed which scalar function we call for
double precision atan when using the relaxed
version. It turns out that the previous version
of scalar double precision atan function was
slightly faster than our new one, therefore
decided to make a way that it could still
be called.
  • Loading branch information...
gklimowicz committed Aug 29, 2019
1 parent c22671b commit 39bd879da96c4f40e6df4c3265b97bf78461520a
@@ -105,6 +105,7 @@ vdouble __attribute__((noinline)) atan_d_vec(vdouble const x) {

vdouble result = vsel_vd_vo_vd_vd(f_big, result_f_big, result_not_f_big);

result = vreinterpret_vd_vm(vreinterpret_vm_vd(result) | vreinterpret_vm_vd(ans_sgn));

return result;
}

@@ -92,6 +92,7 @@ double __attribute__((noinline)) atan_d_scalar(double x) {

double result_d = FMA(x2 * xReduced, poly, xReduced);

result_d = copysign(result_d, x);

return result_d;
}

@@ -72,5 +72,8 @@ vfloat __attribute__((noinline)) atan_vec(vfloat const x) {

vfloat result = vsel_vf_vo_vf_vf(x_big, result_x_big, result_not_x_big);

//Make sure atanf(-0.0f) = -0.0f:
result = vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(result), vreinterpret_vm_vf(ans_sgn)));

return result;
}
@@ -76,5 +76,8 @@ float __attribute__((noinline)) atan_scalar(const float x) {

float result_d = FMAF(x2 * xReduced, poly, xReduced);

//This fixes atanf(-0.0) = -0.0, but doesn't slow down the code seemingly
result_d = copysignf(result_d, x);

return result_d;
}
@@ -52,7 +52,7 @@ MTHINTRIN(atan , sv8m , avxfma4 , __fs_atan_8_mn , __rs_atan_8_mn
MTHINTRIN(atan , dv4m , avxfma4 , __fd_atan_4_mn , __rd_atan_4_mn , __pd_atan_4_mn ,__math_dispatch_error)

MTHINTRIN(atan , ss , avx2 , __fs_atan_1_avx2 , __fs_atan_1_avx2 , __mth_i_atan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , ds , avx2 , __fd_atan_1_avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , ds , avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , sv4 , avx2 , __fs_atan_4_avx2 , __fs_atan_4_avx2 , __gs_atan_4_p ,__math_dispatch_error)
MTHINTRIN(atan , dv2 , avx2 , __fd_atan_2_avx2 , __fd_atan_2_avx2 , __gd_atan_2_p ,__math_dispatch_error)
MTHINTRIN(atan , sv8 , avx2 , __fs_atan_8_avx2 , __fs_atan_8_avx2 , __gs_atan_8_p ,__math_dispatch_error)
@@ -63,7 +63,7 @@ MTHINTRIN(atan , sv8m , avx2 , __fs_atan_8_mn , __rs_atan_8_mn
MTHINTRIN(atan , dv4m , avx2 , __fd_atan_4_mn , __rd_atan_4_mn , __pd_atan_4_mn ,__math_dispatch_error)

MTHINTRIN(atan , ss , avx512knl , __fs_atan_1_avx2 , __fs_atan_1_avx2 , __mth_i_atan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , ds , avx512knl , __fd_atan_1_avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , ds , avx512knl , __fd_atan_1_avx2 , __mth_i_datan_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , sv4 , avx512knl , __fs_atan_4_avx2 , __fs_atan_4_avx2 , __gs_atan_4_p ,__math_dispatch_error)
MTHINTRIN(atan , dv2 , avx512knl , __fd_atan_2_avx2 , __fd_atan_2_avx2 , __gd_atan_2_p ,__math_dispatch_error)
MTHINTRIN(atan , sv8 , avx512knl , __fs_atan_8_avx2 , __fs_atan_8_avx2 , __gs_atan_8_p ,__math_dispatch_error)
@@ -78,7 +78,7 @@ MTHINTRIN(atan , sv16m, avx512knl , __fs_atan_16_mn , __rs_atan_16_mn
MTHINTRIN(atan , dv8m , avx512knl , __fd_atan_8_mn , __rd_atan_8_mn , __pd_atan_8_mn ,__math_dispatch_error)

MTHINTRIN(atan , ss , avx512 , __fs_atan_1_avx2 , __fs_atan_1_avx2 , __mth_i_atan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , ds , avx512 , __fd_atan_1_avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , ds , avx512 , __fd_atan_1_avx2 , __mth_i_datan_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , sv4 , avx512 , __fs_atan_4_avx2 , __fs_atan_4_avx2 , __gs_atan_4_p ,__math_dispatch_error)
MTHINTRIN(atan , dv2 , avx512 , __fd_atan_2_avx2 , __fd_atan_2_avx2 , __gd_atan_2_p ,__math_dispatch_error)
MTHINTRIN(atan , sv8 , avx512 , __fs_atan_8_avx2 , __fs_atan_8_avx2 , __gs_atan_8_p ,__math_dispatch_error)

0 comments on commit 39bd879

Please sign in to comment.
You can’t perform that action at this time.