Fix issue with atan on x86

Fixed issue with the new atan function where passing the argument(-0.0) was failing to return -0.0. Fixed both the scalar and vector versions. Also changed which scalar function we call for double precision atan when using the relaxed version. It turns out that the previous version of scalar double precision atan function was slightly faster than our new one, therefore decided to make a way that it could still be called.
flang-compiler · Aug 29, 2019 · 39bd879 · 39bd879
1 parent c22671b
commit 39bd879
Show file tree

Hide file tree

Showing 5 changed files with 13 additions and 5 deletions.
diff --git a/runtime/libpgmath/lib/common/atan/atan_d_vec.h b/runtime/libpgmath/lib/common/atan/atan_d_vec.h
@@ -105,6 +105,7 @@ vdouble __attribute__((noinline)) atan_d_vec(vdouble const x) {
 
     vdouble result = vsel_vd_vo_vd_vd(f_big, result_f_big, result_not_f_big);
 
+    result = vreinterpret_vd_vm(vreinterpret_vm_vd(result) | vreinterpret_vm_vd(ans_sgn));
+
     return result;
 }
-
diff --git a/runtime/libpgmath/lib/common/atan/fd_atan_scalar.cpp b/runtime/libpgmath/lib/common/atan/fd_atan_scalar.cpp
@@ -92,6 +92,7 @@ double __attribute__((noinline)) atan_d_scalar(double x) {
 
     double result_d = FMA(x2 * xReduced, poly, xReduced);
 
+    result_d = copysign(result_d, x);
+
     return result_d;
 }
-
diff --git a/runtime/libpgmath/lib/common/atanf/atan_vec.h b/runtime/libpgmath/lib/common/atanf/atan_vec.h
@@ -72,5 +72,8 @@ vfloat __attribute__((noinline)) atan_vec(vfloat const x) {
 
     vfloat result = vsel_vf_vo_vf_vf(x_big, result_x_big, result_not_x_big);
 
+    //Make sure atanf(-0.0f) = -0.0f:
+    result = vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(result), vreinterpret_vm_vf(ans_sgn)));
+
     return result;
 }
diff --git a/runtime/libpgmath/lib/common/atanf/fs_atan_scalar.cpp b/runtime/libpgmath/lib/common/atanf/fs_atan_scalar.cpp
@@ -76,5 +76,8 @@ float __attribute__((noinline)) atan_scalar(const float x) {
 
     float result_d = FMAF(x2 * xReduced, poly, xReduced);
 
+    //This fixes atanf(-0.0) = -0.0, but doesn't slow down the code seemingly
+    result_d = copysignf(result_d, x);
+
     return result_d;
 }
diff --git a/runtime/libpgmath/lib/x86_64/math_tables/mth_atandefs.h b/runtime/libpgmath/lib/x86_64/math_tables/mth_atandefs.h
@@ -52,7 +52,7 @@ MTHINTRIN(atan , sv8m , avxfma4    , __fs_atan_8_mn        , __rs_atan_8_mn
 MTHINTRIN(atan , dv4m , avxfma4    , __fd_atan_4_mn        , __rd_atan_4_mn        , __pd_atan_4_mn        ,__math_dispatch_error)
 
 MTHINTRIN(atan , ss   , avx2       , __fs_atan_1_avx2      , __fs_atan_1_avx2      , __mth_i_atan_avx2     ,__math_dispatch_error)
-MTHINTRIN(atan , ds   , avx2       , __fd_atan_1_avx2      , __fd_atan_1_avx2      , __mth_i_datan_avx2    ,__math_dispatch_error)
+MTHINTRIN(atan , ds   , avx2       , __fd_atan_1_avx2      , __mth_i_datan_avx2    , __mth_i_datan_avx2    ,__math_dispatch_error)
 MTHINTRIN(atan , sv4  , avx2       , __fs_atan_4_avx2      , __fs_atan_4_avx2      , __gs_atan_4_p         ,__math_dispatch_error)
 MTHINTRIN(atan , dv2  , avx2       , __fd_atan_2_avx2      , __fd_atan_2_avx2      , __gd_atan_2_p         ,__math_dispatch_error)
 MTHINTRIN(atan , sv8  , avx2       , __fs_atan_8_avx2      , __fs_atan_8_avx2      , __gs_atan_8_p         ,__math_dispatch_error)
@@ -63,7 +63,7 @@ MTHINTRIN(atan , sv8m , avx2       , __fs_atan_8_mn        , __rs_atan_8_mn
 MTHINTRIN(atan , dv4m , avx2       , __fd_atan_4_mn        , __rd_atan_4_mn        , __pd_atan_4_mn        ,__math_dispatch_error)
 
 MTHINTRIN(atan , ss   , avx512knl  , __fs_atan_1_avx2      , __fs_atan_1_avx2      , __mth_i_atan_avx2     ,__math_dispatch_error)
-MTHINTRIN(atan , ds   , avx512knl  , __fd_atan_1_avx2      , __fd_atan_1_avx2      , __mth_i_datan_avx2    ,__math_dispatch_error)
+MTHINTRIN(atan , ds   , avx512knl  , __fd_atan_1_avx2      , __mth_i_datan_avx2    , __mth_i_datan_avx2    ,__math_dispatch_error)
 MTHINTRIN(atan , sv4  , avx512knl  , __fs_atan_4_avx2      , __fs_atan_4_avx2      , __gs_atan_4_p         ,__math_dispatch_error)
 MTHINTRIN(atan , dv2  , avx512knl  , __fd_atan_2_avx2      , __fd_atan_2_avx2      , __gd_atan_2_p         ,__math_dispatch_error)
 MTHINTRIN(atan , sv8  , avx512knl  , __fs_atan_8_avx2      , __fs_atan_8_avx2      , __gs_atan_8_p         ,__math_dispatch_error)
@@ -78,7 +78,7 @@ MTHINTRIN(atan , sv16m, avx512knl  , __fs_atan_16_mn       , __rs_atan_16_mn
 MTHINTRIN(atan , dv8m , avx512knl  , __fd_atan_8_mn        , __rd_atan_8_mn        , __pd_atan_8_mn        ,__math_dispatch_error)
 
 MTHINTRIN(atan , ss   , avx512     , __fs_atan_1_avx2      , __fs_atan_1_avx2      , __mth_i_atan_avx2     ,__math_dispatch_error)
-MTHINTRIN(atan , ds   , avx512     , __fd_atan_1_avx2      , __fd_atan_1_avx2      , __mth_i_datan_avx2    ,__math_dispatch_error)
+MTHINTRIN(atan , ds   , avx512     , __fd_atan_1_avx2      , __mth_i_datan_avx2    , __mth_i_datan_avx2    ,__math_dispatch_error)
 MTHINTRIN(atan , sv4  , avx512     , __fs_atan_4_avx2      , __fs_atan_4_avx2      , __gs_atan_4_p         ,__math_dispatch_error)
 MTHINTRIN(atan , dv2  , avx512     , __fd_atan_2_avx2      , __fd_atan_2_avx2      , __gd_atan_2_p         ,__math_dispatch_error)
 MTHINTRIN(atan , sv8  , avx512     , __fs_atan_8_avx2      , __fs_atan_8_avx2      , __gs_atan_8_p         ,__math_dispatch_error)